maayanorner
/

hebrew-summarization-llm

Model card Files Files and versions Community

maayanorner commited on Nov 14, 2024

Commit

c9de96d

·

verified ·

1 Parent(s): 91f4685

Update README.md

Files changed (1) hide show

README.md +3 -18

README.md CHANGED Viewed

@@ -7,6 +7,8 @@ Known Issues:
 - While the results look pretty good, the model was not evaluated.
 - Short inputs (i.e., "articles" of one line) will yield a contextless "summary".
 # Data:
 https://github.com/IAHLT/summarization_he
@@ -31,30 +33,13 @@ def summarize(text, tokenizer, model, num_beams=4, temperature=1, max_new_tokens
     generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)
     return generated_text
-# optional
-use_4bit = True
-bnb_4bit_compute_dtype = "float16"
-bnb_4bit_quant_type = "nf4"
-use_nested_quant = False
-compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
-# optional
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=use_4bit,
-    bnb_4bit_quant_type=bnb_4bit_quant_type,
-    bnb_4bit_compute_dtype=compute_dtype,
-    bnb_4bit_use_double_quant=use_nested_quant,
-)
-model_path = 'maayanorner/hebrew-summarization-llm'
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     trust_remote_code=True,
-    quantization_config=bnb_config # optional
 )
 model.to('cuda')
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

 - While the results look pretty good, the model was not evaluated.
 - Short inputs (i.e., "articles" of one line) will yield a contextless "summary".
+4-bit quantized version:
+https://huggingface.co/maayanorner/hebrew-summarization-llm-4bit
 # Data:
 https://github.com/IAHLT/summarization_he
     generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)
     return generated_text
+model_path = 'maayanorner/hebrew-summarization-llm' # or https://huggingface.co/maayanorner/hebrew-summarization-llm-4bit
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     trust_remote_code=True,
 )
 model.to('cuda')
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)