maayanorner commited on
Commit
4d5f82a
verified
1 Parent(s): 17055ea

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -0
README.md CHANGED
@@ -38,6 +38,38 @@ def summarize(text, tokenizer, model, num_beams=4, temperature=1, max_new_tokens
38
  return generated_text
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  model_path = 'maayanorner/hebrew-summarization-llm' # or maayanorner/hebrew-summarization-llm-4bit
42
 
43
  model = AutoModelForCausalLM.from_pretrained(
 
38
  return generated_text
39
 
40
 
41
+ def summarize_batch(texts, tokenizer, model, num_beams=4, temperature=1, max_new_tokens=512):
42
+ for text in texts:
43
+ if len(text) < 20:
44
+ raise ValueError('Each text must be at least 20 characters long.')
45
+
46
+ if tokenizer.pad_token is None:
47
+ tokenizer.pad_token = tokenizer.eos_token
48
+
49
+ inputs = tokenizer([f'{text}\n### 住讬讻讜诐:' for text in texts], return_tensors="pt", padding=True)
50
+
51
+ in_data = inputs.input_ids.to('cuda')
52
+ attention_mask = inputs.attention_mask.to('cuda')
53
+
54
+ output_ids = model.generate(
55
+ input_ids=in_data,
56
+ attention_mask=attention_mask,
57
+ num_beams=num_beams,
58
+ max_new_tokens=max_new_tokens,
59
+ do_sample=True,
60
+ early_stopping=True,
61
+ use_cache=True,
62
+ temperature=temperature,
63
+ pad_token_id=tokenizer.eos_token_id,
64
+ eos_token_id=tokenizer.eos_token_id
65
+ )
66
+
67
+ # Decode each generated summary
68
+ generated_texts = [tokenizer.decode(output, skip_special_tokens=False) for output in output_ids]
69
+
70
+ return generated_texts
71
+
72
+
73
  model_path = 'maayanorner/hebrew-summarization-llm' # or maayanorner/hebrew-summarization-llm-4bit
74
 
75
  model = AutoModelForCausalLM.from_pretrained(