Rehman1603 commited on
Commit
3a86bc0
·
1 Parent(s): 54edba8

Create summary.py

Browse files
Files changed (1) hide show
  1. summary.py +58 -0
summary.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.corpus import stopwords
3
+ from nltk.tokenize import word_tokenize, sent_tokenize
4
+ import traceback
5
+ import sys
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
+
8
+
9
+
10
+ nltk.download('stopwords')
11
+ nltk.download('punkt')
12
+
13
+ def summary_nlp(text):
14
+ stopWords = set(stopwords.words("english"))
15
+ words = word_tokenize(text)
16
+ freqTable = dict()
17
+ for word in words:
18
+ word = word.lower()
19
+ if word in stopWords:
20
+ continue
21
+ if word in freqTable:
22
+ freqTable[word] += 1
23
+ else:
24
+ freqTable[word] = 1
25
+ sentences = sent_tokenize(text)
26
+ sentenceValue = dict()
27
+ for sentence in sentences:
28
+ for word, freq in freqTable.items():
29
+ if word in sentence.lower():
30
+ if sentence in sentenceValue:
31
+ sentenceValue[sentence] += freq
32
+ else:
33
+ sentenceValue[sentence] = freq
34
+ sumValues = 0
35
+ for sentence in sentenceValue:
36
+ sumValues += sentenceValue[sentence]
37
+ average = int(sumValues / len(sentenceValue))
38
+ summary = ''
39
+ for sentence in sentences:
40
+ if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
41
+ summary += " " + sentence
42
+ return summary
43
+
44
+
45
+
46
+ def Summary_BART(text):
47
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
48
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
49
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
50
+ inputs = tokenizer(text,
51
+ max_length=1024,
52
+ truncation=True,
53
+ return_tensors="pt")
54
+ summary_ids = model.generate(inputs["input_ids"])
55
+ summary = tokenizer.batch_decode(summary_ids,
56
+ skip_special_tokens=True,
57
+ clean_up_tokenization_spaces=False)
58
+ return summary[0]