Spaces:

Rehman1603
/

YouTubeTextSummarizer

Runtime error

App Files Files Community

Rehman1603 commited on Dec 6, 2023

Commit

3a86bc0

1 Parent(s): 54edba8

Create summary.py

Browse files

Files changed (1) hide show

summary.py +58 -0

summary.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize, sent_tokenize
+import traceback
+import sys
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+nltk.download('stopwords')
+nltk.download('punkt')
+def summary_nlp(text):
+    stopWords = set(stopwords.words("english"))
+    words = word_tokenize(text)
+    freqTable = dict()
+    for word in words:
+        word = word.lower()
+        if word in stopWords:
+            continue
+        if word in freqTable:
+            freqTable[word] += 1
+        else:
+            freqTable[word] = 1
+    sentences = sent_tokenize(text)
+    sentenceValue = dict()
+    for sentence in sentences:
+        for word, freq in freqTable.items():
+            if word in sentence.lower():
+                if sentence in sentenceValue:
+                    sentenceValue[sentence] += freq
+                else:
+                    sentenceValue[sentence] = freq
+    sumValues = 0
+    for sentence in sentenceValue:
+        sumValues += sentenceValue[sentence]
+    average = int(sumValues / len(sentenceValue))
+    summary = ''
+    for sentence in sentences:
+        if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
+            summary += " " + sentence
+    return summary
+def Summary_BART(text):
+    checkpoint = "sshleifer/distilbart-cnn-12-6"
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+    inputs = tokenizer(text,
+                    max_length=1024,
+                    truncation=True,
+                    return_tensors="pt")
+    summary_ids = model.generate(inputs["input_ids"])
+    summary = tokenizer.batch_decode(summary_ids,
+                                  skip_special_tokens=True,
+                                  clean_up_tokenization_spaces=False)
+    return summary[0]