import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize, sent_tokenize import traceback import sys from transformers import AutoTokenizer, AutoModelForSeq2SeqLM nltk.download('stopwords') nltk.download('punkt') def summary_nlp(text): stopWords = set(stopwords.words("english")) words = word_tokenize(text) freqTable = dict() for word in words: word = word.lower() if word in stopWords: continue if word in freqTable: freqTable[word] += 1 else: freqTable[word] = 1 sentences = sent_tokenize(text) sentenceValue = dict() for sentence in sentences: for word, freq in freqTable.items(): if word in sentence.lower(): if sentence in sentenceValue: sentenceValue[sentence] += freq else: sentenceValue[sentence] = freq sumValues = 0 for sentence in sentenceValue: sumValues += sentenceValue[sentence] average = int(sumValues / len(sentenceValue)) summary = '' for sentence in sentences: if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)): summary += " " + sentence return summary def Summary_BART(text): checkpoint = "sshleifer/distilbart-cnn-12-6" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") summary_ids = model.generate(inputs["input_ids"]) summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) return summary[0]