Spaces:

Rehman1603
/

YouTubeTextSummarizer

Runtime error

YouTubeTextSummarizer / summary.py

Create summary.py

3a86bc0 almost 2 years ago

1.89 kB

	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize, sent_tokenize
	import traceback
	import sys
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM



	nltk.download('stopwords')
	nltk.download('punkt')

	def summary_nlp(text):
	stopWords = set(stopwords.words("english"))
	words = word_tokenize(text)
	freqTable = dict()
	for word in words:
	word = word.lower()
	if word in stopWords:
	continue
	if word in freqTable:
	freqTable[word] += 1
	else:
	freqTable[word] = 1
	sentences = sent_tokenize(text)
	sentenceValue = dict()
	for sentence in sentences:
	for word, freq in freqTable.items():
	if word in sentence.lower():
	if sentence in sentenceValue:
	sentenceValue[sentence] += freq
	else:
	sentenceValue[sentence] = freq
	sumValues = 0
	for sentence in sentenceValue:
	sumValues += sentenceValue[sentence]
	average = int(sumValues / len(sentenceValue))
	summary = ''
	for sentence in sentences:
	if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
	summary += " " + sentence
	return summary



	def Summary_BART(text):
	checkpoint = "sshleifer/distilbart-cnn-12-6"
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	inputs = tokenizer(text,
	max_length=1024,
	truncation=True,
	return_tensors="pt")
	summary_ids = model.generate(inputs["input_ids"])
	summary = tokenizer.batch_decode(summary_ids,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False)
	return summary[0]