Spaces:

Amitontheweb
/

Text_Paraphraser_Title_Generator

Running

App Files Files Community

Text_Paraphraser_Title_Generator / app.py

Amitontheweb

Upload app.py

c41038b verified 5 months ago

raw

history blame contribute delete

4.5 kB

	import numpy as np
	import pandas as pd
	import re
	import torch
	import gradio as gr

	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
	model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")

	tokenizer_gen_title = AutoTokenizer.from_pretrained("Ateeqq/news-title-generator")
	model_gen_title = AutoModelForSeq2SeqLM.from_pretrained("Ateeqq/news-title-generator")

	def generate_title(input_text): #Generate a title for input text with Ateeq model

	input_ids = tokenizer_gen_title.encode(input_text, return_tensors="pt") #Tokenize input text
	#input_ids = input_ids.to('cuda') #Send tokenized inputs to gpu
	output = model_gen_title.generate(input_ids,
	max_new_tokens=100,
	do_sample=True,
	temperature=0.8,
	top_k = 20
	)
	decoded_text = tokenizer_gen_title.decode(output[0], skip_special_tokens=True)
	return decoded_text



	def split_into_sentences(paragraph): #For paraphraser - return a list of sentences from input para
	# Split sentences after period. Retains \n if part of the text, but not included in model output

	sentence_endings = r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.\|\?\|\!)\s'
	sentences = re.split(sentence_endings, paragraph)
	return sentences

	def paraphrase(
	text,
	beam_search,
	#num_beams=10,
	#num_beam_groups=10,
	#num_return_sequences=1,
	#repetition_penalty=1.0,
	#diversity_penalty=1.0,
	#no_repeat_ngram_size=3,
	temperature=0.8,
	max_length=128
	):
	if text != "":
	sentence_list = split_into_sentences(text) #feed input para into sentence splitter
	output = [] #List to hold the individual rephrased sentences obtained from the model

	for sentence in sentence_list:

	input_ids = tokenizer(
	f'paraphrase: {sentence}', #Using paraphrase prompt for T5
	return_tensors="pt", padding="longest",
	#max_length=max_length,
	#truncation=True,
	).input_ids

	outputs = model.generate(
	input_ids,
	do_sample=True,
	num_beams = 20 if beam_search else 1,
	temperature=temperature,
	max_length=max_length,
	no_repeat_ngram_size=4
	)

	res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
	output.append(res[0]) #Add rephrased sentence to list

	paraphrased_text = "" #to hold the combined sentence output made from generated list
	titles_list = "" #to hold the three titles

	for sentence in output: #Join all new reworded sentences together
	paraphrased_text += sentence + " "

	for title in range (1,4): #Print 3 titles by calling Ateeq model fn - generate_title

	titles_list += (f"Title {title}: {generate_title (paraphrased_text)}<br>")
	#titles_list.append ("") #space after each title


	return (titles_list, paraphrased_text) # Return paraphrased text after printing three titles above


	iface = gr.Interface(fn=paraphrase,
	inputs=[gr.Textbox(label="Paste text in the input box and press 'Submit'.", lines=10), "checkbox", gr.Slider(0.1, 2, 0.8)],
	outputs=[gr.HTML(label="Titles:"), gr.Textbox(label="Rephrased text:", lines=15)],
	title="AI Paraphraser with Title Generator",
	description="Sentencet-to-sentence rewording backed with GPT-3.5 training set",
	article="<div align=left><h1>AI Paraphraser and Title Generator</h1><li>Each sentence is rephrased separately without context.</li><li>Temperature: Increase value for more creative rewordings. Higher values may corrupt the sentence. Reset value after pressing 'Clear'</li><li>Beam search: Try for safer and conservative rephrasing.</li><p>Models:<br><li>Training set derived by using Chat-GPT3.5. No competition intended.</li><li>Original models: humarin/chatgpt_paraphraser_on_T5_base and Ateeq_news_title_generator. Deployment code modified for long text inputs.</li></p><p>Parameter details:<br><li>For rephraser: Beam search: No. of beams = 20, no_repeat_ngram_size=4, do_sample=True.</li><li>For title generator: do_sample=True, temperature=0.8, top_k = 20 </li></div>",
	flagging_mode='never'
	)

	iface.launch()