| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForSeq2SeqLM, | |
| ) | |
| class Paraphraser: | |
| def __init__(self, model_name='humarin/chatgpt_paraphraser_on_T5_base'): | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| self.model.eval() | |
| def paraphrase(self, text, num_return_sequences=5, num_beams=10, num_beam_groups=5, diversity_penalty=0.8): | |
| try: | |
| input_text = "paraphrase: " + text + " </s>" | |
| encoding = self.tokenizer.encode_plus(input_text, return_tensors="pt") | |
| input_ids = encoding["input_ids"] | |
| outputs = self.model.generate( | |
| input_ids=input_ids, | |
| max_length=256, | |
| num_beams=num_beams, | |
| num_beam_groups=num_beam_groups, | |
| num_return_sequences=num_return_sequences, | |
| diversity_penalty=diversity_penalty, | |
| early_stopping=True | |
| ) | |
| paraphrases = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs] | |
| return paraphrases | |
| except Exception as e: | |
| print(f"Error in paraphrasing: {e}") | |
| return [] |