File size: 1,123 Bytes
43340f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ccca0b
43340f6
1dfc0c0
898992e
43340f6
6ccca0b
43340f6
6ccca0b
43340f6
 
 
 
 
 
 
898992e
 
43340f6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Imports
import os
import torch
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import transformers
import gradio as gr


WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))

model_name = "csebuetnlp/mT5_multilingual_XLSum"
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def generate_summary(text):

   input_ids = tokenizer(
    [WHITESPACE_HANDLER(text)],
    return_tensors="pt",
    padding="max_length",
    truncation=True,
    max_length=512)["input_ids"]

   output_ids = model.generate(
       input_ids=input_ids,
       max_length=1024,
       no_repeat_ngram_size=2,
       num_beams=4
   )[0]

   #max_output_length=200
   summary = tokenizer.decode(
       output_ids,
       skip_special_tokens=True,
       clean_up_tokenization_spaces=False
   )

   return summary

demo = gr.Interface(fn=generate_summary,
                    inputs=gr.Textbox(lines=100, placeholder="Ingrese Texto"),
                    outputs=gr.Textbox(lines=10)
                    )

demo.launch()