Spaces:
Sleeping
Sleeping
File size: 1,812 Bytes
e75a8db 13dc820 e75a8db 13dc820 e75a8db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import gradio as gr
# Load model và tokenizer
model_name = "Moleys/hirashiba-mt-tiny-vp-zh"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
def translate_text(input_text):
lines = input_text.split('\n') # Tách từng dòng
translated_lines = []
for line in lines:
raw_text = line.strip().lower()
if not raw_text:
translated_lines.append('') # Giữ dòng trống
continue
# Tokenize input
inputs = tokenizer(raw_text, return_tensors="pt", padding=True, truncation=True).to(device)
# Dịch với mô hình (không cần tính gradient)
with torch.no_grad():
output_tokens = model.generate(**inputs, max_length=512)
# Giải mã kết quả
translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
translated_lines.append(translated_text)
return '\n'.join(translated_lines)
if __name__ == '__main__':
with gr.Blocks() as app:
gr.Markdown('## Vietphrase (Book title) to Chinese Translation')
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(label='Input Vietnamese Text', lines=5, placeholder='Enter Vietnamese text here...')
translate_button = gr.Button('Translate')
output_text = gr.Textbox(label='Output Chinese Text', lines=5, interactive=False)
translate_button.click(
fn=translate_text,
inputs=input_text,
outputs=output_text
)
app.launch()
|