EricaCorral's picture
Update app.py
d703df5
raw
history blame
1.26 kB
import streamlit as st
from pypinyin import pinyin
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
from LAC import LAC
lac = LAC(mode='seg')
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
tokenizer.src_lang = "zh"
def make_request(chinese_text):
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
def generatepinyin(input):
pinyin_list = pinyin(input)
pinyin_string = ""
for piece in pinyin_list:
pinyin_string = pinyin_string+" "+piece[0]
return pinyin_string
st.title("Chinese-To-English-Tools")
string_to_translate = st.text_area(
"Chinese Text to Translate",
height = 500
)
if st.button("Run"):
response = []
response.append([string_to_translate,make_request(string_to_translate),generatepinyin(string_to_translate)])
segmented_string_list = lac.run(string_to_translate)
for piece in segmented_string_list:
response.append([piece,make_request(piece),generatepinyin(piece)])
st.success(response)