File size: 1,257 Bytes
940743d
 
d703df5
940743d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import streamlit as st
from pypinyin import pinyin
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
from LAC import LAC

lac = LAC(mode='seg')
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
tokenizer.src_lang = "zh"

def make_request(chinese_text):
  encoded_zh = tokenizer(chinese_text, return_tensors="pt")
  generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
  return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

def generatepinyin(input):
  pinyin_list = pinyin(input)
  pinyin_string = ""
  for piece in pinyin_list:
    pinyin_string = pinyin_string+" "+piece[0]
  return pinyin_string

st.title("Chinese-To-English-Tools")
string_to_translate = st.text_area(
  "Chinese Text to Translate",
  height = 500
)

if st.button("Run"):
    response = []
    response.append([string_to_translate,make_request(string_to_translate),generatepinyin(string_to_translate)])
    segmented_string_list = lac.run(string_to_translate)
    for piece in segmented_string_list:
        response.append([piece,make_request(piece),generatepinyin(piece)])
    st.success(response)