EricaCorral commited on
Commit
940743d
·
1 Parent(s): 62121ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -0
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pypinyin import pinyin
3
+ from transformers import M2MForConditionalGeneration, M2M100Tokenizer
4
+ from LAC import LAC
5
+
6
+ lac = LAC(mode='seg')
7
+ model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
8
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
9
+ tokenizer.src_lang = "zh"
10
+
11
+ def make_request(chinese_text):
12
+ encoded_zh = tokenizer(chinese_text, return_tensors="pt")
13
+ generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
14
+ return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
15
+
16
+ def generatepinyin(input):
17
+ pinyin_list = pinyin(input)
18
+ pinyin_string = ""
19
+ for piece in pinyin_list:
20
+ pinyin_string = pinyin_string+" "+piece[0]
21
+ return pinyin_string
22
+
23
+ st.title("Chinese-To-English-Tools")
24
+ string_to_translate = st.text_area(
25
+ "Chinese Text to Translate",
26
+ height = 500
27
+ )
28
+
29
+ if st.button("Run"):
30
+ response = []
31
+ response.append([string_to_translate,make_request(string_to_translate),generatepinyin(string_to_translate)])
32
+ segmented_string_list = lac.run(string_to_translate)
33
+ for piece in segmented_string_list:
34
+ response.append([piece,make_request(piece),generatepinyin(piece)])
35
+ st.success(response)