Nice / pages /3_🐧_分词.py
betterme
update
b966bc9
raw
history blame
1.08 kB
from meutils.pipe import *
from appzoo.streamlit_app import Page
import streamlit as st
from LAC import LAC as _LAC
LAC = st.experimental_singleton(_LAC)
# @st.cache(func=None, persist=False, hash_funcs={'LAC.lac.LAC': str})
# def tokenizer():
# print('Loading tokenizer1...')
# return LAC()
@ttl_cache(key=str)
@disk_cache()
def tokenizer(texts):
return LAC().run(texts) | xmap_(lambda r: list(zip(*r)))
class MyPage(Page):
def main(self):
with st.form("Coding"):
texts = st.text_area("输入文本", ["为什么东北证券那么牛?"]*3 | xjoin('\n')).split("\n")
if st.form_submit_button('开始转换'):
_ = tokenizer(texts)
# st.json(_)
# st.text(_)
# st.dataframe(pd.DataFrame(_))
from annotated_text import annotated_text
for text in _:
annotated_text(*text)
if __name__ == '__main__':
app_title = "# 切词"
app_info = ""
MyPage(app_title=app_title, app_info=app_info).main()