import transformers import torch import tokenizers import streamlit as st import re @st.cache(hash_funcs={tokenizers.Tokenizer: lambda _: None, tokenizers.AddedToken: lambda _: None, re.Pattern: lambda _: None}, allow_output_mutation=True, suppress_st_warning=True) def get_model(model_name, model_path): tokenizer = transformers.GPT2Tokenizer.from_pretrained(model_name) model = transformers.GPT2LMHeadModel.from_pretrained(model_name) model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) model.eval() return model, tokenizer #@st.cache(hash_funcs={tokenizers.Tokenizer: lambda _: None, tokenizers.AddedToken: lambda _: None, re.Pattern: lambda _: None}, allow_output_mutation=True, suppress_st_warning=True) def predict(text, model, tokenizer, n_beams=5, temperature=2.5, top_p=0.8, max_length=200): text += '\n' input_ids = tokenizer.encode(text, return_tensors="pt") with torch.no_grad(): out = model.generate(input_ids, do_sample=True, num_beams=n_beams, temperature=temperature, top_p=top_p, max_length=max_length, ) return list(map(tokenizer.decode, out))[0] model, tokenizer = get_model('sberbank-ai/rugpt3medium_based_on_gpt2', 'korzh-medium_30epochs_1bs.bin') st.title("NeuroKorzh") st.markdown("", unsafe_allow_html=True) st.markdown("\n") text = st.text_area(label='Starting point for text generation', height=100) button = st.button('Go') if button: #try: result = predict(text, model, tokenizer) #st.subheader('Max Korzh:') #lines = result.split('\n') #for line in lines: # st.write(line) lines = result.replace('\n', '\n\n') st.write(lines) #except Exception: # st.error("Ooooops, something went wrong. Try again please and report to me, tg: @vladyur")