#the inference function from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM,AutoTokenizer from transformers import BertTokenizer #load the tokenizer tokenizer_path_1="./vocab.txt" tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1) #load the model path model_path="./bert_lm_10" model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path) #build the unmasker pipeline using HF for inference unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1) #try on a sample of txt txt="a polynomial [MASK] from 3-SAT." #reduction #results=unmasker(txt,top_k=5) #show the results #for res in results: #print(res["sequence"]) #print(res["score"]) #now for BERT on English default_name="bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(default_name) model = TFAutoModelForMaskedLM.from_pretrained(default_name) unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer) #make a function out of the unmasker def unmask_words(txt_with_mask,k_suggestions=5): results_cc=unmasker(txt_with_mask,top_k=k_suggestions) labels={} for res in results_cc: labels["".join(res["token_str"].split(" "))]=res["score"] results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions) labels_bert={} for res in results_bert: labels_bert["".join(res["token_str"].split(" "))]=res["score"] return labels,labels_bert #trying our function #val=unmask_words(txt) import gradio as gr description="""This is a demo to show the Masked Language model pretrained on data collected from ~197k papers on arXiv consisting of mathematical proofs and theorems. The aim of this interface is to show the difference between English and scientific English pretraining. For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb) or contact [mishra@di.ens.fr](mishra@di.ens.fr). """ examples=[["as pspace is [MASK] under complement."], ["n!-(n-1)[MASK]"], ["[MASK] these two classes is a major problem."], ["This would show that the polynomial hierarchy at the second [MASK], which is considered only"], ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data, and their combined [MASK]"""] ] input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:") interface=gr.Interface(fn=unmask_words,inputs=[input_box, gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")], outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")], examples=examples, theme="darkhuggingface", title="CC-Bert MLM",description=description,allow_flagging=True) interface.launch()