import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_name = "AddieFoote0/language-100M-MaxEnt-distilled-relearned" model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16) tokenizer = AutoTokenizer.from_pretrained(model_name) if hasattr(torch, "compile"): model = torch.compile(model) print("compiled model") else: print("no compile") def generate_response(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=16, do_sample=True, temperature=1, pad_token_id=tokenizer.eos_token_id, ) input_length = inputs['input_ids'].shape[1] new_token_ids = outputs[0][input_length:] new_tokens = tokenizer.decode(new_token_ids, skip_special_tokens=False) return new_tokens iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Enter your prompt"), outputs=gr.Textbox(label="Model Response"), title="Lang Model Demo", ) iface.launch()