Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
model_name = "AddieFoote0/language-100M-MaxEnt-distilled-relearned" | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
if hasattr(torch, "compile"): | |
model = torch.compile(model) | |
print("compiled model") | |
else: | |
print("no compile") | |
def generate_response(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate(**inputs, | |
max_new_tokens=16, | |
do_sample=True, | |
temperature=1, | |
pad_token_id=tokenizer.eos_token_id, | |
) | |
input_length = inputs['input_ids'].shape[1] | |
new_token_ids = outputs[0][input_length:] | |
new_tokens = tokenizer.decode(new_token_ids, skip_special_tokens=False) | |
return new_tokens | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(label="Enter your prompt"), | |
outputs=gr.Textbox(label="Model Response"), | |
title="Lang Model Demo", | |
) | |
iface.launch() |