import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import spaces # Load the model and tokenizer tokenizer = AutoTokenizer.from_pretrained("Yoxas/autotrain-tinyllama-statistical") model = AutoModelForCausalLM.from_pretrained("Yoxas/autotrain-tinyllama-statistical") @spaces.GPU(duration=120) def chatbot_response(input_text, max_length, temperature): inputs = tokenizer(input_text, return_tensors="pt", padding=True) attention_mask = inputs.attention_mask outputs = model.generate( inputs['input_ids'], attention_mask=attention_mask, max_length=max_length, temperature=temperature, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Create the Gradio interface inputs = [ gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.Slider(minimum=10, maximum=512, value=50, label="Max Length"), gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature") ] outputs = gr.Textbox() interface = gr.Interface(fn=chatbot_response, inputs=inputs, outputs=outputs, title="Simple Chatbot") # Launch the interface interface.launch()