import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Load model and tokenizer model_name = "DAMO-NLP-SG/VideoLLaMA3-7B" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) model.eval() def generate_response(prompt, max_tokens=200, temperature=0.7): inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, do_sample=True, top_p=0.9, eos_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(output[0], skip_special_tokens=True) return response[len(prompt):].strip() # Return only the generated part # Gradio UI iface = gr.Interface( fn=generate_response, inputs=[ gr.Textbox(label="Prompt", lines=5, placeholder="Enter your prompt here..."), gr.Slider(minimum=50, maximum=1000, value=200, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"), ], outputs=gr.Textbox(label="Response"), title="VideoLLaMA3-7B Text Generation", description="Generate text using DAMO-NLP-SG/VideoLLaMA3-7B" ) if __name__ == "__main__": iface.launch()