from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import gradio as gr import torch model_id = "AmiyendraOP/llama3-legal-finetuned" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.float32) # Set device device = 0 if torch.cuda.is_available() else -1 # Use the text-generation pipeline pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device) # Define a chat function def chat(prompt): response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]["generated_text"] return response # Launch Gradio app gr.Interface( fn=chat, inputs=gr.Textbox(lines=4, placeholder="Enter legal question...", label="Your Question"), outputs=gr.Textbox(label="Response"), title="LLaMA 3 Legal Chatbot (Fine-tuned)" ).launch()