Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import gradio as gr | |
import torch | |
model_id = "AmiyendraOP/llama3-legal-finetuned" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.float32) | |
# Set device | |
device = 0 if torch.cuda.is_available() else -1 | |
# Use the text-generation pipeline | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device) | |
# Define a chat function | |
def chat(prompt): | |
response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]["generated_text"] | |
return response | |
# Launch Gradio app | |
gr.Interface( | |
fn=chat, | |
inputs=gr.Textbox(lines=4, placeholder="Enter legal question...", label="Your Question"), | |
outputs=gr.Textbox(label="Response"), | |
title="LLaMA 3 Legal Chatbot (Fine-tuned)" | |
).launch() | |