Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftModel | |
import torch | |
# Replace with your model repository ID | |
model_repo_id = "ubiodee/Plutuslearn-Llama-3.2-3B-Instruct" | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_repo_id) | |
# Load the base model and apply the PEFT adapter | |
base_model = AutoModelForCausalLM.from_pretrained( | |
"meta-llama/Llama-3.2-3B-Instruct", | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
model = PeftModel.from_pretrained(base_model, model_repo_id) | |
# Define the prediction function | |
def predict(text): | |
inputs = tokenizer(text, return_tensors="pt").to("cuda") | |
outputs = model.generate(**inputs, max_length=100) # Adjust parameters as needed | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=predict, | |
inputs=gr.Textbox(label="Input Text"), | |
outputs=gr.Textbox(label="Model Output"), | |
title="My Model Demo", | |
description="Test the fine-tuned model hosted on Hugging Face." | |
) | |
# Launch the app | |
demo.launch() |