Cardano_7B / app.py
Remostart's picture
Update app.py
edc81e3 verified
raw
history blame
2.06 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from spaces import GPU
# Load base model and tokenizer
BASE_MODEL_NAME = "NousResearch/Meta-Llama-3-8B"
LORA_MODEL_NAME = "ubiodee/plutus_llm"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, use_fast=False)
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto"
)
# Apply LoRA weights
model = PeftModel.from_pretrained(base_model, LORA_MODEL_NAME)
# Set padding token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.eval()
# Response function with ZeroGPU decorator
@GPU
def generate_response(prompt, max_new_tokens=200, temperature=0.7, top_p=0.9):
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
if response.startswith(prompt):
response = response[len(prompt):].strip()
return response
# Gradio UI
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(label="Enter your prompt", lines=4, placeholder="Ask about Plutus..."),
gr.Slider(label="Max New Tokens", minimum=50, maximum=500, value=200, step=10),
gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.7, step=0.1),
gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.05)
],
outputs=gr.Textbox(label="Model Response"),
title="Cardano Plutus AI Assistant",
description="Ask questions about Plutus smart contracts or Cardano blockchain using ubiodee/plutus_llm."
)
if __name__ == "__main__":
demo.launch()