|
import os |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import gradio as gr |
|
import spaces |
|
|
|
huggingface_token = os.getenv("HUGGINGFACE_TOKEN") |
|
if not huggingface_token: |
|
raise ValueError("HUGGINGFACE_TOKEN environment variable is not set") |
|
|
|
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct" |
|
device = "auto" |
|
dtype = torch.bfloat16 |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token) |
|
|
|
|
|
@spaces.GPU |
|
def generate_text(prompt, system_message="You are a helpful assistant."): |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, torch_dtype=dtype, device_map=device, token=huggingface_token |
|
) |
|
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=dtype, device_map=device) |
|
|
|
messages = [ |
|
{"role": "system", "content": system_message}, |
|
{"role": "user", "content": prompt}, |
|
] |
|
|
|
result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7) |
|
|
|
generated_output = result[0]["generated_text"] |
|
if isinstance(generated_output, list): |
|
for message in reversed(generated_output): |
|
if message.get("role") == "assistant": |
|
return message.get("content", "No content found.") |
|
return "No assistant response found." |
|
else: |
|
return "Unexpected output format." |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=[ |
|
gr.Textbox(lines=3, label="Input Prompt"), |
|
gr.Textbox(lines=2, label="System Message", value="You are a helpful assistant."), |
|
], |
|
outputs=gr.Textbox(label="Generated Text"), |
|
title="Llama 3.1 8B Instruct Text Generation", |
|
description="Enter a prompt and optional system message to generate text using the Llama 3.1 8B Instruct model.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|