Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from huggingface_hub import snapshot_download | |
import torch | |
import os | |
import subprocess | |
import gc | |
model_id = "meta-llama/Llama-2-7b" | |
print("\n\nSaving model to Local....\n\n") | |
snapshot_download(repo_id=model_id, local_dir="llama") | |
print("\n\nConverting to suitable type...\n\n") | |
subprocess.run("python converter.py --input_dir llama --model_size 7B --output_dir model".split(" ")) | |
print("\n\nModel converted successfully!!\n\n") | |
print(os.listdir("model")) | |
gc.collect() | |
print("\n\nInitializing model...\n\n") | |
model_interface = pipeline( | |
"text-generation", | |
model="./model", | |
torch_dtype=torch.bfloat16, | |
device="cpu", | |
) | |
print("\n\nModel initialized successfully!!\n\n") | |
def generate_text(text: str) -> str: | |
response = model_interface(text, do_sample=False) | |
response_text = response[0]["generated_text"] | |
return response_text | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here"), | |
outputs=gr.Textbox(lines=5), | |
title="Llama 2 Text Generator", | |
description="Generate text using the Llama 2 model.", | |
) | |
iface.launch() |