Spaces:
Sleeping
Sleeping
import subprocess | |
import os | |
from llama_cpp import Llama | |
import gradio as gr | |
# πΉ Get Hugging Face Token from environment variable | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
# πΉ Model details | |
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-GGUF/resolve/main/mistral-7b.Q4_K_M.gguf" | |
MODEL_PATH = "./models/mistral-7b.Q4_K_M.gguf" | |
# πΉ Ensure the models directory exists | |
os.makedirs("./models", exist_ok=True) | |
# πΉ Check if the model exists, else download it | |
if not os.path.exists(MODEL_PATH): | |
print("π Downloading Mistral-7B Q4 GGUF model...") | |
subprocess.run([ | |
"wget", "--header", f"Authorization: Bearer {HF_TOKEN}", | |
MODEL_URL, "-O", MODEL_PATH | |
], check=True) | |
print("β Download complete!") | |
# πΉ Load the model | |
print("π₯ Loading the model...") | |
model = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=8) | |
print("β Model loaded successfully!") | |
# πΉ Define a function to interact with the model | |
def chat_with_mistral(prompt): | |
response = model(prompt, max_tokens=512) | |
return response["choices"][0]["text"] | |
# πΉ Create a Gradio UI | |
iface = gr.Interface(fn=chat_with_mistral, inputs="text", outputs="text", title="Mistral-7B Chatbot") | |
# πΉ Launch the app | |
iface.launch() |