Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import subprocess | |
from llama_cpp import Llama | |
# Model download link (Modify if needed) | |
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-GGUF/resolve/main/mistral-7b.Q4_K_M.gguf" | |
MODEL_PATH = "./models/mistral-7b.Q4_K_M.gguf" | |
# Create models directory if not exists | |
os.makedirs("./models", exist_ok=True) | |
# Auto-download model if not present | |
if not os.path.exists(MODEL_PATH): | |
print("Downloading Mistral-7B Q4 GGUF model...") | |
subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH], check=True) | |
# Load GGUF model | |
print("Loading model...") | |
model = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=8) | |
# Define function for chat | |
def chat_with_ai(prompt): | |
response = model(prompt, max_tokens=512, stop=["</s>"]) | |
return response["choices"][0]["text"] | |
# Gradio UI | |
iface = gr.Interface(fn=chat_with_ai, inputs="text", outputs="text", title="Mistral-7B GGUF Chatbot") | |
iface.launch() |