Spaces:

ntaexams
/

ProfCool

Sleeping

File size: 931 Bytes

import os
import gradio as gr
import subprocess
from llama_cpp import Llama

# Model download link (Modify if needed)
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-GGUF/resolve/main/mistral-7b.Q4_K_M.gguf"
MODEL_PATH = "./models/mistral-7b.Q4_K_M.gguf"

# Create models directory if not exists
os.makedirs("./models", exist_ok=True)

# Auto-download model if not present
if not os.path.exists(MODEL_PATH):
    print("Downloading Mistral-7B Q4 GGUF model...")
    subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH], check=True)

# Load GGUF model
print("Loading model...")
model = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=8)

# Define function for chat
def chat_with_ai(prompt):
    response = model(prompt, max_tokens=512, stop=["</s>"])
    return response["choices"][0]["text"]

# Gradio UI
iface = gr.Interface(fn=chat_with_ai, inputs="text", outputs="text", title="Mistral-7B GGUF Chatbot")
iface.launch()