ProfCool / app.py
ntaexams's picture
Update app.py
919ae68 verified
raw
history blame
931 Bytes
import os
import gradio as gr
import subprocess
from llama_cpp import Llama
# Model download link (Modify if needed)
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-GGUF/resolve/main/mistral-7b.Q4_K_M.gguf"
MODEL_PATH = "./models/mistral-7b.Q4_K_M.gguf"
# Create models directory if not exists
os.makedirs("./models", exist_ok=True)
# Auto-download model if not present
if not os.path.exists(MODEL_PATH):
print("Downloading Mistral-7B Q4 GGUF model...")
subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH], check=True)
# Load GGUF model
print("Loading model...")
model = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=8)
# Define function for chat
def chat_with_ai(prompt):
response = model(prompt, max_tokens=512, stop=["</s>"])
return response["choices"][0]["text"]
# Gradio UI
iface = gr.Interface(fn=chat_with_ai, inputs="text", outputs="text", title="Mistral-7B GGUF Chatbot")
iface.launch()