File size: 931 Bytes
992421f
b5b9763
919ae68
3905f39
 
919ae68
 
 
854ff0a
919ae68
 
992421f
919ae68
992421f
919ae68
 
992421f
919ae68
992421f
919ae68
992421f
919ae68
 
 
 
854ff0a
919ae68
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
import gradio as gr
import subprocess
from llama_cpp import Llama

# Model download link (Modify if needed)
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-GGUF/resolve/main/mistral-7b.Q4_K_M.gguf"
MODEL_PATH = "./models/mistral-7b.Q4_K_M.gguf"

# Create models directory if not exists
os.makedirs("./models", exist_ok=True)

# Auto-download model if not present
if not os.path.exists(MODEL_PATH):
    print("Downloading Mistral-7B Q4 GGUF model...")
    subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH], check=True)

# Load GGUF model
print("Loading model...")
model = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=8)

# Define function for chat
def chat_with_ai(prompt):
    response = model(prompt, max_tokens=512, stop=["</s>"])
    return response["choices"][0]["text"]

# Gradio UI
iface = gr.Interface(fn=chat_with_ai, inputs="text", outputs="text", title="Mistral-7B GGUF Chatbot")
iface.launch()