ProfCool / app.py
ntaexams's picture
Update app.py
df1b6e1 verified
raw
history blame
1.23 kB
import subprocess
import os
from llama_cpp import Llama
import gradio as gr
# πŸ”Ή Get Hugging Face Token from environment variable
HF_TOKEN = os.getenv("HF_TOKEN")
# πŸ”Ή Model details
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-GGUF/resolve/main/mistral-7b.Q4_K_M.gguf"
MODEL_PATH = "./models/mistral-7b.Q4_K_M.gguf"
# πŸ”Ή Ensure the models directory exists
os.makedirs("./models", exist_ok=True)
# πŸ”Ή Check if the model exists, else download it
if not os.path.exists(MODEL_PATH):
print("πŸš€ Downloading Mistral-7B Q4 GGUF model...")
subprocess.run([
"wget", "--header", f"Authorization: Bearer {HF_TOKEN}",
MODEL_URL, "-O", MODEL_PATH
], check=True)
print("βœ… Download complete!")
# πŸ”Ή Load the model
print("πŸ“₯ Loading the model...")
model = Llama(model_path=MODEL_PATH, n_ctx=4096, n_threads=8)
print("βœ… Model loaded successfully!")
# πŸ”Ή Define a function to interact with the model
def chat_with_mistral(prompt):
response = model(prompt, max_tokens=512)
return response["choices"][0]["text"]
# πŸ”Ή Create a Gradio UI
iface = gr.Interface(fn=chat_with_mistral, inputs="text", outputs="text", title="Mistral-7B Chatbot")
# πŸ”Ή Launch the app
iface.launch()