import os, gradio as gr | |
from llama_cpp import Llama | |
MODEL_URL = ( | |
"https://huggingface.co/fdtn-ai/Foundation-Sec-8B-Q4_K_M-GGUF/" | |
"resolve/main/foundation-sec-8b-q4_k_m.gguf" | |
) | |
MODEL_PATH = "foundation-sec-8b-q4_k_m.gguf" | |
# download once (Gradio blocks until the file exists) | |
if not os.path.exists(MODEL_PATH): | |
import requests, time | |
with requests.get(MODEL_URL, stream=True) as r: | |
r.raise_for_status() | |
with open(MODEL_PATH, "wb") as f: | |
for chunk in r.iter_content(chunk_size=8192): | |
f.write(chunk) | |
llm = Llama(model_path=MODEL_PATH, n_ctx=4096, verbose=False) | |
def chat_fn(message, history): | |
messages = [{"role": "user", "content": message}] | |
out = llm.create_chat_completion(messages=messages, max_tokens=256, temperature=0.7) | |
return out["choices"][0]["message"]["content"] | |
demo = gr.ChatInterface(chat_fn, title="Foundation-Sec-8B") | |
# expose on 0.0.0.0:7860 (Gradio default) | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |