Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from huggingface_hub import hf_hub_download
|
| 4 |
+
from llama_cpp import Llama
|
| 5 |
+
|
| 6 |
+
# --- Choose the GGUF you want from HF and set here (exact names) ---
|
| 7 |
+
REPO_ID = os.getenv("REPO_ID", "TheBloke/airoboros-mistral-7B-GGUF")
|
| 8 |
+
MODEL_FILE = os.getenv("MODEL_FILE", "airoboros-mistral-7b.Q4_K_M.gguf")
|
| 9 |
+
|
| 10 |
+
SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are an uncensored assistant. Answer directly.")
|
| 11 |
+
APP_PASSWORD = os.getenv("APP_PASSWORD", "") # set in Settings → Variables if you want a password
|
| 12 |
+
CTX = int(os.getenv("CTX", "4096"))
|
| 13 |
+
THREADS = int(os.getenv("THREADS", "4"))
|
| 14 |
+
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0")) # 0 on free CPU Space
|
| 15 |
+
|
| 16 |
+
# Download model (cached by HF infra)
|
| 17 |
+
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir="models")
|
| 18 |
+
|
| 19 |
+
# Load model
|
| 20 |
+
llm = Llama(
|
| 21 |
+
model_path=model_path,
|
| 22 |
+
n_ctx=CTX,
|
| 23 |
+
n_threads=THREADS,
|
| 24 |
+
n_gpu_layers=N_GPU_LAYERS,
|
| 25 |
+
verbose=False
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
def chat_fn(history, user_msg):
|
| 29 |
+
messages = [{"role":"system","content":SYSTEM_PROMPT}]
|
| 30 |
+
for role, content in history:
|
| 31 |
+
messages.append({"role": "user" if role=="user" else "assistant", "content": content})
|
| 32 |
+
messages.append({"role":"user","content":user_msg})
|
| 33 |
+
|
| 34 |
+
out = llm.create_chat_completion(
|
| 35 |
+
messages=messages,
|
| 36 |
+
temperature=0.9, top_p=0.92, repeat_penalty=1.05, max_tokens=1024
|
| 37 |
+
)
|
| 38 |
+
reply = out["choices"][0]["message"]["content"]
|
| 39 |
+
history = history + [("user", user_msg), ("assistant", reply)]
|
| 40 |
+
return history, ""
|
| 41 |
+
|
| 42 |
+
with gr.Blocks(title="Airoboros Mistral 7B (Uncensored)") as demo:
|
| 43 |
+
gr.Markdown("## Airoboros Mistral 7B (Uncensored)\nFree CPU Space is slow. Add a GPU in **Settings → Hardware** for speed.")
|
| 44 |
+
|
| 45 |
+
# Simple password gate (optional)
|
| 46 |
+
with gr.Row():
|
| 47 |
+
user = gr.Textbox(label="User (any)")
|
| 48 |
+
pwd = gr.Textbox(label="Password", type="password")
|
| 49 |
+
enter = gr.Button("Enter")
|
| 50 |
+
gate_info = gr.Markdown(visible=False, value="Access granted. Start chatting below.")
|
| 51 |
+
chat = gr.Chatbot(height=460, visible=False)
|
| 52 |
+
msg = gr.Textbox(label="Message", visible=False, lines=3, placeholder="Ask anything…")
|
| 53 |
+
send = gr.Button("Send", visible=False)
|
| 54 |
+
state = gr.State([])
|
| 55 |
+
|
| 56 |
+
def allow(u, p):
|
| 57 |
+
if not APP_PASSWORD or p == APP_PASSWORD:
|
| 58 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
| 59 |
+
return gr.update(value="Wrong password. Try again."), gr.update(), gr.update(), gr.update(), gr.update()
|
| 60 |
+
|
| 61 |
+
enter.click(allow, [user, pwd], [gate_info, user, chat, msg, send])
|
| 62 |
+
|
| 63 |
+
send.click(chat_fn, [state, msg], [state, msg]).then(lambda s: s, state, chat)
|
| 64 |
+
|
| 65 |
+
demo.queue().launch()
|