HuatuoGPT-o1-7B-GGUF-Demo-Q4

Running

App Files Files Community

Lyte commited on Nov 28, 2024

Commit

fc46f2c

verified ·

1 Parent(s): 2fb59b7

Create app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+model = Llama(
+    model_path=hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "bartowski/QwQ-32B-Preview-GGUF"),
+        filename=os.environ.get("MODEL_FILE", "QwQ-32B-Preview-Q3_K_L.gguf"),
+    )
+)
+DESCRIPTION = '''
+# QwQ-32B-Preview
+Qwen/QwQ-32B-Preview: an experimental research model developed by the Qwen Team.
+Focused on advancing AI reasoning capabilities.
+**To start a new chat**, click "clear" and start a new dialog.
+'''
+LICENSE = """
+--- Apache 2.0 License ---
+"""
+def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
+    """Generate a response using the Llama model."""
+    temp = ""
+    response = model.create_chat_completion(
+        messages=[{"role": "system", "content": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."},
+                  {"role": "user", "content": message}],
+        temperature=temperature,
+        max_tokens=max_tokens,
+        top_p=top_p,
+        stream=True,
+    )
+    for streamed in response:
+        delta = streamed["choices"][0].get("delta", {})
+        text_chunk = delta.get("content", "")
+        temp += text_chunk
+        yield temp
+with gr.Blocks() as demo:
+    gr.Markdown(DESCRIPTION)
+    chatbot = gr.ChatInterface(
+        generate_text,
+        title="Qwen/QwQ-32B-Preview | GGUF Demo",
+        description=" settings below if needed.",
+        examples=[
+            ["How many r's are in the word strawberry?"],
+            ['What is the most optimal way to do Test-Time Scaling?'],
+            ['Explain to me how gravity works like I am 5!'],
+        ],
+        cache_examples=False,
+        fill_height=True
+    )
+    with gr.Accordion("Adjust Parameters", open=False):
+        gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
+        gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+    gr.Markdown(LICENSE)
+if __name__ == "__main__":
+    demo.launch()