HuatuoGPT-o1-7B-GGUF-Demo-Q4

Running

App Files Files Community

Lyte commited on Dec 30, 2024

Commit

372a5eb

verified ·

1 Parent(s): a35d071

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -1,18 +1,21 @@
 import os
 import gradio as gr
 from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
 model = Llama(
     model_path=hf_hub_download(
-        repo_id=os.environ.get("REPO_ID", "bartowski/QwQ-32B-Preview-GGUF"),
-        filename=os.environ.get("MODEL_FILE", "QwQ-32B-Preview-Q3_K_L.gguf"),
     )
 )
 DESCRIPTION = '''
-# QwQ-32B-Preview | Duplicate the space and set it to private for faster & personal inference for free.
-Qwen/QwQ-32B-Preview: an experimental research model developed by the Qwen Team.
 Focused on advancing AI reasoning capabilities.
 **To start a new chat**, click "clear" and start a new dialog.
@@ -26,8 +29,7 @@ def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95)
     """Generate a response using the Llama model."""
     temp = ""
     response = model.create_chat_completion(
-        messages=[{"role": "system", "content": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."},
-                  {"role": "user", "content": message}],
         temperature=temperature,
         max_tokens=max_tokens,
         top_p=top_p,
@@ -44,12 +46,12 @@ with gr.Blocks() as demo:
     chatbot = gr.ChatInterface(
         generate_text,
-        title="Qwen/QwQ-32B-Preview | GGUF Demo",
         description=" settings below if needed.",
         examples=[
             ["How many r's are in the word strawberry?"],
-            ['What is the most optimal way to do Test-Time Scaling?'],
-            ['Explain to me how gravity works like I am 5!'],
         ],
         cache_examples=False,
         fill_height=True

 import os
 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download, login
+import os
+login(os.getenv("HF_TOKEN"))
 model = Llama(
     model_path=hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "Lyte/HuatuoGPT-o1-7B-Q4_K_M-GGUF"),
+        filename=os.environ.get("MODEL_FILE", "huatuogpt-o1-7b-q4_k_m.gguf"),
     )
 )
 DESCRIPTION = '''
+# FreedomIntelligence/HuatuoGPT-o1-7B | Duplicate the space and set it to private for faster & personal inference for free.
+HuatuoGPT-o1-7B: an experimental research model developed by the Qwen Team.
 Focused on advancing AI reasoning capabilities.
 **To start a new chat**, click "clear" and start a new dialog.
     """Generate a response using the Llama model."""
     temp = ""
     response = model.create_chat_completion(
+        messages=[{"role": "user", "content": message}],
         temperature=temperature,
         max_tokens=max_tokens,
         top_p=top_p,
     chatbot = gr.ChatInterface(
         generate_text,
+        title="FreedomIntelligence/HuatuoGPT-o1-7B | GGUF Demo",
         description=" settings below if needed.",
         examples=[
             ["How many r's are in the word strawberry?"],
+            ['How to stop a cough?'],
+            ['How do I relieve feet pain?'],
         ],
         cache_examples=False,
         fill_height=True