Lyte commited on
Commit
fc46f2c
·
verified ·
1 Parent(s): 2fb59b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ model = Llama(
7
+ model_path=hf_hub_download(
8
+ repo_id=os.environ.get("REPO_ID", "bartowski/QwQ-32B-Preview-GGUF"),
9
+ filename=os.environ.get("MODEL_FILE", "QwQ-32B-Preview-Q3_K_L.gguf"),
10
+ )
11
+ )
12
+
13
+ DESCRIPTION = '''
14
+ # QwQ-32B-Preview
15
+ Qwen/QwQ-32B-Preview: an experimental research model developed by the Qwen Team.
16
+ Focused on advancing AI reasoning capabilities.
17
+
18
+ **To start a new chat**, click "clear" and start a new dialog.
19
+ '''
20
+
21
+ LICENSE = """
22
+ --- Apache 2.0 License ---
23
+ """
24
+
25
+ def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
26
+ """Generate a response using the Llama model."""
27
+ temp = ""
28
+ response = model.create_chat_completion(
29
+ messages=[{"role": "system", "content": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."},
30
+ {"role": "user", "content": message}],
31
+ temperature=temperature,
32
+ max_tokens=max_tokens,
33
+ top_p=top_p,
34
+ stream=True,
35
+ )
36
+ for streamed in response:
37
+ delta = streamed["choices"][0].get("delta", {})
38
+ text_chunk = delta.get("content", "")
39
+ temp += text_chunk
40
+ yield temp
41
+
42
+ with gr.Blocks() as demo:
43
+ gr.Markdown(DESCRIPTION)
44
+
45
+ chatbot = gr.ChatInterface(
46
+ generate_text,
47
+ title="Qwen/QwQ-32B-Preview | GGUF Demo",
48
+ description=" settings below if needed.",
49
+ examples=[
50
+ ["How many r's are in the word strawberry?"],
51
+ ['What is the most optimal way to do Test-Time Scaling?'],
52
+ ['Explain to me how gravity works like I am 5!'],
53
+ ],
54
+ cache_examples=False,
55
+ fill_height=True
56
+ )
57
+
58
+ with gr.Accordion("Adjust Parameters", open=False):
59
+ gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
60
+ gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
61
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
62
+
63
+ gr.Markdown(LICENSE)
64
+
65
+ if __name__ == "__main__":
66
+ demo.launch()