SkyNetWalker commited on
Commit
1be87ac
·
verified ·
1 Parent(s): 21a478e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -1
app.py CHANGED
@@ -1,3 +1,88 @@
 
 
1
  import gradio as gr
 
 
2
 
3
- gr.load("models/PowerInfer/SmallThinker-3B-Preview").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
2
+ #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
3
  import gradio as gr
4
+ from openai import OpenAI
5
+ import os
6
 
7
+ ACCESS_TOKEN = os.getenv("HF")
8
+
9
+ print("Access token loaded.")
10
+
11
+ client = OpenAI(
12
+ base_url="https://api-inference.huggingface.co/v1/",
13
+ api_key=ACCESS_TOKEN,
14
+ )
15
+
16
+ print("OpenAI client initialized.")
17
+
18
+ def respond(
19
+ message,
20
+ history: list[tuple[str, str]],
21
+ system_message,
22
+ max_tokens,
23
+ temperature,
24
+ top_p,
25
+ ):
26
+ print(f"Received message: {message}")
27
+ print(f"History: {history}")
28
+ print(f"System message: {system_message}")
29
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
30
+
31
+ messages = [{"role": "system", "content": system_message}]
32
+
33
+ for val in history:
34
+ if val[0]:
35
+ messages.append({"role": "user", "content": val[0]})
36
+ print(f"Added user message to context: {val[0]}")
37
+ if val[1]:
38
+ messages.append({"role": "assistant", "content": val[1]})
39
+ print(f"Added assistant message to context: {val[1]}")
40
+
41
+ messages.append({"role": "user", "content": message})
42
+
43
+ response = ""
44
+ print("Sending request to OpenAI API.")
45
+
46
+ for message in client.chat.completions.create(
47
+ model="PowerInfer/SmallThinker-3B-Preview",
48
+ max_tokens=max_tokens,
49
+ stream=True,
50
+ temperature=temperature,
51
+ top_p=top_p,
52
+ messages=messages,
53
+ ):
54
+ token = message.choices[0].delta.content
55
+ print(f"Received token: {token}")
56
+ response += token
57
+ yield response
58
+
59
+ print("Completed response generation.")
60
+
61
+ chatbot = gr.Chatbot(height=600)
62
+
63
+ print("Chatbot interface created.")
64
+
65
+ demo = gr.ChatInterface(
66
+ respond,
67
+ additional_inputs=[
68
+ gr.Textbox(value="", label="System message"),
69
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
70
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
71
+ gr.Slider(
72
+ minimum=0.1,
73
+ maximum=1.0,
74
+ value=0.95,
75
+ step=0.05,
76
+ label="Top-P",
77
+ ),
78
+
79
+ ],
80
+ fill_height=True,
81
+ chatbot=chatbot,
82
+ theme="Nymbo/Nymbo_Theme",
83
+ )
84
+ print("Gradio interface initialized.")
85
+
86
+ if __name__ == "__main__":
87
+ print("Launching the demo application.")
88
+ demo.launch()