xu song
commited on
Commit
·
3ac04fa
1
Parent(s):
41bb1cf
update
Browse files- app.py +18 -20
- app_util.py +12 -18
- config.py +1 -1
- models/cpp_qwen2.py +1 -0
app.py
CHANGED
@@ -35,13 +35,22 @@ TODO: 使用说明
|
|
35 |
with gr.Blocks() as demo:
|
36 |
# Knowledge Distillation through Self Chatting
|
37 |
gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
|
46 |
with gr.Row():
|
47 |
with gr.Column(scale=4):
|
@@ -52,15 +61,6 @@ with gr.Blocks() as demo:
|
|
52 |
undo_btn = gr.Button("↩️ Undo")
|
53 |
clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
|
54 |
stop_btn = gr.Button("停止生成", variant="primary")
|
55 |
-
with gr.Column(scale=1):
|
56 |
-
# generate_query_btn = gr.Button("Generate First Query")
|
57 |
-
gr.Dropdown(
|
58 |
-
["moss", "chatglm-2", "chatpdf"],
|
59 |
-
value="moss",
|
60 |
-
label="model",
|
61 |
-
interactive=True,
|
62 |
-
# info="Will add more animals later!"
|
63 |
-
)
|
64 |
|
65 |
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
66 |
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
@@ -82,12 +82,10 @@ with gr.Blocks() as demo:
|
|
82 |
|
83 |
generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
84 |
show_progress="full")
|
85 |
-
retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history]
|
86 |
-
show_progress="full")
|
87 |
retry_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
88 |
show_progress="full")
|
89 |
-
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history]
|
90 |
-
show_progress="full")
|
91 |
|
92 |
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
93 |
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
|
|
35 |
with gr.Blocks() as demo:
|
36 |
# Knowledge Distillation through Self Chatting
|
37 |
gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
|
38 |
+
with gr.Row():
|
39 |
+
system = gr.Dropdown(
|
40 |
+
choices=system_list,
|
41 |
+
value=system_list[0],
|
42 |
+
allow_custom_value=True,
|
43 |
+
interactive=True,
|
44 |
+
label="System message",
|
45 |
+
scale=4,
|
46 |
+
)
|
47 |
+
gr.Dropdown(
|
48 |
+
["Qwen2-0.5B-Instruct", "llama3.1", "gemini"],
|
49 |
+
value="Qwen2-0.5B-Instruct",
|
50 |
+
label="model",
|
51 |
+
interactive=True,
|
52 |
+
scale=1,
|
53 |
+
)
|
54 |
chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
|
55 |
with gr.Row():
|
56 |
with gr.Column(scale=4):
|
|
|
61 |
undo_btn = gr.Button("↩️ Undo")
|
62 |
clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
|
63 |
stop_btn = gr.Button("停止生成", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
66 |
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
|
|
82 |
|
83 |
generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
84 |
show_progress="full")
|
85 |
+
retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
|
|
|
86 |
retry_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
87 |
show_progress="full")
|
88 |
+
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
|
|
|
89 |
|
90 |
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
91 |
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
app_util.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from utils.logging_util import logger
|
3 |
from models.cpp_qwen2 import bot
|
@@ -72,12 +73,17 @@ def generate(chatbot, history):
|
|
72 |
yield out
|
73 |
|
74 |
|
75 |
-
def
|
76 |
-
"""
|
77 |
-
|
78 |
-
|
79 |
-
"""
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
|
83 |
def reset_user_input():
|
@@ -98,15 +104,3 @@ def set_top_p(top_p):
|
|
98 |
|
99 |
def set_temperature(temperature):
|
100 |
bot.generation_kwargs["temperature"] = temperature
|
101 |
-
|
102 |
-
|
103 |
-
def undo_generate(chatbot, history):
|
104 |
-
if history[-1]["role"] == "user":
|
105 |
-
history = history[:-1]
|
106 |
-
chatbot = chatbot[:-1]
|
107 |
-
elif history[-1]["role"] == "assistant":
|
108 |
-
history = history[:-1]
|
109 |
-
chatbot[-1] = (chatbot[-1][0], None)
|
110 |
-
else:
|
111 |
-
pass
|
112 |
-
return "", chatbot, history
|
|
|
1 |
+
import json
|
2 |
import gradio as gr
|
3 |
from utils.logging_util import logger
|
4 |
from models.cpp_qwen2 import bot
|
|
|
73 |
yield out
|
74 |
|
75 |
|
76 |
+
def undo_generate(chatbot, history):
|
77 |
+
if history[-1]["role"] == "user":
|
78 |
+
history = history[:-1]
|
79 |
+
chatbot = chatbot[:-1]
|
80 |
+
elif history[-1]["role"] == "assistant":
|
81 |
+
history = history[:-1]
|
82 |
+
chatbot[-1] = (chatbot[-1][0], None)
|
83 |
+
else:
|
84 |
+
pass
|
85 |
+
logger.info(f"after undo, {json.dumps(chatbot, ensure_ascii=False)}, {json.dumps(history, ensure_ascii=False)}")
|
86 |
+
return "", chatbot, history
|
87 |
|
88 |
|
89 |
def reset_user_input():
|
|
|
104 |
|
105 |
def set_temperature(temperature):
|
106 |
bot.generation_kwargs["temperature"] = temperature
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
|
3 |
MAX_SEQUENCE_LENGTH = 2048 # max_seq_len
|
4 |
|
5 |
-
DEFAULT_MAX_TOKENS =
|
6 |
# DEFAULT_MAX_NEW_TOKENS = None
|
7 |
DEFAULT_TOP_K = 100
|
8 |
DEFAULT_TOP_P = 0.95
|
|
|
2 |
|
3 |
MAX_SEQUENCE_LENGTH = 2048 # max_seq_len
|
4 |
|
5 |
+
DEFAULT_MAX_TOKENS = 128
|
6 |
# DEFAULT_MAX_NEW_TOKENS = None
|
7 |
DEFAULT_TOP_K = 100
|
8 |
DEFAULT_TOP_P = 0.95
|
models/cpp_qwen2.py
CHANGED
@@ -104,6 +104,7 @@ class Qwen2Simulator(Simulator):
|
|
104 |
return output_text
|
105 |
|
106 |
def _stream_generate(self, inputs):
|
|
|
107 |
output = self.llm(
|
108 |
inputs,
|
109 |
stream=True,
|
|
|
104 |
return output_text
|
105 |
|
106 |
def _stream_generate(self, inputs):
|
107 |
+
logger.info(f"generation_kwargs {self.generation_kwargs}")
|
108 |
output = self.llm(
|
109 |
inputs,
|
110 |
stream=True,
|