xu song
commited on
Commit
·
3ac04fa
1
Parent(s):
41bb1cf
update
Browse files- app.py +18 -20
- app_util.py +12 -18
- config.py +1 -1
- models/cpp_qwen2.py +1 -0
app.py
CHANGED
|
@@ -35,13 +35,22 @@ TODO: 使用说明
|
|
| 35 |
with gr.Blocks() as demo:
|
| 36 |
# Knowledge Distillation through Self Chatting
|
| 37 |
gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
|
| 46 |
with gr.Row():
|
| 47 |
with gr.Column(scale=4):
|
|
@@ -52,15 +61,6 @@ with gr.Blocks() as demo:
|
|
| 52 |
undo_btn = gr.Button("↩️ Undo")
|
| 53 |
clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
|
| 54 |
stop_btn = gr.Button("停止生成", variant="primary")
|
| 55 |
-
with gr.Column(scale=1):
|
| 56 |
-
# generate_query_btn = gr.Button("Generate First Query")
|
| 57 |
-
gr.Dropdown(
|
| 58 |
-
["moss", "chatglm-2", "chatpdf"],
|
| 59 |
-
value="moss",
|
| 60 |
-
label="model",
|
| 61 |
-
interactive=True,
|
| 62 |
-
# info="Will add more animals later!"
|
| 63 |
-
)
|
| 64 |
|
| 65 |
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
| 66 |
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
|
@@ -82,12 +82,10 @@ with gr.Blocks() as demo:
|
|
| 82 |
|
| 83 |
generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
| 84 |
show_progress="full")
|
| 85 |
-
retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history]
|
| 86 |
-
show_progress="full")
|
| 87 |
retry_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
| 88 |
show_progress="full")
|
| 89 |
-
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history]
|
| 90 |
-
show_progress="full")
|
| 91 |
|
| 92 |
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
| 93 |
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
|
|
|
| 35 |
with gr.Blocks() as demo:
|
| 36 |
# Knowledge Distillation through Self Chatting
|
| 37 |
gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
|
| 38 |
+
with gr.Row():
|
| 39 |
+
system = gr.Dropdown(
|
| 40 |
+
choices=system_list,
|
| 41 |
+
value=system_list[0],
|
| 42 |
+
allow_custom_value=True,
|
| 43 |
+
interactive=True,
|
| 44 |
+
label="System message",
|
| 45 |
+
scale=4,
|
| 46 |
+
)
|
| 47 |
+
gr.Dropdown(
|
| 48 |
+
["Qwen2-0.5B-Instruct", "llama3.1", "gemini"],
|
| 49 |
+
value="Qwen2-0.5B-Instruct",
|
| 50 |
+
label="model",
|
| 51 |
+
interactive=True,
|
| 52 |
+
scale=1,
|
| 53 |
+
)
|
| 54 |
chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
|
| 55 |
with gr.Row():
|
| 56 |
with gr.Column(scale=4):
|
|
|
|
| 61 |
undo_btn = gr.Button("↩️ Undo")
|
| 62 |
clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
|
| 63 |
stop_btn = gr.Button("停止生成", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
| 66 |
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
|
|
|
| 82 |
|
| 83 |
generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
| 84 |
show_progress="full")
|
| 85 |
+
retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
|
|
|
|
| 86 |
retry_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
| 87 |
show_progress="full")
|
| 88 |
+
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
|
|
|
|
| 89 |
|
| 90 |
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
| 91 |
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
app_util.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from utils.logging_util import logger
|
| 3 |
from models.cpp_qwen2 import bot
|
|
@@ -72,12 +73,17 @@ def generate(chatbot, history):
|
|
| 72 |
yield out
|
| 73 |
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
"""
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
|
| 83 |
def reset_user_input():
|
|
@@ -98,15 +104,3 @@ def set_top_p(top_p):
|
|
| 98 |
|
| 99 |
def set_temperature(temperature):
|
| 100 |
bot.generation_kwargs["temperature"] = temperature
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
def undo_generate(chatbot, history):
|
| 104 |
-
if history[-1]["role"] == "user":
|
| 105 |
-
history = history[:-1]
|
| 106 |
-
chatbot = chatbot[:-1]
|
| 107 |
-
elif history[-1]["role"] == "assistant":
|
| 108 |
-
history = history[:-1]
|
| 109 |
-
chatbot[-1] = (chatbot[-1][0], None)
|
| 110 |
-
else:
|
| 111 |
-
pass
|
| 112 |
-
return "", chatbot, history
|
|
|
|
| 1 |
+
import json
|
| 2 |
import gradio as gr
|
| 3 |
from utils.logging_util import logger
|
| 4 |
from models.cpp_qwen2 import bot
|
|
|
|
| 73 |
yield out
|
| 74 |
|
| 75 |
|
| 76 |
+
def undo_generate(chatbot, history):
|
| 77 |
+
if history[-1]["role"] == "user":
|
| 78 |
+
history = history[:-1]
|
| 79 |
+
chatbot = chatbot[:-1]
|
| 80 |
+
elif history[-1]["role"] == "assistant":
|
| 81 |
+
history = history[:-1]
|
| 82 |
+
chatbot[-1] = (chatbot[-1][0], None)
|
| 83 |
+
else:
|
| 84 |
+
pass
|
| 85 |
+
logger.info(f"after undo, {json.dumps(chatbot, ensure_ascii=False)}, {json.dumps(history, ensure_ascii=False)}")
|
| 86 |
+
return "", chatbot, history
|
| 87 |
|
| 88 |
|
| 89 |
def reset_user_input():
|
|
|
|
| 104 |
|
| 105 |
def set_temperature(temperature):
|
| 106 |
bot.generation_kwargs["temperature"] = temperature
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
MAX_SEQUENCE_LENGTH = 2048 # max_seq_len
|
| 4 |
|
| 5 |
-
DEFAULT_MAX_TOKENS =
|
| 6 |
# DEFAULT_MAX_NEW_TOKENS = None
|
| 7 |
DEFAULT_TOP_K = 100
|
| 8 |
DEFAULT_TOP_P = 0.95
|
|
|
|
| 2 |
|
| 3 |
MAX_SEQUENCE_LENGTH = 2048 # max_seq_len
|
| 4 |
|
| 5 |
+
DEFAULT_MAX_TOKENS = 128
|
| 6 |
# DEFAULT_MAX_NEW_TOKENS = None
|
| 7 |
DEFAULT_TOP_K = 100
|
| 8 |
DEFAULT_TOP_P = 0.95
|
models/cpp_qwen2.py
CHANGED
|
@@ -104,6 +104,7 @@ class Qwen2Simulator(Simulator):
|
|
| 104 |
return output_text
|
| 105 |
|
| 106 |
def _stream_generate(self, inputs):
|
|
|
|
| 107 |
output = self.llm(
|
| 108 |
inputs,
|
| 109 |
stream=True,
|
|
|
|
| 104 |
return output_text
|
| 105 |
|
| 106 |
def _stream_generate(self, inputs):
|
| 107 |
+
logger.info(f"generation_kwargs {self.generation_kwargs}")
|
| 108 |
output = self.llm(
|
| 109 |
inputs,
|
| 110 |
stream=True,
|