Spaces:
Running
Running
微调对话裁剪
Browse files- request_llm/bridge_chatgpt.py +1 -1
- toolbox.py +10 -7
request_llm/bridge_chatgpt.py
CHANGED
|
@@ -200,7 +200,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|
| 200 |
if "reduce the length" in error_msg:
|
| 201 |
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
| 202 |
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
| 203 |
-
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])
|
| 204 |
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
| 205 |
# history = [] # 清除历史
|
| 206 |
elif "does not exist" in error_msg:
|
|
|
|
| 200 |
if "reduce the length" in error_msg:
|
| 201 |
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
| 202 |
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
| 203 |
+
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
| 204 |
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
| 205 |
# history = [] # 清除历史
|
| 206 |
elif "does not exist" in error_msg:
|
toolbox.py
CHANGED
|
@@ -555,23 +555,26 @@ def run_gradio_in_subpath(demo, auth, port, custom_path):
|
|
| 555 |
|
| 556 |
def clip_history(inputs, history, tokenizer, max_token_limit):
|
| 557 |
"""
|
| 558 |
-
reduce the length of
|
| 559 |
this function search for the longest entries to clip, little by little,
|
| 560 |
-
until the number of token of
|
| 561 |
-
|
| 562 |
此函数逐渐地搜索最长的条目进行剪辑,
|
| 563 |
-
|
| 564 |
"""
|
| 565 |
import numpy as np
|
| 566 |
from request_llm.bridge_all import model_info
|
| 567 |
def get_token_num(txt):
|
| 568 |
return len(tokenizer.encode(txt, disallowed_special=()))
|
| 569 |
input_token_num = get_token_num(inputs)
|
| 570 |
-
if input_token_num < max_token_limit * 3 / 4:
|
| 571 |
-
# 当输入部分的token占比小于限制的3/4
|
|
|
|
| 572 |
max_token_limit = max_token_limit - input_token_num
|
|
|
|
|
|
|
|
|
|
| 573 |
if max_token_limit < 128:
|
| 574 |
-
# 余量太小了,直接清除历史
|
| 575 |
history = []
|
| 576 |
return history
|
| 577 |
else:
|
|
|
|
| 555 |
|
| 556 |
def clip_history(inputs, history, tokenizer, max_token_limit):
|
| 557 |
"""
|
| 558 |
+
reduce the length of history by clipping.
|
| 559 |
this function search for the longest entries to clip, little by little,
|
| 560 |
+
until the number of token of history is reduced under threshold.
|
| 561 |
+
通过裁剪来缩短历史记录的长度。
|
| 562 |
此函数逐渐地搜索最长的条目进行剪辑,
|
| 563 |
+
直到历史记录的标记数量降低到阈值以下。
|
| 564 |
"""
|
| 565 |
import numpy as np
|
| 566 |
from request_llm.bridge_all import model_info
|
| 567 |
def get_token_num(txt):
|
| 568 |
return len(tokenizer.encode(txt, disallowed_special=()))
|
| 569 |
input_token_num = get_token_num(inputs)
|
| 570 |
+
if input_token_num < max_token_limit * 3 / 4:
|
| 571 |
+
# 当输入部分的token占比小于限制的3/4时,裁剪时
|
| 572 |
+
# 1. 把input的余量留出来
|
| 573 |
max_token_limit = max_token_limit - input_token_num
|
| 574 |
+
# 2. 把输出用的余量留出来
|
| 575 |
+
max_token_limit = max_token_limit - 128
|
| 576 |
+
# 3. 如果余量太小了,直接清除历史
|
| 577 |
if max_token_limit < 128:
|
|
|
|
| 578 |
history = []
|
| 579 |
return history
|
| 580 |
else:
|