Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,8 @@ import ctypes
|
|
| 12 |
from wasmtime import Store, Module, Linker
|
| 13 |
import re
|
| 14 |
import transformers
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# -------------------------- 初始化 tokenizer --------------------------
|
| 17 |
chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
|
|
@@ -114,14 +116,13 @@ DEEPSEEK_COMPLETION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/completion"
|
|
| 114 |
|
| 115 |
BASE_HEADERS = {
|
| 116 |
'Host': "chat.deepseek.com",
|
| 117 |
-
'User-Agent': "DeepSeek/1.0.
|
| 118 |
'Accept': "application/json",
|
| 119 |
'Accept-Encoding': "gzip",
|
| 120 |
'Content-Type': "application/json",
|
| 121 |
'x-client-platform': "android",
|
| 122 |
-
'x-client-version': "1.0.
|
| 123 |
'x-client-locale': "zh_CN",
|
| 124 |
-
'x-rangers-id': "7883327620434123524",
|
| 125 |
'accept-charset': "UTF-8",
|
| 126 |
}
|
| 127 |
|
|
@@ -184,21 +185,36 @@ def login_deepseek_via_account(account):
|
|
| 184 |
return new_token
|
| 185 |
|
| 186 |
# ----------------------------------------------------------------------
|
| 187 |
-
#
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
return None
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
# ----------------------------------------------------------------------
|
| 203 |
# (5) 判断调用模式:配置模式 vs 用户自带 token
|
| 204 |
# ----------------------------------------------------------------------
|
|
@@ -218,10 +234,10 @@ def determine_mode_and_token():
|
|
| 218 |
if caller_key in config_keys:
|
| 219 |
g.use_config_token = True
|
| 220 |
g.tried_accounts = [] # 初始化已尝试账号
|
| 221 |
-
selected_account = choose_new_account(
|
| 222 |
if not selected_account:
|
| 223 |
-
return Response(json.dumps({"error": "No accounts configured."}),
|
| 224 |
-
status=
|
| 225 |
if not selected_account.get("token", "").strip():
|
| 226 |
try:
|
| 227 |
login_deepseek_via_account(selected_account)
|
|
@@ -229,15 +245,11 @@ def determine_mode_and_token():
|
|
| 229 |
app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
|
| 230 |
return Response(json.dumps({"error": "Account login failed."}),
|
| 231 |
status=500, mimetype="application/json")
|
| 232 |
-
else:
|
| 233 |
-
app.logger.info(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 已有 token,无需重新登录")
|
| 234 |
g.deepseek_token = selected_account.get("token")
|
| 235 |
g.account = selected_account
|
| 236 |
-
app.logger.info(f"[determine_mode_and_token] 配置模式:使用账号 {get_account_identifier(selected_account)} 的 token")
|
| 237 |
else:
|
| 238 |
g.use_config_token = False
|
| 239 |
g.deepseek_token = caller_key
|
| 240 |
-
app.logger.info("[determine_mode_and_token] 使用用户自带 DeepSeek token")
|
| 241 |
return None
|
| 242 |
|
| 243 |
def get_auth_headers():
|
|
@@ -299,7 +311,7 @@ def create_session(max_attempts=3):
|
|
| 299 |
g.tried_accounts = []
|
| 300 |
if current_id not in g.tried_accounts:
|
| 301 |
g.tried_accounts.append(current_id)
|
| 302 |
-
new_account = choose_new_account(
|
| 303 |
if new_account is None:
|
| 304 |
break
|
| 305 |
try:
|
|
@@ -464,7 +476,7 @@ def get_pow_response(max_attempts=3):
|
|
| 464 |
g.tried_accounts = []
|
| 465 |
if current_id not in g.tried_accounts:
|
| 466 |
g.tried_accounts.append(current_id)
|
| 467 |
-
new_account = choose_new_account(
|
| 468 |
if new_account is None:
|
| 469 |
break
|
| 470 |
try:
|
|
@@ -564,8 +576,8 @@ def messages_prepare(messages: list) -> str:
|
|
| 564 |
else:
|
| 565 |
parts.append(text)
|
| 566 |
final_prompt = "".join(parts)
|
| 567 |
-
#
|
| 568 |
-
final_prompt = re.sub(r"
|
| 569 |
return final_prompt
|
| 570 |
|
| 571 |
# ----------------------------------------------------------------------
|
|
@@ -577,22 +589,6 @@ def chat_completions():
|
|
| 577 |
if mode_resp:
|
| 578 |
return mode_resp
|
| 579 |
|
| 580 |
-
# 如果使用配置模式,检查账号是否正忙;如果忙则尝试切换账号
|
| 581 |
-
if g.use_config_token:
|
| 582 |
-
account_id = get_account_identifier(g.account)
|
| 583 |
-
if account_id in active_accounts:
|
| 584 |
-
g.tried_accounts.append(account_id)
|
| 585 |
-
new_account = choose_new_account(g.tried_accounts)
|
| 586 |
-
if new_account is None:
|
| 587 |
-
return jsonify({"error": "All accounts are busy."}), 503
|
| 588 |
-
try:
|
| 589 |
-
login_deepseek_via_account(new_account)
|
| 590 |
-
except Exception as e:
|
| 591 |
-
return jsonify({"error": "Account login failed."}), 500
|
| 592 |
-
g.account = new_account
|
| 593 |
-
g.deepseek_token = new_account.get("token")
|
| 594 |
-
account_id = get_account_identifier(new_account)
|
| 595 |
-
active_accounts.add(account_id)
|
| 596 |
try:
|
| 597 |
req_data = request.json or {}
|
| 598 |
app.logger.info(f"[chat_completions] 收到请求: {req_data}")
|
|
@@ -661,29 +657,81 @@ def chat_completions():
|
|
| 661 |
status=deepseek_resp.status_code,
|
| 662 |
mimetype="application/json")
|
| 663 |
|
|
|
|
|
|
|
|
|
|
| 664 |
def sse_stream():
|
| 665 |
try:
|
| 666 |
final_text = ""
|
| 667 |
final_thinking = ""
|
| 668 |
first_chunk_sent = False
|
|
|
|
|
|
|
| 669 |
citation_map = {} # 用于存储引用链接的字典
|
| 670 |
-
|
|
|
|
| 671 |
try:
|
| 672 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
except Exception as e:
|
| 674 |
-
app.logger.warning(f"[sse_stream]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
continue
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
if data_str == "[DONE]":
|
| 681 |
prompt_tokens = len(tokenizer.encode(final_prompt))
|
| 682 |
completion_tokens = len(tokenizer.encode(final_text))
|
| 683 |
usage = {
|
| 684 |
"prompt_tokens": prompt_tokens,
|
| 685 |
"completion_tokens": completion_tokens,
|
| 686 |
-
"total_tokens": prompt_tokens + completion_tokens
|
| 687 |
}
|
| 688 |
finish_chunk = {
|
| 689 |
"id": completion_id,
|
|
@@ -691,30 +739,25 @@ def chat_completions():
|
|
| 691 |
"created": created_time,
|
| 692 |
"model": model,
|
| 693 |
"choices": [
|
| 694 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
],
|
| 696 |
-
"usage": usage
|
| 697 |
}
|
| 698 |
yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
|
| 699 |
yield "data: [DONE]\n\n"
|
|
|
|
| 700 |
break
|
| 701 |
-
try:
|
| 702 |
-
chunk = json.loads(data_str)
|
| 703 |
-
app.logger.debug(f"[sse_stream] 解析到 chunk: {chunk}")
|
| 704 |
-
# 处理搜索索引数据
|
| 705 |
-
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
| 706 |
-
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
| 707 |
-
for idx in search_indexes:
|
| 708 |
-
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
| 709 |
-
continue
|
| 710 |
-
except Exception as e:
|
| 711 |
-
app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
|
| 712 |
-
continue
|
| 713 |
new_choices = []
|
| 714 |
for choice in chunk.get("choices", []):
|
| 715 |
delta = choice.get("delta", {})
|
| 716 |
ctype = delta.get("type")
|
| 717 |
ctext = delta.get("content", "")
|
|
|
|
|
|
|
| 718 |
if search_enabled and ctext.startswith("[citation:"):
|
| 719 |
ctext = ""
|
| 720 |
if ctype == "thinking":
|
|
@@ -732,96 +775,141 @@ def chat_completions():
|
|
| 732 |
elif ctype == "text":
|
| 733 |
delta_obj["content"] = ctext
|
| 734 |
if delta_obj:
|
| 735 |
-
new_choices.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 736 |
if new_choices:
|
| 737 |
out_chunk = {
|
| 738 |
"id": completion_id,
|
| 739 |
"object": "chat.completion.chunk",
|
| 740 |
"created": created_time,
|
| 741 |
"model": model,
|
| 742 |
-
"choices": new_choices
|
| 743 |
}
|
| 744 |
yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
|
|
|
|
| 745 |
except Exception as e:
|
| 746 |
app.logger.error(f"[sse_stream] 异常: {e}")
|
| 747 |
finally:
|
| 748 |
deepseek_resp.close()
|
| 749 |
if g.use_config_token:
|
| 750 |
-
|
| 751 |
return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
|
| 752 |
else:
|
| 753 |
# 非流式响应处理
|
| 754 |
think_list = []
|
| 755 |
text_list = []
|
|
|
|
| 756 |
citation_map = {} # 用于存储引用链接的字典
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
if not line:
|
| 765 |
-
continue
|
| 766 |
-
if line.startswith("data:"):
|
| 767 |
-
data_str = line[5:].strip()
|
| 768 |
-
if data_str == "[DONE]":
|
| 769 |
-
break
|
| 770 |
try:
|
| 771 |
-
|
| 772 |
-
app.logger.debug(f"[chat_completions] 非流式 chunk: {chunk}")
|
| 773 |
-
# 处理搜索索引数据
|
| 774 |
-
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
| 775 |
-
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
| 776 |
-
for idx in search_indexes:
|
| 777 |
-
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
| 778 |
-
continue
|
| 779 |
except Exception as e:
|
| 780 |
-
app.logger.warning(f"[chat_completions]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 781 |
continue
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
text_list.append(ctext)
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
{
|
| 806 |
-
"
|
| 807 |
-
"
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
},
|
| 812 |
-
"finish_reason": "stop"
|
| 813 |
}
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
finally:
|
| 823 |
if g.use_config_token:
|
| 824 |
-
|
| 825 |
|
| 826 |
# ----------------------------------------------------------------------
|
| 827 |
# (11) 路由:/
|
|
|
|
| 12 |
from wasmtime import Store, Module, Linker
|
| 13 |
import re
|
| 14 |
import transformers
|
| 15 |
+
import queue
|
| 16 |
+
import threading
|
| 17 |
|
| 18 |
# -------------------------- 初始化 tokenizer --------------------------
|
| 19 |
chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
|
|
|
|
| 116 |
|
| 117 |
BASE_HEADERS = {
|
| 118 |
'Host': "chat.deepseek.com",
|
| 119 |
+
'User-Agent': "DeepSeek/1.0.13 Android/35",
|
| 120 |
'Accept': "application/json",
|
| 121 |
'Accept-Encoding': "gzip",
|
| 122 |
'Content-Type': "application/json",
|
| 123 |
'x-client-platform': "android",
|
| 124 |
+
'x-client-version': "1.0.13",
|
| 125 |
'x-client-locale': "zh_CN",
|
|
|
|
| 126 |
'accept-charset': "UTF-8",
|
| 127 |
}
|
| 128 |
|
|
|
|
| 185 |
return new_token
|
| 186 |
|
| 187 |
# ----------------------------------------------------------------------
|
| 188 |
+
# -------------------------- 全局账号队列 --------------------------
|
| 189 |
+
account_queue = [] # 维护所有可用账号
|
| 190 |
+
|
| 191 |
+
def init_account_queue():
|
| 192 |
+
"""初始化时从配置加载账号"""
|
| 193 |
+
global account_queue
|
| 194 |
+
account_queue = CONFIG.get("accounts", [])[:] # 深拷贝
|
| 195 |
+
random.shuffle(account_queue) # 初始随机排序
|
| 196 |
+
|
| 197 |
+
init_account_queue()
|
| 198 |
+
|
| 199 |
+
def choose_new_account():
|
| 200 |
+
"""选择策略:
|
| 201 |
+
1. 遍历队列,找到第一个未被 exclude_ids 包含的账号
|
| 202 |
+
2. 从队列中移除该账号
|
| 203 |
+
3. 返回该账号(由后续逻辑保证最终会重新入队)
|
| 204 |
+
"""
|
| 205 |
+
for i in range(len(account_queue)):
|
| 206 |
+
acc = account_queue[i]
|
| 207 |
+
acc_id = get_account_identifier(acc)
|
| 208 |
+
if acc_id:
|
| 209 |
+
# 从队列中移除并返回
|
| 210 |
+
return account_queue.pop(i)
|
| 211 |
+
app.logger.warning("[choose_new_account] 没有可用的账号或所有账号都在使用中")
|
| 212 |
return None
|
| 213 |
|
| 214 |
+
def release_account(account):
|
| 215 |
+
"""将账号重新加入队列末尾"""
|
| 216 |
+
account_queue.append(account)
|
| 217 |
+
|
| 218 |
# ----------------------------------------------------------------------
|
| 219 |
# (5) 判断调用模式:配置模式 vs 用户自带 token
|
| 220 |
# ----------------------------------------------------------------------
|
|
|
|
| 234 |
if caller_key in config_keys:
|
| 235 |
g.use_config_token = True
|
| 236 |
g.tried_accounts = [] # 初始化已尝试账号
|
| 237 |
+
selected_account = choose_new_account()
|
| 238 |
if not selected_account:
|
| 239 |
+
return Response(json.dumps({"error": "No accounts configured or all accounts are busy."}),
|
| 240 |
+
status=429, mimetype="application/json")
|
| 241 |
if not selected_account.get("token", "").strip():
|
| 242 |
try:
|
| 243 |
login_deepseek_via_account(selected_account)
|
|
|
|
| 245 |
app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
|
| 246 |
return Response(json.dumps({"error": "Account login failed."}),
|
| 247 |
status=500, mimetype="application/json")
|
|
|
|
|
|
|
| 248 |
g.deepseek_token = selected_account.get("token")
|
| 249 |
g.account = selected_account
|
|
|
|
| 250 |
else:
|
| 251 |
g.use_config_token = False
|
| 252 |
g.deepseek_token = caller_key
|
|
|
|
| 253 |
return None
|
| 254 |
|
| 255 |
def get_auth_headers():
|
|
|
|
| 311 |
g.tried_accounts = []
|
| 312 |
if current_id not in g.tried_accounts:
|
| 313 |
g.tried_accounts.append(current_id)
|
| 314 |
+
new_account = choose_new_account()
|
| 315 |
if new_account is None:
|
| 316 |
break
|
| 317 |
try:
|
|
|
|
| 476 |
g.tried_accounts = []
|
| 477 |
if current_id not in g.tried_accounts:
|
| 478 |
g.tried_accounts.append(current_id)
|
| 479 |
+
new_account = choose_new_account()
|
| 480 |
if new_account is None:
|
| 481 |
break
|
| 482 |
try:
|
|
|
|
| 576 |
else:
|
| 577 |
parts.append(text)
|
| 578 |
final_prompt = "".join(parts)
|
| 579 |
+
# 仅移除 markdown 图片格式(不全部移除 !)
|
| 580 |
+
final_prompt = re.sub(r"!\[(.*?)\]\((.*?)\)", r"[\1](\2)", final_prompt)
|
| 581 |
return final_prompt
|
| 582 |
|
| 583 |
# ----------------------------------------------------------------------
|
|
|
|
| 589 |
if mode_resp:
|
| 590 |
return mode_resp
|
| 591 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
try:
|
| 593 |
req_data = request.json or {}
|
| 594 |
app.logger.info(f"[chat_completions] 收到请求: {req_data}")
|
|
|
|
| 657 |
status=deepseek_resp.status_code,
|
| 658 |
mimetype="application/json")
|
| 659 |
|
| 660 |
+
# 添加保活超时配置(5秒)
|
| 661 |
+
KEEP_ALIVE_TIMEOUT = 5
|
| 662 |
+
|
| 663 |
def sse_stream():
|
| 664 |
try:
|
| 665 |
final_text = ""
|
| 666 |
final_thinking = ""
|
| 667 |
first_chunk_sent = False
|
| 668 |
+
result_queue = queue.Queue()
|
| 669 |
+
last_send_time = time.time()
|
| 670 |
citation_map = {} # 用于存储引用链接的字典
|
| 671 |
+
|
| 672 |
+
def process_data():
|
| 673 |
try:
|
| 674 |
+
for raw_line in deepseek_resp.iter_lines():
|
| 675 |
+
try:
|
| 676 |
+
line = raw_line.decode("utf-8")
|
| 677 |
+
except Exception as e:
|
| 678 |
+
app.logger.warning(f"[sse_stream] 解码失败: {e}")
|
| 679 |
+
busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
|
| 680 |
+
busy_content = json.loads(busy_content_str)
|
| 681 |
+
result_queue.put(busy_content)
|
| 682 |
+
result_queue.put(None)
|
| 683 |
+
break
|
| 684 |
+
if not line:
|
| 685 |
+
continue
|
| 686 |
+
if line.startswith("data:"):
|
| 687 |
+
data_str = line[5:].strip()
|
| 688 |
+
if data_str == "[DONE]":
|
| 689 |
+
result_queue.put(None) # 结束信号
|
| 690 |
+
break
|
| 691 |
+
try:
|
| 692 |
+
chunk = json.loads(data_str)
|
| 693 |
+
# 处理搜索索引数据
|
| 694 |
+
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
| 695 |
+
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
| 696 |
+
for idx in search_indexes:
|
| 697 |
+
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
| 698 |
+
continue
|
| 699 |
+
result_queue.put(chunk) # 将数据放入队列
|
| 700 |
+
except Exception as e:
|
| 701 |
+
app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
|
| 702 |
+
busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
|
| 703 |
+
busy_content = json.loads(busy_content_str)
|
| 704 |
+
result_queue.put(busy_content)
|
| 705 |
+
result_queue.put(None)
|
| 706 |
+
break
|
| 707 |
except Exception as e:
|
| 708 |
+
app.logger.warning(f"[sse_stream] 错误: {e}")
|
| 709 |
+
busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
|
| 710 |
+
busy_content = json.loads(busy_content_str)
|
| 711 |
+
result_queue.put(busy_content)
|
| 712 |
+
result_queue.put(None)
|
| 713 |
+
finally:
|
| 714 |
+
deepseek_resp.close()
|
| 715 |
+
|
| 716 |
+
process_thread = threading.Thread(target=process_data)
|
| 717 |
+
process_thread.start()
|
| 718 |
+
|
| 719 |
+
while True:
|
| 720 |
+
current_time = time.time()
|
| 721 |
+
if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
|
| 722 |
+
yield ": keep-alive\n\n"
|
| 723 |
+
last_send_time = current_time
|
| 724 |
continue
|
| 725 |
+
try:
|
| 726 |
+
chunk = result_queue.get(timeout=0.1)
|
| 727 |
+
if chunk is None:
|
| 728 |
+
# 发送最终统计信息
|
|
|
|
| 729 |
prompt_tokens = len(tokenizer.encode(final_prompt))
|
| 730 |
completion_tokens = len(tokenizer.encode(final_text))
|
| 731 |
usage = {
|
| 732 |
"prompt_tokens": prompt_tokens,
|
| 733 |
"completion_tokens": completion_tokens,
|
| 734 |
+
"total_tokens": prompt_tokens + completion_tokens,
|
| 735 |
}
|
| 736 |
finish_chunk = {
|
| 737 |
"id": completion_id,
|
|
|
|
| 739 |
"created": created_time,
|
| 740 |
"model": model,
|
| 741 |
"choices": [
|
| 742 |
+
{
|
| 743 |
+
"delta": {},
|
| 744 |
+
"index": 0,
|
| 745 |
+
"finish_reason": "stop",
|
| 746 |
+
}
|
| 747 |
],
|
| 748 |
+
"usage": usage,
|
| 749 |
}
|
| 750 |
yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
|
| 751 |
yield "data: [DONE]\n\n"
|
| 752 |
+
last_send_time = current_time
|
| 753 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
new_choices = []
|
| 755 |
for choice in chunk.get("choices", []):
|
| 756 |
delta = choice.get("delta", {})
|
| 757 |
ctype = delta.get("type")
|
| 758 |
ctext = delta.get("content", "")
|
| 759 |
+
if choice.get("finish_reason") == "backend_busy":
|
| 760 |
+
ctext = '服务器繁忙,请稍候再试'
|
| 761 |
if search_enabled and ctext.startswith("[citation:"):
|
| 762 |
ctext = ""
|
| 763 |
if ctype == "thinking":
|
|
|
|
| 775 |
elif ctype == "text":
|
| 776 |
delta_obj["content"] = ctext
|
| 777 |
if delta_obj:
|
| 778 |
+
new_choices.append(
|
| 779 |
+
{
|
| 780 |
+
"delta": delta_obj,
|
| 781 |
+
"index": choice.get("index", 0),
|
| 782 |
+
}
|
| 783 |
+
)
|
| 784 |
if new_choices:
|
| 785 |
out_chunk = {
|
| 786 |
"id": completion_id,
|
| 787 |
"object": "chat.completion.chunk",
|
| 788 |
"created": created_time,
|
| 789 |
"model": model,
|
| 790 |
+
"choices": new_choices,
|
| 791 |
}
|
| 792 |
yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
|
| 793 |
+
last_send_time = current_time
|
| 794 |
+
except queue.Empty:
|
| 795 |
+
continue
|
| 796 |
except Exception as e:
|
| 797 |
app.logger.error(f"[sse_stream] 异常: {e}")
|
| 798 |
finally:
|
| 799 |
deepseek_resp.close()
|
| 800 |
if g.use_config_token:
|
| 801 |
+
release_account(g.account)
|
| 802 |
return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
|
| 803 |
else:
|
| 804 |
# 非流式响应处理
|
| 805 |
think_list = []
|
| 806 |
text_list = []
|
| 807 |
+
result = None
|
| 808 |
citation_map = {} # 用于存储引用链接的字典
|
| 809 |
+
|
| 810 |
+
data_queue = queue.Queue()
|
| 811 |
+
|
| 812 |
+
def collect_data():
|
| 813 |
+
nonlocal result
|
| 814 |
+
try:
|
| 815 |
+
for raw_line in deepseek_resp.iter_lines():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
try:
|
| 817 |
+
line = raw_line.decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
except Exception as e:
|
| 819 |
+
app.logger.warning(f"[chat_completions] 解码失败: {e}")
|
| 820 |
+
ctext = '服务器繁忙,请稍候再试'
|
| 821 |
+
text_list.append(ctext)
|
| 822 |
+
data_queue.put(None)
|
| 823 |
+
break
|
| 824 |
+
if not line:
|
| 825 |
continue
|
| 826 |
+
if line.startswith("data:"):
|
| 827 |
+
data_str = line[5:].strip()
|
| 828 |
+
if data_str == "[DONE]":
|
| 829 |
+
data_queue.put(None)
|
| 830 |
+
break
|
| 831 |
+
try:
|
| 832 |
+
chunk = json.loads(data_str)
|
| 833 |
+
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
| 834 |
+
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
| 835 |
+
for idx in search_indexes:
|
| 836 |
+
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
| 837 |
+
continue
|
| 838 |
+
for choice in chunk.get("choices", []):
|
| 839 |
+
delta = choice.get("delta", {})
|
| 840 |
+
ctype = delta.get("type")
|
| 841 |
+
ctext = delta.get("content", "")
|
| 842 |
+
if choice.get("finish_reason") == "backend_busy":
|
| 843 |
+
ctext = '服务器繁忙,请稍候再试'
|
| 844 |
+
if search_enabled and ctext.startswith("[citation:"):
|
| 845 |
+
ctext = ""
|
| 846 |
+
if ctype == "thinking" and thinking_enabled:
|
| 847 |
+
think_list.append(ctext)
|
| 848 |
+
elif ctype == "text":
|
| 849 |
+
text_list.append(ctext)
|
| 850 |
+
except Exception as e:
|
| 851 |
+
app.logger.warning(f"[collect_data] 无法解析: {data_str}, 错误: {e}")
|
| 852 |
+
ctext = '服务器繁忙,请稍候再试'
|
| 853 |
text_list.append(ctext)
|
| 854 |
+
data_queue.put(None)
|
| 855 |
+
break
|
| 856 |
+
except Exception as e:
|
| 857 |
+
app.logger.warning(f"[collect_data] 错误: {e}")
|
| 858 |
+
ctext = '服务器繁忙,请稍候再试'
|
| 859 |
+
text_list.append(ctext)
|
| 860 |
+
data_queue.put(None)
|
| 861 |
+
finally:
|
| 862 |
+
deepseek_resp.close()
|
| 863 |
+
final_reasoning = "".join(think_list)
|
| 864 |
+
final_content = "".join(text_list)
|
| 865 |
+
prompt_tokens = len(tokenizer.encode(final_prompt))
|
| 866 |
+
completion_tokens = len(tokenizer.encode(final_content))
|
| 867 |
+
result = {
|
| 868 |
+
"id": completion_id,
|
| 869 |
+
"object": "chat.completion",
|
| 870 |
+
"created": created_time,
|
| 871 |
+
"model": model,
|
| 872 |
+
"choices": [
|
| 873 |
+
{
|
| 874 |
+
"index": 0,
|
| 875 |
+
"message": {
|
| 876 |
+
"role": "assistant",
|
| 877 |
+
"content": final_content,
|
| 878 |
+
"reasoning_content": final_reasoning,
|
| 879 |
+
},
|
| 880 |
+
"finish_reason": "stop",
|
| 881 |
+
}
|
| 882 |
+
],
|
| 883 |
+
"usage": {
|
| 884 |
+
"prompt_tokens": prompt_tokens,
|
| 885 |
+
"completion_tokens": completion_tokens,
|
| 886 |
+
"total_tokens": prompt_tokens + completion_tokens,
|
| 887 |
},
|
|
|
|
| 888 |
}
|
| 889 |
+
data_queue.put("DONE")
|
| 890 |
+
|
| 891 |
+
collect_thread = threading.Thread(target=collect_data)
|
| 892 |
+
collect_thread.start()
|
| 893 |
+
|
| 894 |
+
def generate():
|
| 895 |
+
last_send_time = time.time()
|
| 896 |
+
while True:
|
| 897 |
+
current_time = time.time()
|
| 898 |
+
if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
|
| 899 |
+
yield ""
|
| 900 |
+
last_send_time = current_time
|
| 901 |
+
if not collect_thread.is_alive() and result is not None:
|
| 902 |
+
yield json.dumps(result)
|
| 903 |
+
break
|
| 904 |
+
time.sleep(0.1)
|
| 905 |
+
|
| 906 |
+
return Response(generate(), mimetype="application/json")
|
| 907 |
+
except Exception as e:
|
| 908 |
+
app.logger.error(f"[chat_completions] 未知异常: {e}")
|
| 909 |
+
return jsonify({"error": "Internal Server Error"}), 500
|
| 910 |
finally:
|
| 911 |
if g.use_config_token:
|
| 912 |
+
release_account(g.account)
|
| 913 |
|
| 914 |
# ----------------------------------------------------------------------
|
| 915 |
# (11) 路由:/
|