hf-demo-linux commited on
Commit
d16010a
·
verified ·
1 Parent(s): 2e811b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1396 -0
app.py ADDED
@@ -0,0 +1,1396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os,time,logging,requests,json,uuid,concurrent.futures,threading,base64,io
2
+ from io import BytesIO
3
+ from itertools import chain
4
+ from PIL import Image
5
+ from datetime import datetime
6
+ from apscheduler.schedulers.background import BackgroundScheduler
7
+ from flask import Flask, request, jsonify, Response, stream_with_context, render_template # Import render_template
8
+ from werkzeug.middleware.proxy_fix import ProxyFix
9
+ from requests.adapters import HTTPAdapter
10
+ from requests.packages.urllib3.util.retry import Retry
11
+ os.environ['TZ'] = 'Asia/Shanghai'
12
+ time.tzset()
13
+ logging.basicConfig(level=logging.INFO,
14
+ format='%(asctime)s - %(levelname)s - %(message)s')
15
+ API_ENDPOINT = "https://api-st.siliconflow.cn/v1/user/info"
16
+ TEST_MODEL_ENDPOINT = "https://api-st.siliconflow.cn/v1/chat/completions"
17
+ MODELS_ENDPOINT = "https://api-st.siliconflow.cn/v1/models"
18
+ EMBEDDINGS_ENDPOINT = "https://api-st.siliconflow.cn/v1/embeddings"
19
+ IMAGE_ENDPOINT = "https://api-st.siliconflow.cn/v1/images/generations"
20
+ def requests_session_with_retries(
21
+ retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504)
22
+ ):
23
+ session = requests.Session()
24
+ retry = Retry(
25
+ total=retries,
26
+ read=retries,
27
+ connect=retries,
28
+ backoff_factor=backoff_factor,
29
+ status_forcelist=status_forcelist,
30
+ )
31
+ adapter = HTTPAdapter(
32
+ max_retries=retry,
33
+ pool_connections=1000,
34
+ pool_maxsize=10000,
35
+ pool_block=False
36
+ )
37
+ session.mount("http://", adapter)
38
+ session.mount("https://", adapter)
39
+ return session
40
+ session = requests_session_with_retries()
41
+ app = Flask(__name__)
42
+ app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1)
43
+ models = {
44
+ "text": [],
45
+ "free_text": [],
46
+ "embedding": [],
47
+ "free_embedding": [],
48
+ "image": [],
49
+ "free_image": []
50
+ }
51
+ key_status = {
52
+ "invalid": [],
53
+ "free": [],
54
+ "unverified": [],
55
+ "valid": []
56
+ }
57
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=10000)
58
+ model_key_indices = {}
59
+ request_timestamps = []
60
+ token_counts = []
61
+ request_timestamps_day = []
62
+ token_counts_day = []
63
+ data_lock = threading.Lock()
64
+ def get_credit_summary(api_key):
65
+ headers = {
66
+ "Authorization": f"Bearer {api_key}",
67
+ "Content-Type": "application/json"
68
+ }
69
+ max_retries = 3
70
+ for attempt in range(max_retries):
71
+ try:
72
+ response = session.get(API_ENDPOINT, headers=headers, timeout=2)
73
+ response.raise_for_status()
74
+ data = response.json().get("data", {})
75
+ total_balance = data.get("totalBalance", 0)
76
+ logging.info(f"获取额度,API Key:{api_key},当前额度: {total_balance}")
77
+ return {"total_balance": float(total_balance)}
78
+ except requests.exceptions.Timeout as e:
79
+ logging.error(f"获取额度信息失败,API Key:{api_key},尝试次数:{attempt+1}/{max_retries},错误信息:{e} (Timeout)")
80
+ if attempt >= max_retries - 1:
81
+ logging.error(f"获取额度信息失败,API Key:{api_key},所有重试次数均已失败 (Timeout)")
82
+ except requests.exceptions.RequestException as e:
83
+ logging.error(f"获取额度信息失败,API Key:{api_key},错误信息:{e}")
84
+ return None
85
+ FREE_MODEL_TEST_KEY = (
86
+ "sk-bmjbjzleaqfgtqfzmcnsbagxrlohriadnxqrzfocbizaxukw"
87
+ )
88
+ FREE_IMAGE_LIST = [
89
+ "stabilityai/stable-diffusion-3-5-large",
90
+ "black-forest-labs/FLUX.1-schnell",
91
+ "stabilityai/stable-diffusion-3-medium",
92
+ "stabilityai/stable-diffusion-xl-base-1.0",
93
+ "stabilityai/stable-diffusion-2-1"
94
+ ]
95
+ def test_model_availability(api_key, model_name, model_type="chat"):
96
+ headers = {
97
+ "Authorization": f"Bearer {api_key}",
98
+ "Content-Type": "application/json"
99
+ }
100
+ if model_type == "image":
101
+ return model_name in FREE_IMAGE_LIST
102
+ try:
103
+ endpoint = EMBEDDINGS_ENDPOINT if model_type == "embedding" else TEST_MODEL_ENDPOINT
104
+ payload = (
105
+ {"model": model_name, "input": ["hi"]}
106
+ if model_type == "embedding"
107
+ else {"model": model_name, "messages": [{"role": "user", "content": "hi"}], "max_tokens": 5, "stream": False}
108
+ )
109
+ timeout = 10 if model_type == "embedding" else 5
110
+ response = session.post(
111
+ endpoint,
112
+ headers=headers,
113
+ json=payload,
114
+ timeout=timeout
115
+ )
116
+ return response.status_code in [200, 429]
117
+ except requests.exceptions.RequestException as e:
118
+ logging.error(
119
+ f"测试{model_type}模型 {model_name} 可用性失败,"
120
+ f"API Key:{api_key},错误信息:{e}"
121
+ )
122
+ return False
123
+ def process_image_url(image_url, response_format=None):
124
+ if not image_url:
125
+ return {"url": ""}
126
+ if response_format == "b64_json":
127
+ try:
128
+ response = session.get(image_url, stream=True)
129
+ response.raise_for_status()
130
+ image = Image.open(response.raw)
131
+ buffered = io.BytesIO()
132
+ image.save(buffered, format="PNG")
133
+ img_str = base64.b64encode(buffered.getvalue()).decode()
134
+ return {"b64_json": img_str}
135
+ except Exception as e:
136
+ logging.error(f"图片转base64失败: {e}")
137
+ return {"url": image_url}
138
+ return {"url": image_url}
139
+ def create_base64_markdown_image(image_url):
140
+ try:
141
+ response = session.get(image_url, stream=True)
142
+ response.raise_for_status()
143
+ image = Image.open(BytesIO(response.content))
144
+ new_size = tuple(dim // 4 for dim in image.size)
145
+ resized_image = image.resize(new_size, Image.LANCZOS)
146
+ buffered = BytesIO()
147
+ resized_image.save(buffered, format="PNG")
148
+ base64_encoded = base64.b64encode(buffered.getvalue()).decode('utf-8')
149
+ markdown_image_link = f"![](data:image/png;base64,{base64_encoded})"
150
+ logging.info("Created base64 markdown image link.")
151
+ return markdown_image_link
152
+ except Exception as e:
153
+ logging.error(f"Error creating markdown image: {e}")
154
+ return None
155
+ def extract_user_content(messages):
156
+ user_content = ""
157
+ for message in messages:
158
+ if message["role"] == "user":
159
+ if isinstance(message["content"], str):
160
+ user_content += message["content"] + " "
161
+ elif isinstance(message["content"], list):
162
+ for item in message["content"]:
163
+ if isinstance(item, dict) and item.get("type") == "text":
164
+ user_content += item.get("text", "") + " "
165
+ return user_content.strip()
166
+ def get_siliconflow_data(model_name, data):
167
+ siliconflow_data = {
168
+ "model": model_name,
169
+ "prompt": data.get("prompt") or "",
170
+ }
171
+ if model_name == "black-forest-labs/FLUX.1-pro":
172
+ siliconflow_data.update({
173
+ "width": max(256, min(1440, (data.get("width", 1024) // 32) * 32)),
174
+ "height": max(256, min(1440, (data.get("height", 768) // 32) * 32)),
175
+ "prompt_upsampling": data.get("prompt_upsampling", False),
176
+ "image_prompt": data.get("image_prompt"),
177
+ "steps": max(1, min(50, data.get("steps", 20))),
178
+ "guidance": max(1.5, min(5, data.get("guidance", 3))),
179
+ "safety_tolerance": max(0, min(6, data.get("safety_tolerance", 2))),
180
+ "interval": max(1, min(4, data.get("interval", 2))),
181
+ "output_format": data.get("output_format", "png")
182
+ })
183
+ seed = data.get("seed")
184
+ if isinstance(seed, int) and 0 < seed < 9999999999:
185
+ siliconflow_data["seed"] = seed
186
+ else:
187
+ siliconflow_data.update({
188
+ "image_size": data.get("image_size", "1024x1024"),
189
+ "prompt_enhancement": data.get("prompt_enhancement", False)
190
+ })
191
+ seed = data.get("seed")
192
+ if isinstance(seed, int) and 0 < seed < 9999999999:
193
+ siliconflow_data["seed"] = seed
194
+ if model_name not in ["black-forest-labs/FLUX.1-schnell", "Pro/black-forest-labs/FLUX.1-schnell"]:
195
+ siliconflow_data.update({
196
+ "batch_size": max(1, min(4, data.get("n", 1))),
197
+ "num_inference_steps": max(1, min(50, data.get("steps", 20))),
198
+ "guidance_scale": max(0, min(100, data.get("guidance_scale", 7.5))),
199
+ "negative_prompt": data.get("negative_prompt")
200
+ })
201
+ valid_sizes = ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024", "960x1280", "720x1440", "720x1280"]
202
+ if "image_size" in siliconflow_data and siliconflow_data["image_size"] not in valid_sizes:
203
+ siliconflow_data["image_size"] = "1024x1024"
204
+ return siliconflow_data
205
+ def refresh_models():
206
+ global models
207
+ models["text"] = get_all_models(FREE_MODEL_TEST_KEY, "chat")
208
+ models["embedding"] = get_all_models(FREE_MODEL_TEST_KEY, "embedding")
209
+ models["image"] = get_all_models(FREE_MODEL_TEST_KEY, "text-to-image")
210
+ models["free_text"] = []
211
+ models["free_embedding"] = []
212
+ models["free_image"] = []
213
+ ban_models = []
214
+ ban_models_str = os.environ.get("BAN_MODELS")
215
+ if ban_models_str:
216
+ try:
217
+ ban_models = json.loads(ban_models_str)
218
+ if not isinstance(ban_models, list):
219
+ logging.warning("环境变量 BAN_MODELS 格式不正确,应为 JSON 数组。")
220
+ ban_models = []
221
+ except json.JSONDecodeError:
222
+ logging.warning("环境变量 BAN_MODELS JSON 解析失败,请检查格式。")
223
+ models["text"] = [model for model in models["text"] if model not in ban_models]
224
+ models["embedding"] = [model for model in models["embedding"] if model not in ban_models]
225
+ models["image"] = [model for model in models["image"] if model not in ban_models]
226
+ model_types = [
227
+ ("text", "chat"),
228
+ ("embedding", "embedding"),
229
+ ("image", "image")
230
+ ]
231
+ for model_type, test_type in model_types:
232
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10000) as executor:
233
+ future_to_model = {
234
+ executor.submit(
235
+ test_model_availability,
236
+ FREE_MODEL_TEST_KEY,
237
+ model,
238
+ test_type
239
+ ): model for model in models[model_type]
240
+ }
241
+ for future in concurrent.futures.as_completed(future_to_model):
242
+ model = future_to_model[future]
243
+ try:
244
+ is_free = future.result()
245
+ if is_free:
246
+ models[f"free_{model_type}"].append(model)
247
+ except Exception as exc:
248
+ logging.error(f"{model_type}模型 {model} 测试生成异常: {exc}")
249
+ for model_type in ["text", "embedding", "image"]:
250
+ logging.info(f"所有{model_type}模型列表:{models[model_type]}")
251
+ logging.info(f"免费{model_type}模型列表:{models[f'free_{model_type}']}")
252
+ def load_keys():
253
+ global key_status
254
+ for status in key_status:
255
+ key_status[status] = []
256
+ keys_str = os.environ.get("KEYS")
257
+ if not keys_str:
258
+ logging.warning("环境变量 KEYS 未设置。")
259
+ return
260
+ test_model = os.environ.get("TEST_MODEL", "Pro/google/gemma-2-9b-it")
261
+ unique_keys = list(set(key.strip() for key in keys_str.split(',')))
262
+ os.environ["KEYS"] = ','.join(unique_keys)
263
+ logging.info(f"加载的 keys:{unique_keys}")
264
+ def process_key_with_logging(key):
265
+ try:
266
+ key_type = process_key(key, test_model)
267
+ if key_type in key_status:
268
+ key_status[key_type].append(key)
269
+ return key_type
270
+ except Exception as exc:
271
+ logging.error(f"处理 KEY {key} 生成异常: {exc}")
272
+ return "invalid"
273
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10000) as executor:
274
+ futures = [executor.submit(process_key_with_logging, key) for key in unique_keys]
275
+ concurrent.futures.wait(futures)
276
+ for status, keys in key_status.items():
277
+ logging.info(f"{status.capitalize()} KEYS: {keys}")
278
+ global invalid_keys_global, free_keys_global, unverified_keys_global, valid_keys_global
279
+ invalid_keys_global = key_status["invalid"]
280
+ free_keys_global = key_status["free"]
281
+ unverified_keys_global = key_status["unverified"]
282
+ valid_keys_global = key_status["valid"]
283
+ def process_key(key, test_model):
284
+ credit_summary = get_credit_summary(key)
285
+ if credit_summary is None:
286
+ return "invalid"
287
+ else:
288
+ total_balance = credit_summary.get("total_balance", 0)
289
+ if total_balance <= 0.03:
290
+ return "free"
291
+ else:
292
+ if test_model_availability(key, test_model):
293
+ return "valid"
294
+ else:
295
+ return "unverified"
296
+ def get_all_models(api_key, sub_type):
297
+ headers = {
298
+ "Authorization": f"Bearer {api_key}",
299
+ "Content-Type": "application/json"
300
+ }
301
+ try:
302
+ response = session.get(
303
+ MODELS_ENDPOINT,
304
+ headers=headers,
305
+ params={"sub_type": sub_type}
306
+ )
307
+ response.raise_for_status()
308
+ data = response.json()
309
+ if (
310
+ isinstance(data, dict) and
311
+ 'data' in data and
312
+ isinstance(data['data'], list)
313
+ ):
314
+ return [
315
+ model.get("id") for model in data["data"]
316
+ if isinstance(model, dict) and "id" in model
317
+ ]
318
+ else:
319
+ logging.error("获取模型列表失败:响应数据格式不正确")
320
+ return []
321
+ except requests.exceptions.RequestException as e:
322
+ logging.error(
323
+ f"获取模型列表失败,"
324
+ f"API Key:{api_key},错误信息:{e}"
325
+ )
326
+ return []
327
+ except (KeyError, TypeError) as e:
328
+ logging.error(
329
+ f"解析模型列表失败,"
330
+ f"API Key:{api_key},错误信息:{e}"
331
+ )
332
+ return []
333
+ def determine_request_type(model_name, model_list, free_model_list):
334
+ if model_name in free_model_list:
335
+ return "free"
336
+ elif model_name in model_list:
337
+ return "paid"
338
+ else:
339
+ return "unknown"
340
+ def select_key(request_type, model_name):
341
+ if request_type == "free":
342
+ available_keys = (
343
+ free_keys_global +
344
+ unverified_keys_global +
345
+ valid_keys_global
346
+ )
347
+ elif request_type == "paid":
348
+ available_keys = unverified_keys_global + valid_keys_global
349
+ else:
350
+ available_keys = (
351
+ free_keys_global +
352
+ unverified_keys_global +
353
+ valid_keys_global
354
+ )
355
+ if not available_keys:
356
+ return None
357
+ current_index = model_key_indices.get(model_name, 0)
358
+ for _ in range(len(available_keys)): # Corrected line: _in changed to _
359
+ key = available_keys[current_index % len(available_keys)]
360
+ current_index += 1
361
+ if key_is_valid(key, request_type):
362
+ model_key_indices[model_name] = current_index
363
+ return key
364
+ else:
365
+ logging.warning(
366
+ f"KEY {key} 无效或达到限制,尝试下一个 KEY"
367
+ )
368
+ model_key_indices[model_name] = 0
369
+ return None
370
+ def key_is_valid(key, request_type):
371
+ if request_type == "invalid":
372
+ return False
373
+ credit_summary = get_credit_summary(key)
374
+ if credit_summary is None:
375
+ return False
376
+ total_balance = credit_summary.get("total_balance", 0)
377
+ if request_type == "free":
378
+ return True
379
+ elif request_type == "paid" or request_type == "unverified": #Fixed typo here
380
+ return total_balance > 0
381
+ else:
382
+ return False
383
+ def check_authorization(request):
384
+ authorization_key = os.environ.get("AUTHORIZATION_KEY")
385
+ if not authorization_key:
386
+ logging.warning("环境变量 AUTHORIZATION_KEY 未设置,此时无需鉴权即可使用,建议进行设置后再使用。")
387
+ return True
388
+ auth_header = request.headers.get('Authorization')
389
+ if not auth_header:
390
+ logging.warning("请求头中缺少 Authorization 字段。")
391
+ return False
392
+ if auth_header != f"Bearer {authorization_key}":
393
+ logging.warning(f"无效的 Authorization 密钥:{auth_header}")
394
+ return False
395
+ return True
396
+
397
+ def obfuscate_key(key):
398
+ if not key:
399
+ return "****"
400
+ prefix_length = 6
401
+ suffix_length = 4
402
+ if len(key) <= prefix_length + suffix_length:
403
+ return "****" # If key is too short, just mask it all
404
+ prefix = key[:prefix_length]
405
+ suffix = key[-suffix_length:]
406
+ masked_part = "*" * (len(key) - prefix_length - suffix_length)
407
+ return prefix + masked_part + suffix
408
+
409
+ scheduler = BackgroundScheduler()
410
+ scheduler.add_job(load_keys, 'interval', hours=1)
411
+ scheduler.remove_all_jobs()
412
+ scheduler.add_job(refresh_models, 'interval', hours=1)
413
+
414
+ @app.route('/')
415
+ def index():
416
+ current_time = time.time()
417
+ one_minute_ago = current_time - 60
418
+ one_day_ago = current_time - 86400
419
+ with data_lock:
420
+ while request_timestamps and request_timestamps[0] < one_minute_ago:
421
+ request_timestamps.pop(0)
422
+ token_counts.pop(0)
423
+ rpm = len(request_timestamps)
424
+ tpm = sum(token_counts)
425
+ with data_lock:
426
+ while request_timestamps_day and request_timestamps_day[0] < one_day_ago:
427
+ request_timestamps_day.pop(0)
428
+ token_counts_day.pop(0)
429
+ rpd = len(request_timestamps_day)
430
+ tpd = sum(token_counts_day)
431
+
432
+ key_balances = []
433
+ all_keys = list(chain(*key_status.values())) # Get all keys from all statuses
434
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10000) as executor:
435
+ future_to_key = {executor.submit(get_credit_summary, key): key for key in all_keys}
436
+ for future in concurrent.futures.as_completed(future_to_key):
437
+ key = future_to_key[future]
438
+ try:
439
+ credit_summary = future.result()
440
+ balance = credit_summary.get("total_balance") if credit_summary else "获取失败"
441
+ key_balances.append({"key": obfuscate_key(key), "balance": balance})
442
+ except Exception as exc:
443
+ logging.error(f"获取 KEY {obfuscate_key(key)} 余额信息失败: {exc}")
444
+ key_balances.append({"key": obfuscate_key(key), "balance": "获取失败"})
445
+
446
+
447
+ return render_template('index.html', rpm=rpm, tpm=tpm, rpd=rpd, tpd=tpd, key_balances=key_balances) # Render template instead of jsonify
448
+
449
+ @app.route('/handsome/v1/models', methods=['GET'])
450
+ def list_models():
451
+ if not check_authorization(request):
452
+ return jsonify({"error": "Unauthorized"}), 401
453
+ detailed_models = []
454
+ all_models = chain(
455
+ models["text"],
456
+ models["embedding"],
457
+ models["image"]
458
+ )
459
+ for model in all_models:
460
+ model_data = {
461
+ "id": model,
462
+ "object": "model",
463
+ "created": 1678888888,
464
+ "owned_by": "openai",
465
+ "permission": [],
466
+ "root": model,
467
+ "parent": None
468
+ }
469
+ detailed_models.append(model_data)
470
+ if "DeepSeek-R1" in model:
471
+ detailed_models.append({
472
+ "id": model + "-thinking",
473
+ "object": "model",
474
+ "created": 1678888888,
475
+ "owned_by": "openai",
476
+ "permission": [],
477
+ "root": model + "-thinking",
478
+ "parent": None
479
+ })
480
+ detailed_models.append({
481
+ "id": model + "-openwebui",
482
+ "object": "model",
483
+ "created": 1678888888,
484
+ "owned_by": "openai",
485
+ "permission": [],
486
+ "root": model + "-openwebui",
487
+ "parent": None
488
+ })
489
+ return jsonify({
490
+ "success": True,
491
+ "data": detailed_models
492
+ })
493
+ @app.route('/handsome/v1/dashboard/billing/usage', methods=['GET'])
494
+ def billing_usage():
495
+ if not check_authorization(request):
496
+ return jsonify({"error": "Unauthorized"}), 401
497
+ daily_usage = []
498
+ return jsonify({
499
+ "object": "list",
500
+ "data": daily_usage,
501
+ "total_usage": 0
502
+ })
503
+ @app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
504
+ def billing_subscription():
505
+ if not check_authorization(request):
506
+ return jsonify({"error": "Unauthorized"}), 401
507
+ keys = valid_keys_global + unverified_keys_global
508
+ total_balance = 0
509
+ with concurrent.futures.ThreadPoolExecutor(
510
+ max_workers=10000
511
+ ) as executor:
512
+ futures = [
513
+ executor.submit(get_credit_summary, key) for key in keys
514
+ ]
515
+ for future in concurrent.futures.as_completed(futures):
516
+ try:
517
+ credit_summary = future.result()
518
+ if credit_summary:
519
+ total_balance += credit_summary.get("total_balance", 0)
520
+ except Exception as exc:
521
+ logging.error(f"获取额度信息生成异常: {exc}")
522
+ return jsonify({
523
+ "object": "billing_subscription",
524
+ "access_until": int(datetime(9999, 12, 31).timestamp()),
525
+ "soft_limit": 0,
526
+ "hard_limit": total_balance,
527
+ "system_hard_limit": total_balance,
528
+ "soft_limit_usd": 0,
529
+ "hard_limit_usd": total_balance,
530
+ "system_hard_limit_usd": total_balance
531
+ })
532
+ @app.route('/handsome/v1/embeddings', methods=['POST'])
533
+ def handsome_embeddings():
534
+ if not check_authorization(request):
535
+ return jsonify({"error": "Unauthorized"}), 401
536
+ data = request.get_json()
537
+ if not data or 'model' not in data:
538
+ return jsonify({"error": "Invalid request data"}), 400
539
+ if data['model'] not in models["embedding"]:
540
+ return jsonify({"error": "Invalid model"}), 400
541
+ model_name = data['model']
542
+ request_type = determine_request_type(
543
+ model_name,
544
+ models["embedding"],
545
+ models["free_embedding"]
546
+ )
547
+ api_key = select_key(request_type, model_name)
548
+ if not api_key:
549
+ return jsonify({"error": ("No available API key for this request type or all keys have reached their limits")}), 429
550
+ headers = {
551
+ "Authorization": f"Bearer {api_key}",
552
+ "Content-Type": "application/json"
553
+ }
554
+ try:
555
+ start_time = time.time()
556
+ response = requests.post(
557
+ EMBEDDINGS_ENDPOINT,
558
+ headers=headers,
559
+ json=data,
560
+ timeout=120
561
+ )
562
+ if response.status_code == 429:
563
+ return jsonify(response.json()), 429
564
+ response.raise_for_status()
565
+ end_time = time.time()
566
+ response_json = response.json()
567
+ total_time = end_time - start_time
568
+ try:
569
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
570
+ embedding_data = response_json["data"]
571
+ except (KeyError, ValueError, IndexError) as e:
572
+ logging.error(
573
+ f"解析响应 JSON 失败: {e}, "
574
+ f"完整内容: {response_json}"
575
+ )
576
+ prompt_tokens = 0
577
+ embedding_data = []
578
+ logging.info(
579
+ f"使用的key: {api_key}, "
580
+ f"提示token: {prompt_tokens}, "
581
+ f"总共用时: {total_time:.4f}秒, "
582
+ f"使用的模型: {model_name}"
583
+ )
584
+ with data_lock:
585
+ request_timestamps.append(time.time())
586
+ token_counts.append(prompt_tokens)
587
+ request_timestamps_day.append(time.time())
588
+ token_counts_day.append(prompt_tokens)
589
+ return jsonify({
590
+ "object": "list",
591
+ "data": embedding_data,
592
+ "model": model_name,
593
+ "usage": {
594
+ "prompt_tokens": prompt_tokens,
595
+ "total_tokens": prompt_tokens
596
+ }
597
+ })
598
+ except requests.exceptions.RequestException as e:
599
+ return jsonify({"error": str(e)}), 500
600
+ @app.route('/handsome/v1/images/generations', methods=['POST'])
601
+ def handsome_images_generations():
602
+ if not check_authorization(request):
603
+ return jsonify({"error": "Unauthorized"}), 401
604
+ data = request.get_json()
605
+ if not data or 'model' not in data:
606
+ return jsonify({"error": "Invalid request data"}), 400
607
+ if data['model'] not in models["image"]:
608
+ return jsonify({"error": "Invalid model"}), 400
609
+ model_name = data.get('model')
610
+ request_type = determine_request_type(
611
+ model_name,
612
+ models["image"],
613
+ models["free_image"]
614
+ )
615
+ api_key = select_key(request_type, model_name)
616
+ if not api_key:
617
+ return jsonify({"error": ("No available API key for this request type or all keys have reached their limits")}), 429
618
+ headers = {
619
+ "Authorization": f"Bearer {api_key}",
620
+ "Content-Type": "application/json"
621
+ }
622
+ response_data = {}
623
+ if "stable-diffusion" in model_name or model_name in ["black-forest-labs/FLUX.1-schnell", "Pro/black-forest-labs/FLUX.1-schnell","black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-pro"]:
624
+ siliconflow_data = get_siliconflow_data(model_name, data)
625
+ try:
626
+ start_time = time.time()
627
+ response = requests.post(
628
+ IMAGE_ENDPOINT,
629
+ headers=headers,
630
+ json=siliconflow_data,
631
+ timeout=120
632
+ )
633
+ if response.status_code == 429:
634
+ return jsonify(response.json()), 429
635
+ response.raise_for_status()
636
+ end_time = time.time()
637
+ response_json = response.json()
638
+ total_time = end_time - start_time
639
+ try:
640
+ images = response_json.get("images", [])
641
+ openai_images = []
642
+ for item in images:
643
+ if isinstance(item, dict) and "url" in item:
644
+ image_url = item["url"]
645
+ print(f"image_url: {image_url}")
646
+ if data.get("response_format") == "b64_json":
647
+ try:
648
+ image_data = session.get(image_url, stream=True).raw
649
+ image = Image.open(image_data)
650
+ buffered = io.BytesIO()
651
+ image.save(buffered, format="PNG")
652
+ img_str = base64.b64encode(buffered.getvalue()).decode()
653
+ openai_images.append({"b64_json": img_str})
654
+ except Exception as e:
655
+ logging.error(f"图片转base64失败: {e}")
656
+ openai_images.append({"url": image_url})
657
+ else:
658
+ openai_images.append({"url": image_url})
659
+ else:
660
+ logging.error(f"无效的图片数据: {item}")
661
+ openai_images.append({"url": item})
662
+ response_data = {
663
+ "created": int(time.time()),
664
+ "data": openai_images
665
+ }
666
+ except (KeyError, ValueError, IndexError) as e:
667
+ logging.error(
668
+ f"解析响应 JSON 失败: {e}, "
669
+ f"完整内容: {response_json}"
670
+ )
671
+ response_data = {
672
+ "created": int(time.time()),
673
+ "data": []
674
+ }
675
+ logging.info(
676
+ f"使用的key: {api_key}, "
677
+ f"总共用时: {total_time:.4f}秒, "
678
+ f"使用的模型: {model_name}"
679
+ )
680
+ with data_lock:
681
+ request_timestamps.append(time.time())
682
+ token_counts.append(0)
683
+ request_timestamps_day.append(time.time())
684
+ token_counts_day.append(0)
685
+ return jsonify(response_data)
686
+ except requests.exceptions.RequestException as e:
687
+ logging.error(f"请求转发异常: {e}")
688
+ return jsonify({"error": str(e)}), 500
689
+ else:
690
+ return jsonify({"error": "Unsupported model"}), 400
691
+ @app.route('/handsome/v1/chat/completions', methods=['POST'])
692
+ def handsome_chat_completions():
693
+ if not check_authorization(request):
694
+ return jsonify({"error": "Unauthorized"}), 401
695
+ data = request.get_json()
696
+ if not data or 'model' not in data:
697
+ return jsonify({"error": "Invalid request data"}), 400
698
+ model_name = data['model']
699
+ if model_name not in models["text"] and model_name not in models["image"]:
700
+ if "DeepSeek-R1" in model_name and (model_name.endswith("-openwebui") or model_name.endswith("-thinking")):
701
+ pass
702
+ else:
703
+ return jsonify({"error": "Invalid model"}), 400
704
+ model_realname = model_name.replace("-thinking", "").replace("-openwebui", "")
705
+ request_type = determine_request_type(
706
+ model_realname,
707
+ models["text"] + models["image"],
708
+ models["free_text"] + models["free_image"]
709
+ )
710
+ api_key = select_key(request_type, model_name)
711
+ if not api_key:
712
+ return jsonify(
713
+ {
714
+ "error": (
715
+ "No available API key for this "
716
+ "request type or all keys have "
717
+ "reached their limits"
718
+ )
719
+ }
720
+ ), 429
721
+ headers = {
722
+ "Authorization": f"Bearer {api_key}",
723
+ "Content-Type": "application/json"
724
+ }
725
+ if "DeepSeek-R1" in model_name and ("thinking" in model_name or "openwebui" in model_name):
726
+ data['model'] = model_realname
727
+ start_time = time.time()
728
+ response = requests.post(
729
+ TEST_MODEL_ENDPOINT,
730
+ headers=headers,
731
+ json=data,
732
+ stream=data.get("stream", False),
733
+ timeout=120
734
+ )
735
+ if response.status_code == 429:
736
+ return jsonify(response.json()), 429
737
+ if data.get("stream", False):
738
+ def generate():
739
+ if model_name.endswith("-openwebui"):
740
+ first_chunk_time = None
741
+ full_response_content = ""
742
+ reasoning_content_accumulated = ""
743
+ content_accumulated = ""
744
+ first_reasoning_chunk = True
745
+ for chunk in response.iter_lines():
746
+ if chunk:
747
+ if first_chunk_time is None:
748
+ first_chunk_time = time.time()
749
+ full_response_content += chunk.decode("utf-8")
750
+ for line in chunk.decode("utf-8").splitlines():
751
+ if line.startswith("data:"):
752
+ try:
753
+ chunk_json = json.loads(line.lstrip("data: ").strip())
754
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
755
+ delta = chunk_json["choices"][0].get("delta", {})
756
+ if delta.get("reasoning_content") is not None:
757
+ reasoning_chunk = delta["reasoning_content"]
758
+ if first_reasoning_chunk:
759
+ think_chunk = f"<"
760
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
761
+ think_chunk = f"think"
762
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
763
+ think_chunk = f">\n"
764
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
765
+ first_reasoning_chunk = False
766
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
767
+ if delta.get("content") is not None:
768
+ if not first_reasoning_chunk:
769
+ reasoning_chunk = f"\n</think>\n"
770
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
771
+ first_reasoning_chunk = True
772
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
773
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
774
+ continue
775
+ end_time = time.time()
776
+ first_token_time = (
777
+ first_chunk_time - start_time
778
+ if first_chunk_time else 0
779
+ )
780
+ total_time = end_time - start_time
781
+ prompt_tokens = 0
782
+ completion_tokens = 0
783
+ for line in full_response_content.splitlines():
784
+ if line.startswith("data:"):
785
+ line = line[5:].strip()
786
+ if line == "[DONE]":
787
+ continue
788
+ try:
789
+ response_json = json.loads(line)
790
+ if (
791
+ "usage" in response_json and
792
+ "completion_tokens" in response_json["usage"]
793
+ ):
794
+ completion_tokens += response_json[
795
+ "usage"
796
+ ]["completion_tokens"]
797
+ if (
798
+ "usage" in response_json and
799
+ "prompt_tokens" in response_json["usage"]
800
+ ):
801
+ prompt_tokens = response_json[
802
+ "usage"
803
+ ]["prompt_tokens"]
804
+ except ( KeyError,ValueError,IndexError) as e:
805
+ pass
806
+ user_content = ""
807
+ messages = data.get("messages", [])
808
+ for message in messages:
809
+ if message["role"] == "user":
810
+ if isinstance(message["content"], str):
811
+ user_content += message["content"] + " "
812
+ elif isinstance(message["content"], list):
813
+ for item in message["content"]:
814
+ if (
815
+ isinstance(item, dict) and
816
+ item.get("type") == "text"
817
+ ):
818
+ user_content += (
819
+ item.get("text", "") +
820
+ " "
821
+ )
822
+ user_content = user_content.strip()
823
+ user_content_replaced = user_content.replace(
824
+ '\n', '\\n'
825
+ ).replace('\r', '\\n')
826
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
827
+ response_content_replaced = response_content_replaced.replace(
828
+ '\n', '\\n'
829
+ ).replace('\r', '\\n')
830
+ logging.info(
831
+ f"使用的key: {api_key}, "
832
+ f"提示token: {prompt_tokens}, "
833
+ f"输出token: {completion_tokens}, "
834
+ f"首字用时: {first_token_time:.4f}秒, "
835
+ f"总共用时: {total_time:.4f}秒, "
836
+ f"使用的模型: {model_name}, "
837
+ f"用户的内容: {user_content_replaced}, "
838
+ f"输出的内容: {response_content_replaced}"
839
+ )
840
+ with data_lock:
841
+ request_timestamps.append(time.time())
842
+ token_counts.append(prompt_tokens + completion_tokens)
843
+ yield "data: [DONE]\n\n"
844
+ return Response(
845
+ stream_with_context(generate()),
846
+ content_type="text/event-stream"
847
+ )
848
+ first_chunk_time = None
849
+ full_response_content = ""
850
+ reasoning_content_accumulated = ""
851
+ content_accumulated = ""
852
+ first_reasoning_chunk = True
853
+ for chunk in response.iter_lines():
854
+ if chunk:
855
+ if first_chunk_time is None:
856
+ first_chunk_time = time.time()
857
+ full_response_content += chunk.decode("utf-8")
858
+ for line in chunk.decode("utf-8").splitlines():
859
+ if line.startswith("data:"):
860
+ try:
861
+ chunk_json = json.loads(line.lstrip("data: ").strip())
862
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
863
+ delta = chunk_json["choices"][0].get("delta", {})
864
+ if delta.get("reasoning_content") is not None:
865
+ reasoning_chunk = delta["reasoning_content"]
866
+ reasoning_chunk = reasoning_chunk.replace('\n', '\n> ')
867
+ if first_reasoning_chunk:
868
+ reasoning_chunk = "> " + reasoning_chunk
869
+ first_reasoning_chunk = False
870
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
871
+ if delta.get("content") is not None:
872
+ if not first_reasoning_chunk:
873
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n\n'}, 'index': 0}]})}\n\n"
874
+ first_reasoning_chunk = True
875
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
876
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
877
+ continue
878
+ end_time = time.time()
879
+ first_token_time = (
880
+ first_chunk_time - start_time
881
+ if first_chunk_time else 0
882
+ )
883
+ total_time = end_time - start_time
884
+ prompt_tokens = 0
885
+ completion_tokens = 0
886
+ for line in full_response_content.splitlines():
887
+ if line.startswith("data:"):
888
+ line = line[5:].strip()
889
+ if line == "[DONE]":
890
+ continue
891
+ try:
892
+ response_json = json.loads(line)
893
+ if (
894
+ "usage" in response_json and
895
+ "completion_tokens" in response_json["usage"]
896
+ ):
897
+ completion_tokens += response_json[
898
+ "usage"
899
+ ]["completion_tokens"]
900
+ if (
901
+ "usage" in response_json and
902
+ "prompt_tokens" in response_json["usage"]
903
+ ):
904
+ prompt_tokens = response_json[
905
+ "usage"
906
+ ]["prompt_tokens"]
907
+ except (KeyError,ValueError,IndexError) as e:
908
+ pass
909
+ user_content = ""
910
+ messages = data.get("messages", [])
911
+ for message in messages:
912
+ if message["role"] == "user":
913
+ if isinstance(message["content"], str):
914
+ user_content += message["content"] + " "
915
+ elif isinstance(message["content"], list):
916
+ for item in message["content"]:
917
+ if (
918
+ isinstance(item, dict) and
919
+ item.get("type") == "text"
920
+ ):
921
+ user_content += (
922
+ item.get("text", "") +
923
+ " "
924
+ )
925
+ user_content = user_content.strip()
926
+ user_content_replaced = user_content.replace(
927
+ '\n', '\\n'
928
+ ).replace('\r', '\\n')
929
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
930
+ response_content_replaced = response_content_replaced.replace(
931
+ '\n', '\\n'
932
+ ).replace('\r', '\\n')
933
+ logging.info(
934
+ f"使用的key: {api_key}, "
935
+ f"提示token: {prompt_tokens}, "
936
+ f"输出token: {completion_tokens}, "
937
+ f"首字用时: {first_token_time:.4f}秒, "
938
+ f"总共用时: {total_time:.4f}秒, "
939
+ f"使用的模型: {model_name}, "
940
+ f"用户的内容: {user_content_replaced}, "
941
+ f"输出的内容: {response_content_replaced}"
942
+ )
943
+ with data_lock:
944
+ request_timestamps.append(time.time())
945
+ token_counts.append(prompt_tokens + completion_tokens)
946
+ yield "data: [DONE]\n\n"
947
+ return Response(
948
+ stream_with_context(generate()),
949
+ content_type="text/event-stream"
950
+ )
951
+ else:
952
+ response.raise_for_status()
953
+ end_time = time.time()
954
+ response_json = response.json()
955
+ total_time = end_time - start_time
956
+ try:
957
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
958
+ completion_tokens = response_json["usage"]["completion_tokens"]
959
+ response_content = ""
960
+ if model_name.endswith("-thinking") and "choices" in response_json and len(response_json["choices"]) > 0:
961
+ choice = response_json["choices"][0]
962
+ if "message" in choice:
963
+ if "reasoning_content" in choice["message"]:
964
+ reasoning_content = choice["message"]["reasoning_content"]
965
+ reasoning_content = reasoning_content.replace('\n', '\n> ')
966
+ reasoning_content = '> ' + reasoning_content
967
+ formatted_reasoning = f"{reasoning_content}\n"
968
+ response_content += formatted_reasoning + "\n"
969
+ if "content" in choice["message"]:
970
+ response_content += choice["message"]["content"]
971
+ elif model_name.endswith("-openwebui") and "choices" in response_json and len(response_json["choices"]) > 0:
972
+ choice = response_json["choices"][0]
973
+ if "message" in choice:
974
+ if "reasoning_content" in choice["message"]:
975
+ reasoning_content = choice["message"]["reasoning_content"]
976
+ response_content += f"<think>\n{reasoning_content}\n</think>\n"
977
+ if "content" in choice["message"]:
978
+ response_content += choice["message"]["content"]
979
+ except (KeyError, ValueError, IndexError) as e:
980
+ logging.error(
981
+ f"解析非流式响应 JSON 失败: {e}, "
982
+ f"完整内容: {response_json}"
983
+ )
984
+ prompt_tokens = 0
985
+ completion_tokens = 0
986
+ response_content = ""
987
+ user_content = ""
988
+ messages = data.get("messages", [])
989
+ for message in messages:
990
+ if message["role"] == "user":
991
+ if isinstance(message["content"], str):
992
+ user_content += message["content"] + " "
993
+ elif isinstance(message["content"], list):
994
+ for item in message["content"]:
995
+ if (
996
+ isinstance(item, dict) and
997
+ item.get("type") == "text"
998
+ ):
999
+ user_content += (
1000
+ item.get("text", "") +
1001
+ " "
1002
+ )
1003
+ user_content = user_content.strip()
1004
+ user_content_replaced = user_content.replace(
1005
+ '\n', '\\n'
1006
+ ).replace('\r', '\\n')
1007
+ response_content_replaced = response_content.replace(
1008
+ '\n', '\\n'
1009
+ ).replace('\r', '\\n')
1010
+ logging.info(
1011
+ f"使用的key: {api_key}, "
1012
+ f"提示token: {prompt_tokens}, "
1013
+ f"输出token: {completion_tokens}, "
1014
+ f"首字用时: 0, "
1015
+ f"总共用时: {total_time:.4f}秒, "
1016
+ f"使用的模型: {model_name}, "
1017
+ f"用户的内容: {user_content_replaced}, "
1018
+ f"输出的内容: {response_content_replaced}"
1019
+ )
1020
+ with data_lock:
1021
+ request_timestamps.append(time.time())
1022
+ token_counts.append(prompt_tokens + completion_tokens)
1023
+ formatted_response = {
1024
+ "id": response_json.get("id", ""),
1025
+ "object": "chat.completion",
1026
+ "created": response_json.get("created", int(time.time())),
1027
+ "model": model_name,
1028
+ "choices": [
1029
+ {
1030
+ "index": 0,
1031
+ "message": {
1032
+ "role": "assistant",
1033
+ "content": response_content
1034
+ },
1035
+ "finish_reason": "stop"
1036
+ }
1037
+ ],
1038
+ "usage": {
1039
+ "prompt_tokens": prompt_tokens,
1040
+ "completion_tokens": completion_tokens,
1041
+ "total_tokens": prompt_tokens + completion_tokens
1042
+ }
1043
+ }
1044
+ return jsonify(formatted_response)
1045
+ if model_name in models["image"]:
1046
+ if isinstance(data.get("messages"), list):
1047
+ data = data.copy()
1048
+ data["prompt"] = extract_user_content(data["messages"])
1049
+ siliconflow_data = get_siliconflow_data(model_name, data)
1050
+ try:
1051
+ start_time = time.time()
1052
+ response = requests.post(
1053
+ IMAGE_ENDPOINT,
1054
+ headers=headers,
1055
+ json=siliconflow_data,
1056
+ stream=data.get("stream", False)
1057
+ )
1058
+ if response.status_code == 429:
1059
+ return jsonify(response.json()), 429
1060
+ if data.get("stream", False):
1061
+ def generate():
1062
+ try:
1063
+ response.raise_for_status()
1064
+ response_json = response.json()
1065
+ images = response_json.get("images", [])
1066
+ image_url = ""
1067
+ if images and isinstance(images[0], dict) and "url" in images[0]:
1068
+ image_url = images[0]["url"]
1069
+ logging.info(f"Extracted image URL: {image_url}")
1070
+ elif images and isinstance(images[0], str):
1071
+ image_url = images[0]
1072
+ logging.info(f"Extracted image URL: {image_url}")
1073
+ markdown_image_link = create_base64_markdown_image(image_url)
1074
+ if image_url:
1075
+ chunk_size = 8192
1076
+ for i in range(0, len(markdown_image_link), chunk_size):
1077
+ chunk = markdown_image_link[i:i + chunk_size]
1078
+ chunk_data = {
1079
+ "id": f"chatcmpl-{uuid.uuid4()}",
1080
+ "object": "chat.completion.chunk",
1081
+ "created": int(time.time()),
1082
+ "model": model_name,
1083
+ "choices": [
1084
+ {
1085
+ "index": 0,
1086
+ "delta": {
1087
+ "role": "assistant",
1088
+ "content": chunk
1089
+ },
1090
+ "finish_reason": None
1091
+ }
1092
+ ]
1093
+ }
1094
+ yield f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8')
1095
+ else:
1096
+ chunk_data = {
1097
+ "id": f"chatcmpl-{uuid.uuid4()}",
1098
+ "object": "chat.completion.chunk",
1099
+ "created": int(time.time()),
1100
+ "model": model_name,
1101
+ "choices": [
1102
+ {
1103
+ "index": 0,
1104
+ "delta": {
1105
+ "role": "assistant",
1106
+ "content": "Failed to generate image"
1107
+ },
1108
+ "finish_reason": None
1109
+ }
1110
+ ]
1111
+ }
1112
+ yield f"data: {json.dumps(chunk_data)}\n\n".encode('utf-8')
1113
+ end_chunk_data = {
1114
+ "id": f"chatcmpl-{uuid.uuid4()}",
1115
+ "object": "chat.completion.chunk",
1116
+ "created": int(time.time()),
1117
+ "model": model_name,
1118
+ "choices": [
1119
+ {
1120
+ "index": 0,
1121
+ "delta": {},
1122
+ "finish_reason": "stop"
1123
+ }
1124
+ ]
1125
+ }
1126
+ yield f"data: {json.dumps(end_chunk_data)}\n\n".encode('utf-8')
1127
+ with data_lock:
1128
+ request_timestamps.append(time.time())
1129
+ token_counts.append(0)
1130
+ request_timestamps_day.append(time.time())
1131
+ token_counts_day.append(0)
1132
+ except requests.exceptions.RequestException as e:
1133
+ logging.error(f"请求转发异常: {e}")
1134
+ error_chunk_data = {
1135
+ "id": f"chatcmpl-{uuid.uuid4()}",
1136
+ "object": "chat.completion.chunk",
1137
+ "created": int(time.time()),
1138
+ "model": model_name,
1139
+ "choices": [
1140
+ {
1141
+ "index": 0,
1142
+ "delta": {
1143
+ "role": "assistant",
1144
+ "content": f"Error: {str(e)}"
1145
+ },
1146
+ "finish_reason": None
1147
+ }
1148
+ ]
1149
+ }
1150
+ yield f"data: {json.dumps(error_chunk_data)}\n\n".encode('utf-8')
1151
+ end_chunk_data = {
1152
+ "id": f"chatcmpl-{uuid.uuid4()}",
1153
+ "object": "chat.completion.chunk",
1154
+ "created": int(time.time()),
1155
+ "model": model_name,
1156
+ "choices": [
1157
+ {
1158
+ "index": 0,
1159
+ "delta": {},
1160
+ "finish_reason": "stop"
1161
+ }
1162
+ ]
1163
+ }
1164
+ yield f"data: {json.dumps(end_chunk_data)}\n\n".encode('utf-8')
1165
+ logging.info(
1166
+ f"使用的key: {api_key}, "
1167
+ f"使用的模型: {model_name}"
1168
+ )
1169
+ yield "data: [DONE]\n\n".encode('utf-8')
1170
+ return Response(stream_with_context(generate()), content_type='text/event-stream')
1171
+ else:
1172
+ response.raise_for_status()
1173
+ end_time = time.time()
1174
+ response_json = response.json()
1175
+ total_time = end_time - start_time
1176
+ try:
1177
+ images = response_json.get("images", [])
1178
+ image_url = ""
1179
+ if images and isinstance(images[0], dict) and "url" in images[0]:
1180
+ image_url = images[0]["url"]
1181
+ logging.info(f"Extracted image URL: {image_url}")
1182
+ elif images and isinstance(images[0], str):
1183
+ image_url = images[0]
1184
+ logging.info(f"Extracted image URL: {image_url}")
1185
+ markdown_image_link = f"![image]({image_url})"
1186
+ response_data = {
1187
+ "id": f"chatcmpl-{uuid.uuid4()}",
1188
+ "object": "chat.completion",
1189
+ "created": int(time.time()),
1190
+ "model": model_name,
1191
+ "choices": [
1192
+ {
1193
+ "index": 0,
1194
+ "message": {
1195
+ "role": "assistant",
1196
+ "content": markdown_image_link if image_url else "Failed to generate image",
1197
+ },
1198
+ "finish_reason": "stop",
1199
+ }
1200
+ ],
1201
+ }
1202
+ except (KeyError, ValueError, IndexError) as e:
1203
+ logging.error(
1204
+ f"解析响应 JSON 失败: {e}, "
1205
+ f"完整内容: {response_json}"
1206
+ )
1207
+ response_data = {
1208
+ "id": f"chatcmpl-{uuid.uuid4()}",
1209
+ "object": "chat.completion",
1210
+ "created": int(time.time()),
1211
+ "model": model_name,
1212
+ "choices": [
1213
+ {
1214
+ "index": 0,
1215
+ "message": {
1216
+ "role": "assistant",
1217
+ "content": "Failed to process image data",
1218
+ },
1219
+ "finish_reason": "stop",
1220
+ }
1221
+ ],
1222
+ }
1223
+ logging.info(
1224
+ f"使用的key: {api_key}, "
1225
+ f"总共用时: {total_time:.4f}秒, "
1226
+ f"使用的模型: {model_name}"
1227
+ )
1228
+ with data_lock:
1229
+ request_timestamps.append(time.time())
1230
+ token_counts.append(0)
1231
+ request_timestamps_day.append(time.time())
1232
+ token_counts_day.append(0)
1233
+ return jsonify(response_data)
1234
+ except requests.exceptions.RequestException as e:
1235
+ logging.error(f"请求转发异常: {e}")
1236
+ return jsonify({"error": str(e)}), 500
1237
+ else:
1238
+ try:
1239
+ start_time = time.time()
1240
+ response = requests.post(
1241
+ TEST_MODEL_ENDPOINT,
1242
+ headers=headers,
1243
+ json=data,
1244
+ stream=data.get("stream", False)
1245
+ )
1246
+ if response.status_code == 429:
1247
+ return jsonify(response.json()), 429
1248
+ if data.get("stream", False):
1249
+ def generate():
1250
+ first_chunk_time = None
1251
+ full_response_content = ""
1252
+ for chunk in response.iter_content(chunk_size=2048):
1253
+ if chunk:
1254
+ if first_chunk_time is None:
1255
+ first_chunk_time = time.time()
1256
+ full_response_content += chunk.decode("utf-8")
1257
+ yield chunk
1258
+ end_time = time.time()
1259
+ first_token_time = (
1260
+ first_chunk_time - start_time
1261
+ if first_chunk_time else 0
1262
+ )
1263
+ total_time = end_time - start_time
1264
+ prompt_tokens = 0
1265
+ completion_tokens = 0
1266
+ response_content = ""
1267
+ for line in full_response_content.splitlines():
1268
+ if line.startswith("data:"):
1269
+ line = line[5:].strip()
1270
+ if line == "[DONE]":
1271
+ continue
1272
+ try:
1273
+ response_json = json.loads(line)
1274
+ if (
1275
+ "usage" in response_json and
1276
+ "completion_tokens" in response_json["usage"]
1277
+ ):
1278
+ completion_tokens = response_json[
1279
+ "usage"
1280
+ ]["completion_tokens"]
1281
+ if (
1282
+ "choices" in response_json and
1283
+ len(response_json["choices"]) > 0 and
1284
+ "delta" in response_json["choices"][0] and
1285
+ "content" in response_json[
1286
+ "choices"
1287
+ ][0]["delta"]
1288
+ ):
1289
+ response_content += response_json[
1290
+ "choices"
1291
+ ][0]["delta"]["content"]
1292
+ if (
1293
+ "usage" in response_json and
1294
+ "prompt_tokens" in response_json["usage"]
1295
+ ):
1296
+ prompt_tokens = response_json[
1297
+ "usage"
1298
+ ]["prompt_tokens"]
1299
+ except (
1300
+ KeyError,
1301
+ ValueError,
1302
+ IndexError
1303
+ ) as e:
1304
+ logging.error(
1305
+ f"解析流式响应单行 JSON 失败: {e}, "
1306
+ f"行内容: {line}"
1307
+ )
1308
+ user_content = extract_user_content(data.get("messages", []))
1309
+ user_content_replaced = user_content.replace(
1310
+ '\n', '\\n'
1311
+ ).replace('\r', '\\n')
1312
+ response_content_replaced = response_content.replace(
1313
+ '\n', '\\n'
1314
+ ).replace('\r', '\\n')
1315
+ logging.info(
1316
+ f"使用的key: {api_key}, "
1317
+ f"提示token: {prompt_tokens}, "
1318
+ f"输出token: {completion_tokens}, "
1319
+ f"首字用时: {first_token_time:.4f}秒, "
1320
+ f"总共用时: {total_time:.4f}秒, "
1321
+ f"使用的模型: {model_name}, "
1322
+ f"用户的内容: {user_content_replaced}, "
1323
+ f"输出的内容: {response_content_replaced}"
1324
+ )
1325
+ with data_lock:
1326
+ request_timestamps.append(time.time())
1327
+ token_counts.append(prompt_tokens+completion_tokens)
1328
+ request_timestamps_day.append(time.time())
1329
+ token_counts_day.append(prompt_tokens+completion_tokens)
1330
+ return Response(
1331
+ stream_with_context(generate()),
1332
+ content_type=response.headers['Content-Type']
1333
+ )
1334
+ else:
1335
+ response.raise_for_status()
1336
+ end_time = time.time()
1337
+ response_json = response.json()
1338
+ total_time = end_time - start_time
1339
+ try:
1340
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
1341
+ completion_tokens = response_json[
1342
+ "usage"
1343
+ ]["completion_tokens"]
1344
+ response_content = response_json[
1345
+ "choices"
1346
+ ][0]["message"]["content"]
1347
+ except (KeyError, ValueError, IndexError) as e:
1348
+ logging.error(
1349
+ f"解析非流式响应 JSON 失败: {e}, "
1350
+ f"完整内容: {response_json}"
1351
+ )
1352
+ prompt_tokens = 0
1353
+ completion_tokens = 0
1354
+ response_content = ""
1355
+ user_content = extract_user_content(data.get("messages", []))
1356
+ user_content_replaced = user_content.replace(
1357
+ '\n', '\\n'
1358
+ ).replace('\r', '\\n')
1359
+ response_content_replaced = response_content.replace(
1360
+ '\n', '\\n'
1361
+ ).replace('\r', '\\n')
1362
+ logging.info(
1363
+ f"使用的key: {api_key}, "
1364
+ f"提示token: {prompt_tokens}, "
1365
+ f"输出token: {completion_tokens}, "
1366
+ f"首字用时: 0, "
1367
+ f"总共用时: {total_time:.4f}秒, "
1368
+ f"使用的模型: {model_name}, "
1369
+ f"用户的内容: {user_content_replaced}, "
1370
+ f"输出的内容: {response_content_replaced}"
1371
+ )
1372
+ with data_lock:
1373
+ request_timestamps.append(time.time())
1374
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
1375
+ token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
1376
+ else:
1377
+ token_counts.append(0)
1378
+ request_timestamps_day.append(time.time())
1379
+ if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
1380
+ token_counts_day.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
1381
+ else:
1382
+ token_counts_day.append(0)
1383
+ return jsonify(response_json)
1384
+ except requests.exceptions.RequestException as e:
1385
+ logging.error(f"请求转发异常: {e}")
1386
+ return jsonify({"error": str(e)}), 500
1387
+ if __name__ == '__main__':
1388
+ logging.info(f"环境变量:{os.environ}")
1389
+ load_keys()
1390
+ logging.info("程序启动时首次加载 keys 已执行")
1391
+ scheduler.start()
1392
+ logging.info("首次加载 keys 已手动触发执行")
1393
+ refresh_models()
1394
+ logging.info("首次刷新模型列表已手动触发执行")
1395
+ app.run(debug=False,host='0.0.0.0',port=int(os.environ.get('PORT', 7860)))
1396
+