Spaces:
Running
Running
add fix exceed quota
Browse files
db/__pycache__/mongodb.cpython-310.pyc
CHANGED
Binary files a/db/__pycache__/mongodb.cpython-310.pyc and b/db/__pycache__/mongodb.cpython-310.pyc differ
|
|
excel/__pycache__/excel_translate.cpython-310.pyc
CHANGED
Binary files a/excel/__pycache__/excel_translate.cpython-310.pyc and b/excel/__pycache__/excel_translate.cpython-310.pyc differ
|
|
excel/__pycache__/xlsx.cpython-310.pyc
CHANGED
Binary files a/excel/__pycache__/xlsx.cpython-310.pyc and b/excel/__pycache__/xlsx.cpython-310.pyc differ
|
|
utils/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/utils.cpython-310.pyc and b/utils/__pycache__/utils.cpython-310.pyc differ
|
|
utils/utils.py
CHANGED
@@ -6,6 +6,7 @@ import io
|
|
6 |
import json
|
7 |
import time
|
8 |
from google.api_core.exceptions import ResourceExhausted
|
|
|
9 |
|
10 |
genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
|
11 |
|
@@ -84,56 +85,46 @@ def preprocess_text(text_list):
|
|
84 |
text_dict = {index: text for index, text in enumerate(text_list)}
|
85 |
return text_dict
|
86 |
|
87 |
-
def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
|
88 |
-
"""
|
89 |
-
Translates the values of a dictionary {index: text} using an LLM.
|
90 |
-
It uses an intermediate JSON string format for reliable LLM interaction.
|
91 |
-
Returns a dictionary {index: translated_text} with the same keys.
|
92 |
-
"""
|
93 |
-
if not isinstance(text_dict, dict):
|
94 |
-
print("Warning: translate_text_dict expected a dict, received:", type(text_dict))
|
95 |
-
return {}
|
96 |
-
if not text_dict:
|
97 |
-
return {}
|
98 |
|
99 |
-
|
100 |
def _dict_to_json_string(d):
|
101 |
json_compatible = {str(k): v for k, v in d.items()}
|
102 |
try:
|
103 |
-
return json.dumps(json_compatible, ensure_ascii=False, separators=(',',':'))
|
104 |
except Exception as e:
|
105 |
print(f"Internal Error (_dict_to_json_string): {e}")
|
106 |
return "{}"
|
107 |
|
108 |
-
# --- Internal Helper: Convert LLM's JSON String Response to Dictionary ---
|
109 |
def _json_string_to_dict(s):
|
110 |
res_dict = {}
|
111 |
if not s or not isinstance(s, str): return {}
|
112 |
try:
|
113 |
raw = json.loads(s)
|
114 |
if not isinstance(raw, dict):
|
115 |
-
|
116 |
-
|
117 |
for k_str, v in raw.items():
|
118 |
try:
|
119 |
res_dict[int(k_str)] = v
|
120 |
except ValueError:
|
121 |
-
print(f"
|
122 |
except json.JSONDecodeError as e:
|
123 |
-
print(f"
|
124 |
except Exception as e:
|
125 |
-
|
126 |
return res_dict
|
127 |
-
# --- End Internal Helpers ---
|
128 |
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
130 |
json_input_string = _dict_to_json_string(text_dict)
|
131 |
-
print(f"Input JSON String: {json_input_string}") # Debugging output
|
132 |
if json_input_string == "{}":
|
133 |
-
print("
|
134 |
-
return {key: "" for key in text_dict}
|
135 |
|
136 |
-
|
137 |
system_prompt = f"""Translate the string values within the following JSON object .
|
138 |
Follow these instructions carefully:
|
139 |
1. Analyze the entire JSON object to understand the context.
|
@@ -143,72 +134,64 @@ def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
|
|
143 |
5. Preserve the original JSON structure perfectly.
|
144 |
6. Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
|
145 |
"""
|
146 |
-
# 3. Construct User Prompt
|
147 |
-
user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string} \n\n Translated JSON Output:"
|
148 |
|
149 |
-
|
150 |
-
raw_translated_json_string = "{}" # Default to empty JSON string
|
151 |
-
try:
|
152 |
-
model = genai.GenerativeModel('gemini-2.0-flash')
|
153 |
-
full_prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
|
154 |
-
|
155 |
-
response = model.generate_content(
|
156 |
-
contents=full_prompt,
|
157 |
-
generation_config={
|
158 |
-
'temperature': 0.3, # Low temp for adherence
|
159 |
-
'top_p': 1,
|
160 |
-
'top_k': 1,
|
161 |
-
}
|
162 |
-
# safety_settings=[...]
|
163 |
-
)
|
164 |
-
|
165 |
-
# Extract text safely and clean
|
166 |
-
if response and response.parts:
|
167 |
-
if hasattr(response.parts[0], 'text'):
|
168 |
-
raw_translated_json_string = response.parts[0].text.strip()
|
169 |
-
else:
|
170 |
-
print(f"Warning: Received response part without text attribute: {response.parts[0]}")
|
171 |
-
try: raw_translated_json_string = str(response.parts[0])
|
172 |
-
except Exception as str_e: print(f"Could not convert response part to string: {str_e}")
|
173 |
-
elif response and hasattr(response, 'text'):
|
174 |
-
raw_translated_json_string = response.text.strip()
|
175 |
-
else:
|
176 |
-
print(f"Warning: Received unexpected or empty response format from API: {response}")
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
if not raw_translated_json_string: raw_translated_json_string = "{}"
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
|
|
|
190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
print(raw_translated_json_string)
|
192 |
-
# 5. Convert the LLM's JSON string response back to a dictionary
|
193 |
translated_intermediate_dict = _json_string_to_dict(raw_translated_json_string)
|
194 |
|
195 |
-
# 6. Validation: Ensure output dict has same keys as input dict
|
196 |
final_translated_dict = {}
|
197 |
missing_keys = []
|
198 |
-
for key in text_dict
|
199 |
if key in translated_intermediate_dict:
|
200 |
final_translated_dict[key] = translated_intermediate_dict[key]
|
201 |
else:
|
202 |
-
final_translated_dict[key] = ""
|
203 |
missing_keys.append(key)
|
204 |
|
205 |
if missing_keys:
|
206 |
-
print(f"
|
207 |
|
208 |
extra_keys = set(translated_intermediate_dict.keys()) - set(text_dict.keys())
|
209 |
if extra_keys:
|
210 |
-
print(f"
|
211 |
-
|
212 |
|
213 |
return final_translated_dict
|
214 |
|
|
|
6 |
import json
|
7 |
import time
|
8 |
from google.api_core.exceptions import ResourceExhausted
|
9 |
+
import re
|
10 |
|
11 |
genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
|
12 |
|
|
|
85 |
text_dict = {index: text for index, text in enumerate(text_list)}
|
86 |
return text_dict
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
+
def translate_text(text_dict, source_lang='English', target_lang="Vietnamese", max_retries=5, base_delay: float = 5.0):
|
90 |
def _dict_to_json_string(d):
|
91 |
json_compatible = {str(k): v for k, v in d.items()}
|
92 |
try:
|
93 |
+
return json.dumps(json_compatible, ensure_ascii=False, separators=(',', ':'))
|
94 |
except Exception as e:
|
95 |
print(f"Internal Error (_dict_to_json_string): {e}")
|
96 |
return "{}"
|
97 |
|
|
|
98 |
def _json_string_to_dict(s):
|
99 |
res_dict = {}
|
100 |
if not s or not isinstance(s, str): return {}
|
101 |
try:
|
102 |
raw = json.loads(s)
|
103 |
if not isinstance(raw, dict):
|
104 |
+
print(f"LLM response is not a JSON object: {s}")
|
105 |
+
return {}
|
106 |
for k_str, v in raw.items():
|
107 |
try:
|
108 |
res_dict[int(k_str)] = v
|
109 |
except ValueError:
|
110 |
+
print(f"Non-integer key '{k_str}' in LLM response.")
|
111 |
except json.JSONDecodeError as e:
|
112 |
+
print(f"JSON decode error: {e}")
|
113 |
except Exception as e:
|
114 |
+
print(f"General error: {e}")
|
115 |
return res_dict
|
|
|
116 |
|
117 |
+
if not isinstance(text_dict, dict):
|
118 |
+
print("translate_text_dict expected a dict, got:", type(text_dict))
|
119 |
+
return {}
|
120 |
+
if not text_dict:
|
121 |
+
return {}
|
122 |
+
|
123 |
json_input_string = _dict_to_json_string(text_dict)
|
|
|
124 |
if json_input_string == "{}":
|
125 |
+
print("Empty or invalid dictionary input.")
|
126 |
+
return {key: "" for key in text_dict}
|
127 |
|
|
|
128 |
system_prompt = f"""Translate the string values within the following JSON object .
|
129 |
Follow these instructions carefully:
|
130 |
1. Analyze the entire JSON object to understand the context.
|
|
|
134 |
5. Preserve the original JSON structure perfectly.
|
135 |
6. Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
|
136 |
"""
|
|
|
|
|
137 |
|
138 |
+
user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string}\n\nTranslated JSON Output:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
+
raw_translated_json_string = "{}"
|
141 |
+
retry_count = 0
|
142 |
+
while retry_count < max_retries:
|
143 |
+
try:
|
144 |
+
model = genai.GenerativeModel('gemini-2.0-flash')
|
145 |
+
full_prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
|
|
|
146 |
|
147 |
+
response = model.generate_content(
|
148 |
+
contents=full_prompt,
|
149 |
+
generation_config={
|
150 |
+
'temperature': 0.3,
|
151 |
+
'top_p': 1,
|
152 |
+
'top_k': 1,
|
153 |
+
}
|
154 |
+
)
|
155 |
|
156 |
+
if response and response.parts and hasattr(response.parts[0], 'text'):
|
157 |
+
raw_translated_json_string = response.parts[0].text.strip()
|
158 |
+
elif hasattr(response, 'text'):
|
159 |
+
raw_translated_json_string = response.text.strip()
|
160 |
|
161 |
+
# Clean markdown wrappers if present
|
162 |
+
raw_translated_json_string = re.sub(r"^```(?:json)?|```$", "", raw_translated_json_string).strip()
|
163 |
+
|
164 |
+
if raw_translated_json_string:
|
165 |
+
break # Success, exit retry loop
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
wait_time = base_delay * (2 ** retry_count)
|
169 |
+
print(f"[Retry {retry_count+1}] Lỗi gọi API: {e}. Thử lại sau {wait_time:.2f} giây.")
|
170 |
+
time.sleep(wait_time)
|
171 |
+
retry_count += 1
|
172 |
+
|
173 |
+
if retry_count == max_retries:
|
174 |
+
print("❌ Hết số lần thử lại. Trả về JSON rỗng.")
|
175 |
+
raw_translated_json_string = "{}"
|
176 |
+
|
177 |
print(raw_translated_json_string)
|
|
|
178 |
translated_intermediate_dict = _json_string_to_dict(raw_translated_json_string)
|
179 |
|
|
|
180 |
final_translated_dict = {}
|
181 |
missing_keys = []
|
182 |
+
for key in text_dict:
|
183 |
if key in translated_intermediate_dict:
|
184 |
final_translated_dict[key] = translated_intermediate_dict[key]
|
185 |
else:
|
186 |
+
final_translated_dict[key] = ""
|
187 |
missing_keys.append(key)
|
188 |
|
189 |
if missing_keys:
|
190 |
+
print(f"Cảnh báo: Thiếu keys: {sorted(missing_keys)}.")
|
191 |
|
192 |
extra_keys = set(translated_intermediate_dict.keys()) - set(text_dict.keys())
|
193 |
if extra_keys:
|
194 |
+
print(f"Cảnh báo: Có keys không mong đợi: {sorted(extra_keys)}.")
|
|
|
195 |
|
196 |
return final_translated_dict
|
197 |
|