mintlee commited on
Commit
f9becec
·
1 Parent(s): 314e765

add fix exceed quota

Browse files
db/__pycache__/mongodb.cpython-310.pyc CHANGED
Binary files a/db/__pycache__/mongodb.cpython-310.pyc and b/db/__pycache__/mongodb.cpython-310.pyc differ
 
excel/__pycache__/excel_translate.cpython-310.pyc CHANGED
Binary files a/excel/__pycache__/excel_translate.cpython-310.pyc and b/excel/__pycache__/excel_translate.cpython-310.pyc differ
 
excel/__pycache__/xlsx.cpython-310.pyc CHANGED
Binary files a/excel/__pycache__/xlsx.cpython-310.pyc and b/excel/__pycache__/xlsx.cpython-310.pyc differ
 
utils/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/utils.cpython-310.pyc and b/utils/__pycache__/utils.cpython-310.pyc differ
 
utils/utils.py CHANGED
@@ -6,6 +6,7 @@ import io
6
  import json
7
  import time
8
  from google.api_core.exceptions import ResourceExhausted
 
9
 
10
  genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
11
 
@@ -84,56 +85,46 @@ def preprocess_text(text_list):
84
  text_dict = {index: text for index, text in enumerate(text_list)}
85
  return text_dict
86
 
87
- def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
88
- """
89
- Translates the values of a dictionary {index: text} using an LLM.
90
- It uses an intermediate JSON string format for reliable LLM interaction.
91
- Returns a dictionary {index: translated_text} with the same keys.
92
- """
93
- if not isinstance(text_dict, dict):
94
- print("Warning: translate_text_dict expected a dict, received:", type(text_dict))
95
- return {}
96
- if not text_dict:
97
- return {}
98
 
99
- # --- Internal Helper: Convert Dictionary to JSON String for LLM ---
100
  def _dict_to_json_string(d):
101
  json_compatible = {str(k): v for k, v in d.items()}
102
  try:
103
- return json.dumps(json_compatible, ensure_ascii=False, separators=(',',':'))
104
  except Exception as e:
105
  print(f"Internal Error (_dict_to_json_string): {e}")
106
  return "{}"
107
 
108
- # --- Internal Helper: Convert LLM's JSON String Response to Dictionary ---
109
  def _json_string_to_dict(s):
110
  res_dict = {}
111
  if not s or not isinstance(s, str): return {}
112
  try:
113
  raw = json.loads(s)
114
  if not isinstance(raw, dict):
115
- print(f"Internal Warning (_json_string_to_dict): LLM response is not a JSON object: {s}")
116
- return {}
117
  for k_str, v in raw.items():
118
  try:
119
  res_dict[int(k_str)] = v
120
  except ValueError:
121
- print(f"Internal Warning (_json_string_to_dict): Non-integer key '{k_str}' in LLM response.")
122
  except json.JSONDecodeError as e:
123
- print(f"Internal Error (_json_string_to_dict): Failed decoding JSON '{s}'. Error: {e}")
124
  except Exception as e:
125
- print(f"Internal Error (_json_string_to_dict): {e}")
126
  return res_dict
127
- # --- End Internal Helpers ---
128
 
129
- # 1. Convert input dictionary to JSON string
 
 
 
 
 
130
  json_input_string = _dict_to_json_string(text_dict)
131
- print(f"Input JSON String: {json_input_string}") # Debugging output
132
  if json_input_string == "{}":
133
- print("Skipping translation due to empty input dictionary or conversion error.")
134
- return {key: "" for key in text_dict} # Return original structure with empty values
135
 
136
-
137
  system_prompt = f"""Translate the string values within the following JSON object .
138
  Follow these instructions carefully:
139
  1. Analyze the entire JSON object to understand the context.
@@ -143,72 +134,64 @@ def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
143
  5. Preserve the original JSON structure perfectly.
144
  6. Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
145
  """
146
- # 3. Construct User Prompt
147
- user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string} \n\n Translated JSON Output:"
148
 
149
- # 4. Call the LLM API
150
- raw_translated_json_string = "{}" # Default to empty JSON string
151
- try:
152
- model = genai.GenerativeModel('gemini-2.0-flash')
153
- full_prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
154
-
155
- response = model.generate_content(
156
- contents=full_prompt,
157
- generation_config={
158
- 'temperature': 0.3, # Low temp for adherence
159
- 'top_p': 1,
160
- 'top_k': 1,
161
- }
162
- # safety_settings=[...]
163
- )
164
-
165
- # Extract text safely and clean
166
- if response and response.parts:
167
- if hasattr(response.parts[0], 'text'):
168
- raw_translated_json_string = response.parts[0].text.strip()
169
- else:
170
- print(f"Warning: Received response part without text attribute: {response.parts[0]}")
171
- try: raw_translated_json_string = str(response.parts[0])
172
- except Exception as str_e: print(f"Could not convert response part to string: {str_e}")
173
- elif response and hasattr(response, 'text'):
174
- raw_translated_json_string = response.text.strip()
175
- else:
176
- print(f"Warning: Received unexpected or empty response format from API: {response}")
177
 
178
- # Clean potential markdown backticks
179
- if raw_translated_json_string.startswith("```json"): raw_translated_json_string = raw_translated_json_string[7:]
180
- if raw_translated_json_string.startswith("```"): raw_translated_json_string = raw_translated_json_string[3:]
181
- if raw_translated_json_string.endswith("```"): raw_translated_json_string = raw_translated_json_string[:-3]
182
- raw_translated_json_string = raw_translated_json_string.strip()
183
- # Ensure it's at least plausible JSON before parsing
184
- if not raw_translated_json_string: raw_translated_json_string = "{}"
185
 
 
 
 
 
 
 
 
 
186
 
187
- except Exception as e:
188
- print(f"Lỗi trong quá trình gọi API dịch: {e}")
189
- raw_translated_json_string = "{}" # Ensure empty JSON on error
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  print(raw_translated_json_string)
192
- # 5. Convert the LLM's JSON string response back to a dictionary
193
  translated_intermediate_dict = _json_string_to_dict(raw_translated_json_string)
194
 
195
- # 6. Validation: Ensure output dict has same keys as input dict
196
  final_translated_dict = {}
197
  missing_keys = []
198
- for key in text_dict.keys(): # Iterate using ORIGINAL keys
199
  if key in translated_intermediate_dict:
200
  final_translated_dict[key] = translated_intermediate_dict[key]
201
  else:
202
- final_translated_dict[key] = "" # Preserve key, use empty string if missing
203
  missing_keys.append(key)
204
 
205
  if missing_keys:
206
- print(f"Warning: LLM response was missing keys: {sorted(missing_keys)}. Filled with empty strings.")
207
 
208
  extra_keys = set(translated_intermediate_dict.keys()) - set(text_dict.keys())
209
  if extra_keys:
210
- print(f"Warning: LLM response contained unexpected extra keys: {sorted(list(extra_keys))}. These were ignored.")
211
-
212
 
213
  return final_translated_dict
214
 
 
6
  import json
7
  import time
8
  from google.api_core.exceptions import ResourceExhausted
9
+ import re
10
 
11
  genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
12
 
 
85
  text_dict = {index: text for index, text in enumerate(text_list)}
86
  return text_dict
87
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ def translate_text(text_dict, source_lang='English', target_lang="Vietnamese", max_retries=5, base_delay: float = 5.0):
90
  def _dict_to_json_string(d):
91
  json_compatible = {str(k): v for k, v in d.items()}
92
  try:
93
+ return json.dumps(json_compatible, ensure_ascii=False, separators=(',', ':'))
94
  except Exception as e:
95
  print(f"Internal Error (_dict_to_json_string): {e}")
96
  return "{}"
97
 
 
98
  def _json_string_to_dict(s):
99
  res_dict = {}
100
  if not s or not isinstance(s, str): return {}
101
  try:
102
  raw = json.loads(s)
103
  if not isinstance(raw, dict):
104
+ print(f"LLM response is not a JSON object: {s}")
105
+ return {}
106
  for k_str, v in raw.items():
107
  try:
108
  res_dict[int(k_str)] = v
109
  except ValueError:
110
+ print(f"Non-integer key '{k_str}' in LLM response.")
111
  except json.JSONDecodeError as e:
112
+ print(f"JSON decode error: {e}")
113
  except Exception as e:
114
+ print(f"General error: {e}")
115
  return res_dict
 
116
 
117
+ if not isinstance(text_dict, dict):
118
+ print("translate_text_dict expected a dict, got:", type(text_dict))
119
+ return {}
120
+ if not text_dict:
121
+ return {}
122
+
123
  json_input_string = _dict_to_json_string(text_dict)
 
124
  if json_input_string == "{}":
125
+ print("Empty or invalid dictionary input.")
126
+ return {key: "" for key in text_dict}
127
 
 
128
  system_prompt = f"""Translate the string values within the following JSON object .
129
  Follow these instructions carefully:
130
  1. Analyze the entire JSON object to understand the context.
 
134
  5. Preserve the original JSON structure perfectly.
135
  6. Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
136
  """
 
 
137
 
138
+ user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string}\n\nTranslated JSON Output:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ raw_translated_json_string = "{}"
141
+ retry_count = 0
142
+ while retry_count < max_retries:
143
+ try:
144
+ model = genai.GenerativeModel('gemini-2.0-flash')
145
+ full_prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
 
146
 
147
+ response = model.generate_content(
148
+ contents=full_prompt,
149
+ generation_config={
150
+ 'temperature': 0.3,
151
+ 'top_p': 1,
152
+ 'top_k': 1,
153
+ }
154
+ )
155
 
156
+ if response and response.parts and hasattr(response.parts[0], 'text'):
157
+ raw_translated_json_string = response.parts[0].text.strip()
158
+ elif hasattr(response, 'text'):
159
+ raw_translated_json_string = response.text.strip()
160
 
161
+ # Clean markdown wrappers if present
162
+ raw_translated_json_string = re.sub(r"^```(?:json)?|```$", "", raw_translated_json_string).strip()
163
+
164
+ if raw_translated_json_string:
165
+ break # Success, exit retry loop
166
+
167
+ except Exception as e:
168
+ wait_time = base_delay * (2 ** retry_count)
169
+ print(f"[Retry {retry_count+1}] Lỗi gọi API: {e}. Thử lại sau {wait_time:.2f} giây.")
170
+ time.sleep(wait_time)
171
+ retry_count += 1
172
+
173
+ if retry_count == max_retries:
174
+ print("❌ Hết số lần thử lại. Trả về JSON rỗng.")
175
+ raw_translated_json_string = "{}"
176
+
177
  print(raw_translated_json_string)
 
178
  translated_intermediate_dict = _json_string_to_dict(raw_translated_json_string)
179
 
 
180
  final_translated_dict = {}
181
  missing_keys = []
182
+ for key in text_dict:
183
  if key in translated_intermediate_dict:
184
  final_translated_dict[key] = translated_intermediate_dict[key]
185
  else:
186
+ final_translated_dict[key] = ""
187
  missing_keys.append(key)
188
 
189
  if missing_keys:
190
+ print(f"Cảnh báo: Thiếu keys: {sorted(missing_keys)}.")
191
 
192
  extra_keys = set(translated_intermediate_dict.keys()) - set(text_dict.keys())
193
  if extra_keys:
194
+ print(f"Cảnh báo: keys không mong đợi: {sorted(extra_keys)}.")
 
195
 
196
  return final_translated_dict
197