Spaces:

mintlee
/

MT_deploy

Running

App Files Files Community

mintlee commited on Jun 3

Commit

f9becec

1 Parent(s): 314e765

add fix exceed quota

Browse files

Files changed (5) hide show

db/__pycache__/mongodb.cpython-310.pyc +0 -0
excel/__pycache__/excel_translate.cpython-310.pyc +0 -0
excel/__pycache__/xlsx.cpython-310.pyc +0 -0
utils/__pycache__/utils.cpython-310.pyc +0 -0
utils/utils.py +55 -72

db/__pycache__/mongodb.cpython-310.pyc CHANGED Viewed

Binary files a/db/__pycache__/mongodb.cpython-310.pyc and b/db/__pycache__/mongodb.cpython-310.pyc differ

excel/__pycache__/excel_translate.cpython-310.pyc CHANGED Viewed

Binary files a/excel/__pycache__/excel_translate.cpython-310.pyc and b/excel/__pycache__/excel_translate.cpython-310.pyc differ

excel/__pycache__/xlsx.cpython-310.pyc CHANGED Viewed

Binary files a/excel/__pycache__/xlsx.cpython-310.pyc and b/excel/__pycache__/xlsx.cpython-310.pyc differ

utils/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/utils.cpython-310.pyc and b/utils/__pycache__/utils.cpython-310.pyc differ

utils/utils.py CHANGED Viewed

@@ -6,6 +6,7 @@ import io
 import json
 import time
 from google.api_core.exceptions import ResourceExhausted
 genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
@@ -84,56 +85,46 @@ def preprocess_text(text_list):
     text_dict = {index: text for index, text in enumerate(text_list)}
     return text_dict
-def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
-    """
-    Translates the values of a dictionary {index: text} using an LLM.
-    It uses an intermediate JSON string format for reliable LLM interaction.
-    Returns a dictionary {index: translated_text} with the same keys.
-    """
-    if not isinstance(text_dict, dict):
-        print("Warning: translate_text_dict expected a dict, received:", type(text_dict))
-        return {}
-    if not text_dict:
-        return {}
-    # --- Internal Helper: Convert Dictionary to JSON String for LLM ---
     def _dict_to_json_string(d):
         json_compatible = {str(k): v for k, v in d.items()}
         try:
-            return json.dumps(json_compatible, ensure_ascii=False, separators=(',',':'))
         except Exception as e:
             print(f"Internal Error (_dict_to_json_string): {e}")
             return "{}"
-    # --- Internal Helper: Convert LLM's JSON String Response to Dictionary ---
     def _json_string_to_dict(s):
         res_dict = {}
         if not s or not isinstance(s, str): return {}
         try:
             raw = json.loads(s)
             if not isinstance(raw, dict):
-                 print(f"Internal Warning (_json_string_to_dict): LLM response is not a JSON object: {s}")
-                 return {}
             for k_str, v in raw.items():
                 try:
                     res_dict[int(k_str)] = v
                 except ValueError:
-                    print(f"Internal Warning (_json_string_to_dict): Non-integer key '{k_str}' in LLM response.")
         except json.JSONDecodeError as e:
-            print(f"Internal Error (_json_string_to_dict): Failed decoding JSON '{s}'. Error: {e}")
         except Exception as e:
-             print(f"Internal Error (_json_string_to_dict): {e}")
         return res_dict
-    # --- End Internal Helpers ---
-    # 1. Convert input dictionary to JSON string
     json_input_string = _dict_to_json_string(text_dict)
-    print(f"Input JSON String: {json_input_string}") # Debugging output
     if json_input_string == "{}":
-        print("Skipping translation due to empty input dictionary or conversion error.")
-        return {key: "" for key in text_dict} # Return original structure with empty values
     system_prompt = f"""Translate the string values within the following JSON object .
         Follow these instructions carefully:
         1.  Analyze the entire JSON object to understand the context.
@@ -143,72 +134,64 @@ def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
         5.  Preserve the original JSON structure perfectly.
         6.  Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
     """
-    # 3. Construct User Prompt
-    user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string} \n\n Translated JSON Output:"
-    # 4. Call the LLM API
-    raw_translated_json_string = "{}" # Default to empty JSON string
-    try:
-        model = genai.GenerativeModel('gemini-2.0-flash')
-        full_prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
-        response = model.generate_content(
-            contents=full_prompt,
-            generation_config={
-                'temperature': 0.3, # Low temp for adherence
-                'top_p': 1,
-                'top_k': 1,
-            }
-            # safety_settings=[...]
-        )
-        # Extract text safely and clean
-        if response and response.parts:
-             if hasattr(response.parts[0], 'text'):
-                 raw_translated_json_string = response.parts[0].text.strip()
-             else:
-                 print(f"Warning: Received response part without text attribute: {response.parts[0]}")
-                 try: raw_translated_json_string = str(response.parts[0])
-                 except Exception as str_e: print(f"Could not convert response part to string: {str_e}")
-        elif response and hasattr(response, 'text'):
-             raw_translated_json_string = response.text.strip()
-        else:
-             print(f"Warning: Received unexpected or empty response format from API: {response}")
-        # Clean potential markdown backticks
-        if raw_translated_json_string.startswith("```json"): raw_translated_json_string = raw_translated_json_string[7:]
-        if raw_translated_json_string.startswith("```"): raw_translated_json_string = raw_translated_json_string[3:]
-        if raw_translated_json_string.endswith("```"): raw_translated_json_string = raw_translated_json_string[:-3]
-        raw_translated_json_string = raw_translated_json_string.strip()
-        # Ensure it's at least plausible JSON before parsing
-        if not raw_translated_json_string: raw_translated_json_string = "{}"
-    except Exception as e:
-        print(f"Lỗi trong quá trình gọi API dịch: {e}")
-        raw_translated_json_string = "{}" # Ensure empty JSON on error
     print(raw_translated_json_string)
-    # 5. Convert the LLM's JSON string response back to a dictionary
     translated_intermediate_dict = _json_string_to_dict(raw_translated_json_string)
-    # 6. Validation: Ensure output dict has same keys as input dict
     final_translated_dict = {}
     missing_keys = []
-    for key in text_dict.keys(): # Iterate using ORIGINAL keys
         if key in translated_intermediate_dict:
             final_translated_dict[key] = translated_intermediate_dict[key]
         else:
-            final_translated_dict[key] = "" # Preserve key, use empty string if missing
             missing_keys.append(key)
     if missing_keys:
-        print(f"Warning: LLM response was missing keys: {sorted(missing_keys)}. Filled with empty strings.")
     extra_keys = set(translated_intermediate_dict.keys()) - set(text_dict.keys())
     if extra_keys:
-        print(f"Warning: LLM response contained unexpected extra keys: {sorted(list(extra_keys))}. These were ignored.")
     return final_translated_dict

 import json
 import time
 from google.api_core.exceptions import ResourceExhausted
+import re
 genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
     text_dict = {index: text for index, text in enumerate(text_list)}
     return text_dict
+def translate_text(text_dict, source_lang='English', target_lang="Vietnamese", max_retries=5, base_delay: float = 5.0):
     def _dict_to_json_string(d):
         json_compatible = {str(k): v for k, v in d.items()}
         try:
+            return json.dumps(json_compatible, ensure_ascii=False, separators=(',', ':'))
         except Exception as e:
             print(f"Internal Error (_dict_to_json_string): {e}")
             return "{}"
     def _json_string_to_dict(s):
         res_dict = {}
         if not s or not isinstance(s, str): return {}
         try:
             raw = json.loads(s)
             if not isinstance(raw, dict):
+                print(f"LLM response is not a JSON object: {s}")
+                return {}
             for k_str, v in raw.items():
                 try:
                     res_dict[int(k_str)] = v
                 except ValueError:
+                    print(f"Non-integer key '{k_str}' in LLM response.")
         except json.JSONDecodeError as e:
+            print(f"JSON decode error: {e}")
         except Exception as e:
+            print(f"General error: {e}")
         return res_dict
+    if not isinstance(text_dict, dict):
+        print("translate_text_dict expected a dict, got:", type(text_dict))
+        return {}
+    if not text_dict:
+        return {}
     json_input_string = _dict_to_json_string(text_dict)
     if json_input_string == "{}":
+        print("Empty or invalid dictionary input.")
+        return {key: "" for key in text_dict}
     system_prompt = f"""Translate the string values within the following JSON object .
         Follow these instructions carefully:
         1.  Analyze the entire JSON object to understand the context.
         5.  Preserve the original JSON structure perfectly.
         6.  Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
     """
+    user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string}\n\nTranslated JSON Output:"
+    raw_translated_json_string = "{}"
+    retry_count = 0
+    while retry_count < max_retries:
+        try:
+            model = genai.GenerativeModel('gemini-2.0-flash')
+            full_prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
+            response = model.generate_content(
+                contents=full_prompt,
+                generation_config={
+                    'temperature': 0.3,
+                    'top_p': 1,
+                    'top_k': 1,
+                }
+            )
+            if response and response.parts and hasattr(response.parts[0], 'text'):
+                raw_translated_json_string = response.parts[0].text.strip()
+            elif hasattr(response, 'text'):
+                raw_translated_json_string = response.text.strip()
+            # Clean markdown wrappers if present
+            raw_translated_json_string = re.sub(r"^```(?:json)?|```$", "", raw_translated_json_string).strip()
+            if raw_translated_json_string:
+                break  # Success, exit retry loop
+        except Exception as e:
+            wait_time = base_delay * (2 ** retry_count)
+            print(f"[Retry {retry_count+1}] Lỗi gọi API: {e}. Thử lại sau {wait_time:.2f} giây.")
+            time.sleep(wait_time)
+            retry_count += 1
+    if retry_count == max_retries:
+        print("❌ Hết số lần thử lại. Trả về JSON rỗng.")
+        raw_translated_json_string = "{}"
     print(raw_translated_json_string)
     translated_intermediate_dict = _json_string_to_dict(raw_translated_json_string)
     final_translated_dict = {}
     missing_keys = []
+    for key in text_dict:
         if key in translated_intermediate_dict:
             final_translated_dict[key] = translated_intermediate_dict[key]
         else:
+            final_translated_dict[key] = ""
             missing_keys.append(key)
     if missing_keys:
+        print(f"Cảnh báo: Thiếu keys: {sorted(missing_keys)}.")
     extra_keys = set(translated_intermediate_dict.keys()) - set(text_dict.keys())
     if extra_keys:
+        print(f"Cảnh báo: Có keys không mong đợi: {sorted(extra_keys)}.")
     return final_translated_dict