Spaces:

mintlee
/

MT_deploy

Running

App Files Files Community

mintlee commited on Jun 3

Commit

314e765

1 Parent(s): 58fa02f

add fix exceed quota

Browse files

Files changed (1) hide show

utils/utils.py +21 -29

utils/utils.py CHANGED Viewed

@@ -7,7 +7,7 @@ import json
 import time
 from google.api_core.exceptions import ResourceExhausted
-genai.configure(api_key="AIzaSyBH8O5IfqYrJ5wtWnmUC21IfMjzJCrTm3I")
 def unzip_office_file(pptx_file: io.BytesIO):
@@ -26,57 +26,51 @@ def unzip_office_file(pptx_file: io.BytesIO):
 def translate_single_text(text: str, source_lang: str = 'English', target_lang: str = "Vietnamese",
-                          max_retries: int = 5, retry_delay_seconds: int = 3) -> str:
     if not text or not text.strip():
         return ""  # Bỏ qua nếu chuỗi rỗng hoặc chỉ chứa khoảng trắng
     retries = 0
     while retries <= max_retries:
         try:
-            model = genai.GenerativeModel('gemini-2.0-flash') # Hoặc 'gemini-1.0-pro', 'gemini-1.5-flash' tùy bạn chọn
-            system_prompt_simple = f"""You are a translation engine.
-            Translate the following text accurately from {source_lang} to {target_lang}.
-            Provide *only* the translated text as a single string.
-            Do NOT add any extra formatting, delimiters like '#', introductory phrases, or explanations."""
             user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. Text to translate: {text}"
-            full_prompt = system_prompt_simple.strip() + "\n\n" + user_prompt.strip()
             response = model.generate_content(
                 contents=full_prompt,
                 generation_config={
-                    'temperature': 0.2,
-                    'top_p': 1.0,
-                    'top_k': 1,
-                    # 'max_output_tokens': 2048,
                 }
             )
-            # Kiểm tra xem response có text không và có ứng viên không
             if response.candidates and response.candidates[0].content.parts:
                 translated_text = "".join(part.text for part in response.candidates[0].content.parts if hasattr(part, 'text')).strip()
                 return translated_text
             else:
-                print(f"Không nhận được nội dung hợp lệ từ API cho văn bản: '{text[:50]}...'")
-                # Không thử lại với lỗi này, trả về rỗng
                 return ""
-        except ResourceExhausted as e: # Bắt lỗi 429 (Too Many Requests / Quota Exceeded)
-            print(f"Lỗi quota (429) khi dịch '{text[:50]}...': {e}. Đang thử lại sau {retry_delay_seconds} giây. Lần thử {retries + 1}/{max_retries +1 }.")
-            if retries < max_retries:
-                time.sleep(retry_delay_seconds)
-                retries += 1
-            else:
-                print(f"Đã vượt quá số lần thử lại tối đa ({max_retries + 1}) cho '{text[:50]}...'. Bỏ qua.")
-                return "" # Trả về rỗng sau khi đã thử lại tối đa số lần
         except Exception as e:
-            print(f"Lỗi không mong muốn trong quá trình dịch (translate_single_text) cho '{text[:50]}...': {e}")
             return ""
-    return "" # Trường hợp không bao giờ nên xảy ra nếu logic vòng lặp đúng
 def preprocess_text(text_list):
     """
@@ -141,7 +135,6 @@ def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
     system_prompt = f"""Translate the string values within the following JSON object .
         Follow these instructions carefully:
         1.  Analyze the entire JSON object to understand the context.
         2.  Translate *only* the string values.
@@ -149,7 +142,6 @@ def translate_text(text_dict, source_lang='English', target_lang="Vietnamese"):
         4.  Do *not* translate non-string values (like hex color codes, numbers, or potentially proper nouns like 'CALISTOGA', 'DM SANS', 'Pexels', 'Pixabay' unless they have a common translation). Use your best judgment for proper nouns.
         5.  Preserve the original JSON structure perfectly.
         6.  Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
     """
     # 3. Construct User Prompt
     user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string} \n\n Translated JSON Output:"

 import time
 from google.api_core.exceptions import ResourceExhausted
+genai.configure(api_key="AIzaSyDInJcxzqBvsh1avs4Zkxb4ZGBooNzOyEM")
 def unzip_office_file(pptx_file: io.BytesIO):
 def translate_single_text(text: str, source_lang: str = 'English', target_lang: str = "Vietnamese",
+                          max_retries: int = 5, base_delay: float = 5.0) -> str:
     if not text or not text.strip():
         return ""  # Bỏ qua nếu chuỗi rỗng hoặc chỉ chứa khoảng trắng
     retries = 0
     while retries <= max_retries:
         try:
+            model = genai.GenerativeModel('gemini-2.0-flash')  # hoặc 'gemini-1.5-flash'
+            system_prompt = f"""You are a translation engine.
+Translate the following text accurately from {source_lang} to {target_lang}.
+Provide *only* the translated text as a single string.
+Do NOT add any extra formatting, delimiters like '#', introductory phrases, or explanations."""
             user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. Text to translate: {text}"
+            full_prompt = system_prompt.strip() + "\n\n" + user_prompt.strip()
             response = model.generate_content(
                 contents=full_prompt,
                 generation_config={
+                    'temperature': 0.2,
+                    'top_p': 1.0,
+                    'top_k': 1,
                 }
             )
             if response.candidates and response.candidates[0].content.parts:
                 translated_text = "".join(part.text for part in response.candidates[0].content.parts if hasattr(part, 'text')).strip()
                 return translated_text
             else:
+                print(f"[!] Không nhận được nội dung hợp lệ từ API cho văn bản: '{text[:50]}...'")
                 return ""
+        except ResourceExhausted as e:
+            wait_time = base_delay * (2 ** retries)
+            print(f"[429] Quota exceeded khi dịch '{text[:50]}...'. Thử lại sau {wait_time:.1f}s (lần {retries + 1}/{max_retries + 1}).")
+            time.sleep(wait_time)
+            retries += 1
         except Exception as e:
+            print(f"[!] Lỗi không mong muốn khi dịch '{text[:50]}...': {e}")
             return ""
+    print(f"[x] Bỏ qua sau {max_retries + 1} lần thử không thành công cho '{text[:50]}...'.")
+    return ""
 def preprocess_text(text_list):
     """
     system_prompt = f"""Translate the string values within the following JSON object .
         Follow these instructions carefully:
         1.  Analyze the entire JSON object to understand the context.
         2.  Translate *only* the string values.
         4.  Do *not* translate non-string values (like hex color codes, numbers, or potentially proper nouns like 'CALISTOGA', 'DM SANS', 'Pexels', 'Pixabay' unless they have a common translation). Use your best judgment for proper nouns.
         5.  Preserve the original JSON structure perfectly.
         6.  Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
     """
     # 3. Construct User Prompt
     user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON String: {json_input_string} \n\n Translated JSON Output:"