MT_deploy / translate /translator.py
mintlee's picture
add upgrade model version
efbadb1
import json
from typing import Dict, List
import google.generativeai as genai
import dotenv
import os
dotenv.load_dotenv(".env")
def translate_text_dict(text_dict: Dict[str, List[str]], source_lang: str = "vi", target_lang: str = "vi") -> Dict[str, List[str]]:
def translate_batch(batch_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:
"""Translates a single batch of text."""
prompt = f"""The following python dictionary contains pieces of text that form a whole document: {json.dumps(batch_dict)}.
Read through the entire dictionary, then translate the texts to {target_lang} so that the meaning is as close to the intended context as possible.
Specialized jargon for which there are no direct translations, or names, titles, etc. should be kept whole if possible.
Look at the entire dictionary as a whole for context so that the translation is as accurate as possible, and to determine if each text should be translated or not.
Aim for brevity if possible so that the length of the translations match the length of the original texts, but prioritize accuracy above all .
Return the translated texts formatted like the original dictionary. Do NOT say anthing else. Return it as a JSON block."""
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel(os.getenv("MODEL_VERSION"))
response = model.generate_content(prompt) # Use a model appropriate for your needs and API key. gemini-2.0-flash doesn't exist. 1.5-pro is a good general-purpose model.
# Handle potential errors in the response, including rate limits and invalid JSON.
try:
# More robust JSON parsing: Handle code blocks, markdown, and other variations.
response_text = response.text
start = response_text.find('{')
end = response_text.rfind('}') + 1
if start == -1 or end == -1:
raise ValueError("Invalid JSON response from Gemini API: No object found.")
json_string = response_text[start:end]
trans_dict = json.loads(json_string)
return trans_dict
except (ValueError, json.JSONDecodeError) as e:
print(f"Error processing Gemini API response: {e}")
print(f"Raw response text: {response.text}") # Print the raw response for debugging
return {} # Return an empty dict on error (or raise, depending on your needs)
except Exception as e:
print(f"An unexpected error occur: {e}")
return {}
batch_size = 30 # Adjust as needed, based on testing and Gemini's context window limits
translated_dict = {}
keys = list(text_dict.keys())
# Process in batches
for i in range(0, len(keys), batch_size):
batch_keys = keys[i:i + batch_size]
batch_dict = {key: text_dict[key] for key in batch_keys}
translated_batch = translate_batch(batch_dict)
# Merge results
if translated_batch: # Only merge if the translation was successful
translated_dict.update(translated_batch)
else:
print(f"Skipping batch {i // batch_size} due to translation error.")
return translated_dict