Spaces:
Running
Running
import json | |
from typing import Dict, List | |
import google.generativeai as genai | |
import dotenv | |
import os | |
dotenv.load_dotenv(".env") | |
def translate_text_dict(text_dict: Dict[str, List[str]], source_lang: str = "vi", target_lang: str = "vi") -> Dict[str, List[str]]: | |
def translate_batch(batch_dict: Dict[str, List[str]]) -> Dict[str, List[str]]: | |
"""Translates a single batch of text.""" | |
prompt = f"""The following python dictionary contains pieces of text that form a whole document: {json.dumps(batch_dict)}. | |
Read through the entire dictionary, then translate the texts to {target_lang} so that the meaning is as close to the intended context as possible. | |
Specialized jargon for which there are no direct translations, or names, titles, etc. should be kept whole if possible. | |
Look at the entire dictionary as a whole for context so that the translation is as accurate as possible, and to determine if each text should be translated or not. | |
Aim for brevity if possible so that the length of the translations match the length of the original texts, but prioritize accuracy above all . | |
Return the translated texts formatted like the original dictionary. Do NOT say anthing else. Return it as a JSON block.""" | |
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
model = genai.GenerativeModel(os.getenv("MODEL_VERSION")) | |
response = model.generate_content(prompt) # Use a model appropriate for your needs and API key. gemini-2.0-flash doesn't exist. 1.5-pro is a good general-purpose model. | |
# Handle potential errors in the response, including rate limits and invalid JSON. | |
try: | |
# More robust JSON parsing: Handle code blocks, markdown, and other variations. | |
response_text = response.text | |
start = response_text.find('{') | |
end = response_text.rfind('}') + 1 | |
if start == -1 or end == -1: | |
raise ValueError("Invalid JSON response from Gemini API: No object found.") | |
json_string = response_text[start:end] | |
trans_dict = json.loads(json_string) | |
return trans_dict | |
except (ValueError, json.JSONDecodeError) as e: | |
print(f"Error processing Gemini API response: {e}") | |
print(f"Raw response text: {response.text}") # Print the raw response for debugging | |
return {} # Return an empty dict on error (or raise, depending on your needs) | |
except Exception as e: | |
print(f"An unexpected error occur: {e}") | |
return {} | |
batch_size = 30 # Adjust as needed, based on testing and Gemini's context window limits | |
translated_dict = {} | |
keys = list(text_dict.keys()) | |
# Process in batches | |
for i in range(0, len(keys), batch_size): | |
batch_keys = keys[i:i + batch_size] | |
batch_dict = {key: text_dict[key] for key in batch_keys} | |
translated_batch = translate_batch(batch_dict) | |
# Merge results | |
if translated_batch: # Only merge if the translation was successful | |
translated_dict.update(translated_batch) | |
else: | |
print(f"Skipping batch {i // batch_size} due to translation error.") | |
return translated_dict | |