File size: 3,313 Bytes
0e9ff78
 
3f22460
 
0e9ff78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import json
from typing import Dict, List
import google.generativeai as genai


def translate_text_dict(text_dict: Dict[str, List[str]], source_lang: str, target_lang: str = "vi", gemini_api: str = "") -> Dict[str, List[str]]:
    def translate_batch(batch_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:
        """Translates a single batch of text."""
        prompt = f"""The following python dictionary contains pieces of text that form a whole document: {json.dumps(batch_dict)}
        The text is in {source_lang}, with a chance of there being phrases in other languages as well.

        Read through the entire dictionary, then translate the texts into {target_lang} so that the meaning is as close to the intended context as possible.

        Specialized jargon for which there are no direct translations, or names, titles, etc. should be kept whole if possible.
        Look at the entire dictionary as a whole for context so that the translation is as accurate as possible, and to determine if each text should be translated or not.

        Aim for brevity if possible so that the length of the translations match the length of the original texts, but prioritize accuracy above all .
        Return the translated texts formatted like the original dictionary. Do NOT say anthing else. Return it as a JSON block."""

        client = genai.Client(api_key=gemini_api)
        response = client.models.generate_content(
            model="gemini-2.0-flash", contents=prompt) # Use a model appropriate for your needs and API key.  gemini-2.0-flash doesn't exist.  1.5-pro is a good general-purpose model.

        # Handle potential errors in the response, including rate limits and invalid JSON.
        try:
            # More robust JSON parsing:  Handle code blocks, markdown, and other variations.
            response_text = response.text
            start = response_text.find('{')
            end = response_text.rfind('}') + 1

            if start == -1 or end == -1:
                 raise ValueError("Invalid JSON response from Gemini API: No object found.")

            json_string = response_text[start:end]
            trans_dict = json.loads(json_string)
            return trans_dict
        except (ValueError, json.JSONDecodeError) as e:
            print(f"Error processing Gemini API response: {e}")
            print(f"Raw response text: {response.text}") # Print the raw response for debugging
            return {}  # Return an empty dict on error (or raise, depending on your needs)
        except Exception as e:
            print(f"An unexpected error occur: {e}")
            return {}
        

    batch_size = 30  # Adjust as needed, based on testing and Gemini's context window limits
    translated_dict = {}
    keys = list(text_dict.keys())

    # Process in batches
    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i + batch_size]
        batch_dict = {key: text_dict[key] for key in batch_keys}
        translated_batch = translate_batch(batch_dict)

        # Merge results
        if translated_batch: # Only merge if the translation was successful
          translated_dict.update(translated_batch)
        else:
            print(f"Skipping batch {i // batch_size} due to translation error.")
    
    return translated_dict