File size: 3,270 Bytes
0e9ff78
 
3f22460
f543be8
 
3f22460
f543be8
0e9ff78
f543be8
804add3
0e9ff78
 
c4810eb
0e9ff78
d300944
0e9ff78
 
 
 
 
 
 
f543be8
efbadb1
4d949ca
 
0e9ff78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
from typing import Dict, List
import google.generativeai as genai
import dotenv 
import os

dotenv.load_dotenv(".env")


def translate_text_dict(text_dict: Dict[str, List[str]], source_lang:  str = "vi", target_lang: str = "vi") -> Dict[str, List[str]]:
    def translate_batch(batch_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:
        """Translates a single batch of text."""
        prompt = f"""The following python dictionary contains pieces of text that form a whole document: {json.dumps(batch_dict)}.

        Read through the entire dictionary, then translate the texts to {target_lang} so that the meaning is as close to the intended context as possible.

        Specialized jargon for which there are no direct translations, or names, titles, etc. should be kept whole if possible.
        Look at the entire dictionary as a whole for context so that the translation is as accurate as possible, and to determine if each text should be translated or not.

        Aim for brevity if possible so that the length of the translations match the length of the original texts, but prioritize accuracy above all .
        Return the translated texts formatted like the original dictionary. Do NOT say anthing else. Return it as a JSON block."""

        genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
        model = genai.GenerativeModel(os.getenv("MODEL_VERSION"))

        response = model.generate_content(prompt) # Use a model appropriate for your needs and API key.  gemini-2.0-flash doesn't exist.  1.5-pro is a good general-purpose model.

        # Handle potential errors in the response, including rate limits and invalid JSON.
        try:
            # More robust JSON parsing:  Handle code blocks, markdown, and other variations.
            response_text = response.text
            start = response_text.find('{')
            end = response_text.rfind('}') + 1

            if start == -1 or end == -1:
                 raise ValueError("Invalid JSON response from Gemini API: No object found.")

            json_string = response_text[start:end]
            trans_dict = json.loads(json_string)
            return trans_dict
        except (ValueError, json.JSONDecodeError) as e:
            print(f"Error processing Gemini API response: {e}")
            print(f"Raw response text: {response.text}") # Print the raw response for debugging
            return {}  # Return an empty dict on error (or raise, depending on your needs)
        except Exception as e:
            print(f"An unexpected error occur: {e}")
            return {}
        

    batch_size = 30  # Adjust as needed, based on testing and Gemini's context window limits
    translated_dict = {}
    keys = list(text_dict.keys())

    # Process in batches
    for i in range(0, len(keys), batch_size):
        batch_keys = keys[i:i + batch_size]
        batch_dict = {key: text_dict[key] for key in batch_keys}
        translated_batch = translate_batch(batch_dict)

        # Merge results
        if translated_batch: # Only merge if the translation was successful
          translated_dict.update(translated_batch)
        else:
            print(f"Skipping batch {i // batch_size} due to translation error.")
    
    return translated_dict