Spaces:
Running
Running
add fix translate word
Browse files
word/__pycache__/word_helper.cpython-310.pyc
CHANGED
|
Binary files a/word/__pycache__/word_helper.cpython-310.pyc and b/word/__pycache__/word_helper.cpython-310.pyc differ
|
|
|
word/word_helper.py
CHANGED
|
@@ -26,65 +26,21 @@ def batch_translate(texts, source_lang = 'English', target_lang="Vietnamese"):
|
|
| 26 |
if not texts:
|
| 27 |
return texts # Skip if empty
|
| 28 |
|
| 29 |
-
system_prompt = """
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
5. Preserve spaces before or after strings. Do not remove, merge, split, or omit any strings.
|
| 38 |
-
6. Translate paragraphs and ensure the translation makes sense when text is put together.
|
| 39 |
-
7. Translate split words so that the word is not split in the translation.
|
| 40 |
-
8. Return a JSON object that is a Python dictionary containing as many items as the original JSON file, with keys and order preserved.
|
| 41 |
-
9. The output must be a syntactically correct Python dictionary.
|
| 42 |
-
|
| 43 |
-
Additional Examples:
|
| 44 |
-
**Input 1**:
|
| 45 |
-
- Source language: English
|
| 46 |
-
- Target language: Vietnamese
|
| 47 |
-
- JSON file:
|
| 48 |
-
```json
|
| 49 |
-
{"0": "My name is ", "1": "Huy", "2": ".", "3": " Today is ", "4": "a ", "5": "good day", "6": ".", "7": ""}
|
| 50 |
-
```
|
| 51 |
-
**Output 1**:
|
| 52 |
-
```json
|
| 53 |
-
{"0": "Tên tôi là ", "1": "Huy", "2": ".", "3": " Hôm nay là ", "4": "một ", "5": "ngày đẹp", "6": ".", "7": ""}
|
| 54 |
-
```
|
| 55 |
-
|
| 56 |
-
**Input 2**:
|
| 57 |
-
- Source language: English
|
| 58 |
-
- Target language: Spanish
|
| 59 |
-
- JSON file:
|
| 60 |
-
```json
|
| 61 |
-
{"0": "The sky is ", "1": "blue", "2": ".", "3": " Water is ", "4": "essential", "5": " for ", "6": "life", "7": "."}
|
| 62 |
-
```
|
| 63 |
-
**Output 2**:
|
| 64 |
-
```json
|
| 65 |
-
{"0": "El cielo es ", "1": "azul", "2": ".", "3": " El agua es ", "4": "esencial", "5": " para ", "6": "la vida", "7": "."}
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
**Input 3**:
|
| 69 |
-
- Source language: English
|
| 70 |
-
- Target language: French
|
| 71 |
-
- JSON file:
|
| 72 |
-
```json
|
| 73 |
-
{"0": "The quick brown ", "1": "fox ", "2": "jumps ", "3": "over ", "4": "the ", "5": "lazy ", "6": "dog", "7": "."}
|
| 74 |
-
```
|
| 75 |
-
**Output 3**:
|
| 76 |
-
```json
|
| 77 |
-
{"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux", "7": "."}
|
| 78 |
-
```
|
| 79 |
-
|
| 80 |
-
Perform the translation and return the result as specified above. Do not include any additional text other than the translated JSON object.
|
| 81 |
"""
|
| 82 |
json_data = json.dumps({i: t for i, t in enumerate(texts)})
|
| 83 |
user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON file: {json_data}"
|
| 84 |
|
| 85 |
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 86 |
response = model.generate_content(contents = system_prompt.strip() + "\n" + user_prompt.strip(), generation_config={
|
| 87 |
-
'temperature':
|
| 88 |
'top_p': 1,
|
| 89 |
'top_k': 1,})
|
| 90 |
# response_dict = ast.literal_eval(response.text.strip().strip("json```").strip("```").strip().strip("\""))
|
|
|
|
| 26 |
if not texts:
|
| 27 |
return texts # Skip if empty
|
| 28 |
|
| 29 |
+
system_prompt = f"""Translate the string values within the following JSON object .
|
| 30 |
+
Follow these instructions carefully:
|
| 31 |
+
1. Analyze the entire JSON object to understand the context.
|
| 32 |
+
2. Translate *only* the string values.
|
| 33 |
+
3. Keep the original keys *exactly* as they are.
|
| 34 |
+
4. Do *not* translate non-string values (like hex color codes, numbers, or potentially proper nouns like 'CALISTOGA', 'DM SANS', 'Pexels', 'Pixabay' unless they have a common translation). Use your best judgment for proper nouns.
|
| 35 |
+
5. Preserve the original JSON structure perfectly.
|
| 36 |
+
6. Your output *must* be only the translated JSON object, without any introductory text, explanations, or markdown formatting like ```json ... ```.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
json_data = json.dumps({i: t for i, t in enumerate(texts)})
|
| 39 |
user_prompt = f"Source language: {source_lang}. Target language: {target_lang}. JSON file: {json_data}"
|
| 40 |
|
| 41 |
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 42 |
response = model.generate_content(contents = system_prompt.strip() + "\n" + user_prompt.strip(), generation_config={
|
| 43 |
+
'temperature': 0.3, # Adjust temperature for desired creativity
|
| 44 |
'top_p': 1,
|
| 45 |
'top_k': 1,})
|
| 46 |
# response_dict = ast.literal_eval(response.text.strip().strip("json```").strip("```").strip().strip("\""))
|