Spaces:
Running
Running
'fix'
Browse files
word/__pycache__/word_helper.cpython-310.pyc
CHANGED
Binary files a/word/__pycache__/word_helper.cpython-310.pyc and b/word/__pycache__/word_helper.cpython-310.pyc differ
|
|
word/word_helper.py
CHANGED
@@ -96,102 +96,6 @@ def batch_translate(texts, source_lang = 'English', target_lang="Vietnamese"):
|
|
96 |
def response_to_dict(response):
|
97 |
return list(ast.literal_eval(response.text.strip().strip("json```").strip("```").strip().strip("\"")).values())
|
98 |
|
99 |
-
def fix_translate(texts, translated_text):
|
100 |
-
""" Translates multiple text segments in a single API call. """
|
101 |
-
if not texts:
|
102 |
-
return texts # Skip if empty
|
103 |
-
|
104 |
-
system_prompt = """
|
105 |
-
You are given the original JSON dictionary and the translated response text. Your task is to ensure that the translated text is in the correct format and has the same number of items as the original JSON dictionary.
|
106 |
-
|
107 |
-
Steps to follow:
|
108 |
-
1. Parse the original and translated JSON dictionaries.
|
109 |
-
2. Ensure that the keys in both dictionaries are strings (i.e., "1" instead of 1).
|
110 |
-
3. Compare the number of items in both dictionaries.
|
111 |
-
4. If the number of items in the translated dictionary is not equal to the number of items in the original dictionary, adjust the translated dictionary by:
|
112 |
-
a. Adding missing items with empty strings if there are fewer items.
|
113 |
-
b. Merging or splitting items to ensure correspondence with the original items if there are more items.
|
114 |
-
5. Ensure that each item in the translated dictionary is in the correct order, with the same key as the original item.
|
115 |
-
6. Preserve any leading or trailing spaces in the original strings.
|
116 |
-
7. Ensure the output is a syntactically correct Python dictionary, with proper opening and closing braces.
|
117 |
-
8. If the translated dictionary is already correct, return it as is.
|
118 |
-
9. Return the corrected JSON dictionary in proper Python dictionary format.
|
119 |
-
|
120 |
-
Example Inputs and Outputs:
|
121 |
-
|
122 |
-
**Input:**
|
123 |
-
- Original JSON dictionary:
|
124 |
-
```json
|
125 |
-
{"0": "My name is ", "1": "Huy", "2": ".", "3": " Today is ", "4": "a ", "5": "good day", "6": ".", "7": ""}
|
126 |
-
```
|
127 |
-
- Translated response text with fewer items:
|
128 |
-
```json
|
129 |
-
{"0": "Tên tôi là ", "1": "Huy", "2": ".", "3": "Hôm nay ", "4": "là một ", "5": "ngày đẹp", "6": "."}
|
130 |
-
```
|
131 |
-
|
132 |
-
**Output:**
|
133 |
-
```json
|
134 |
-
{"0": "Tên tôi là ", "1": "Huy", "2": ".", "3": "Hôm nay ", "4": "là một ", "5": "ngày đẹp", "6": ".", "7": ""}
|
135 |
-
```
|
136 |
-
|
137 |
-
**Input:**
|
138 |
-
- Original JSON dictionary:
|
139 |
-
```json
|
140 |
-
{"0": "The sky is ", "1": "blue", "2": ".", "3": " Water is ", "4": "essential", "5": " for ", "6": "life", "7": "."}
|
141 |
-
```
|
142 |
-
- Translated response text with more items:
|
143 |
-
```json
|
144 |
-
{"0": "El cielo es ", "1": "azul", "2": ".", "3": " El agua es ", "4": "esencial", "5": " para ", "6": "la", "7": " vida", "8": "."}
|
145 |
-
```
|
146 |
-
|
147 |
-
**Output:**
|
148 |
-
```json
|
149 |
-
{"0": "El cielo es ", "1": "azul", "2": ".", "3": " El agua es ", "4": "esencial", "5": " para ", "6": "la vida", "7": "."}
|
150 |
-
```
|
151 |
-
|
152 |
-
**Input:**
|
153 |
-
- Original JSON dictionary:
|
154 |
-
```json
|
155 |
-
{"0": "The quick brown ", "1": "fox ", "2": "jumps ", "3": "over ", "4": "the ", "5": "lazy ", "6": "dog", "7": "."}
|
156 |
-
```
|
157 |
-
- Translated response text with issues:
|
158 |
-
```json
|
159 |
-
{"0": "Le renard ", "1": "brun ", 2: "rapide ", 3: "saute ", 4: "par-dessus ", "5": "le ", "6": "chien ", "7": "paresseux", 8: "."}
|
160 |
-
```
|
161 |
-
|
162 |
-
**Output:**
|
163 |
-
```json
|
164 |
-
{"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux", "7": "."}
|
165 |
-
```
|
166 |
-
|
167 |
-
**Input:**
|
168 |
-
- Original JSON dictionary:
|
169 |
-
```json
|
170 |
-
{"0": "The quick brown ", "1": "fox ", "2": "jumps ", "3": "over ", "4": "the ", "5": "lazy ", "6": "dog."}
|
171 |
-
```
|
172 |
-
- Translated response text with wrong formatting:
|
173 |
-
```json
|
174 |
-
{"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux".}
|
175 |
-
```
|
176 |
-
|
177 |
-
**Output:**
|
178 |
-
```json
|
179 |
-
{"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux."}
|
180 |
-
```
|
181 |
-
|
182 |
-
Perform the corrections and return the result as a properly formatted Python dictionary.
|
183 |
-
"""
|
184 |
-
json_data = json.dumps({i: t for i, t in enumerate(texts)})
|
185 |
-
user_prompt = f"Original JSON dictionary: {json_data}. Translated response text: {translated_text}"
|
186 |
-
|
187 |
-
model = genai.GenerativeModel('gemini-2.0-flash')
|
188 |
-
response = model.generate_content(contents = system_prompt.strip() + "\n" + user_prompt.strip(), generation_config={
|
189 |
-
'temperature': 1, # Adjust temperature for desired creativity
|
190 |
-
'top_p': 1,
|
191 |
-
'top_k': 1,})
|
192 |
-
return response_to_dict(response)
|
193 |
-
# return response
|
194 |
-
|
195 |
def brute_force_fix(batch, translated_batch):
|
196 |
if len(batch) > len(translated_batch):
|
197 |
translated_batch += [""] * (len(batch) - len(translated_batch))
|
@@ -211,14 +115,15 @@ def batch_translate_loop(batch, source_lang, target_lang):
|
|
211 |
for i in range(10):
|
212 |
print(f'I am ChatGPT and I am retarded, retrying translation time {i}:')
|
213 |
try:
|
214 |
-
|
|
|
215 |
assert(len(translated_batch) == len(batch))
|
216 |
-
break
|
217 |
except:
|
218 |
pass
|
219 |
|
220 |
try:
|
221 |
-
|
222 |
except:
|
223 |
raise ValueError("The translated batch is not a list.")
|
224 |
|
@@ -244,6 +149,7 @@ def get_batches(texts, limit = 1000):
|
|
244 |
word_count += len(string)
|
245 |
|
246 |
batches.append(batch)
|
|
|
247 |
return batches
|
248 |
|
249 |
def full_translate(texts, source_lang = 'English', target_lang="Vietnamese"):
|
|
|
96 |
def response_to_dict(response):
|
97 |
return list(ast.literal_eval(response.text.strip().strip("json```").strip("```").strip().strip("\"")).values())
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
def brute_force_fix(batch, translated_batch):
|
100 |
if len(batch) > len(translated_batch):
|
101 |
translated_batch += [""] * (len(batch) - len(translated_batch))
|
|
|
115 |
for i in range(10):
|
116 |
print(f'I am ChatGPT and I am retarded, retrying translation time {i}:')
|
117 |
try:
|
118 |
+
translated_batch_response = batch_translate(batch, source_lang, target_lang)
|
119 |
+
translated_batch = response_to_dict(translated_batch_response)
|
120 |
assert(len(translated_batch) == len(batch))
|
121 |
+
break
|
122 |
except:
|
123 |
pass
|
124 |
|
125 |
try:
|
126 |
+
assert(isinstance(response_to_dict(translated_batch_response), list))
|
127 |
except:
|
128 |
raise ValueError("The translated batch is not a list.")
|
129 |
|
|
|
149 |
word_count += len(string)
|
150 |
|
151 |
batches.append(batch)
|
152 |
+
|
153 |
return batches
|
154 |
|
155 |
def full_translate(texts, source_lang = 'English', target_lang="Vietnamese"):
|