mintlee commited on
Commit
571ba8a
·
1 Parent(s): bdcb5e5
word/__pycache__/word_helper.cpython-310.pyc CHANGED
Binary files a/word/__pycache__/word_helper.cpython-310.pyc and b/word/__pycache__/word_helper.cpython-310.pyc differ
 
word/word_helper.py CHANGED
@@ -96,102 +96,6 @@ def batch_translate(texts, source_lang = 'English', target_lang="Vietnamese"):
96
  def response_to_dict(response):
97
  return list(ast.literal_eval(response.text.strip().strip("json```").strip("```").strip().strip("\"")).values())
98
 
99
- def fix_translate(texts, translated_text):
100
- """ Translates multiple text segments in a single API call. """
101
- if not texts:
102
- return texts # Skip if empty
103
-
104
- system_prompt = """
105
- You are given the original JSON dictionary and the translated response text. Your task is to ensure that the translated text is in the correct format and has the same number of items as the original JSON dictionary.
106
-
107
- Steps to follow:
108
- 1. Parse the original and translated JSON dictionaries.
109
- 2. Ensure that the keys in both dictionaries are strings (i.e., "1" instead of 1).
110
- 3. Compare the number of items in both dictionaries.
111
- 4. If the number of items in the translated dictionary is not equal to the number of items in the original dictionary, adjust the translated dictionary by:
112
- a. Adding missing items with empty strings if there are fewer items.
113
- b. Merging or splitting items to ensure correspondence with the original items if there are more items.
114
- 5. Ensure that each item in the translated dictionary is in the correct order, with the same key as the original item.
115
- 6. Preserve any leading or trailing spaces in the original strings.
116
- 7. Ensure the output is a syntactically correct Python dictionary, with proper opening and closing braces.
117
- 8. If the translated dictionary is already correct, return it as is.
118
- 9. Return the corrected JSON dictionary in proper Python dictionary format.
119
-
120
- Example Inputs and Outputs:
121
-
122
- **Input:**
123
- - Original JSON dictionary:
124
- ```json
125
- {"0": "My name is ", "1": "Huy", "2": ".", "3": " Today is ", "4": "a ", "5": "good day", "6": ".", "7": ""}
126
- ```
127
- - Translated response text with fewer items:
128
- ```json
129
- {"0": "Tên tôi là ", "1": "Huy", "2": ".", "3": "Hôm nay ", "4": "là một ", "5": "ngày đẹp", "6": "."}
130
- ```
131
-
132
- **Output:**
133
- ```json
134
- {"0": "Tên tôi là ", "1": "Huy", "2": ".", "3": "Hôm nay ", "4": "là một ", "5": "ngày đẹp", "6": ".", "7": ""}
135
- ```
136
-
137
- **Input:**
138
- - Original JSON dictionary:
139
- ```json
140
- {"0": "The sky is ", "1": "blue", "2": ".", "3": " Water is ", "4": "essential", "5": " for ", "6": "life", "7": "."}
141
- ```
142
- - Translated response text with more items:
143
- ```json
144
- {"0": "El cielo es ", "1": "azul", "2": ".", "3": " El agua es ", "4": "esencial", "5": " para ", "6": "la", "7": " vida", "8": "."}
145
- ```
146
-
147
- **Output:**
148
- ```json
149
- {"0": "El cielo es ", "1": "azul", "2": ".", "3": " El agua es ", "4": "esencial", "5": " para ", "6": "la vida", "7": "."}
150
- ```
151
-
152
- **Input:**
153
- - Original JSON dictionary:
154
- ```json
155
- {"0": "The quick brown ", "1": "fox ", "2": "jumps ", "3": "over ", "4": "the ", "5": "lazy ", "6": "dog", "7": "."}
156
- ```
157
- - Translated response text with issues:
158
- ```json
159
- {"0": "Le renard ", "1": "brun ", 2: "rapide ", 3: "saute ", 4: "par-dessus ", "5": "le ", "6": "chien ", "7": "paresseux", 8: "."}
160
- ```
161
-
162
- **Output:**
163
- ```json
164
- {"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux", "7": "."}
165
- ```
166
-
167
- **Input:**
168
- - Original JSON dictionary:
169
- ```json
170
- {"0": "The quick brown ", "1": "fox ", "2": "jumps ", "3": "over ", "4": "the ", "5": "lazy ", "6": "dog."}
171
- ```
172
- - Translated response text with wrong formatting:
173
- ```json
174
- {"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux".}
175
- ```
176
-
177
- **Output:**
178
- ```json
179
- {"0": "Le renard brun ", "1": "rapide ", "2": "saute ", "3": "par-dessus ", "4": "le ", "5": "chien ", "6": "paresseux."}
180
- ```
181
-
182
- Perform the corrections and return the result as a properly formatted Python dictionary.
183
- """
184
- json_data = json.dumps({i: t for i, t in enumerate(texts)})
185
- user_prompt = f"Original JSON dictionary: {json_data}. Translated response text: {translated_text}"
186
-
187
- model = genai.GenerativeModel('gemini-2.0-flash')
188
- response = model.generate_content(contents = system_prompt.strip() + "\n" + user_prompt.strip(), generation_config={
189
- 'temperature': 1, # Adjust temperature for desired creativity
190
- 'top_p': 1,
191
- 'top_k': 1,})
192
- return response_to_dict(response)
193
- # return response
194
-
195
  def brute_force_fix(batch, translated_batch):
196
  if len(batch) > len(translated_batch):
197
  translated_batch += [""] * (len(batch) - len(translated_batch))
@@ -211,14 +115,15 @@ def batch_translate_loop(batch, source_lang, target_lang):
211
  for i in range(10):
212
  print(f'I am ChatGPT and I am retarded, retrying translation time {i}:')
213
  try:
214
- translated_batch = fix_translate(batch, translated_batch_response.text.strip().strip("json```").strip("```").strip().strip("\""))
 
215
  assert(len(translated_batch) == len(batch))
216
- break
217
  except:
218
  pass
219
 
220
  try:
221
- translated_batch = response_to_dict(translated_batch_response)
222
  except:
223
  raise ValueError("The translated batch is not a list.")
224
 
@@ -244,6 +149,7 @@ def get_batches(texts, limit = 1000):
244
  word_count += len(string)
245
 
246
  batches.append(batch)
 
247
  return batches
248
 
249
  def full_translate(texts, source_lang = 'English', target_lang="Vietnamese"):
 
96
  def response_to_dict(response):
97
  return list(ast.literal_eval(response.text.strip().strip("json```").strip("```").strip().strip("\"")).values())
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def brute_force_fix(batch, translated_batch):
100
  if len(batch) > len(translated_batch):
101
  translated_batch += [""] * (len(batch) - len(translated_batch))
 
115
  for i in range(10):
116
  print(f'I am ChatGPT and I am retarded, retrying translation time {i}:')
117
  try:
118
+ translated_batch_response = batch_translate(batch, source_lang, target_lang)
119
+ translated_batch = response_to_dict(translated_batch_response)
120
  assert(len(translated_batch) == len(batch))
121
+ break
122
  except:
123
  pass
124
 
125
  try:
126
+ assert(isinstance(response_to_dict(translated_batch_response), list))
127
  except:
128
  raise ValueError("The translated batch is not a list.")
129
 
 
149
  word_count += len(string)
150
 
151
  batches.append(batch)
152
+
153
  return batches
154
 
155
  def full_translate(texts, source_lang = 'English', target_lang="Vietnamese"):