Spaces:

mintlee
/

MT_deploy

Running

App Files Files Community

MT_deploy / translate /translator.py

mintlee

add upgrade model version

efbadb1 3 months ago

raw

history blame contribute delete

3.27 kB

	import json
	from typing import Dict, List
	import google.generativeai as genai
	import dotenv
	import os

	dotenv.load_dotenv(".env")


	def translate_text_dict(text_dict: Dict[str, List[str]], source_lang: str = "vi", target_lang: str = "vi") -> Dict[str, List[str]]:
	def translate_batch(batch_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:
	"""Translates a single batch of text."""
	prompt = f"""The following python dictionary contains pieces of text that form a whole document: {json.dumps(batch_dict)}.

	Read through the entire dictionary, then translate the texts to {target_lang} so that the meaning is as close to the intended context as possible.

	Specialized jargon for which there are no direct translations, or names, titles, etc. should be kept whole if possible.
	Look at the entire dictionary as a whole for context so that the translation is as accurate as possible, and to determine if each text should be translated or not.

	Aim for brevity if possible so that the length of the translations match the length of the original texts, but prioritize accuracy above all .
	Return the translated texts formatted like the original dictionary. Do NOT say anthing else. Return it as a JSON block."""

	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
	model = genai.GenerativeModel(os.getenv("MODEL_VERSION"))

	response = model.generate_content(prompt) # Use a model appropriate for your needs and API key. gemini-2.0-flash doesn't exist. 1.5-pro is a good general-purpose model.

	# Handle potential errors in the response, including rate limits and invalid JSON.
	try:
	# More robust JSON parsing: Handle code blocks, markdown, and other variations.
	response_text = response.text
	start = response_text.find('{')
	end = response_text.rfind('}') + 1

	if start == -1 or end == -1:
	raise ValueError("Invalid JSON response from Gemini API: No object found.")

	json_string = response_text[start:end]
	trans_dict = json.loads(json_string)
	return trans_dict
	except (ValueError, json.JSONDecodeError) as e:
	print(f"Error processing Gemini API response: {e}")
	print(f"Raw response text: {response.text}") # Print the raw response for debugging
	return {} # Return an empty dict on error (or raise, depending on your needs)
	except Exception as e:
	print(f"An unexpected error occur: {e}")
	return {}


	batch_size = 30 # Adjust as needed, based on testing and Gemini's context window limits
	translated_dict = {}
	keys = list(text_dict.keys())

	# Process in batches
	for i in range(0, len(keys), batch_size):
	batch_keys = keys[i:i + batch_size]
	batch_dict = {key: text_dict[key] for key in batch_keys}
	translated_batch = translate_batch(batch_dict)

	# Merge results
	if translated_batch: # Only merge if the translation was successful
	translated_dict.update(translated_batch)
	else:
	print(f"Skipping batch {i // batch_size} due to translation error.")

	return translated_dict