Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -89,8 +89,26 @@ class Translators:
|
|
89 |
do_sample=True
|
90 |
)
|
91 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
92 |
return response.split("Translation:")[-1].strip()
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
def mtom(model_name, sl, tl, input_text):
|
95 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
96 |
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
@@ -143,23 +161,6 @@ def HelsinkiNLP(sl, tl, input_text):
|
|
143 |
except KeyError as error:
|
144 |
return f"Error: Translation direction {sl} to {tl} is not supported by Helsinki Translation Models", error
|
145 |
|
146 |
-
def flan(model_name, sl, tl, input_text):
|
147 |
-
tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
|
148 |
-
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
149 |
-
input_text = f"translate {sl} to {tl}: {input_text}"
|
150 |
-
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
151 |
-
outputs = model.generate(input_ids)
|
152 |
-
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
153 |
-
|
154 |
-
def tfive(model_name, sl, tl, input_text):
|
155 |
-
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
156 |
-
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
|
157 |
-
prompt = f"translate {sl} to {tl}: {input_text}"
|
158 |
-
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
159 |
-
output_ids = model.generate(input_ids, max_length=512)
|
160 |
-
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
161 |
-
return translated_text
|
162 |
-
|
163 |
def teuken(model_name, sl, tl, input_text):
|
164 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
165 |
model = AutoModelForCausalLM.from_pretrained(
|
@@ -336,9 +337,12 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
336 |
|
337 |
elif model_name == "utter-project/EuroLLM-1.7B":
|
338 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
339 |
-
|
|
|
|
|
|
|
340 |
elif 'flan' in model_name.lower():
|
341 |
-
translated_text =
|
342 |
|
343 |
elif 'teuken' in model_name.lower():
|
344 |
translated_text = teuken(model_name, sselected_language, tselected_language, input_text)
|
@@ -364,9 +368,6 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
364 |
|
365 |
elif 'Unbabel' in model_name:
|
366 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
367 |
-
|
368 |
-
elif model_name.startswith('t5'):
|
369 |
-
translated_text = tfive(model_name, sselected_language, tselected_language, input_text)
|
370 |
|
371 |
elif model_name == "HuggingFaceTB/SmolLM3-3B":
|
372 |
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).smollm()
|
|
|
89 |
do_sample=True
|
90 |
)
|
91 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
92 |
+
print(response)
|
93 |
return response.split("Translation:")[-1].strip()
|
94 |
|
95 |
+
def flan(self):
|
96 |
+
tokenizer = T5Tokenizer.from_pretrained(self.model_name, legacy=False)
|
97 |
+
model = T5ForConditionalGeneration.from_pretrained(self.model_name)
|
98 |
+
prompt = f"translate {self.sl} to {self.tl}: {self.input_text}"
|
99 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
100 |
+
outputs = model.generate(input_ids)
|
101 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
102 |
+
|
103 |
+
def tfive(self):
|
104 |
+
tokenizer = T5Tokenizer.from_pretrained(self.model_name)
|
105 |
+
model = T5ForConditionalGeneration.from_pretrained(self.model_name, device_map="auto")
|
106 |
+
prompt = f"translate {self.sl} to {self.tl}: {self.input_text}"
|
107 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
108 |
+
output_ids = model.generate(input_ids, max_length=512)
|
109 |
+
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
110 |
+
return translated_text
|
111 |
+
|
112 |
def mtom(model_name, sl, tl, input_text):
|
113 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
114 |
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
|
|
161 |
except KeyError as error:
|
162 |
return f"Error: Translation direction {sl} to {tl} is not supported by Helsinki Translation Models", error
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
def teuken(model_name, sl, tl, input_text):
|
165 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
166 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
337 |
|
338 |
elif model_name == "utter-project/EuroLLM-1.7B":
|
339 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
340 |
+
|
341 |
+
elif model_name.startswith('t5'):
|
342 |
+
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).tfive()
|
343 |
+
|
344 |
elif 'flan' in model_name.lower():
|
345 |
+
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).flan()
|
346 |
|
347 |
elif 'teuken' in model_name.lower():
|
348 |
translated_text = teuken(model_name, sselected_language, tselected_language, input_text)
|
|
|
368 |
|
369 |
elif 'Unbabel' in model_name:
|
370 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
|
|
|
371 |
|
372 |
elif model_name == "HuggingFaceTB/SmolLM3-3B":
|
373 |
translated_text = Translators(model_name, sselected_language, tselected_language, input_text).smollm()
|