Update app.py
Browse files
app.py
CHANGED
@@ -101,10 +101,18 @@ def calculate_linguistic_metrics(text):
|
|
101 |
'unique_words': len(unique_words)
|
102 |
}
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def translate_text(text, direction):
|
105 |
"""Main translation function with linguistic analysis"""
|
106 |
if not text.strip():
|
107 |
-
return "Please enter text to translate.", "",
|
108 |
|
109 |
start_time = time.time()
|
110 |
|
@@ -115,7 +123,7 @@ def translate_text(text, direction):
|
|
115 |
# Perform translation
|
116 |
if direction == "English → Siswati":
|
117 |
if en_ss_translator is None:
|
118 |
-
return "Translation model not loaded. Please try again.", "",
|
119 |
|
120 |
result = en_ss_translator(text, max_length=512)
|
121 |
translated_text = result[0]['translation_text']
|
@@ -127,7 +135,7 @@ def translate_text(text, direction):
|
|
127 |
|
128 |
else: # Siswati → English
|
129 |
if ss_en_translator is None:
|
130 |
-
return "Translation model not loaded. Please try again.", "",
|
131 |
|
132 |
result = ss_en_translator(text, max_length=512)
|
133 |
translated_text = result[0]['translation_text']
|
@@ -151,7 +159,7 @@ def translate_text(text, direction):
|
|
151 |
return translated_text, analysis_report, metrics_table
|
152 |
|
153 |
except Exception as e:
|
154 |
-
return f"Translation error: {str(e)}", "",
|
155 |
|
156 |
def create_analysis_report(source_metrics, target_metrics, siswati_features, processing_time, direction):
|
157 |
"""Create a comprehensive linguistic analysis report"""
|
@@ -209,7 +217,7 @@ def create_metrics_table(source_metrics, target_metrics, processing_time):
|
|
209 |
def secure_file_processing(file_obj, direction):
|
210 |
"""Securely process uploaded files with proper cleanup"""
|
211 |
if file_obj is None:
|
212 |
-
return "Please upload a file.",
|
213 |
|
214 |
# Create a unique temporary directory for this processing session
|
215 |
session_id = str(uuid.uuid4())
|
@@ -222,7 +230,7 @@ def secure_file_processing(file_obj, direction):
|
|
222 |
# Get file extension and validate
|
223 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
224 |
if file_ext not in ['.txt', '.csv']:
|
225 |
-
return "Only .txt and .csv files are supported.",
|
226 |
|
227 |
# Create secure temporary file path
|
228 |
temp_file_path = os.path.join(temp_dir, f"upload_{session_id}{file_ext}")
|
@@ -237,21 +245,21 @@ def secure_file_processing(file_obj, direction):
|
|
237 |
try:
|
238 |
df = pd.read_csv(temp_file_path)
|
239 |
if df.empty:
|
240 |
-
return "The uploaded CSV file is empty.",
|
241 |
# Assume first column contains text to translate
|
242 |
texts = df.iloc[:, 0].dropna().astype(str).tolist()
|
243 |
except Exception as e:
|
244 |
-
return f"Error reading CSV file: {str(e)}",
|
245 |
else: # .txt file
|
246 |
try:
|
247 |
with open(temp_file_path, 'r', encoding='utf-8') as f:
|
248 |
content = f.read()
|
249 |
texts = [line.strip() for line in content.split('\n') if line.strip()]
|
250 |
except Exception as e:
|
251 |
-
return f"Error reading text file: {str(e)}",
|
252 |
|
253 |
if not texts:
|
254 |
-
return "No text found in the uploaded file.",
|
255 |
|
256 |
# Limit batch size for performance and security
|
257 |
max_batch_size = 10
|
@@ -298,7 +306,7 @@ def secure_file_processing(file_obj, direction):
|
|
298 |
})
|
299 |
|
300 |
if not results:
|
301 |
-
return "No valid text entries found to translate.",
|
302 |
|
303 |
results_df = pd.DataFrame(results)
|
304 |
summary = f"Successfully processed {len(results)} text entries."
|
@@ -308,12 +316,13 @@ def secure_file_processing(file_obj, direction):
|
|
308 |
return summary, results_df
|
309 |
|
310 |
except Exception as e:
|
311 |
-
return f"Error processing file: {str(e)}",
|
312 |
|
313 |
finally:
|
314 |
# Clean up temporary files and directory
|
315 |
if temp_dir and os.path.exists(temp_dir):
|
316 |
try:
|
|
|
317 |
shutil.rmtree(temp_dir)
|
318 |
except Exception as e:
|
319 |
print(f"Warning: Could not clean up temporary directory: {e}")
|
@@ -389,14 +398,6 @@ def create_gradio_interface():
|
|
389 |
lines=4,
|
390 |
interactive=False
|
391 |
)
|
392 |
-
|
393 |
-
# Quick metrics display
|
394 |
-
with gr.Row():
|
395 |
-
processing_info = gr.Textbox(
|
396 |
-
label="Processing Info",
|
397 |
-
lines=1,
|
398 |
-
interactive=False
|
399 |
-
)
|
400 |
|
401 |
# Examples Section
|
402 |
gr.Markdown("### 📚 Example Translations")
|
|
|
101 |
'unique_words': len(unique_words)
|
102 |
}
|
103 |
|
104 |
+
def create_empty_metrics_table():
|
105 |
+
"""Create an empty metrics table for error cases"""
|
106 |
+
return pd.DataFrame({
|
107 |
+
'Metric': ['Words', 'Characters', 'Sentences', 'Unique Words', 'Avg Word Length', 'Lexical Diversity'],
|
108 |
+
'Source Text': [0, 0, 0, 0, '0.0', '0.000'],
|
109 |
+
'Target Text': [0, 0, 0, 0, '0.0', '0.000']
|
110 |
+
})
|
111 |
+
|
112 |
def translate_text(text, direction):
|
113 |
"""Main translation function with linguistic analysis"""
|
114 |
if not text.strip():
|
115 |
+
return "Please enter text to translate.", "No analysis available.", create_empty_metrics_table()
|
116 |
|
117 |
start_time = time.time()
|
118 |
|
|
|
123 |
# Perform translation
|
124 |
if direction == "English → Siswati":
|
125 |
if en_ss_translator is None:
|
126 |
+
return "Translation model not loaded. Please try again.", "Model loading failed.", create_empty_metrics_table()
|
127 |
|
128 |
result = en_ss_translator(text, max_length=512)
|
129 |
translated_text = result[0]['translation_text']
|
|
|
135 |
|
136 |
else: # Siswati → English
|
137 |
if ss_en_translator is None:
|
138 |
+
return "Translation model not loaded. Please try again.", "Model loading failed.", create_empty_metrics_table()
|
139 |
|
140 |
result = ss_en_translator(text, max_length=512)
|
141 |
translated_text = result[0]['translation_text']
|
|
|
159 |
return translated_text, analysis_report, metrics_table
|
160 |
|
161 |
except Exception as e:
|
162 |
+
return f"Translation error: {str(e)}", f"Analysis failed: {str(e)}", create_empty_metrics_table()
|
163 |
|
164 |
def create_analysis_report(source_metrics, target_metrics, siswati_features, processing_time, direction):
|
165 |
"""Create a comprehensive linguistic analysis report"""
|
|
|
217 |
def secure_file_processing(file_obj, direction):
|
218 |
"""Securely process uploaded files with proper cleanup"""
|
219 |
if file_obj is None:
|
220 |
+
return "Please upload a file.", pd.DataFrame()
|
221 |
|
222 |
# Create a unique temporary directory for this processing session
|
223 |
session_id = str(uuid.uuid4())
|
|
|
230 |
# Get file extension and validate
|
231 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
232 |
if file_ext not in ['.txt', '.csv']:
|
233 |
+
return "Only .txt and .csv files are supported.", pd.DataFrame()
|
234 |
|
235 |
# Create secure temporary file path
|
236 |
temp_file_path = os.path.join(temp_dir, f"upload_{session_id}{file_ext}")
|
|
|
245 |
try:
|
246 |
df = pd.read_csv(temp_file_path)
|
247 |
if df.empty:
|
248 |
+
return "The uploaded CSV file is empty.", pd.DataFrame()
|
249 |
# Assume first column contains text to translate
|
250 |
texts = df.iloc[:, 0].dropna().astype(str).tolist()
|
251 |
except Exception as e:
|
252 |
+
return f"Error reading CSV file: {str(e)}", pd.DataFrame()
|
253 |
else: # .txt file
|
254 |
try:
|
255 |
with open(temp_file_path, 'r', encoding='utf-8') as f:
|
256 |
content = f.read()
|
257 |
texts = [line.strip() for line in content.split('\n') if line.strip()]
|
258 |
except Exception as e:
|
259 |
+
return f"Error reading text file: {str(e)}", pd.DataFrame()
|
260 |
|
261 |
if not texts:
|
262 |
+
return "No text found in the uploaded file.", pd.DataFrame()
|
263 |
|
264 |
# Limit batch size for performance and security
|
265 |
max_batch_size = 10
|
|
|
306 |
})
|
307 |
|
308 |
if not results:
|
309 |
+
return "No valid text entries found to translate.", pd.DataFrame()
|
310 |
|
311 |
results_df = pd.DataFrame(results)
|
312 |
summary = f"Successfully processed {len(results)} text entries."
|
|
|
316 |
return summary, results_df
|
317 |
|
318 |
except Exception as e:
|
319 |
+
return f"Error processing file: {str(e)}", pd.DataFrame()
|
320 |
|
321 |
finally:
|
322 |
# Clean up temporary files and directory
|
323 |
if temp_dir and os.path.exists(temp_dir):
|
324 |
try:
|
325 |
+
import shutil
|
326 |
shutil.rmtree(temp_dir)
|
327 |
except Exception as e:
|
328 |
print(f"Warning: Could not clean up temporary directory: {e}")
|
|
|
398 |
lines=4,
|
399 |
interactive=False
|
400 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
|
402 |
# Examples Section
|
403 |
gr.Markdown("### 📚 Example Translations")
|