Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,12 @@ BN_TO_EN_MODEL = "csebuetnlp/banglat5_nmt_bn_en"
|
|
11 |
NORMALIZER_REPO = "https://github.com/csebuetnlp/normalizer.git"
|
12 |
|
13 |
# --- Helper function to install/import normalizer ---
|
14 |
-
# This ensures the normalizer is available.
|
15 |
-
# In HF Spaces, requirements.txt is the primary method.
|
16 |
normalizer_module = None
|
|
|
|
|
|
|
|
|
|
|
17 |
try:
|
18 |
from normalizer import normalize as normalize_fn_imported
|
19 |
normalizer_module = normalize_fn_imported
|
@@ -21,8 +24,6 @@ try:
|
|
21 |
except ImportError:
|
22 |
print(f"Normalizer library not found. Attempting to install from {NORMALIZER_REPO}...")
|
23 |
try:
|
24 |
-
# This command installs the package directly from git.
|
25 |
-
# The #egg=normalizer part helps pip identify the package name.
|
26 |
subprocess.check_call([sys.executable, "-m", "pip", "install", f"git+{NORMALIZER_REPO}#egg=normalizer"])
|
27 |
from normalizer import normalize as normalize_fn_imported_after_install
|
28 |
normalizer_module = normalize_fn_imported_after_install
|
@@ -30,10 +31,9 @@ except ImportError:
|
|
30 |
except Exception as e:
|
31 |
print(f"Failed to install or import normalizer: {e}")
|
32 |
print("Please ensure 'git+https://github.com/csebuetnlp/normalizer.git#egg=normalizer' is in your requirements.txt for Hugging Face Spaces.")
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
normalizer_module = dummy_normalize
|
37 |
|
38 |
# --- Model Loading (Globally, when the script starts) ---
|
39 |
sylheti_to_bn_pipe = None
|
@@ -63,8 +63,6 @@ try:
|
|
63 |
|
64 |
except Exception as e:
|
65 |
print(f"FATAL: Error loading one or more models: {e}")
|
66 |
-
# To prevent the app from crashing entirely if models don't load,
|
67 |
-
# but it will show errors during translation.
|
68 |
sylheti_to_bn_pipe = None
|
69 |
bn_to_en_model = None
|
70 |
bn_to_en_tokenizer = None
|
@@ -78,7 +76,9 @@ def translate_sylheti_to_english_gradio(sylheti_text_input):
|
|
78 |
return "Error: Sylheti-to-Bengali model not loaded. Check logs.", ""
|
79 |
if not bn_to_en_model or not bn_to_en_tokenizer:
|
80 |
return "Error: Bengali-to-English model not loaded. Check logs.", ""
|
81 |
-
|
|
|
|
|
82 |
return "Error: Bengali normalizer library not available. Check logs.", ""
|
83 |
|
84 |
|
@@ -99,18 +99,24 @@ def translate_sylheti_to_english_gradio(sylheti_text_input):
|
|
99 |
except Exception as e:
|
100 |
print(f"Error during Sylheti to Bengali translation: {e}")
|
101 |
bengali_text_intermediate = f"Sylheti->Bengali Error: {str(e)}"
|
102 |
-
return bengali_text_intermediate, english_text_final
|
103 |
|
104 |
# Step 2: Bengali → English
|
105 |
try:
|
106 |
print(f"Normalizing and translating Bengali to English: '{bengali_text_intermediate}'")
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
print(f"Normalized Bengali: '{normalized_bn_text}'")
|
109 |
|
110 |
input_ids = bn_to_en_tokenizer(
|
111 |
normalized_bn_text,
|
112 |
return_tensors="pt"
|
113 |
-
).input_ids.to(model_device)
|
114 |
|
115 |
generated_tokens = bn_to_en_model.generate(
|
116 |
input_ids,
|
@@ -152,11 +158,10 @@ iface = gr.Interface(
|
|
152 |
["আফনে ভালা আছনি?"]
|
153 |
],
|
154 |
allow_flagging="never",
|
155 |
-
|
|
|
156 |
)
|
157 |
|
158 |
# --- Launch the Gradio app ---
|
159 |
if __name__ == "__main__":
|
160 |
-
# When running locally, this launches the server.
|
161 |
-
# In Hugging Face Spaces, the `app.py` is typically run by their infrastructure.
|
162 |
iface.launch()
|
|
|
11 |
NORMALIZER_REPO = "https://github.com/csebuetnlp/normalizer.git"
|
12 |
|
13 |
# --- Helper function to install/import normalizer ---
|
|
|
|
|
14 |
normalizer_module = None
|
15 |
+
dummy_normalizer_flag = False # Flag to indicate if dummy is used
|
16 |
+
|
17 |
+
def dummy_normalize_func(text): # Define the dummy function clearly
|
18 |
+
raise RuntimeError("Normalizer library could not be loaded. Please check installation and logs.")
|
19 |
+
|
20 |
try:
|
21 |
from normalizer import normalize as normalize_fn_imported
|
22 |
normalizer_module = normalize_fn_imported
|
|
|
24 |
except ImportError:
|
25 |
print(f"Normalizer library not found. Attempting to install from {NORMALIZER_REPO}...")
|
26 |
try:
|
|
|
|
|
27 |
subprocess.check_call([sys.executable, "-m", "pip", "install", f"git+{NORMALIZER_REPO}#egg=normalizer"])
|
28 |
from normalizer import normalize as normalize_fn_imported_after_install
|
29 |
normalizer_module = normalize_fn_imported_after_install
|
|
|
31 |
except Exception as e:
|
32 |
print(f"Failed to install or import normalizer: {e}")
|
33 |
print("Please ensure 'git+https://github.com/csebuetnlp/normalizer.git#egg=normalizer' is in your requirements.txt for Hugging Face Spaces.")
|
34 |
+
normalizer_module = dummy_normalize_func # Assign the actual dummy function
|
35 |
+
dummy_normalizer_flag = True
|
36 |
+
|
|
|
37 |
|
38 |
# --- Model Loading (Globally, when the script starts) ---
|
39 |
sylheti_to_bn_pipe = None
|
|
|
63 |
|
64 |
except Exception as e:
|
65 |
print(f"FATAL: Error loading one or more models: {e}")
|
|
|
|
|
66 |
sylheti_to_bn_pipe = None
|
67 |
bn_to_en_model = None
|
68 |
bn_to_en_tokenizer = None
|
|
|
76 |
return "Error: Sylheti-to-Bengali model not loaded. Check logs.", ""
|
77 |
if not bn_to_en_model or not bn_to_en_tokenizer:
|
78 |
return "Error: Bengali-to-English model not loaded. Check logs.", ""
|
79 |
+
|
80 |
+
# Check if the normalizer is the dummy function
|
81 |
+
if dummy_normalizer_flag or normalizer_module is None:
|
82 |
return "Error: Bengali normalizer library not available. Check logs.", ""
|
83 |
|
84 |
|
|
|
99 |
except Exception as e:
|
100 |
print(f"Error during Sylheti to Bengali translation: {e}")
|
101 |
bengali_text_intermediate = f"Sylheti->Bengali Error: {str(e)}"
|
102 |
+
return bengali_text_intermediate, english_text_final
|
103 |
|
104 |
# Step 2: Bengali → English
|
105 |
try:
|
106 |
print(f"Normalizing and translating Bengali to English: '{bengali_text_intermediate}'")
|
107 |
+
# Ensure normalizer_module is callable before calling
|
108 |
+
if callable(normalizer_module):
|
109 |
+
normalized_bn_text = normalizer_module(bengali_text_intermediate)
|
110 |
+
else:
|
111 |
+
# This case should ideally be caught by the check above, but as a safeguard:
|
112 |
+
raise RuntimeError("Normalizer function is not callable.")
|
113 |
+
|
114 |
print(f"Normalized Bengali: '{normalized_bn_text}'")
|
115 |
|
116 |
input_ids = bn_to_en_tokenizer(
|
117 |
normalized_bn_text,
|
118 |
return_tensors="pt"
|
119 |
+
).input_ids.to(model_device)
|
120 |
|
121 |
generated_tokens = bn_to_en_model.generate(
|
122 |
input_ids,
|
|
|
158 |
["আফনে ভালা আছনি?"]
|
159 |
],
|
160 |
allow_flagging="never",
|
161 |
+
cache_examples=False, # Explicitly disable example caching
|
162 |
+
theme=gr.themes.Soft()
|
163 |
)
|
164 |
|
165 |
# --- Launch the Gradio app ---
|
166 |
if __name__ == "__main__":
|
|
|
|
|
167 |
iface.launch()
|