Charif El Belghiti commited on
Commit
abea1d0
·
1 Parent(s): d70c1cd

Add Streamlit app

Browse files
Files changed (2) hide show
  1. app.py +58 -0
  2. requirements.txt +193 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import MBartForConditionalGeneration, MBart50Tokenizer
3
+
4
+ # Download the model and tokenizer
5
+ model_path = "echarif/mBART_for_darija_transaltion" # تأكد من وجود المسار الصحيح
6
+ model = MBartForConditionalGeneration.from_pretrained(model_path)
7
+ tokenizer = MBart50Tokenizer.from_pretrained(model_path)
8
+
9
+ # Interface setup
10
+ st.set_page_config(page_title="Darija to English Translator", page_icon="🌐", layout="wide")
11
+ #st.image("logo.png", width=200)
12
+
13
+ st.title("Darija to English Translation App 🌐")
14
+ st.markdown(
15
+ """
16
+ ### Welcome to the Darija to English Translation App! 🌍
17
+ This app uses **mBART** for translating Moroccan Darija to English with advanced NLP capabilities.
18
+ """
19
+ )
20
+
21
+ # Parameters setting sidebar
22
+ st.sidebar.header("Model Parameters")
23
+ beam_size = st.sidebar.slider("Beam Size", 1, 10, 5) # Default: 5
24
+ max_length = st.sidebar.slider("Max Length", 10, 200, 50) # Default: 50
25
+ src_lang = st.sidebar.selectbox("Source Language", ["ar_AR", "fr_XX"], index=0)
26
+ tgt_lang = st.sidebar.selectbox("Target Language", ["en_XX", "es_XX"], index=0)
27
+
28
+ # Enter and translate text
29
+ col1, col2 = st.columns(2)
30
+
31
+ with col1:
32
+ st.header("Input Text")
33
+ input_text = st.text_area("Darija Text:", "")
34
+
35
+ with col2:
36
+ st.header("Translation")
37
+ if st.button("Translate"):
38
+ if input_text.strip():
39
+ try:
40
+ # Preparing text for translation
41
+ tokenizer.src_lang = src_lang
42
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True)
43
+ outputs = model.generate(
44
+ **inputs, max_length=max_length, num_beams=beam_size,
45
+ forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang]
46
+ )
47
+ # Show translation
48
+ translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+ st.success("Translation:")
50
+ st.write(translation)
51
+ except Exception as e:
52
+ st.error(f"An error occurred: {e}")
53
+ else:
54
+ st.warning("Please enter text to translate!")
55
+
56
+ # Button to download translation
57
+ if 'translation' in locals():
58
+ st.download_button("Download Translation", data=translation, file_name="translation.txt")
requirements.txt ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ aiohappyeyeballs==2.4.0
3
+ aiohttp==3.10.5
4
+ aiosignal==1.3.1
5
+ altair==5.5.0
6
+ annotated-types==0.7.0
7
+ anyio==4.4.0
8
+ argon2-cffi==23.1.0
9
+ argon2-cffi-bindings==21.2.0
10
+ arrow==1.3.0
11
+ astroid==3.3.5
12
+ asttokens==2.4.1
13
+ astunparse==1.6.3
14
+ async-lru==2.0.4
15
+ attrs==24.2.0
16
+ babel==2.16.0
17
+ beautifulsoup4==4.12.3
18
+ bleach==6.2.0
19
+ blinker==1.9.0
20
+ cachetools==5.5.1
21
+ certifi==2024.8.30
22
+ cffi==1.17.1
23
+ charset-normalizer==3.3.2
24
+ click==8.1.8
25
+ colorama==0.4.6
26
+ comm==0.2.2
27
+ contourpy==1.3.0
28
+ cycler==0.12.1
29
+ debugpy==1.8.5
30
+ decorator==5.1.1
31
+ defusedxml==0.7.1
32
+ dill==0.3.9
33
+ distro==1.9.0
34
+ et-xmlfile==1.1.0
35
+ executing==2.0.1
36
+ fastjsonschema==2.21.1
37
+ filelock==3.17.0
38
+ flatbuffers==24.3.25
39
+ fonttools==4.53.1
40
+ fqdn==1.5.1
41
+ frozenlist==1.4.1
42
+ fsspec==2025.2.0
43
+ gast==0.6.0
44
+ gitdb==4.0.12
45
+ GitPython==3.1.44
46
+ google-pasta==0.2.0
47
+ greenlet==3.1.1
48
+ grpcio==1.66.1
49
+ h11==0.14.0
50
+ h5py==3.11.0
51
+ httpcore==1.0.5
52
+ httpx==0.27.2
53
+ huggingface-hub==0.28.1
54
+ idna==3.8
55
+ ipykernel==6.29.5
56
+ ipython==8.26.0
57
+ isoduration==20.11.0
58
+ isort==5.13.2
59
+ jedi==0.19.1
60
+ Jinja2==3.1.5
61
+ jiter==0.5.0
62
+ json5==0.10.0
63
+ jsonpointer==3.0.0
64
+ jsonschema==4.23.0
65
+ jsonschema-specifications==2024.10.1
66
+ jupyter-events==0.11.0
67
+ jupyter-lsp==2.2.5
68
+ jupyter_client==8.6.2
69
+ jupyter_core==5.7.2
70
+ jupyter_server==2.15.0
71
+ jupyter_server_terminals==0.5.3
72
+ jupyterlab==4.3.4
73
+ jupyterlab_pygments==0.3.0
74
+ jupyterlab_server==2.27.3
75
+ keras==3.5.0
76
+ keyboard==0.13.5
77
+ kiwisolver==1.4.7
78
+ libclang==18.1.1
79
+ Markdown==3.7
80
+ markdown-it-py==3.0.0
81
+ MarkupSafe==2.1.5
82
+ matplotlib==3.9.2
83
+ matplotlib-inline==0.1.7
84
+ mccabe==0.7.0
85
+ mdurl==0.1.2
86
+ mistune==3.1.0
87
+ ml-dtypes==0.4.0
88
+ mpmath==1.3.0
89
+ multidict==6.0.5
90
+ namex==0.0.8
91
+ narwhals==1.25.0
92
+ nbclient==0.10.2
93
+ nbconvert==7.16.5
94
+ nbformat==5.10.4
95
+ nest-asyncio==1.6.0
96
+ networkx==3.4.2
97
+ notebook==7.3.2
98
+ notebook_shim==0.2.4
99
+ numpy==1.26.4
100
+ openai==0.28.0
101
+ opencv-python==4.10.0.84
102
+ openpyxl==3.1.5
103
+ opt-einsum==3.3.0
104
+ optree==0.12.1
105
+ outcome==1.3.0.post0
106
+ overrides==7.7.0
107
+ packaging==24.1
108
+ pandas==2.2.2
109
+ pandocfilters==1.5.1
110
+ parso==0.8.4
111
+ pillow==10.4.0
112
+ platformdirs==4.2.2
113
+ playwright==1.49.0
114
+ prometheus_client==0.21.1
115
+ prompt_toolkit==3.0.47
116
+ protobuf==4.25.4
117
+ psutil==6.0.0
118
+ pure_eval==0.2.3
119
+ pyarrow==19.0.0
120
+ pycparser==2.22
121
+ pydantic==2.9.1
122
+ pydantic_core==2.23.3
123
+ pydeck==0.9.1
124
+ pyee==12.0.0
125
+ pygame==2.6.0
126
+ Pygments==2.18.0
127
+ pylint==3.3.1
128
+ pyparsing==3.1.4
129
+ PySocks==1.7.1
130
+ python-dateutil==2.9.0.post0
131
+ python-json-logger==3.2.1
132
+ pytz==2024.1
133
+ pywin32==306
134
+ pywinpty==2.0.14
135
+ PyYAML==6.0.2
136
+ pyzbar==0.1.9
137
+ pyzmq==26.1.1
138
+ referencing==0.36.1
139
+ regex==2024.11.6
140
+ requests==2.32.3
141
+ rfc3339-validator==0.1.4
142
+ rfc3986-validator==0.1.1
143
+ rich==13.8.1
144
+ rpds-py==0.22.3
145
+ safetensors==0.5.2
146
+ seaborn==0.13.2
147
+ selenium==4.27.1
148
+ Send2Trash==1.8.3
149
+ sentencepiece==0.2.0
150
+ setuptools==74.1.2
151
+ six==1.16.0
152
+ smmap==5.0.2
153
+ sniffio==1.3.1
154
+ sortedcontainers==2.4.0
155
+ soupsieve==2.6
156
+ stack-data==0.6.3
157
+ streamlit==1.41.1
158
+ sympy==1.13.1
159
+ tenacity==9.0.0
160
+ tensorboard==2.17.1
161
+ tensorboard-data-server==0.7.2
162
+ tensorflow==2.17.0
163
+ tensorflow-intel==2.17.0
164
+ termcolor==2.4.0
165
+ terminado==0.18.1
166
+ tinycss2==1.4.0
167
+ tokenizers==0.21.0
168
+ toml==0.10.2
169
+ tomlkit==0.13.2
170
+ torch==2.6.0
171
+ torchaudio==2.6.0
172
+ torchvision==0.21.0
173
+ tornado==6.4.1
174
+ tqdm==4.66.5
175
+ traitlets==5.14.3
176
+ transformers==4.48.2
177
+ trio==0.27.0
178
+ trio-websocket==0.11.1
179
+ types-python-dateutil==2.9.0.20241206
180
+ typing_extensions==4.12.2
181
+ tzdata==2024.1
182
+ uri-template==1.3.0
183
+ urllib3==2.2.2
184
+ watchdog==6.0.0
185
+ wcwidth==0.2.13
186
+ webcolors==24.11.1
187
+ webencodings==0.5.1
188
+ websocket-client==1.8.0
189
+ Werkzeug==3.0.4
190
+ wheel==0.44.0
191
+ wrapt==1.16.0
192
+ wsproto==1.2.0
193
+ yarl==1.11.0