Update app.py
Browse files
app.py
CHANGED
@@ -161,8 +161,9 @@ def tokenize_text(text, chosen_model, better_tokenization=False):
|
|
161 |
|
162 |
return gr.HighlightedText(output, color_map)
|
163 |
|
164 |
-
leaderboard_description = """The
|
165 |
-
dataset
|
|
|
166 |
"""
|
167 |
|
168 |
with gr.Blocks() as demo:
|
|
|
161 |
|
162 |
return gr.HighlightedText(output, color_map)
|
163 |
|
164 |
+
leaderboard_description = """The `Total Number of Tokens` in this leaderboard is based on the total number of tokens summed on the Arabic section of [rasaif-translations](https://huggingface.co/datasets/MohamedRashad/rasaif-translations) dataset.
|
165 |
+
This dataset was chosen because it represents Arabic Fusha text in a small and consentrated manner.
|
166 |
+
A tokenizer that scores high in this leaderboard will be efficient in parsing Arabic in its different dialects and forms.
|
167 |
"""
|
168 |
|
169 |
with gr.Blocks() as demo:
|