MohamedRashad commited on
Commit
e4cac44
·
verified ·
1 Parent(s): a9aadc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -161,8 +161,9 @@ def tokenize_text(text, chosen_model, better_tokenization=False):
161
 
162
  return gr.HighlightedText(output, color_map)
163
 
164
- leaderboard_description = """The numbers in this leaderboard are based on the total number of tokens in the Arabic
165
- dataset [rasaif-translations](https://huggingface.co/datasets/MohamedRashad/rasaif-translations).
 
166
  """
167
 
168
  with gr.Blocks() as demo:
 
161
 
162
  return gr.HighlightedText(output, color_map)
163
 
164
+ leaderboard_description = """The `Total Number of Tokens` in this leaderboard is based on the total number of tokens summed on the Arabic section of [rasaif-translations](https://huggingface.co/datasets/MohamedRashad/rasaif-translations) dataset.
165
+ This dataset was chosen because it represents Arabic Fusha text in a small and consentrated manner.
166
+ A tokenizer that scores high in this leaderboard will be efficient in parsing Arabic in its different dialects and forms.
167
  """
168
 
169
  with gr.Blocks() as demo: