Xenova
/

wav2vec2-large-xlsr-53-english

Automatic Speech Recognition

Transformers.js

Model card Files Files and versions Community

Xenova HF staff commited on Jul 29, 2023

Commit

1814452

·

1 Parent(s): 110e7f8

Update tokenizer.json

Files changed (1) hide show

tokenizer.json +53 -3

tokenizer.json CHANGED Viewed

@@ -2,9 +2,59 @@
     "version": "1.0",
     "truncation": null,
     "padding": null,
-    "added_tokens": [],
-    "normalizer": null,
-    "pre_tokenizer": null,
     "post_processor": null,
     "decoder": {
         "type": "CTC",

     "version": "1.0",
     "truncation": null,
     "padding": null,
+    "added_tokens": [
+        {
+            "id": 0,
+            "content": "<pad>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": false,
+            "special": true
+        },
+        {
+            "id": 1,
+            "content": "<s>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": false,
+            "special": true
+        },
+        {
+            "id": 2,
+            "content": "</s>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": false,
+            "special": true
+        },
+        {
+            "id": 3,
+            "content": "<unk>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": false,
+            "special": true
+        }
+    ],
+    "normalizer": {
+        "type": "Replace",
+        "pattern": {
+            "String": " "
+        },
+        "content": "|"
+    },
+    "pre_tokenizer": {
+        "type": "Split",
+        "pattern": {
+            "Regex": ""
+        },
+        "behavior": "Isolated",
+        "invert": false
+    },
     "post_processor": null,
     "decoder": {
         "type": "CTC",