ayymen commited on May 13

Commit

4d539cf

verified ·

1 Parent(s): f449bc7

Upload folder using huggingface_hub

Browse files

Files changed (43) hide show

README.md +142 -0
adapter.hau.safetensors +3 -0
added_tokens.json +4 -0
all_results.json +16 -0
checkpoint-35500/added_tokens.json +4 -0
checkpoint-35500/config.json +108 -0
checkpoint-35500/model.safetensors +3 -0
checkpoint-35500/optimizer.pt +3 -0
checkpoint-35500/preprocessor_config.json +10 -0
checkpoint-35500/rng_state_0.pth +3 -0
checkpoint-35500/rng_state_1.pth +3 -0
checkpoint-35500/scheduler.pt +3 -0
checkpoint-35500/special_tokens_map.json +30 -0
checkpoint-35500/tokenizer_config.json +49 -0
checkpoint-35500/trainer_state.json +0 -0
checkpoint-35500/training_args.bin +3 -0
checkpoint-35500/vocab.json +78 -0
checkpoint-35800/added_tokens.json +4 -0
checkpoint-35800/config.json +108 -0
checkpoint-35800/model.safetensors +3 -0
checkpoint-35800/optimizer.pt +3 -0
checkpoint-35800/preprocessor_config.json +10 -0
checkpoint-35800/rng_state_0.pth +3 -0
checkpoint-35800/rng_state_1.pth +3 -0
checkpoint-35800/scheduler.pt +3 -0
checkpoint-35800/special_tokens_map.json +30 -0
checkpoint-35800/tokenizer_config.json +49 -0
checkpoint-35800/trainer_state.json +0 -0
checkpoint-35800/training_args.bin +3 -0
checkpoint-35800/vocab.json +78 -0
cmd.sh +27 -0
config.json +108 -0
eval_results.json +10 -0
model.safetensors +3 -0
preprocessor_config.json +10 -0
runs/Mar18_00-52-03_gmy/events.out.tfevents.1742251942.gmy.34197.0 +3 -0
runs/Mar18_00-52-03_gmy/events.out.tfevents.1742306969.gmy.34197.1 +3 -0
special_tokens_map.json +30 -0
tokenizer_config.json +49 -0
train_results.json +9 -0
trainer_state.json +0 -0
training_args.bin +3 -0
vocab.json +78 -0

README.md ADDED Viewed

	@@ -0,0 +1,142 @@

+---
+library_name: transformers
+license: cc-by-nc-4.0
+base_model: facebook/mms-1b-all
+tags:
+- automatic-speech-recognition
+- /mnt/md0/synvoices/data/naijavoices_50h
+- mms
+- generated_from_trainer
+metrics:
+- wer
+model-index:
+- name: mms-1b-naijavoices_50h-hau-ft
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mms-1b-naijavoices_50h-hau-ft
+This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the /MNT/MD0/SYNVOICES/DATA/NAIJAVOICES_50H - NA dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3184
+- Wer: 0.3325
+- Cer: 0.0842
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
+- total_eval_batch_size: 16
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 100
+- num_epochs: 20.0
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch   | Step  | Validation Loss | Wer    | Cer    |
+|:-------------:|:-------:|:-----:|:---------------:|:------:|:------:|
+| 0.4365        | 0.2793  | 500   | 0.3954          | 0.4125 | 0.1067 |
+| 0.5478        | 0.5587  | 1000  | 0.3881          | 0.4025 | 0.1031 |
+| 0.5289        | 0.8380  | 1500  | 0.3787          | 0.3857 | 0.0984 |
+| 0.5134        | 1.1173  | 2000  | 0.3760          | 0.4047 | 0.1008 |
+| 0.429         | 1.3966  | 2500  | 0.3699          | 0.3851 | 0.0977 |
+| 0.6624        | 1.6760  | 3000  | 0.3696          | 0.3918 | 0.0995 |
+| 0.4997        | 1.9553  | 3500  | 0.3674          | 0.3797 | 0.0969 |
+| 0.3572        | 2.2346  | 4000  | 0.3659          | 0.3815 | 0.0972 |
+| 0.4281        | 2.5140  | 4500  | 0.3614          | 0.3761 | 0.0962 |
+| 0.4535        | 2.7933  | 5000  | 0.3592          | 0.3739 | 0.0950 |
+| 0.5143        | 3.0726  | 5500  | 0.3574          | 0.3709 | 0.0938 |
+| 0.3061        | 3.3520  | 6000  | 0.3541          | 0.3714 | 0.0941 |
+| 0.4699        | 3.6313  | 6500  | 0.3576          | 0.3711 | 0.0941 |
+| 0.4345        | 3.9106  | 7000  | 0.3578          | 0.3765 | 0.0947 |
+| 0.5514        | 4.1899  | 7500  | 0.3549          | 0.3738 | 0.0944 |
+| 0.4208        | 4.4693  | 8000  | 0.3547          | 0.3665 | 0.0929 |
+| 0.4893        | 4.7486  | 8500  | 0.3532          | 0.3626 | 0.0919 |
+| 0.3894        | 5.0279  | 9000  | 0.3561          | 0.3741 | 0.0949 |
+| 0.4344        | 5.3073  | 9500  | 0.3531          | 0.3661 | 0.0939 |
+| 0.5008        | 5.5866  | 10000 | 0.3484          | 0.3695 | 0.0932 |
+| 0.4309        | 5.8659  | 10500 | 0.3469          | 0.3620 | 0.0916 |
+| 0.5366        | 6.1453  | 11000 | 0.3471          | 0.3613 | 0.0917 |
+| 0.5179        | 6.4246  | 11500 | 0.3529          | 0.3804 | 0.0949 |
+| 0.7649        | 6.7039  | 12000 | 0.3450          | 0.3647 | 0.0916 |
+| 0.3632        | 6.9832  | 12500 | 0.3444          | 0.3577 | 0.0907 |
+| 0.4515        | 7.2626  | 13000 | 0.3405          | 0.3603 | 0.0914 |
+| 0.5776        | 7.5419  | 13500 | 0.3409          | 0.3519 | 0.0893 |
+| 0.4488        | 7.8212  | 14000 | 0.3415          | 0.3652 | 0.0916 |
+| 0.4377        | 8.1006  | 14500 | 0.3466          | 0.3717 | 0.0922 |
+| 0.4987        | 8.3799  | 15000 | 0.3384          | 0.3586 | 0.0902 |
+| 0.4934        | 8.6592  | 15500 | 0.3394          | 0.3705 | 0.0926 |
+| 0.3812        | 8.9385  | 16000 | 0.3387          | 0.3548 | 0.0899 |
+| 0.5597        | 9.2179  | 16500 | 0.3365          | 0.3492 | 0.0883 |
+| 0.3932        | 9.4972  | 17000 | 0.3347          | 0.3496 | 0.0886 |
+| 0.4425        | 9.7765  | 17500 | 0.3368          | 0.3538 | 0.0891 |
+| 0.3765        | 10.0559 | 18000 | 0.3334          | 0.3524 | 0.0887 |
+| 0.4459        | 10.3352 | 18500 | 0.3355          | 0.3544 | 0.0891 |
+| 0.417         | 10.6145 | 19000 | 0.3346          | 0.3530 | 0.0893 |
+| 0.3967        | 10.8939 | 19500 | 0.3288          | 0.3509 | 0.0881 |
+| 0.3371        | 11.1732 | 20000 | 0.3355          | 0.3480 | 0.0880 |
+| 0.3387        | 11.4525 | 20500 | 0.3321          | 0.3454 | 0.0874 |
+| 0.4536        | 11.7318 | 21000 | 0.3311          | 0.3475 | 0.0879 |
+| 0.3799        | 12.0112 | 21500 | 0.3309          | 0.3437 | 0.0874 |
+| 0.3194        | 12.2905 | 22000 | 0.3336          | 0.3434 | 0.0873 |
+| 0.3527        | 12.5698 | 22500 | 0.3323          | 0.3423 | 0.0870 |
+| 0.4748        | 12.8492 | 23000 | 0.3306          | 0.3402 | 0.0865 |
+| 0.462         | 13.1285 | 23500 | 0.3285          | 0.3444 | 0.0870 |
+| 0.5346        | 13.4078 | 24000 | 0.3315          | 0.3398 | 0.0864 |
+| 0.3255        | 13.6872 | 24500 | 0.3273          | 0.3434 | 0.0867 |
+| 0.3471        | 13.9665 | 25000 | 0.3287          | 0.3399 | 0.0863 |
+| 0.3338        | 14.2458 | 25500 | 0.3270          | 0.3405 | 0.0862 |
+| 0.4103        | 14.5251 | 26000 | 0.3259          | 0.3396 | 0.0857 |
+| 0.5109        | 14.8045 | 26500 | 0.3254          | 0.3407 | 0.0862 |
+| 0.473         | 15.0838 | 27000 | 0.3247          | 0.3409 | 0.0860 |
+| 0.3641        | 15.3631 | 27500 | 0.3251          | 0.3391 | 0.0860 |
+| 0.4245        | 15.6425 | 28000 | 0.3225          | 0.3384 | 0.0856 |
+| 0.3648        | 15.9218 | 28500 | 0.3235          | 0.3416 | 0.0861 |
+| 0.339         | 16.2011 | 29000 | 0.3217          | 0.3395 | 0.0856 |
+| 0.3687        | 16.4804 | 29500 | 0.3221          | 0.3368 | 0.0853 |
+| 0.4329        | 16.7598 | 30000 | 0.3233          | 0.3411 | 0.0861 |
+| 0.5613        | 17.0391 | 30500 | 0.3211          | 0.3347 | 0.0848 |
+| 0.4392        | 17.3184 | 31000 | 0.3217          | 0.3330 | 0.0845 |
+| 0.4039        | 17.5978 | 31500 | 0.3200          | 0.3386 | 0.0853 |
+| 0.4111        | 17.8771 | 32000 | 0.3201          | 0.3333 | 0.0843 |
+| 0.3852        | 18.1564 | 32500 | 0.3206          | 0.3336 | 0.0843 |
+| 0.6218        | 18.4358 | 33000 | 0.3204          | 0.3347 | 0.0848 |
+| 0.4763        | 18.7151 | 33500 | 0.3193          | 0.3354 | 0.0848 |
+| 0.3489        | 18.9944 | 34000 | 0.3186          | 0.3347 | 0.0844 |
+| 0.3661        | 19.2737 | 34500 | 0.3195          | 0.3343 | 0.0844 |
+| 0.3567        | 19.5531 | 35000 | 0.3191          | 0.3339 | 0.0843 |
+| 0.3628        | 19.8324 | 35500 | 0.3184          | 0.3325 | 0.0842 |
+### Framework versions
+- Transformers 4.48.1
+- Pytorch 2.5.1+cu121
+- Datasets 3.2.0
+- Tokenizers 0.21.0

adapter.hau.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8649f3386507cb6259d76e75d9f6e405f0c2d73393f8aca34c2ded036d38c7fa
+size 9029128

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 75,
+  "<s>": 74
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 20.0,
+    "eval_cer": 0.08417073090897188,
+    "eval_loss": 0.3183983564376831,
+    "eval_runtime": 55.1605,
+    "eval_samples": 4538,
+    "eval_samples_per_second": 82.269,
+    "eval_steps_per_second": 5.149,
+    "eval_wer": 0.3324767464595961,
+    "total_flos": 5.794046604000698e+20,
+    "train_loss": 0.4541601099669101,
+    "train_runtime": 54968.513,
+    "train_samples": 57274,
+    "train_samples_per_second": 20.839,
+    "train_steps_per_second": 0.651
+}

checkpoint-35500/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 75,
+  "<s>": 74
+}

checkpoint-35500/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "facebook/mms-1b-all",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 73,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 76,
+  "xvector_output_dim": 512
+}

checkpoint-35500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b98222b2fb8bf76e482cab0d37a75fb5d8751039c5fe05cb317376368a67c58
+size 3859121504

checkpoint-35500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d43b5577a751b16b6b6c628223b5461fc23fd9e59eea23c633766e0880fe38c9
+size 18229498

checkpoint-35500/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

checkpoint-35500/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76f79ad9eb632d78cbe68ece09ed896dadd5ba973a9c9b00de233717d3851f93
+size 14576

checkpoint-35500/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd0e3d77f68bf8d28be38b6fc0a8803cc79faae0d4de86891f0a86e52b4ca368
+size 14576

checkpoint-35500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aceb8470f08db25e791dadb975ad2446187fefd8da338a65c859764d521ca4b4
+size 1064

checkpoint-35500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

checkpoint-35500/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "added_tokens_decoder": {
+    "72": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "75": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "hau",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

checkpoint-35500/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-35500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a44ba986b57f123ad8db3e83d0fdf39b1eec05f295109b684a766bb7281f959
+size 5368

checkpoint-35500/vocab.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "hau": {
+    "%": 1,
+    "&": 2,
+    "'": 3,
+    "*": 4,
+    "+": 5,
+    "-": 6,
+    "/": 7,
+    "0": 8,
+    "1": 9,
+    "2": 10,
+    "3": 11,
+    "4": 12,
+    "5": 13,
+    "6": 14,
+    "7": 15,
+    "8": 16,
+    "9": 17,
+    "=": 18,
+    "[PAD]": 73,
+    "[UNK]": 72,
+    "\\": 19,
+    "_": 20,
+    "`": 21,
+    "a": 22,
+    "b": 23,
+    "c": 24,
+    "d": 25,
+    "e": 26,
+    "f": 27,
+    "g": 28,
+    "h": 29,
+    "i": 30,
+    "j": 31,
+    "k": 32,
+    "l": 33,
+    "m": 34,
+    "n": 35,
+    "o": 36,
+    "p": 37,
+    "q": 38,
+    "r": 39,
+    "s": 40,
+    "t": 41,
+    "u": 42,
+    "v": 43,
+    "w": 44,
+    "x": 45,
+    "y": 46,
+    "z": 47,
+    "|": 0,
+    "à": 48,
+    "á": 49,
+    "ã": 50,
+    "é": 51,
+    "ì": 52,
+    "í": 53,
+    "ò": 54,
+    "ù": 55,
+    "ú": 56,
+    "ā": 57,
+    "ı": 58,
+    "ō": 59,
+    "ū": 60,
+    "ƙ": 61,
+    "ƴ": 62,
+    "ɓ": 63,
+    "ɗ": 64,
+    "ʼ": 65,
+    "̀": 66,
+    "ṣ": 67,
+    "ẹ": 68,
+    "ọ": 69,
+    "‘": 70,
+    "’": 71
+  }
+}

checkpoint-35800/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 75,
+  "<s>": 74
+}

checkpoint-35800/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "facebook/mms-1b-all",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 73,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 76,
+  "xvector_output_dim": 512
+}

checkpoint-35800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db7c67f4a34d2d80b4576d0cd2500bcdca1ddf617d8778eb19dab8376efd6112
+size 3859121504

checkpoint-35800/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8e7f34e5a4e54ac226fc5846cecf598bc162ececef5ccfdec7edec33921e43
+size 18229498

checkpoint-35800/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

checkpoint-35800/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ad2c63590735a7041911e12fc7a9cf6b1a0e5addb5d1baae2dbb21be7870890
+size 14512

checkpoint-35800/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3194c7277ef65496d029351c6b10a2bf2965fab3a77887bf21072e5effc07a
+size 14512

checkpoint-35800/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41546fba5e9dd3bc657341f1410e3803ed8f299eee5c0204c92b12a855766fa1
+size 1064

checkpoint-35800/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

checkpoint-35800/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "added_tokens_decoder": {
+    "72": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "75": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "hau",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

checkpoint-35800/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-35800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a44ba986b57f123ad8db3e83d0fdf39b1eec05f295109b684a766bb7281f959
+size 5368

checkpoint-35800/vocab.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "hau": {
+    "%": 1,
+    "&": 2,
+    "'": 3,
+    "*": 4,
+    "+": 5,
+    "-": 6,
+    "/": 7,
+    "0": 8,
+    "1": 9,
+    "2": 10,
+    "3": 11,
+    "4": 12,
+    "5": 13,
+    "6": 14,
+    "7": 15,
+    "8": 16,
+    "9": 17,
+    "=": 18,
+    "[PAD]": 73,
+    "[UNK]": 72,
+    "\\": 19,
+    "_": 20,
+    "`": 21,
+    "a": 22,
+    "b": 23,
+    "c": 24,
+    "d": 25,
+    "e": 26,
+    "f": 27,
+    "g": 28,
+    "h": 29,
+    "i": 30,
+    "j": 31,
+    "k": 32,
+    "l": 33,
+    "m": 34,
+    "n": 35,
+    "o": 36,
+    "p": 37,
+    "q": 38,
+    "r": 39,
+    "s": 40,
+    "t": 41,
+    "u": 42,
+    "v": 43,
+    "w": 44,
+    "x": 45,
+    "y": 46,
+    "z": 47,
+    "|": 0,
+    "à": 48,
+    "á": 49,
+    "ã": 50,
+    "é": 51,
+    "ì": 52,
+    "í": 53,
+    "ò": 54,
+    "ù": 55,
+    "ú": 56,
+    "ā": 57,
+    "ı": 58,
+    "ō": 59,
+    "ū": 60,
+    "ƙ": 61,
+    "ƴ": 62,
+    "ɓ": 63,
+    "ɗ": 64,
+    "ʼ": 65,
+    "̀": 66,
+    "ṣ": 67,
+    "ẹ": 68,
+    "ọ": 69,
+    "‘": 70,
+    "’": 71
+  }
+}

cmd.sh ADDED Viewed

	@@ -0,0 +1,27 @@

+torchrun \
+	--nproc_per_node 2 run_speech_recognition_ctc_adapter.py \
+	--model_name_or_path="facebook/mms-1b-all" \
+	--dataset_name="/mnt/md0/synvoices/data/naijavoices_50h" \
+	--train_split_name="train" \
+	--eval_split_name="validation" \
+	--output_dir="./training_runs/mms-1b-naijavoices_50h-hau-ft" \
+	--num_train_epochs="20" \
+	--per_device_train_batch_size="8" \
+	--per_device_eval_batch_size="8" \
+    --gradient_accumulation_steps="2" \
+	--learning_rate="1e-3" \
+	--warmup_steps="100" \
+	--eval_strategy="steps" \
+	--save_steps="500" \
+	--eval_steps="500" \
+	--logging_steps="1" \
+	--eval_metrics wer cer \
+	--save_total_limit="2" \
+    --max_duration_in_seconds="30" \
+	--target_language="hau" \
+	--overwrite_lang_vocab \
+	--gradient_checkpointing \
+	--fp16 \
+	--do_train --do_eval \
+	--preprocessing_num_workers="12" \
+	--dataloader_num_workers="12"

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "facebook/mms-1b-all",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 73,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 76,
+  "xvector_output_dim": 512
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 20.0,
+    "eval_cer": 0.08417073090897188,
+    "eval_loss": 0.3183983564376831,
+    "eval_runtime": 55.1605,
+    "eval_samples": 4538,
+    "eval_samples_per_second": 82.269,
+    "eval_steps_per_second": 5.149,
+    "eval_wer": 0.3324767464595961
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db7c67f4a34d2d80b4576d0cd2500bcdca1ddf617d8778eb19dab8376efd6112
+size 3859121504

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

runs/Mar18_00-52-03_gmy/events.out.tfevents.1742251942.gmy.34197.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e178316430bf6f7121ca55743d2aee96c203230e71176959a326221c65340639
+size 7664030

runs/Mar18_00-52-03_gmy/events.out.tfevents.1742306969.gmy.34197.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adfb53624452c5c767397ee763c755fc6988a9761bfe124a8c52da3a0aab3bf6
+size 460

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "added_tokens_decoder": {
+    "72": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "75": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "hau",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 20.0,
+    "total_flos": 5.794046604000698e+20,
+    "train_loss": 0.4541601099669101,
+    "train_runtime": 54968.513,
+    "train_samples": 57274,
+    "train_samples_per_second": 20.839,
+    "train_steps_per_second": 0.651
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a44ba986b57f123ad8db3e83d0fdf39b1eec05f295109b684a766bb7281f959
+size 5368

vocab.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "hau": {
+    "%": 1,
+    "&": 2,
+    "'": 3,
+    "*": 4,
+    "+": 5,
+    "-": 6,
+    "/": 7,
+    "0": 8,
+    "1": 9,
+    "2": 10,
+    "3": 11,
+    "4": 12,
+    "5": 13,
+    "6": 14,
+    "7": 15,
+    "8": 16,
+    "9": 17,
+    "=": 18,
+    "[PAD]": 73,
+    "[UNK]": 72,
+    "\\": 19,
+    "_": 20,
+    "`": 21,
+    "a": 22,
+    "b": 23,
+    "c": 24,
+    "d": 25,
+    "e": 26,
+    "f": 27,
+    "g": 28,
+    "h": 29,
+    "i": 30,
+    "j": 31,
+    "k": 32,
+    "l": 33,
+    "m": 34,
+    "n": 35,
+    "o": 36,
+    "p": 37,
+    "q": 38,
+    "r": 39,
+    "s": 40,
+    "t": 41,
+    "u": 42,
+    "v": 43,
+    "w": 44,
+    "x": 45,
+    "y": 46,
+    "z": 47,
+    "|": 0,
+    "à": 48,
+    "á": 49,
+    "ã": 50,
+    "é": 51,
+    "ì": 52,
+    "í": 53,
+    "ò": 54,
+    "ù": 55,
+    "ú": 56,
+    "ā": 57,
+    "ı": 58,
+    "ō": 59,
+    "ū": 60,
+    "ƙ": 61,
+    "ƴ": 62,
+    "ɓ": 63,
+    "ɗ": 64,
+    "ʼ": 65,
+    "̀": 66,
+    "ṣ": 67,
+    "ẹ": 68,
+    "ọ": 69,
+    "‘": 70,
+    "’": 71
+  }
+}