ayymen commited on May 13

Commit

1fda542

verified ·

1 Parent(s): 2899e8c

Upload folder using huggingface_hub

Browse files

Files changed (44) hide show

README.md +141 -0
adapter.hau.safetensors +3 -0
added_tokens.json +4 -0
all_results.json +16 -0
checkpoint-35500/added_tokens.json +4 -0
checkpoint-35500/config.json +108 -0
checkpoint-35500/model.safetensors +3 -0
checkpoint-35500/optimizer.pt +3 -0
checkpoint-35500/preprocessor_config.json +10 -0
checkpoint-35500/rng_state_0.pth +3 -0
checkpoint-35500/rng_state_1.pth +3 -0
checkpoint-35500/scheduler.pt +3 -0
checkpoint-35500/special_tokens_map.json +30 -0
checkpoint-35500/tokenizer_config.json +49 -0
checkpoint-35500/trainer_state.json +0 -0
checkpoint-35500/training_args.bin +3 -0
checkpoint-35500/vocab.json +103 -0
checkpoint-35796/added_tokens.json +4 -0
checkpoint-35796/config.json +108 -0
checkpoint-35796/model.safetensors +3 -0
checkpoint-35796/optimizer.pt +3 -0
checkpoint-35796/preprocessor_config.json +10 -0
checkpoint-35796/rng_state_0.pth +3 -0
checkpoint-35796/rng_state_1.pth +3 -0
checkpoint-35796/scheduler.pt +3 -0
checkpoint-35796/special_tokens_map.json +30 -0
checkpoint-35796/tokenizer_config.json +49 -0
checkpoint-35796/trainer_state.json +0 -0
checkpoint-35796/training_args.bin +3 -0
checkpoint-35796/vocab.json +103 -0
cmd.sh +27 -0
config.json +108 -0
eval_results.json +10 -0
model.safetensors +3 -0
preprocessor_config.json +10 -0
runs/Mar13_14-46-02_gmy/events.out.tfevents.1741871146.gmy.21769.0 +3 -0
runs/Mar13_20-53-06_gmy/events.out.tfevents.1741893192.gmy.53002.0 +3 -0
runs/Mar13_20-53-06_gmy/events.out.tfevents.1741941775.gmy.53002.1 +3 -0
special_tokens_map.json +30 -0
tokenizer_config.json +49 -0
train_results.json +9 -0
trainer_state.json +0 -0
training_args.bin +3 -0
vocab.json +103 -0

README.md ADDED Viewed

	@@ -0,0 +1,141 @@

+---
+library_name: transformers
+base_model: facebook/mms-1b-all
+tags:
+- automatic-speech-recognition
+- /mnt/md0/synvoices/data/naijavoices_500h
+- mms
+- generated_from_trainer
+metrics:
+- wer
+model-index:
+- name: mms-1b-naijavoices_500h-hau-ft
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mms-1b-naijavoices_500h-hau-ft
+This model is a fine-tuned version of [./training_runs/mms-1b-naijavoices_500h-hau-ft/checkpoint-5000](https://huggingface.co/./training_runs/mms-1b-naijavoices_500h-hau-ft/checkpoint-5000) on the /MNT/MD0/SYNVOICES/DATA/NAIJAVOICES_500H - NA dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3121
+- Wer: 0.3303
+- Cer: 0.0830
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
+- total_eval_batch_size: 16
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 100
+- num_epochs: 2.0
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step  | Cer    | Validation Loss | Wer    |
+|:-------------:|:------:|:-----:|:------:|:---------------:|:------:|
+| 0.5183        | 0.0279 | 500   | 0.1051 | 0.3932          | 0.4076 |
+| 0.4379        | 0.0559 | 1000  | 0.1009 | 0.3776          | 0.3964 |
+| 0.5709        | 0.0838 | 1500  | 0.1001 | 0.3829          | 0.3935 |
+| 0.6257        | 0.1117 | 2000  | 0.0994 | 0.3710          | 0.3930 |
+| 0.5065        | 0.1397 | 2500  | 0.1008 | 0.3796          | 0.3937 |
+| 0.4613        | 0.1676 | 3000  | 0.0977 | 0.3637          | 0.3849 |
+| 0.5305        | 0.1955 | 3500  | 0.0971 | 0.3674          | 0.3801 |
+| 0.4303        | 0.2235 | 4000  | 0.0980 | 0.3663          | 0.3875 |
+| 0.6497        | 0.2514 | 4500  | 0.0959 | 0.3617          | 0.3741 |
+| 0.3887        | 0.2794 | 5000  | 0.0985 | 0.3620          | 0.3925 |
+| 0.4604        | 0.3073 | 5500  | 0.3570 | 0.3727          | 0.0947 |
+| 0.4349        | 0.3352 | 6000  | 0.3529 | 0.3686          | 0.0940 |
+| 0.5403        | 0.3632 | 6500  | 0.3518 | 0.3722          | 0.0941 |
+| 0.4455        | 0.3911 | 7000  | 0.3522 | 0.3695          | 0.0937 |
+| 0.4454        | 0.4190 | 7500  | 0.3527 | 0.3722          | 0.0941 |
+| 0.3582        | 0.4470 | 8000  | 0.3479 | 0.3696          | 0.0932 |
+| 0.6661        | 0.4749 | 8500  | 0.3453 | 0.3597          | 0.0911 |
+| 0.4702        | 0.5028 | 9000  | 0.3472 | 0.3672          | 0.0929 |
+| 0.3877        | 0.5308 | 9500  | 0.3467 | 0.3772          | 0.0952 |
+| 0.5848        | 0.5587 | 10000 | 0.3422 | 0.3652          | 0.0920 |
+| 0.4943        | 0.5866 | 10500 | 0.3444 | 0.3701          | 0.0926 |
+| 0.5451        | 0.6146 | 11000 | 0.3441 | 0.3583          | 0.0908 |
+| 0.4033        | 0.6425 | 11500 | 0.3424 | 0.3572          | 0.0907 |
+| 0.4437        | 0.6704 | 12000 | 0.3427 | 0.3576          | 0.0906 |
+| 0.4541        | 0.6984 | 12500 | 0.3375 | 0.3574          | 0.0901 |
+| 0.3769        | 0.7263 | 13000 | 0.3381 | 0.3605          | 0.0901 |
+| 0.3915        | 0.7543 | 13500 | 0.3357 | 0.3538          | 0.0892 |
+| 0.5068        | 0.7822 | 14000 | 0.3373 | 0.3518          | 0.0892 |
+| 0.3922        | 0.8101 | 14500 | 0.3362 | 0.3556          | 0.0895 |
+| 0.3928        | 0.8381 | 15000 | 0.3337 | 0.3489          | 0.0887 |
+| 0.487         | 0.8660 | 15500 | 0.3350 | 0.3580          | 0.0897 |
+| 0.3974        | 0.8939 | 16000 | 0.3330 | 0.3545          | 0.0892 |
+| 0.3988        | 0.9219 | 16500 | 0.3339 | 0.3468          | 0.0880 |
+| 0.5077        | 0.9498 | 17000 | 0.3322 | 0.3529          | 0.0884 |
+| 0.4159        | 0.9777 | 17500 | 0.3320 | 0.3509          | 0.0885 |
+| 0.46          | 1.0057 | 18000 | 0.3313 | 0.3469          | 0.0881 |
+| 0.4727        | 1.0336 | 18500 | 0.3320 | 0.3588          | 0.0893 |
+| 0.5366        | 1.0616 | 19000 | 0.3292 | 0.3519          | 0.0891 |
+| 0.7299        | 1.0895 | 19500 | 0.3292 | 0.3518          | 0.0882 |
+| 0.4359        | 1.1174 | 20000 | 0.3292 | 0.3470          | 0.0872 |
+| 0.4212        | 1.1454 | 20500 | 0.3266 | 0.3449          | 0.0873 |
+| 0.4532        | 1.1733 | 21000 | 0.3264 | 0.3443          | 0.0868 |
+| 0.5725        | 1.2012 | 21500 | 0.3264 | 0.3393          | 0.0857 |
+| 0.4016        | 1.2292 | 22000 | 0.3249 | 0.3398          | 0.0861 |
+| 0.4479        | 1.2571 | 22500 | 0.3243 | 0.3519          | 0.0875 |
+| 0.3502        | 1.2851 | 23000 | 0.3253 | 0.3463          | 0.0867 |
+| 0.4566        | 1.3130 | 23500 | 0.3207 | 0.3387          | 0.0854 |
+| 0.4414        | 1.3409 | 24000 | 0.3218 | 0.3431          | 0.0858 |
+| 0.4479        | 1.3689 | 24500 | 0.3243 | 0.3445          | 0.0864 |
+| 0.4601        | 1.3968 | 25000 | 0.3197 | 0.3405          | 0.0858 |
+| 0.4091        | 1.4247 | 25500 | 0.3219 | 0.3371          | 0.0851 |
+| 0.3548        | 1.4527 | 26000 | 0.3207 | 0.3417          | 0.0856 |
+| 0.4587        | 1.4806 | 26500 | 0.3183 | 0.3360          | 0.0851 |
+| 0.51          | 1.5085 | 27000 | 0.3196 | 0.3393          | 0.0853 |
+| 0.4705        | 1.5365 | 27500 | 0.3187 | 0.3382          | 0.0853 |
+| 0.5046        | 1.5644 | 28000 | 0.3194 | 0.3337          | 0.0843 |
+| 0.4924        | 1.5923 | 28500 | 0.3149 | 0.3327          | 0.0840 |
+| 0.3216        | 1.6203 | 29000 | 0.3173 | 0.3395          | 0.0850 |
+| 0.4593        | 1.6482 | 29500 | 0.3156 | 0.3297          | 0.0833 |
+| 0.3163        | 1.6761 | 30000 | 0.3141 | 0.3355          | 0.0843 |
+| 0.3492        | 1.7041 | 30500 | 0.3147 | 0.3337          | 0.0840 |
+| 0.4529        | 1.7320 | 31000 | 0.3150 | 0.3363          | 0.0843 |
+| 0.2976        | 1.7600 | 31500 | 0.3149 | 0.3320          | 0.0838 |
+| 0.3691        | 1.7879 | 32000 | 0.3141 | 0.3313          | 0.0836 |
+| 0.3154        | 1.8158 | 32500 | 0.3128 | 0.3296          | 0.0831 |
+| 0.429         | 1.8438 | 33000 | 0.3134 | 0.3346          | 0.0841 |
+| 0.3474        | 1.8717 | 33500 | 0.3136 | 0.3311          | 0.0836 |
+| 0.4074        | 1.8996 | 34000 | 0.3130 | 0.3318          | 0.0832 |
+| 0.3559        | 1.9276 | 34500 | 0.3123 | 0.3334          | 0.0835 |
+| 0.3606        | 1.9555 | 35000 | 0.3119 | 0.3318          | 0.0834 |
+| 0.4125        | 1.9834 | 35500 | 0.3123 | 0.3301          | 0.0830 |
+### Framework versions
+- Transformers 4.48.1
+- Pytorch 2.5.1+cu121
+- Datasets 3.2.0
+- Tokenizers 0.21.0

adapter.hau.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f873b6a456a3041f425e4bd9eebb8c6dba6d3eea13dc4f728988ba665baf22aa
+size 9157244

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 100,
+  "<s>": 99
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 1.999972064698159,
+    "eval_cer": 0.08301965020239381,
+    "eval_loss": 0.3121410012245178,
+    "eval_runtime": 55.4824,
+    "eval_samples": 4538,
+    "eval_samples_per_second": 81.792,
+    "eval_steps_per_second": 5.119,
+    "eval_wer": 0.3302701041870339,
+    "total_flos": 5.8041710762050716e+20,
+    "train_loss": 0.3785943881215057,
+    "train_runtime": 48523.5238,
+    "train_samples": 572742,
+    "train_samples_per_second": 23.607,
+    "train_steps_per_second": 0.738
+}

checkpoint-35500/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 100,
+  "<s>": 99
+}

checkpoint-35500/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "./training_runs/mms-1b-naijavoices_500h-hau-ft/checkpoint-5000",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 98,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 101,
+  "xvector_output_dim": 512
+}

checkpoint-35500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c40c05cbe67d8be8e6bf5d34235315504c3f43af1543f5e1d9d545c1d9335f1
+size 3859249604

checkpoint-35500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69a3ea2b0e62437b70de749071a4917ea7a955064e2a3e2c90790a2137569131
+size 18489786

checkpoint-35500/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

checkpoint-35500/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61308a4e1f44d2dfd65d2a470ac42f4492cdc5d39fbeff96782a78ecdcf3173b
+size 14512

checkpoint-35500/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5af4e84bf1ab4f7c9ca3300d123be6f0d1a79d0ab71cf5fc9627bf859b60855b
+size 14576

checkpoint-35500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a812b6694a9c1ed3776f81d6c14c6c060cbc4942eabe19623b81b2f94031d00e
+size 1064

checkpoint-35500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

checkpoint-35500/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "added_tokens_decoder": {
+    "97": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "hau",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

checkpoint-35500/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-35500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c651b72a39453bdafd381a6f2e7b5741e38afd8690278b50637e37501127794
+size 5432

checkpoint-35500/vocab.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "hau": {
+    "$": 1,
+    "%": 2,
+    "&": 3,
+    "'": 4,
+    "*": 5,
+    "+": 6,
+    "-": 7,
+    "/": 8,
+    "0": 9,
+    "1": 10,
+    "2": 11,
+    "3": 12,
+    "4": 13,
+    "5": 14,
+    "6": 15,
+    "7": 16,
+    "8": 17,
+    "9": 18,
+    "=": 19,
+    ">": 20,
+    "@": 21,
+    "[PAD]": 98,
+    "[UNK]": 97,
+    "\\": 22,
+    "_": 23,
+    "`": 24,
+    "a": 25,
+    "b": 26,
+    "c": 27,
+    "d": 28,
+    "e": 29,
+    "f": 30,
+    "g": 31,
+    "h": 32,
+    "i": 33,
+    "j": 34,
+    "k": 35,
+    "l": 36,
+    "m": 37,
+    "n": 38,
+    "o": 39,
+    "p": 40,
+    "q": 41,
+    "r": 42,
+    "s": 43,
+    "t": 44,
+    "u": 45,
+    "v": 46,
+    "w": 47,
+    "x": 48,
+    "y": 49,
+    "z": 50,
+    "|": 0,
+    "à": 51,
+    "á": 52,
+    "â": 53,
+    "ã": 54,
+    "è": 55,
+    "é": 56,
+    "ì": 57,
+    "í": 58,
+    "ñ": 59,
+    "ò": 60,
+    "ó": 61,
+    "ô": 62,
+    "õ": 63,
+    "ù": 64,
+    "ú": 65,
+    "ü": 66,
+    "ý": 67,
+    "ā": 68,
+    "ď": 69,
+    "ē": 70,
+    "ğ": 71,
+    "ı": 72,
+    "ķ": 73,
+    "ń": 74,
+    "ō": 75,
+    "ş": 76,
+    "š": 77,
+    "ũ": 78,
+    "ū": 79,
+    "ŭ": 80,
+    "ž": 81,
+    "ƙ": 82,
+    "ƴ": 83,
+    "ǹ": 84,
+    "ɓ": 85,
+    "ɗ": 86,
+    "ʼ": 87,
+    "̀": 88,
+    "ṣ": 89,
+    "ẹ": 90,
+    "ọ": 91,
+    "—": 92,
+    "‘": 93,
+    "’": 94,
+    "…": 95,
+    "₦": 96
+  }
+}

checkpoint-35796/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 100,
+  "<s>": 99
+}

checkpoint-35796/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "./training_runs/mms-1b-naijavoices_500h-hau-ft/checkpoint-5000",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 98,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 101,
+  "xvector_output_dim": 512
+}

checkpoint-35796/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89b762c4e1f2a680ac3d27b66063703a3b4b22717e9298c9f0cbb52a411a9f92
+size 3859249604

checkpoint-35796/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db2b98cb3060f3520ff69b273e6dcd6897f95f2b7f2fd55cc2c4e7792d329b16
+size 18489786

checkpoint-35796/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

checkpoint-35796/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf602b9e97a3af0f2ccb3ef4e577eaf113f499ff88a23fff00416e7e40293534
+size 14512

checkpoint-35796/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79219cf9f7e7918222038207daa4441107166ba33f7f8b167aaeed38c7c74917
+size 14512

checkpoint-35796/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30c813c37ff4a83862abc6fc7426fbb2f1027db556b854d2356f023c0b0496b7
+size 1064

checkpoint-35796/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

checkpoint-35796/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "added_tokens_decoder": {
+    "97": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "hau",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

checkpoint-35796/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-35796/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c651b72a39453bdafd381a6f2e7b5741e38afd8690278b50637e37501127794
+size 5432

checkpoint-35796/vocab.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "hau": {
+    "$": 1,
+    "%": 2,
+    "&": 3,
+    "'": 4,
+    "*": 5,
+    "+": 6,
+    "-": 7,
+    "/": 8,
+    "0": 9,
+    "1": 10,
+    "2": 11,
+    "3": 12,
+    "4": 13,
+    "5": 14,
+    "6": 15,
+    "7": 16,
+    "8": 17,
+    "9": 18,
+    "=": 19,
+    ">": 20,
+    "@": 21,
+    "[PAD]": 98,
+    "[UNK]": 97,
+    "\\": 22,
+    "_": 23,
+    "`": 24,
+    "a": 25,
+    "b": 26,
+    "c": 27,
+    "d": 28,
+    "e": 29,
+    "f": 30,
+    "g": 31,
+    "h": 32,
+    "i": 33,
+    "j": 34,
+    "k": 35,
+    "l": 36,
+    "m": 37,
+    "n": 38,
+    "o": 39,
+    "p": 40,
+    "q": 41,
+    "r": 42,
+    "s": 43,
+    "t": 44,
+    "u": 45,
+    "v": 46,
+    "w": 47,
+    "x": 48,
+    "y": 49,
+    "z": 50,
+    "|": 0,
+    "à": 51,
+    "á": 52,
+    "â": 53,
+    "ã": 54,
+    "è": 55,
+    "é": 56,
+    "ì": 57,
+    "í": 58,
+    "ñ": 59,
+    "ò": 60,
+    "ó": 61,
+    "ô": 62,
+    "õ": 63,
+    "ù": 64,
+    "ú": 65,
+    "ü": 66,
+    "ý": 67,
+    "ā": 68,
+    "ď": 69,
+    "ē": 70,
+    "ğ": 71,
+    "ı": 72,
+    "ķ": 73,
+    "ń": 74,
+    "ō": 75,
+    "ş": 76,
+    "š": 77,
+    "ũ": 78,
+    "ū": 79,
+    "ŭ": 80,
+    "ž": 81,
+    "ƙ": 82,
+    "ƴ": 83,
+    "ǹ": 84,
+    "ɓ": 85,
+    "ɗ": 86,
+    "ʼ": 87,
+    "̀": 88,
+    "ṣ": 89,
+    "ẹ": 90,
+    "ọ": 91,
+    "—": 92,
+    "‘": 93,
+    "’": 94,
+    "…": 95,
+    "₦": 96
+  }
+}

cmd.sh ADDED Viewed

	@@ -0,0 +1,27 @@

+torchrun \
+	--nproc_per_node 2 run_speech_recognition_ctc_adapter.py \
+	--model_name_or_path="facebook/mms-1b-all" \
+	--dataset_name="/mnt/md0/synvoices/data/naijavoices_500h" \
+	--train_split_name="train" \
+	--eval_split_name="validation" \
+	--output_dir="./training_runs/mms-1b-naijavoices_500h-hau-ft" \
+	--num_train_epochs="2" \
+	--per_device_train_batch_size="8" \
+	--per_device_eval_batch_size="8" \
+    --gradient_accumulation_steps="2" \
+	--learning_rate="1e-3" \
+	--warmup_steps="100" \
+	--eval_strategy="steps" \
+	--save_steps="500" \
+	--eval_steps="500" \
+	--logging_steps="1" \
+	--eval_metrics wer cer \
+	--save_total_limit="2" \
+    --max_duration_in_seconds="30" \
+	--target_language="hau" \
+	--overwrite_lang_vocab \
+	--gradient_checkpointing \
+	--fp16 \
+	--do_train --do_eval \
+	--preprocessing_num_workers="12" \
+	--dataloader_num_workers="12"

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "facebook/mms-1b-all",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 98,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 101,
+  "xvector_output_dim": 512
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 1.999972064698159,
+    "eval_cer": 0.08301965020239381,
+    "eval_loss": 0.3121410012245178,
+    "eval_runtime": 55.4824,
+    "eval_samples": 4538,
+    "eval_samples_per_second": 81.792,
+    "eval_steps_per_second": 5.119,
+    "eval_wer": 0.3302701041870339
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89b762c4e1f2a680ac3d27b66063703a3b4b22717e9298c9f0cbb52a411a9f92
+size 3859249604

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

runs/Mar13_14-46-02_gmy/events.out.tfevents.1741871146.gmy.21769.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25b41411f63c0f6201ec87617bb5bfea2d6c78b9138a68d90085529900daa76a
+size 1154975

runs/Mar13_20-53-06_gmy/events.out.tfevents.1741893192.gmy.53002.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5fac722c3cef71df95ece3cd30cc87aa7993eceeee4cac6134eafb0a078f155
+size 6605134

runs/Mar13_20-53-06_gmy/events.out.tfevents.1741941775.gmy.53002.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3cc91f27baa1956244c9a87e3c9a0fa8d53b958fe796e1fffaf47e7405fcbae
+size 460

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "added_tokens_decoder": {
+    "97": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "hau",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 1.999972064698159,
+    "total_flos": 5.8041710762050716e+20,
+    "train_loss": 0.3785943881215057,
+    "train_runtime": 48523.5238,
+    "train_samples": 572742,
+    "train_samples_per_second": 23.607,
+    "train_steps_per_second": 0.738
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c651b72a39453bdafd381a6f2e7b5741e38afd8690278b50637e37501127794
+size 5432

vocab.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "hau": {
+    "$": 1,
+    "%": 2,
+    "&": 3,
+    "'": 4,
+    "*": 5,
+    "+": 6,
+    "-": 7,
+    "/": 8,
+    "0": 9,
+    "1": 10,
+    "2": 11,
+    "3": 12,
+    "4": 13,
+    "5": 14,
+    "6": 15,
+    "7": 16,
+    "8": 17,
+    "9": 18,
+    "=": 19,
+    ">": 20,
+    "@": 21,
+    "[PAD]": 98,
+    "[UNK]": 97,
+    "\\": 22,
+    "_": 23,
+    "`": 24,
+    "a": 25,
+    "b": 26,
+    "c": 27,
+    "d": 28,
+    "e": 29,
+    "f": 30,
+    "g": 31,
+    "h": 32,
+    "i": 33,
+    "j": 34,
+    "k": 35,
+    "l": 36,
+    "m": 37,
+    "n": 38,
+    "o": 39,
+    "p": 40,
+    "q": 41,
+    "r": 42,
+    "s": 43,
+    "t": 44,
+    "u": 45,
+    "v": 46,
+    "w": 47,
+    "x": 48,
+    "y": 49,
+    "z": 50,
+    "|": 0,
+    "à": 51,
+    "á": 52,
+    "â": 53,
+    "ã": 54,
+    "è": 55,
+    "é": 56,
+    "ì": 57,
+    "í": 58,
+    "ñ": 59,
+    "ò": 60,
+    "ó": 61,
+    "ô": 62,
+    "õ": 63,
+    "ù": 64,
+    "ú": 65,
+    "ü": 66,
+    "ý": 67,
+    "ā": 68,
+    "ď": 69,
+    "ē": 70,
+    "ğ": 71,
+    "ı": 72,
+    "ķ": 73,
+    "ń": 74,
+    "ō": 75,
+    "ş": 76,
+    "š": 77,
+    "ũ": 78,
+    "ū": 79,
+    "ŭ": 80,
+    "ž": 81,
+    "ƙ": 82,
+    "ƴ": 83,
+    "ǹ": 84,
+    "ɓ": 85,
+    "ɗ": 86,
+    "ʼ": 87,
+    "̀": 88,
+    "ṣ": 89,
+    "ẹ": 90,
+    "ọ": 91,
+    "—": 92,
+    "‘": 93,
+    "’": 94,
+    "…": 95,
+    "₦": 96
+  }
+}