Roxysun commited on
Commit
7b19a82
·
1 Parent(s): be8e3c4

wav2vec2-large-xls-r-300m-hungarian-colab

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/wav2vec2-xls-r-300m
4
  tags:
5
  - generated_from_trainer
6
  datasets:
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-large-xls-r-300m-hungarian-colab
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the voxpopuli dataset.
19
 
20
  ## Model description
21
 
@@ -35,15 +35,15 @@ More information needed
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 0.0003
38
- - train_batch_size: 16
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - gradient_accumulation_steps: 2
42
- - total_train_batch_size: 32
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
  - lr_scheduler_warmup_steps: 500
46
- - num_epochs: 30
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
 
1
  ---
2
  license: apache-2.0
3
+ base_model: facebook/wav2vec2-lv-60-espeak-cv-ft
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
 
16
  # wav2vec2-large-xls-r-300m-hungarian-colab
17
 
18
+ This model is a fine-tuned version of [facebook/wav2vec2-lv-60-espeak-cv-ft](https://huggingface.co/facebook/wav2vec2-lv-60-espeak-cv-ft) on the voxpopuli dataset.
19
 
20
  ## Model description
21
 
 
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 0.0003
38
+ - train_batch_size: 8
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - gradient_accumulation_steps: 2
42
+ - total_train_batch_size: 16
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
  - lr_scheduler_warmup_steps: 500
46
+ - num_epochs: 50
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
@@ -9,7 +9,7 @@
9
  "architectures": [
10
  "Wav2Vec2ForCTC"
11
  ],
12
- "attention_dropout": 0.0,
13
  "bos_token_id": 1,
14
  "classifier_proj_size": 256,
15
  "codevector_dim": 768,
@@ -42,7 +42,7 @@
42
  2,
43
  2
44
  ],
45
- "ctc_loss_reduction": "mean",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
  "do_stable_layer_norm": true,
@@ -50,23 +50,31 @@
50
  "feat_extract_activation": "gelu",
51
  "feat_extract_dropout": 0.0,
52
  "feat_extract_norm": "layer",
53
- "feat_proj_dropout": 0.0,
54
  "feat_quantizer_dropout": 0.0,
55
  "final_dropout": 0.0,
56
  "gradient_checkpointing": false,
57
  "hidden_act": "gelu",
58
- "hidden_dropout": 0.0,
59
  "hidden_size": 1024,
60
  "initializer_range": 0.02,
61
  "intermediate_size": 4096,
62
  "layer_norm_eps": 1e-05,
63
- "layerdrop": 0.0,
 
 
 
 
 
64
  "mask_feature_length": 10,
65
  "mask_feature_min_masks": 0,
66
  "mask_feature_prob": 0.0,
67
  "mask_time_length": 10,
68
  "mask_time_min_masks": 2,
69
- "mask_time_prob": 0.05,
 
 
 
70
  "model_type": "wav2vec2",
71
  "num_adapter_layers": 3,
72
  "num_attention_heads": 16,
@@ -78,7 +86,7 @@
78
  "num_hidden_layers": 24,
79
  "num_negatives": 100,
80
  "output_hidden_size": 1024,
81
- "pad_token_id": 41,
82
  "proj_codevector_dim": 768,
83
  "tdnn_dilation": [
84
  1,
@@ -104,6 +112,6 @@
104
  "torch_dtype": "float32",
105
  "transformers_version": "4.35.2",
106
  "use_weighted_layer_sum": false,
107
- "vocab_size": 44,
108
  "xvector_output_dim": 512
109
  }
 
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-lv-60-espeak-cv-ft",
3
  "activation_dropout": 0.0,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
 
9
  "architectures": [
10
  "Wav2Vec2ForCTC"
11
  ],
12
+ "attention_dropout": 0.1,
13
  "bos_token_id": 1,
14
  "classifier_proj_size": 256,
15
  "codevector_dim": 768,
 
42
  2,
43
  2
44
  ],
45
+ "ctc_loss_reduction": "sum",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
  "do_stable_layer_norm": true,
 
50
  "feat_extract_activation": "gelu",
51
  "feat_extract_dropout": 0.0,
52
  "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
55
  "final_dropout": 0.0,
56
  "gradient_checkpointing": false,
57
  "hidden_act": "gelu",
58
+ "hidden_dropout": 0.1,
59
  "hidden_size": 1024,
60
  "initializer_range": 0.02,
61
  "intermediate_size": 4096,
62
  "layer_norm_eps": 1e-05,
63
+ "layerdrop": 0.1,
64
+ "mask_channel_length": 10,
65
+ "mask_channel_min_space": 1,
66
+ "mask_channel_other": 0.0,
67
+ "mask_channel_prob": 0.0,
68
+ "mask_channel_selection": "static",
69
  "mask_feature_length": 10,
70
  "mask_feature_min_masks": 0,
71
  "mask_feature_prob": 0.0,
72
  "mask_time_length": 10,
73
  "mask_time_min_masks": 2,
74
+ "mask_time_min_space": 1,
75
+ "mask_time_other": 0.0,
76
+ "mask_time_prob": 0.075,
77
+ "mask_time_selection": "static",
78
  "model_type": "wav2vec2",
79
  "num_adapter_layers": 3,
80
  "num_attention_heads": 16,
 
86
  "num_hidden_layers": 24,
87
  "num_negatives": 100,
88
  "output_hidden_size": 1024,
89
+ "pad_token_id": 0,
90
  "proj_codevector_dim": 768,
91
  "tdnn_dilation": [
92
  1,
 
112
  "torch_dtype": "float32",
113
  "transformers_version": "4.35.2",
114
  "use_weighted_layer_sum": false,
115
+ "vocab_size": 392,
116
  "xvector_output_dim": 512
117
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb7a49a25129d13c6765a153cbf37d66478ab25315057c487ab07ed2767bb256
3
- size 1261987880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3dfac983bf67770f10ee2dd4b43d55b5037feb8e1166577b7fcb8e0d71efac
3
+ size 1263414696
runs/Nov24_19-43-06_ac4520cd39c0/events.out.tfevents.1700855129.ac4520cd39c0.422.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aab0ed53ed3d4271510336bdff1a6aa2bff93791652651d928bafe52d56c3de
3
+ size 6585
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e40e29aa39353bb449257b458c8bd60036226d296e2815f19711043b3e666db
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea3358a91e8443cd7cde2a0a90a81fdd3ec4b8ccfc82b36fdd6d7282efe28a9f
3
  size 4600