juancopi81 commited on
Commit
ed699bc
·
1 Parent(s): ce8b4b9

Training in progress, step 1000

Browse files
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED
@@ -226,7 +226,7 @@
226
  "name": "stderr",
227
  "output_type": "stream",
228
  "text": [
229
- "Reading metadata...: 230467it [00:04, 56208.39it/s]\n"
230
  ]
231
  },
232
  {
@@ -289,7 +289,7 @@
289
  },
290
  {
291
  "cell_type": "code",
292
- "execution_count": 14,
293
  "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
294
  "metadata": {},
295
  "outputs": [],
@@ -333,14 +333,113 @@
333
  },
334
  {
335
  "cell_type": "code",
336
- "execution_count": 15,
337
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
338
  "metadata": {},
339
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  "source": [
341
  "from transformers import WhisperProcessor\n",
342
  "\n",
343
- "processor = WhisperProcessor.from_pretrained(\"juancopi81/whisper-medium-es\", language=\"Spanish\", task=\"transcribe\")"
344
  ]
345
  },
346
  {
@@ -361,7 +460,7 @@
361
  },
362
  {
363
  "cell_type": "code",
364
- "execution_count": 16,
365
  "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
366
  "metadata": {},
367
  "outputs": [
@@ -372,7 +471,7 @@
372
  " 'sentence': Value(dtype='string', id=None)}"
373
  ]
374
  },
375
- "execution_count": 16,
376
  "metadata": {},
377
  "output_type": "execute_result"
378
  }
@@ -398,7 +497,7 @@
398
  },
399
  {
400
  "cell_type": "code",
401
- "execution_count": 17,
402
  "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
403
  "metadata": {},
404
  "outputs": [],
@@ -418,7 +517,7 @@
418
  },
419
  {
420
  "cell_type": "code",
421
- "execution_count": 18,
422
  "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
423
  "metadata": {},
424
  "outputs": [],
@@ -445,7 +544,7 @@
445
  },
446
  {
447
  "cell_type": "code",
448
- "execution_count": 19,
449
  "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
450
  "metadata": {},
451
  "outputs": [],
@@ -481,7 +580,7 @@
481
  },
482
  {
483
  "cell_type": "code",
484
- "execution_count": 25,
485
  "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
486
  "metadata": {},
487
  "outputs": [],
@@ -499,7 +598,7 @@
499
  },
500
  {
501
  "cell_type": "code",
502
- "execution_count": 26,
503
  "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
504
  "metadata": {},
505
  "outputs": [],
@@ -520,7 +619,7 @@
520
  },
521
  {
522
  "cell_type": "code",
523
- "execution_count": 27,
524
  "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
525
  "metadata": {},
526
  "outputs": [],
@@ -541,7 +640,7 @@
541
  },
542
  {
543
  "cell_type": "code",
544
- "execution_count": 28,
545
  "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
546
  "metadata": {},
547
  "outputs": [],
@@ -611,7 +710,7 @@
611
  },
612
  {
613
  "cell_type": "code",
614
- "execution_count": 29,
615
  "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
616
  "metadata": {},
617
  "outputs": [],
@@ -659,7 +758,7 @@
659
  },
660
  {
661
  "cell_type": "code",
662
- "execution_count": 30,
663
  "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
664
  "metadata": {},
665
  "outputs": [],
@@ -686,7 +785,7 @@
686
  },
687
  {
688
  "cell_type": "code",
689
- "execution_count": 31,
690
  "id": "b22b4011-f31f-4b57-b684-c52332f92890",
691
  "metadata": {},
692
  "outputs": [],
@@ -715,7 +814,7 @@
715
  },
716
  {
717
  "cell_type": "code",
718
- "execution_count": 32,
719
  "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
720
  "metadata": {},
721
  "outputs": [],
@@ -765,14 +864,70 @@
765
  },
766
  {
767
  "cell_type": "code",
768
- "execution_count": 33,
769
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
770
  "metadata": {},
771
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
772
  "source": [
773
  "from transformers import WhisperForConditionalGeneration\n",
774
  "\n",
775
- "model = WhisperForConditionalGeneration.from_pretrained(\"juancopi81/whisper-medium-es\")"
776
  ]
777
  },
778
  {
@@ -785,7 +940,7 @@
785
  },
786
  {
787
  "cell_type": "code",
788
- "execution_count": 34,
789
  "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
790
  "metadata": {},
791
  "outputs": [],
@@ -814,10 +969,18 @@
814
  },
815
  {
816
  "cell_type": "code",
817
- "execution_count": 35,
818
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
819
  "metadata": {},
820
- "outputs": [],
 
 
 
 
 
 
 
 
821
  "source": [
822
  "from transformers import Seq2SeqTrainingArguments\n",
823
  "\n",
@@ -825,7 +988,7 @@
825
  " output_dir=\"./\",\n",
826
  " per_device_train_batch_size=32,\n",
827
  " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
828
- " learning_rate=1e-5,\n",
829
  " warmup_steps=500,\n",
830
  " max_steps=5000,\n",
831
  " gradient_checkpointing=True,\n",
@@ -864,7 +1027,7 @@
864
  },
865
  {
866
  "cell_type": "code",
867
- "execution_count": 36,
868
  "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
869
  "metadata": {},
870
  "outputs": [],
@@ -893,7 +1056,7 @@
893
  },
894
  {
895
  "cell_type": "code",
896
- "execution_count": 37,
897
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
898
  "metadata": {},
899
  "outputs": [
@@ -901,7 +1064,7 @@
901
  "name": "stderr",
902
  "output_type": "stream",
903
  "text": [
904
- "/home/ubuntu/whisper-small-es-common-fleurs/./ is already a clone of https://huggingface.co/juancopi81/whisper-small-es-common-fleurs. Make sure you pull the latest changes with `repo.git_pull()`.\n",
905
  "max_steps is given, it will override any value given in num_train_epochs\n",
906
  "Using cuda_amp half precision backend\n"
907
  ]
@@ -932,7 +1095,7 @@
932
  },
933
  {
934
  "cell_type": "code",
935
- "execution_count": 38,
936
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
937
  "metadata": {},
938
  "outputs": [
@@ -978,14 +1141,14 @@
978
  },
979
  {
980
  "cell_type": "code",
981
- "execution_count": 39,
982
  "id": "ced90915-84df-4538-9034-f6c8c85de2df",
983
  "metadata": {},
984
  "outputs": [
985
  {
986
  "data": {
987
  "application/vnd.jupyter.widget-view+json": {
988
- "model_id": "2e4f6ccd07d344d08259008b7485b7db",
989
  "version_major": 2,
990
  "version_minor": 0
991
  },
@@ -1005,7 +1168,7 @@
1005
  },
1006
  {
1007
  "cell_type": "code",
1008
- "execution_count": null,
1009
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1010
  "metadata": {},
1011
  "outputs": [
@@ -1023,7 +1186,7 @@
1023
  " Gradient Accumulation steps = 1\n",
1024
  " Total optimization steps = 5000\n",
1025
  " Number of trainable parameters = 763857920\n",
1026
- "Reading metadata...: 230467it [00:04, 49083.73it/s]\n",
1027
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1028
  ]
1029
  },
@@ -1033,8 +1196,8 @@
1033
  "\n",
1034
  " <div>\n",
1035
  " \n",
1036
- " <progress value='3001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1037
- " [3001/5000 10:24:07 < 6:56:01, 0.08 it/s, Epoch 0.60/9223372036854775807]\n",
1038
  " </div>\n",
1039
  " <table border=\"1\" class=\"dataframe\">\n",
1040
  " <thead>\n",
@@ -1048,15 +1211,9 @@
1048
  " <tbody>\n",
1049
  " <tr>\n",
1050
  " <td>1000</td>\n",
1051
- " <td>0.069400</td>\n",
1052
- " <td>0.219434</td>\n",
1053
- " <td>6.819422</td>\n",
1054
- " </tr>\n",
1055
- " <tr>\n",
1056
- " <td>2000</td>\n",
1057
- " <td>0.033600</td>\n",
1058
- " <td>0.209724</td>\n",
1059
- " <td>6.755756</td>\n",
1060
  " </tr>\n",
1061
  " </tbody>\n",
1062
  "</table><p>"
@@ -1075,8 +1232,8 @@
1075
  "***** Running Evaluation *****\n",
1076
  " Num examples: Unknown\n",
1077
  " Batch size = 16\n",
1078
- "Reading metadata...: 15520it [00:00, 42402.78it/s]\n",
1079
- "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length, segment, down_votes, age, up_votes, accent, locale, path, client_id, gender. If input_length, segment, down_votes, age, up_votes, accent, locale, path, client_id, gender are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
1080
  "Saving model checkpoint to ./checkpoint-1000\n",
1081
  "Configuration saved in ./checkpoint-1000/config.json\n",
1082
  "Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
@@ -1087,28 +1244,33 @@
1087
  "Feature extractor saved in ./preprocessor_config.json\n",
1088
  "tokenizer config file saved in ./tokenizer_config.json\n",
1089
  "Special tokens file saved in ./special_tokens_map.json\n",
1090
- "added tokens file saved in ./added_tokens.json\n",
1091
- "***** Running Evaluation *****\n",
1092
- " Num examples: Unknown\n",
1093
- " Batch size = 16\n",
1094
- "Reading metadata...: 15520it [00:00, 27981.68it/s]\n",
1095
- "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length, segment, down_votes, age, up_votes, accent, locale, path, client_id, gender. If input_length, segment, down_votes, age, up_votes, accent, locale, path, client_id, gender are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
1096
- "Saving model checkpoint to ./checkpoint-2000\n",
1097
- "Configuration saved in ./checkpoint-2000/config.json\n",
1098
- "Model weights saved in ./checkpoint-2000/pytorch_model.bin\n",
1099
- "Feature extractor saved in ./checkpoint-2000/preprocessor_config.json\n",
1100
- "tokenizer config file saved in ./checkpoint-2000/tokenizer_config.json\n",
1101
- "Special tokens file saved in ./checkpoint-2000/special_tokens_map.json\n",
1102
- "added tokens file saved in ./checkpoint-2000/added_tokens.json\n",
1103
- "Feature extractor saved in ./preprocessor_config.json\n",
1104
- "tokenizer config file saved in ./tokenizer_config.json\n",
1105
- "Special tokens file saved in ./special_tokens_map.json\n",
1106
- "added tokens file saved in ./added_tokens.json\n",
1107
- "***** Running Evaluation *****\n",
1108
- " Num examples: Unknown\n",
1109
- " Batch size = 16\n",
1110
- "Reading metadata...: 15520it [00:00, 72511.74it/s]\n",
1111
- "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length, segment, down_votes, age, up_votes, accent, locale, path, client_id, gender. If input_length, segment, down_votes, age, up_votes, accent, locale, path, client_id, gender are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
 
 
 
 
 
1112
  ]
1113
  }
1114
  ],
@@ -1147,7 +1309,7 @@
1147
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1148
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1149
  " \"language\": \"es\",\n",
1150
- " \"model_name\": \"Whisper Small Es - Sanchit Gandhi\", # a 'pretty' name for your model\n",
1151
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1152
  " \"tasks\": \"automatic-speech-recognition\",\n",
1153
  " \"tags\": \"whisper-event\",\n",
 
226
  "name": "stderr",
227
  "output_type": "stream",
228
  "text": [
229
+ "Reading metadata...: 230467it [00:05, 42062.14it/s]\n"
230
  ]
231
  },
232
  {
 
289
  },
290
  {
291
  "cell_type": "code",
292
+ "execution_count": 8,
293
  "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
294
  "metadata": {},
295
  "outputs": [],
 
333
  },
334
  {
335
  "cell_type": "code",
336
+ "execution_count": 9,
337
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
338
  "metadata": {},
339
+ "outputs": [
340
+ {
341
+ "data": {
342
+ "application/vnd.jupyter.widget-view+json": {
343
+ "model_id": "9769d7a9ab1148b8af2bd69abf74d5d6",
344
+ "version_major": 2,
345
+ "version_minor": 0
346
+ },
347
+ "text/plain": [
348
+ "Downloading: 0%| | 0.00/185k [00:00<?, ?B/s]"
349
+ ]
350
+ },
351
+ "metadata": {},
352
+ "output_type": "display_data"
353
+ },
354
+ {
355
+ "data": {
356
+ "application/vnd.jupyter.widget-view+json": {
357
+ "model_id": "a2b4d68d48d8439096430441c976bd21",
358
+ "version_major": 2,
359
+ "version_minor": 0
360
+ },
361
+ "text/plain": [
362
+ "Downloading: 0%| | 0.00/837 [00:00<?, ?B/s]"
363
+ ]
364
+ },
365
+ "metadata": {},
366
+ "output_type": "display_data"
367
+ },
368
+ {
369
+ "data": {
370
+ "application/vnd.jupyter.widget-view+json": {
371
+ "model_id": "ceae9b86f1674939b330c81cb34c625a",
372
+ "version_major": 2,
373
+ "version_minor": 0
374
+ },
375
+ "text/plain": [
376
+ "Downloading: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
377
+ ]
378
+ },
379
+ "metadata": {},
380
+ "output_type": "display_data"
381
+ },
382
+ {
383
+ "data": {
384
+ "application/vnd.jupyter.widget-view+json": {
385
+ "model_id": "715ade22144945178519b742a88828d7",
386
+ "version_major": 2,
387
+ "version_minor": 0
388
+ },
389
+ "text/plain": [
390
+ "Downloading: 0%| | 0.00/494k [00:00<?, ?B/s]"
391
+ ]
392
+ },
393
+ "metadata": {},
394
+ "output_type": "display_data"
395
+ },
396
+ {
397
+ "data": {
398
+ "application/vnd.jupyter.widget-view+json": {
399
+ "model_id": "381fff2e1ffa4331923ca1b4b3dc965d",
400
+ "version_major": 2,
401
+ "version_minor": 0
402
+ },
403
+ "text/plain": [
404
+ "Downloading: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
405
+ ]
406
+ },
407
+ "metadata": {},
408
+ "output_type": "display_data"
409
+ },
410
+ {
411
+ "data": {
412
+ "application/vnd.jupyter.widget-view+json": {
413
+ "model_id": "7cf108c742b8431187e1e3494610df3c",
414
+ "version_major": 2,
415
+ "version_minor": 0
416
+ },
417
+ "text/plain": [
418
+ "Downloading: 0%| | 0.00/2.11k [00:00<?, ?B/s]"
419
+ ]
420
+ },
421
+ "metadata": {},
422
+ "output_type": "display_data"
423
+ },
424
+ {
425
+ "data": {
426
+ "application/vnd.jupyter.widget-view+json": {
427
+ "model_id": "31a51dd942054666b52dce912df102a3",
428
+ "version_major": 2,
429
+ "version_minor": 0
430
+ },
431
+ "text/plain": [
432
+ "Downloading: 0%| | 0.00/2.06k [00:00<?, ?B/s]"
433
+ ]
434
+ },
435
+ "metadata": {},
436
+ "output_type": "display_data"
437
+ }
438
+ ],
439
  "source": [
440
  "from transformers import WhisperProcessor\n",
441
  "\n",
442
+ "processor = WhisperProcessor.from_pretrained(\"juancopi81/whisper-medium-es-common-fleurs\", language=\"Spanish\", task=\"transcribe\")"
443
  ]
444
  },
445
  {
 
460
  },
461
  {
462
  "cell_type": "code",
463
+ "execution_count": 10,
464
  "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
465
  "metadata": {},
466
  "outputs": [
 
471
  " 'sentence': Value(dtype='string', id=None)}"
472
  ]
473
  },
474
+ "execution_count": 10,
475
  "metadata": {},
476
  "output_type": "execute_result"
477
  }
 
497
  },
498
  {
499
  "cell_type": "code",
500
+ "execution_count": 11,
501
  "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
502
  "metadata": {},
503
  "outputs": [],
 
517
  },
518
  {
519
  "cell_type": "code",
520
+ "execution_count": 12,
521
  "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
522
  "metadata": {},
523
  "outputs": [],
 
544
  },
545
  {
546
  "cell_type": "code",
547
+ "execution_count": 13,
548
  "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
549
  "metadata": {},
550
  "outputs": [],
 
580
  },
581
  {
582
  "cell_type": "code",
583
+ "execution_count": 14,
584
  "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
585
  "metadata": {},
586
  "outputs": [],
 
598
  },
599
  {
600
  "cell_type": "code",
601
+ "execution_count": 15,
602
  "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
603
  "metadata": {},
604
  "outputs": [],
 
619
  },
620
  {
621
  "cell_type": "code",
622
+ "execution_count": 16,
623
  "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
624
  "metadata": {},
625
  "outputs": [],
 
640
  },
641
  {
642
  "cell_type": "code",
643
+ "execution_count": 17,
644
  "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
645
  "metadata": {},
646
  "outputs": [],
 
710
  },
711
  {
712
  "cell_type": "code",
713
+ "execution_count": 18,
714
  "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
715
  "metadata": {},
716
  "outputs": [],
 
758
  },
759
  {
760
  "cell_type": "code",
761
+ "execution_count": 19,
762
  "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
763
  "metadata": {},
764
  "outputs": [],
 
785
  },
786
  {
787
  "cell_type": "code",
788
+ "execution_count": 20,
789
  "id": "b22b4011-f31f-4b57-b684-c52332f92890",
790
  "metadata": {},
791
  "outputs": [],
 
814
  },
815
  {
816
  "cell_type": "code",
817
+ "execution_count": 21,
818
  "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
819
  "metadata": {},
820
  "outputs": [],
 
864
  },
865
  {
866
  "cell_type": "code",
867
+ "execution_count": 30,
868
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
869
  "metadata": {},
870
+ "outputs": [
871
+ {
872
+ "name": "stderr",
873
+ "output_type": "stream",
874
+ "text": [
875
+ "loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--juancopi81--whisper-medium-es-common-fleurs/snapshots/ceeaee568ae1c40f6c1eb6bb1de818ae909f60fd/config.json\n",
876
+ "Model config WhisperConfig {\n",
877
+ " \"_name_or_path\": \"juancopi81/whisper-medium-es\",\n",
878
+ " \"activation_dropout\": 0.0,\n",
879
+ " \"activation_function\": \"gelu\",\n",
880
+ " \"architectures\": [\n",
881
+ " \"WhisperForConditionalGeneration\"\n",
882
+ " ],\n",
883
+ " \"attention_dropout\": 0.0,\n",
884
+ " \"begin_suppress_tokens\": [\n",
885
+ " 220,\n",
886
+ " 50257\n",
887
+ " ],\n",
888
+ " \"bos_token_id\": 50257,\n",
889
+ " \"d_model\": 1024,\n",
890
+ " \"decoder_attention_heads\": 16,\n",
891
+ " \"decoder_ffn_dim\": 4096,\n",
892
+ " \"decoder_layerdrop\": 0.0,\n",
893
+ " \"decoder_layers\": 24,\n",
894
+ " \"decoder_start_token_id\": 50258,\n",
895
+ " \"dropout\": 0.1,\n",
896
+ " \"encoder_attention_heads\": 16,\n",
897
+ " \"encoder_ffn_dim\": 4096,\n",
898
+ " \"encoder_layerdrop\": 0.0,\n",
899
+ " \"encoder_layers\": 24,\n",
900
+ " \"eos_token_id\": 50257,\n",
901
+ " \"forced_decoder_ids\": null,\n",
902
+ " \"init_std\": 0.02,\n",
903
+ " \"is_encoder_decoder\": true,\n",
904
+ " \"max_length\": 448,\n",
905
+ " \"max_source_positions\": 1500,\n",
906
+ " \"max_target_positions\": 448,\n",
907
+ " \"model_type\": \"whisper\",\n",
908
+ " \"num_hidden_layers\": 24,\n",
909
+ " \"num_mel_bins\": 80,\n",
910
+ " \"pad_token_id\": 50257,\n",
911
+ " \"scale_embedding\": false,\n",
912
+ " \"suppress_tokens\": [],\n",
913
+ " \"torch_dtype\": \"float32\",\n",
914
+ " \"transformers_version\": \"4.26.0.dev0\",\n",
915
+ " \"use_cache\": false,\n",
916
+ " \"vocab_size\": 51865\n",
917
+ "}\n",
918
+ "\n",
919
+ "loading weights file pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/hub/models--juancopi81--whisper-medium-es-common-fleurs/snapshots/ceeaee568ae1c40f6c1eb6bb1de818ae909f60fd/pytorch_model.bin\n",
920
+ "All model checkpoint weights were used when initializing WhisperForConditionalGeneration.\n",
921
+ "\n",
922
+ "All the weights of WhisperForConditionalGeneration were initialized from the model checkpoint at juancopi81/whisper-medium-es-common-fleurs.\n",
923
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use WhisperForConditionalGeneration for predictions without further training.\n"
924
+ ]
925
+ }
926
+ ],
927
  "source": [
928
  "from transformers import WhisperForConditionalGeneration\n",
929
  "\n",
930
+ "model = WhisperForConditionalGeneration.from_pretrained(\"juancopi81/whisper-medium-es-common-fleurs\")"
931
  ]
932
  },
933
  {
 
940
  },
941
  {
942
  "cell_type": "code",
943
+ "execution_count": 31,
944
  "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
945
  "metadata": {},
946
  "outputs": [],
 
969
  },
970
  {
971
  "cell_type": "code",
972
+ "execution_count": 32,
973
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
974
  "metadata": {},
975
+ "outputs": [
976
+ {
977
+ "name": "stderr",
978
+ "output_type": "stream",
979
+ "text": [
980
+ "PyTorch: setting up devices\n"
981
+ ]
982
+ }
983
+ ],
984
  "source": [
985
  "from transformers import Seq2SeqTrainingArguments\n",
986
  "\n",
 
988
  " output_dir=\"./\",\n",
989
  " per_device_train_batch_size=32,\n",
990
  " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
991
+ " learning_rate=3e-6,\n",
992
  " warmup_steps=500,\n",
993
  " max_steps=5000,\n",
994
  " gradient_checkpointing=True,\n",
 
1027
  },
1028
  {
1029
  "cell_type": "code",
1030
+ "execution_count": 33,
1031
  "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
1032
  "metadata": {},
1033
  "outputs": [],
 
1056
  },
1057
  {
1058
  "cell_type": "code",
1059
+ "execution_count": 34,
1060
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
1061
  "metadata": {},
1062
  "outputs": [
 
1064
  "name": "stderr",
1065
  "output_type": "stream",
1066
  "text": [
1067
+ "/home/ubuntu/whisper-medium-es-common-fleurs-5k-10k/./ is already a clone of https://huggingface.co/juancopi81/whisper-medium-es-common-fleurs-5k-10k. Make sure you pull the latest changes with `repo.git_pull()`.\n",
1068
  "max_steps is given, it will override any value given in num_train_epochs\n",
1069
  "Using cuda_amp half precision backend\n"
1070
  ]
 
1095
  },
1096
  {
1097
  "cell_type": "code",
1098
+ "execution_count": 35,
1099
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
1100
  "metadata": {},
1101
  "outputs": [
 
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
+ "execution_count": 28,
1145
  "id": "ced90915-84df-4538-9034-f6c8c85de2df",
1146
  "metadata": {},
1147
  "outputs": [
1148
  {
1149
  "data": {
1150
  "application/vnd.jupyter.widget-view+json": {
1151
+ "model_id": "386d02833fb0467980c51f82505ce44a",
1152
  "version_major": 2,
1153
  "version_minor": 0
1154
  },
 
1168
  },
1169
  {
1170
  "cell_type": "code",
1171
+ "execution_count": 29,
1172
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1173
  "metadata": {},
1174
  "outputs": [
 
1186
  " Gradient Accumulation steps = 1\n",
1187
  " Total optimization steps = 5000\n",
1188
  " Number of trainable parameters = 763857920\n",
1189
+ "Reading metadata...: 230467it [00:05, 39424.34it/s]\n",
1190
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1191
  ]
1192
  },
 
1196
  "\n",
1197
  " <div>\n",
1198
  " \n",
1199
+ " <progress value='1038' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1200
+ " [1038/5000 4:24:08 < 16:50:09, 0.07 it/s, Epoch 0.21/9223372036854775807]\n",
1201
  " </div>\n",
1202
  " <table border=\"1\" class=\"dataframe\">\n",
1203
  " <thead>\n",
 
1211
  " <tbody>\n",
1212
  " <tr>\n",
1213
  " <td>1000</td>\n",
1214
+ " <td>0.096600</td>\n",
1215
+ " <td>0.234865</td>\n",
1216
+ " <td>7.640585</td>\n",
 
 
 
 
 
 
1217
  " </tr>\n",
1218
  " </tbody>\n",
1219
  "</table><p>"
 
1232
  "***** Running Evaluation *****\n",
1233
  " Num examples: Unknown\n",
1234
  " Batch size = 16\n",
1235
+ "Reading metadata...: 15520it [00:00, 83747.62it/s]\n",
1236
+ "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: up_votes, client_id, down_votes, gender, accent, segment, path, locale, input_length, age. If up_votes, client_id, down_votes, gender, accent, segment, path, locale, input_length, age are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
1237
  "Saving model checkpoint to ./checkpoint-1000\n",
1238
  "Configuration saved in ./checkpoint-1000/config.json\n",
1239
  "Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
 
1244
  "Feature extractor saved in ./preprocessor_config.json\n",
1245
  "tokenizer config file saved in ./tokenizer_config.json\n",
1246
  "Special tokens file saved in ./special_tokens_map.json\n",
1247
+ "added tokens file saved in ./added_tokens.json\n"
1248
+ ]
1249
+ },
1250
+ {
1251
+ "ename": "KeyboardInterrupt",
1252
+ "evalue": "",
1253
+ "output_type": "error",
1254
+ "traceback": [
1255
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1256
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
1257
+ "Cell \u001b[0;32mIn[29], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
1258
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/transformers/trainer.py:1534\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1531\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1532\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1533\u001b[0m )\n\u001b[0;32m-> 1534\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1535\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1536\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
1259
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/transformers/trainer.py:1756\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1753\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_load_rng_state(resume_from_checkpoint)\n\u001b[1;32m 1755\u001b[0m step \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[0;32m-> 1756\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m step, inputs \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(epoch_iterator):\n\u001b[1;32m 1757\u001b[0m \n\u001b[1;32m 1758\u001b[0m \u001b[38;5;66;03m# Skip past any already trained steps if resuming training\u001b[39;00m\n\u001b[1;32m 1759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m steps_trained_in_current_epoch \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1760\u001b[0m steps_trained_in_current_epoch \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n",
1260
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/torch/utils/data/dataloader.py:628\u001b[0m, in \u001b[0;36m_BaseDataLoaderIter.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 625\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sampler_iter \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 626\u001b[0m \u001b[38;5;66;03m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001b[39;00m\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset() \u001b[38;5;66;03m# type: ignore[call-arg]\u001b[39;00m\n\u001b[0;32m--> 628\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_next_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 629\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_yielded \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_kind \u001b[38;5;241m==\u001b[39m _DatasetKind\u001b[38;5;241m.\u001b[39mIterable \u001b[38;5;129;01mand\u001b[39;00m \\\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_IterableDataset_len_called \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \\\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_yielded \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_IterableDataset_len_called:\n",
1261
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/torch/utils/data/dataloader.py:671\u001b[0m, in \u001b[0;36m_SingleProcessDataLoaderIter._next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 669\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_next_data\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 670\u001b[0m index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_next_index() \u001b[38;5;66;03m# may raise StopIteration\u001b[39;00m\n\u001b[0;32m--> 671\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset_fetcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfetch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# may raise StopIteration\u001b[39;00m\n\u001b[1;32m 672\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pin_memory:\n\u001b[1;32m 673\u001b[0m data \u001b[38;5;241m=\u001b[39m _utils\u001b[38;5;241m.\u001b[39mpin_memory\u001b[38;5;241m.\u001b[39mpin_memory(data, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pin_memory_device)\n",
1262
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py:34\u001b[0m, in \u001b[0;36m_IterableDatasetFetcher.fetch\u001b[0;34m(self, possibly_batched_index)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m possibly_batched_index:\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 34\u001b[0m data\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdataset_iter\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mended \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
1263
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/datasets/formatting/dataset_wrappers/torch_iterable_dataset.py:35\u001b[0m, in \u001b[0;36mTorchIterableDataset.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 33\u001b[0m worker_info \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mget_worker_info()\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m worker_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m: \u001b[38;5;66;03m# single-process data loading, return the full iterator\u001b[39;00m\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m IterableDataset\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__iter__\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# in a worker process\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;66;03m# check if there aren't too many workers\u001b[39;00m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m worker_info\u001b[38;5;241m.\u001b[39mid \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_shards \u001b[38;5;241m<\u001b[39m worker_info\u001b[38;5;241m.\u001b[39mnum_workers:\n",
1264
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/datasets/iterable_dataset.py:758\u001b[0m, in \u001b[0;36mIterableDataset.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 757\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__iter__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 758\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, example \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_iter():\n\u001b[1;32m 759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfeatures:\n\u001b[1;32m 760\u001b[0m \u001b[38;5;66;03m# `IterableDataset` automatically fills missing columns with None.\u001b[39;00m\n\u001b[1;32m 761\u001b[0m \u001b[38;5;66;03m# This is done with `_apply_feature_types`.\u001b[39;00m\n\u001b[1;32m 762\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m _apply_feature_types(example, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfeatures, token_per_repo_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_token_per_repo_id)\n",
1265
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/datasets/iterable_dataset.py:748\u001b[0m, in \u001b[0;36mIterableDataset._iter\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 746\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 747\u001b[0m ex_iterable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ex_iterable\n\u001b[0;32m--> 748\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m ex_iterable\n",
1266
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/datasets/iterable_dataset.py:515\u001b[0m, in \u001b[0;36mFilteredExamplesIterable.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 513\u001b[0m current_idx \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m batch_idx \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 514\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 515\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, example \u001b[38;5;129;01min\u001b[39;00m iterator:\n\u001b[1;32m 516\u001b[0m \u001b[38;5;66;03m# If not batched, we can apply the filtering function direcly\u001b[39;00m\n\u001b[1;32m 517\u001b[0m inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(example)\n\u001b[1;32m 518\u001b[0m function_args \u001b[38;5;241m=\u001b[39m [inputs] \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_columns \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m [inputs[col] \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_columns]\n",
1267
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/datasets/iterable_dataset.py:570\u001b[0m, in \u001b[0;36mBufferShuffledExamplesIterable.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;66;03m# this is the shuffle buffer that we keep in memory\u001b[39;00m\n\u001b[1;32m 569\u001b[0m mem_buffer \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 570\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mex_iterable:\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mem_buffer) \u001b[38;5;241m==\u001b[39m buffer_size: \u001b[38;5;66;03m# if the buffer is full, pick and example from it\u001b[39;00m\n\u001b[1;32m 572\u001b[0m i \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(indices_iterator)\n",
1268
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/datasets/iterable_dataset.py:433\u001b[0m, in \u001b[0;36mMappedExamplesIterable.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 431\u001b[0m function_args\u001b[38;5;241m.\u001b[39mappend(current_idx)\n\u001b[1;32m 432\u001b[0m transformed_example \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(example) \u001b[38;5;66;03m# this will be updated with the function output\u001b[39;00m\n\u001b[0;32m--> 433\u001b[0m transformed_example\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunction_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 434\u001b[0m \u001b[38;5;66;03m# then we remove the unwanted columns\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mremove_columns:\n",
1269
+ "Cell \u001b[0;32mIn[13], line 6\u001b[0m, in \u001b[0;36mprepare_dataset\u001b[0;34m(batch)\u001b[0m\n\u001b[1;32m 3\u001b[0m audio \u001b[38;5;241m=\u001b[39m batch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maudio\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# compute log-Mel input features from input audio array \u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m batch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_features\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mprocessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfeature_extractor\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marray\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_rate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudio\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_rate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39minput_features[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# compute input length of audio sample in seconds\u001b[39;00m\n\u001b[1;32m 8\u001b[0m batch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_length\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(audio[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marray\u001b[39m\u001b[38;5;124m\"\u001b[39m]) \u001b[38;5;241m/\u001b[39m audio[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msampling_rate\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
1270
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/transformers/models/whisper/feature_extraction_whisper.py:314\u001b[0m, in \u001b[0;36mWhisperFeatureExtractor.__call__\u001b[0;34m(self, raw_speech, truncation, pad_to_multiple_of, return_tensors, return_attention_mask, padding, max_length, sampling_rate, **kwargs)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;66;03m# make sure list is in array format\u001b[39;00m\n\u001b[1;32m 312\u001b[0m input_features \u001b[38;5;241m=\u001b[39m padded_inputs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_features\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 314\u001b[0m input_features \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_np_extract_fbank_features(waveform) \u001b[38;5;28;01mfor\u001b[39;00m waveform \u001b[38;5;129;01min\u001b[39;00m input_features[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(input_features[\u001b[38;5;241m0\u001b[39m], List):\n\u001b[1;32m 317\u001b[0m padded_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_features\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [np\u001b[38;5;241m.\u001b[39masarray(feature, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat32) \u001b[38;5;28;01mfor\u001b[39;00m feature \u001b[38;5;129;01min\u001b[39;00m input_features]\n",
1271
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/transformers/models/whisper/feature_extraction_whisper.py:314\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;66;03m# make sure list is in array format\u001b[39;00m\n\u001b[1;32m 312\u001b[0m input_features \u001b[38;5;241m=\u001b[39m padded_inputs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_features\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 314\u001b[0m input_features \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_np_extract_fbank_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwaveform\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m waveform \u001b[38;5;129;01min\u001b[39;00m input_features[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(input_features[\u001b[38;5;241m0\u001b[39m], List):\n\u001b[1;32m 317\u001b[0m padded_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_features\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m [np\u001b[38;5;241m.\u001b[39masarray(feature, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat32) \u001b[38;5;28;01mfor\u001b[39;00m feature \u001b[38;5;129;01min\u001b[39;00m input_features]\n",
1272
+ "File \u001b[0;32m~/hf_env/lib/python3.8/site-packages/transformers/models/whisper/feature_extraction_whisper.py:207\u001b[0m, in \u001b[0;36mWhisperFeatureExtractor._np_extract_fbank_features\u001b[0;34m(self, waveform)\u001b[0m\n\u001b[1;32m 205\u001b[0m frames \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfram_wave(waveform)\n\u001b[1;32m 206\u001b[0m stft \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstft(frames, window\u001b[38;5;241m=\u001b[39mwindow)\n\u001b[0;32m--> 207\u001b[0m magnitudes \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mabs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstft\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[1;32m 209\u001b[0m filters \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmel_filters\n\u001b[1;32m 210\u001b[0m mel_spec \u001b[38;5;241m=\u001b[39m filters \u001b[38;5;241m@\u001b[39m magnitudes\n",
1273
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
1274
  ]
1275
  }
1276
  ],
 
1309
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1310
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1311
  " \"language\": \"es\",\n",
1312
+ " \"model_name\": \"Whisper Mediuem Es - Juan Pineros\", # a 'pretty' name for your model\n",
1313
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1314
  " \"tasks\": \"automatic-speech-recognition\",\n",
1315
  " \"tags\": \"whisper-event\",\n",
fine-tune-whisper-streaming.ipynb CHANGED
@@ -226,7 +226,7 @@
226
  "name": "stderr",
227
  "output_type": "stream",
228
  "text": [
229
- "Reading metadata...: 230467it [00:05, 42062.14it/s]\n"
230
  ]
231
  },
232
  {
@@ -336,106 +336,7 @@
336
  "execution_count": 9,
337
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
338
  "metadata": {},
339
- "outputs": [
340
- {
341
- "data": {
342
- "application/vnd.jupyter.widget-view+json": {
343
- "model_id": "9769d7a9ab1148b8af2bd69abf74d5d6",
344
- "version_major": 2,
345
- "version_minor": 0
346
- },
347
- "text/plain": [
348
- "Downloading: 0%| | 0.00/185k [00:00<?, ?B/s]"
349
- ]
350
- },
351
- "metadata": {},
352
- "output_type": "display_data"
353
- },
354
- {
355
- "data": {
356
- "application/vnd.jupyter.widget-view+json": {
357
- "model_id": "a2b4d68d48d8439096430441c976bd21",
358
- "version_major": 2,
359
- "version_minor": 0
360
- },
361
- "text/plain": [
362
- "Downloading: 0%| | 0.00/837 [00:00<?, ?B/s]"
363
- ]
364
- },
365
- "metadata": {},
366
- "output_type": "display_data"
367
- },
368
- {
369
- "data": {
370
- "application/vnd.jupyter.widget-view+json": {
371
- "model_id": "ceae9b86f1674939b330c81cb34c625a",
372
- "version_major": 2,
373
- "version_minor": 0
374
- },
375
- "text/plain": [
376
- "Downloading: 0%| | 0.00/1.04M [00:00<?, ?B/s]"
377
- ]
378
- },
379
- "metadata": {},
380
- "output_type": "display_data"
381
- },
382
- {
383
- "data": {
384
- "application/vnd.jupyter.widget-view+json": {
385
- "model_id": "715ade22144945178519b742a88828d7",
386
- "version_major": 2,
387
- "version_minor": 0
388
- },
389
- "text/plain": [
390
- "Downloading: 0%| | 0.00/494k [00:00<?, ?B/s]"
391
- ]
392
- },
393
- "metadata": {},
394
- "output_type": "display_data"
395
- },
396
- {
397
- "data": {
398
- "application/vnd.jupyter.widget-view+json": {
399
- "model_id": "381fff2e1ffa4331923ca1b4b3dc965d",
400
- "version_major": 2,
401
- "version_minor": 0
402
- },
403
- "text/plain": [
404
- "Downloading: 0%| | 0.00/52.7k [00:00<?, ?B/s]"
405
- ]
406
- },
407
- "metadata": {},
408
- "output_type": "display_data"
409
- },
410
- {
411
- "data": {
412
- "application/vnd.jupyter.widget-view+json": {
413
- "model_id": "7cf108c742b8431187e1e3494610df3c",
414
- "version_major": 2,
415
- "version_minor": 0
416
- },
417
- "text/plain": [
418
- "Downloading: 0%| | 0.00/2.11k [00:00<?, ?B/s]"
419
- ]
420
- },
421
- "metadata": {},
422
- "output_type": "display_data"
423
- },
424
- {
425
- "data": {
426
- "application/vnd.jupyter.widget-view+json": {
427
- "model_id": "31a51dd942054666b52dce912df102a3",
428
- "version_major": 2,
429
- "version_minor": 0
430
- },
431
- "text/plain": [
432
- "Downloading: 0%| | 0.00/2.06k [00:00<?, ?B/s]"
433
- ]
434
- },
435
- "metadata": {},
436
- "output_type": "display_data"
437
- }
438
- ],
439
  "source": [
440
  "from transformers import WhisperProcessor\n",
441
  "\n",
@@ -867,36 +768,7 @@
867
  "execution_count": 22,
868
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
869
  "metadata": {},
870
- "outputs": [
871
- {
872
- "data": {
873
- "application/vnd.jupyter.widget-view+json": {
874
- "model_id": "3b21514a2fff4878a2f569d2cc28b925",
875
- "version_major": 2,
876
- "version_minor": 0
877
- },
878
- "text/plain": [
879
- "Downloading: 0%| | 0.00/1.04k [00:00<?, ?B/s]"
880
- ]
881
- },
882
- "metadata": {},
883
- "output_type": "display_data"
884
- },
885
- {
886
- "data": {
887
- "application/vnd.jupyter.widget-view+json": {
888
- "model_id": "28d70b74dbd844328ad9d325c9babfe1",
889
- "version_major": 2,
890
- "version_minor": 0
891
- },
892
- "text/plain": [
893
- "Downloading: 0%| | 0.00/3.06G [00:00<?, ?B/s]"
894
- ]
895
- },
896
- "metadata": {},
897
- "output_type": "display_data"
898
- }
899
- ],
900
  "source": [
901
  "from transformers import WhisperForConditionalGeneration\n",
902
  "\n",
@@ -920,7 +792,8 @@
920
  "source": [
921
  "model.config.forced_decoder_ids = None\n",
922
  "model.config.suppress_tokens = []\n",
923
- "model.config.use_cache = False"
 
924
  ]
925
  },
926
  {
@@ -952,7 +825,7 @@
952
  " output_dir=\"./\",\n",
953
  " per_device_train_batch_size=32,\n",
954
  " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
955
- " learning_rate=1e-5,\n",
956
  " warmup_steps=500,\n",
957
  " max_steps=5000,\n",
958
  " gradient_checkpointing=True,\n",
@@ -1112,7 +985,7 @@
1112
  {
1113
  "data": {
1114
  "application/vnd.jupyter.widget-view+json": {
1115
- "model_id": "386d02833fb0467980c51f82505ce44a",
1116
  "version_major": 2,
1117
  "version_minor": 0
1118
  },
@@ -1150,7 +1023,7 @@
1150
  " Gradient Accumulation steps = 1\n",
1151
  " Total optimization steps = 5000\n",
1152
  " Number of trainable parameters = 763857920\n",
1153
- "Reading metadata...: 230467it [00:05, 39424.34it/s]\n",
1154
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1155
  ]
1156
  },
@@ -1161,7 +1034,7 @@
1161
  " <div>\n",
1162
  " \n",
1163
  " <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1164
- " [1001/5000 1:45:40 < 7:03:01, 0.16 it/s, Epoch 0.20/9223372036854775807]\n",
1165
  " </div>\n",
1166
  " <table border=\"1\" class=\"dataframe\">\n",
1167
  " <thead>\n",
@@ -1189,8 +1062,8 @@
1189
  "***** Running Evaluation *****\n",
1190
  " Num examples: Unknown\n",
1191
  " Batch size = 16\n",
1192
- "Reading metadata...: 15520it [00:00, 83747.62it/s]\n",
1193
- "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: up_votes, client_id, down_votes, gender, accent, segment, path, locale, input_length, age. If up_votes, client_id, down_votes, gender, accent, segment, path, locale, input_length, age are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1194
  ]
1195
  }
1196
  ],
 
226
  "name": "stderr",
227
  "output_type": "stream",
228
  "text": [
229
+ "Reading metadata...: 230467it [00:05, 45342.52it/s]\n"
230
  ]
231
  },
232
  {
 
336
  "execution_count": 9,
337
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
338
  "metadata": {},
339
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  "source": [
341
  "from transformers import WhisperProcessor\n",
342
  "\n",
 
768
  "execution_count": 22,
769
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
770
  "metadata": {},
771
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
772
  "source": [
773
  "from transformers import WhisperForConditionalGeneration\n",
774
  "\n",
 
792
  "source": [
793
  "model.config.forced_decoder_ids = None\n",
794
  "model.config.suppress_tokens = []\n",
795
+ "model.config.use_cache = False\n",
796
+ "model.config.dropout = 0.1"
797
  ]
798
  },
799
  {
 
825
  " output_dir=\"./\",\n",
826
  " per_device_train_batch_size=32,\n",
827
  " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
828
+ " learning_rate=3e-6,\n",
829
  " warmup_steps=500,\n",
830
  " max_steps=5000,\n",
831
  " gradient_checkpointing=True,\n",
 
985
  {
986
  "data": {
987
  "application/vnd.jupyter.widget-view+json": {
988
+ "model_id": "dca83cda148e49d9ba1b129e3b58fc2f",
989
  "version_major": 2,
990
  "version_minor": 0
991
  },
 
1023
  " Gradient Accumulation steps = 1\n",
1024
  " Total optimization steps = 5000\n",
1025
  " Number of trainable parameters = 763857920\n",
1026
+ "Reading metadata...: 230467it [00:02, 96908.84it/s] \n",
1027
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1028
  ]
1029
  },
 
1034
  " <div>\n",
1035
  " \n",
1036
  " <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1037
+ " [1001/5000 1:45:29 < 7:02:15, 0.16 it/s, Epoch 0.20/9223372036854775807]\n",
1038
  " </div>\n",
1039
  " <table border=\"1\" class=\"dataframe\">\n",
1040
  " <thead>\n",
 
1062
  "***** Running Evaluation *****\n",
1063
  " Num examples: Unknown\n",
1064
  " Batch size = 16\n",
1065
+ "Reading metadata...: 15520it [00:00, 92814.18it/s]\n",
1066
+ "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: accent, up_votes, locale, age, input_length, path, client_id, segment, gender, down_votes. If accent, up_votes, locale, age, input_length, path, client_id, segment, gender, down_votes are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1067
  ]
1068
  }
1069
  ],
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a3249e6e15f570c9d0efc91ec99b7441c886cb12122c58a274ac6a1822c3b08
3
  size 3055754841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05aa841841d192dfcf9039758a9124e7e20b2ab8da5125aa82332fa0c718563
3
  size 3055754841
runs/Dec14_14-23-12_132-145-140-45/events.out.tfevents.1671027857.132-145-140-45.618344.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfcadc9139e00668085bff5587670f600edc27ed2cc2099a36aa9ace07a80d2
3
- size 10894
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3b7c997ea40e8c65277496ae69ac65cd1c79706f4a1c18a8f918fee2054fa7
3
+ size 11051
runs/Dec14_18-54-17_132-145-140-45/1671044156.1678598/events.out.tfevents.1671044156.132-145-140-45.618344.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96cb454b363871ef431445ee594bfa3a4fa9edb374fc5234bb77e2b891d7a8c
3
+ size 5864
runs/Dec14_18-54-17_132-145-140-45/events.out.tfevents.1671044156.132-145-140-45.618344.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ef8b9cb3665f837bea393f7fcc3a3b0684bd35babe5d5ff95e9699a630214c
3
+ size 4311
runs/Dec14_19-08-48_132-145-140-45/1671044964.476709/events.out.tfevents.1671044964.132-145-140-45.1598466.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:009cd0e80b8ba9c5683708fa63d4b2b983bbfb624c96ef59a3b566b435a7c7da
3
+ size 5864
runs/Dec14_19-08-48_132-145-140-45/events.out.tfevents.1671044964.132-145-140-45.1598466.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ca4c46fb88d750e940405304e4a532d88a53b1800c33ac9058e5f787380b8f
3
+ size 10894
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cba2e3c972f6ddedbda56d25f4cd1efc0f88bae273d74a2256414dc2a071f223
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c7fbec30a355ab216bff56aaaf037f4afe0c75cf7ed4d9ff39c96f4dbfee91
3
  size 3579