File size: 47,204 Bytes
5db55c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 100,
  "global_step": 3039,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.09871668311944719,
      "grad_norm": 7.595918655395508,
      "learning_rate": 6.578947368421054e-06,
      "loss": 0.4785,
      "step": 100
    },
    {
      "epoch": 0.09871668311944719,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.48153268812956745,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8239186253209559,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.6804266245309105,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.7476298637171638,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6014421644194009,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.5954963687693909,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.650756003676077,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.48153268812956745,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.0823918625320956,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.22680887484363688,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.14952597274343274,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.48153268812956745,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8239186253209559,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.6804266245309105,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.7476298637171638,
      "eval_loss": 0.4484286308288574,
      "eval_runtime": 204.03,
      "eval_samples_per_second": 9.925,
      "eval_steps_per_second": 0.622,
      "step": 100
    },
    {
      "epoch": 0.19743336623889438,
      "grad_norm": 10.92927074432373,
      "learning_rate": 1.3157894736842108e-05,
      "loss": 0.4112,
      "step": 200
    },
    {
      "epoch": 0.19743336623889438,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5033083152281256,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8463855421686747,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7041279873592732,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.772615050365396,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.624250951720462,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6187061477761295,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.6738741593374042,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5033083152281256,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08463855421686747,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2347093291197577,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1545230100730792,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5033083152281256,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8463855421686747,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7041279873592732,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.772615050365396,
      "eval_loss": 0.3707010746002197,
      "eval_runtime": 203.8165,
      "eval_samples_per_second": 9.935,
      "eval_steps_per_second": 0.623,
      "step": 200
    },
    {
      "epoch": 0.29615004935834155,
      "grad_norm": 33.57826232910156,
      "learning_rate": 1.9736842105263158e-05,
      "loss": 0.2838,
      "step": 300
    },
    {
      "epoch": 0.29615004935834155,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5243432747382974,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.860655737704918,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7234841003357693,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.7913292514319573,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6432678473897245,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6380460561998335,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.6920347683997495,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5243432747382974,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.0860655737704918,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.24116136677858976,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.15826585028639148,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5243432747382974,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.860655737704918,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7234841003357693,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.7913292514319573,
      "eval_loss": 0.32815688848495483,
      "eval_runtime": 203.3881,
      "eval_samples_per_second": 9.956,
      "eval_steps_per_second": 0.624,
      "step": 300
    },
    {
      "epoch": 0.39486673247778875,
      "grad_norm": 2.814175605773926,
      "learning_rate": 1.9297989031078612e-05,
      "loss": 0.2422,
      "step": 400
    },
    {
      "epoch": 0.39486673247778875,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5371321350977681,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8726051747975508,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7371123839620778,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8044143788267826,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6563831674366017,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6514166541262787,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7051169138321652,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5371321350977681,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08726051747975509,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.24570412798735927,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1608828757653565,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5371321350977681,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8726051747975508,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7371123839620778,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8044143788267826,
      "eval_loss": 0.28870803117752075,
      "eval_runtime": 203.6125,
      "eval_samples_per_second": 9.945,
      "eval_steps_per_second": 0.624,
      "step": 400
    },
    {
      "epoch": 0.49358341559723595,
      "grad_norm": 7.553986072540283,
      "learning_rate": 1.856672760511883e-05,
      "loss": 0.2369,
      "step": 500
    },
    {
      "epoch": 0.49358341559723595,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5465139245506616,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8817894528935414,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7465929290934229,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8143393245111594,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6652980546980072,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6607006284309909,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.714385034123883,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5465139245506616,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08817894528935415,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.24886430969780762,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16286786490223187,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5465139245506616,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8817894528935414,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7465929290934229,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8143393245111594,
      "eval_loss": 0.28374621272087097,
      "eval_runtime": 204.4559,
      "eval_samples_per_second": 9.904,
      "eval_steps_per_second": 0.621,
      "step": 500
    },
    {
      "epoch": 0.5923000987166831,
      "grad_norm": 32.98969268798828,
      "learning_rate": 1.783546617915905e-05,
      "loss": 0.2899,
      "step": 600
    },
    {
      "epoch": 0.5923000987166831,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5526367766146554,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8908749753110804,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7583448548291527,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8260912502468892,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6738304576911812,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.669296996730104,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7232271970632966,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5526367766146554,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08908749753110803,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.25278161827638423,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16521825004937787,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5526367766146554,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8908749753110804,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7583448548291527,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8260912502468892,
      "eval_loss": 0.26842400431632996,
      "eval_runtime": 203.2737,
      "eval_samples_per_second": 9.962,
      "eval_steps_per_second": 0.625,
      "step": 600
    },
    {
      "epoch": 0.6910167818361304,
      "grad_norm": 9.886919021606445,
      "learning_rate": 1.710420475319927e-05,
      "loss": 0.1801,
      "step": 700
    },
    {
      "epoch": 0.6910167818361304,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5653268812956745,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8955164921983014,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7669365988544341,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8332510369346238,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.683737510172223,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6793955094382355,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7320247334733945,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5653268812956745,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08955164921983014,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.255645532951478,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16665020738692476,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5653268812956745,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8955164921983014,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7669365988544341,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8332510369346238,
      "eval_loss": 0.2664617896080017,
      "eval_runtime": 203.9076,
      "eval_samples_per_second": 9.931,
      "eval_steps_per_second": 0.623,
      "step": 700
    },
    {
      "epoch": 0.7897334649555775,
      "grad_norm": 17.79576873779297,
      "learning_rate": 1.637294332723949e-05,
      "loss": 0.2279,
      "step": 800
    },
    {
      "epoch": 0.7897334649555775,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5654256369741261,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8918131542563698,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7636282836263085,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8272763183883073,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6824116708533701,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6777940607081065,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7298590424609255,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5654256369741261,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08918131542563698,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2545427612087695,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16545526367766147,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5654256369741261,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8918131542563698,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7636282836263085,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8272763183883073,
      "eval_loss": 0.2617240250110626,
      "eval_runtime": 203.217,
      "eval_samples_per_second": 9.965,
      "eval_steps_per_second": 0.625,
      "step": 800
    },
    {
      "epoch": 0.8884501480750246,
      "grad_norm": 29.348552703857422,
      "learning_rate": 1.564168190127971e-05,
      "loss": 0.2051,
      "step": 900
    },
    {
      "epoch": 0.8884501480750246,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5677463954177365,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8971953387319771,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7670353545328856,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8318190795970768,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6851778091536691,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6808611691104925,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7334481505960582,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5677463954177365,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.08971953387319773,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.25567845151096186,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16636381591941535,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5677463954177365,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.8971953387319771,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7670353545328856,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8318190795970768,
      "eval_loss": 0.26834186911582947,
      "eval_runtime": 203.4633,
      "eval_samples_per_second": 9.953,
      "eval_steps_per_second": 0.624,
      "step": 900
    },
    {
      "epoch": 0.9871668311944719,
      "grad_norm": 0.5696656703948975,
      "learning_rate": 1.491042047531993e-05,
      "loss": 0.2097,
      "step": 1000
    },
    {
      "epoch": 0.9871668311944719,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5728323128579893,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9064783725064192,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7784910132332609,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8429290934228718,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.692767963880535,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6886411963231704,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.741697294005231,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5728323128579893,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09064783725064192,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.259497004411087,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1685858186845744,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5728323128579893,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9064783725064192,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7784910132332609,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8429290934228718,
      "eval_loss": 0.2445395439863205,
      "eval_runtime": 203.8284,
      "eval_samples_per_second": 9.935,
      "eval_steps_per_second": 0.623,
      "step": 1000
    },
    {
      "epoch": 1.085883514313919,
      "grad_norm": 5.538768291473389,
      "learning_rate": 1.4179159049360148e-05,
      "loss": 0.1047,
      "step": 1100
    },
    {
      "epoch": 1.085883514313919,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5813253012048193,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9092435315030615,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7825399960497729,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8449535848311278,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6984402538800855,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6944231453526992,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7466897096167746,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5813253012048193,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09092435315030614,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2608466653499243,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16899071696622553,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5813253012048193,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9092435315030615,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7825399960497729,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8449535848311278,
      "eval_loss": 0.23973241448402405,
      "eval_runtime": 203.3277,
      "eval_samples_per_second": 9.959,
      "eval_steps_per_second": 0.625,
      "step": 1100
    },
    {
      "epoch": 1.1846001974333662,
      "grad_norm": 6.3217902183532715,
      "learning_rate": 1.3447897623400368e-05,
      "loss": 0.0984,
      "step": 1200
    },
    {
      "epoch": 1.1846001974333662,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5921390479952597,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9151688722101521,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7911317400750543,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8541378629271184,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7083777420863926,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7045087061752225,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.755835586028989,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5921390479952597,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09151688722101521,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2637105800250181,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1708275725854237,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5921390479952597,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9151688722101521,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7911317400750543,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8541378629271184,
      "eval_loss": 0.2229250818490982,
      "eval_runtime": 203.6954,
      "eval_samples_per_second": 9.941,
      "eval_steps_per_second": 0.623,
      "step": 1200
    },
    {
      "epoch": 1.2833168805528135,
      "grad_norm": 11.505134582519531,
      "learning_rate": 1.2716636197440586e-05,
      "loss": 0.0498,
      "step": 1300
    },
    {
      "epoch": 1.2833168805528135,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5782638751728224,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9055895714003556,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7789354137862927,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8386825992494569,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.6954513367155198,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6912109797503966,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7433337553688117,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5782638751728224,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09055895714003556,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2596451379287642,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.16773651984989138,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5782638751728224,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9055895714003556,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7789354137862927,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8386825992494569,
      "eval_loss": 0.2586788237094879,
      "eval_runtime": 204.0885,
      "eval_samples_per_second": 9.922,
      "eval_steps_per_second": 0.622,
      "step": 1300
    },
    {
      "epoch": 1.3820335636722607,
      "grad_norm": 1.1614787578582764,
      "learning_rate": 1.1985374771480804e-05,
      "loss": 0.0993,
      "step": 1400
    },
    {
      "epoch": 1.3820335636722607,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.59411416156429,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9151194943709263,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7925636974126012,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8543353742840213,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7096847447019631,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.705814299351989,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7568020206373794,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.59411416156429,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09151194943709264,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2641878991375337,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17086707485680427,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.59411416156429,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9151194943709263,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7925636974126012,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8543353742840213,
      "eval_loss": 0.2147156298160553,
      "eval_runtime": 204.4732,
      "eval_samples_per_second": 9.903,
      "eval_steps_per_second": 0.621,
      "step": 1400
    },
    {
      "epoch": 1.4807502467917077,
      "grad_norm": 1.6429851055145264,
      "learning_rate": 1.1254113345521024e-05,
      "loss": 0.0621,
      "step": 1500
    },
    {
      "epoch": 1.4807502467917077,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5962867864902232,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9217855026664034,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7997234841003358,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.860853249061821,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7136002941229976,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7100783500904636,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7616828218263099,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5962867864902232,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09217855026664035,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2665744947001119,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1721706498123642,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5962867864902232,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9217855026664034,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7997234841003358,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.860853249061821,
      "eval_loss": 0.20447228848934174,
      "eval_runtime": 204.051,
      "eval_samples_per_second": 9.924,
      "eval_steps_per_second": 0.622,
      "step": 1500
    },
    {
      "epoch": 1.579466929911155,
      "grad_norm": 15.424310684204102,
      "learning_rate": 1.0522851919561243e-05,
      "loss": 0.0922,
      "step": 1600
    },
    {
      "epoch": 1.579466929911155,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5950029626703536,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9201560339719533,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7980446375666601,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.858038712225953,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7121969229630567,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7085377027234775,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7601050276866773,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.5950029626703536,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09201560339719535,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.26601487918888667,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17160774244519061,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.5950029626703536,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9201560339719533,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.7980446375666601,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.858038712225953,
      "eval_loss": 0.20542284846305847,
      "eval_runtime": 203.1105,
      "eval_samples_per_second": 9.97,
      "eval_steps_per_second": 0.625,
      "step": 1600
    },
    {
      "epoch": 1.678183613030602,
      "grad_norm": 4.0268096923828125,
      "learning_rate": 9.791590493601464e-06,
      "loss": 0.1093,
      "step": 1700
    },
    {
      "epoch": 1.678183613030602,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6026071499111199,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9248469286984001,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8095496741062611,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8681117914280071,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7204789219182511,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7169305441908291,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7677587208908659,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6026071499111199,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09248469286984001,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.26984989136875365,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1736223582856014,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6026071499111199,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9248469286984001,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8095496741062611,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8681117914280071,
      "eval_loss": 0.1992715299129486,
      "eval_runtime": 203.9449,
      "eval_samples_per_second": 9.929,
      "eval_steps_per_second": 0.623,
      "step": 1700
    },
    {
      "epoch": 1.7769002961500493,
      "grad_norm": 0.6978006958961487,
      "learning_rate": 9.060329067641682e-06,
      "loss": 0.0795,
      "step": 1800
    },
    {
      "epoch": 1.7769002961500493,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.605816709460794,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9286984001580091,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8117222990321944,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8722101520837449,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7237388658743508,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7203004837460861,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7712328450315743,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.605816709460794,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09286984001580093,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2705740996773981,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17444203041674897,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.605816709460794,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9286984001580091,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8117222990321944,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8722101520837449,
      "eval_loss": 0.19426828622817993,
      "eval_runtime": 204.0488,
      "eval_samples_per_second": 9.924,
      "eval_steps_per_second": 0.622,
      "step": 1800
    },
    {
      "epoch": 1.8756169792694966,
      "grad_norm": 2.5955662727355957,
      "learning_rate": 8.329067641681902e-06,
      "loss": 0.1181,
      "step": 1900
    },
    {
      "epoch": 1.8756169792694966,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6043353742840213,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9297847126209757,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8089077621963263,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8701362828362631,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7219374022367849,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7185761194661708,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7700958544922684,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6043353742840213,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09297847126209759,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2696359207321088,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17402725656725263,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6043353742840213,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9297847126209757,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8089077621963263,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8701362828362631,
      "eval_loss": 0.20659147202968597,
      "eval_runtime": 204.0716,
      "eval_samples_per_second": 9.923,
      "eval_steps_per_second": 0.622,
      "step": 1900
    },
    {
      "epoch": 1.9743336623889438,
      "grad_norm": 0.8803901672363281,
      "learning_rate": 7.597806215722121e-06,
      "loss": 0.0709,
      "step": 2000
    },
    {
      "epoch": 1.9743336623889438,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.603199683981829,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9302784910132332,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8102903416946474,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8703831720323919,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7214873578820934,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7181931668908422,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7699451899898953,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.603199683981829,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09302784910132333,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.27009678056488245,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1740766344064784,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.603199683981829,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9302784910132332,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8102903416946474,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8703831720323919,
      "eval_loss": 0.19996753334999084,
      "eval_runtime": 203.9747,
      "eval_samples_per_second": 9.928,
      "eval_steps_per_second": 0.623,
      "step": 2000
    },
    {
      "epoch": 2.073050345508391,
      "grad_norm": 0.9776083827018738,
      "learning_rate": 6.866544789762341e-06,
      "loss": 0.0423,
      "step": 2100
    },
    {
      "epoch": 2.073050345508391,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6150503653960103,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9359075646849694,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8209065771281848,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8796168279676081,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7317976394363255,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.728681000348013,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7793256141306921,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6150503653960103,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09359075646849695,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.27363552570939487,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17592336559352165,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6150503653960103,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9359075646849694,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8209065771281848,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8796168279676081,
      "eval_loss": 0.19277189671993256,
      "eval_runtime": 203.5377,
      "eval_samples_per_second": 9.949,
      "eval_steps_per_second": 0.624,
      "step": 2100
    },
    {
      "epoch": 2.171767028627838,
      "grad_norm": 0.3466501235961914,
      "learning_rate": 6.13528336380256e-06,
      "loss": 0.0365,
      "step": 2200
    },
    {
      "epoch": 2.171767028627838,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6160379221805253,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9352162749358088,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8210547106458621,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8787280268615445,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7324401240286031,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7293587895689079,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7796830803735836,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6160379221805253,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.0935216274935809,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2736849035486207,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17574560537230893,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6160379221805253,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9352162749358088,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8210547106458621,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8787280268615445,
      "eval_loss": 0.2016632854938507,
      "eval_runtime": 203.3562,
      "eval_samples_per_second": 9.958,
      "eval_steps_per_second": 0.625,
      "step": 2200
    },
    {
      "epoch": 2.270483711747285,
      "grad_norm": 11.319628715515137,
      "learning_rate": 5.40402193784278e-06,
      "loss": 0.0488,
      "step": 2300
    },
    {
      "epoch": 2.270483711747285,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6060635986569227,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9303772466916848,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.810784120086905,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8715188623345842,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7233919610739875,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7200670950782208,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7713978686997449,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6060635986569227,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09303772466916849,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.27026137336230166,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17430377246691683,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6060635986569227,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9303772466916848,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.810784120086905,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8715188623345842,
      "eval_loss": 0.2220190316438675,
      "eval_runtime": 203.7981,
      "eval_samples_per_second": 9.936,
      "eval_steps_per_second": 0.623,
      "step": 2300
    },
    {
      "epoch": 2.3692003948667324,
      "grad_norm": 1.2127763032913208,
      "learning_rate": 4.672760511882998e-06,
      "loss": 0.0405,
      "step": 2400
    },
    {
      "epoch": 2.3692003948667324,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.612482717756271,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9345249851866483,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8190302192376062,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8769010468101915,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7299302543873705,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7267631415592252,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7775318729565346,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.612482717756271,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09345249851866484,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.27301007307920205,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17538020936203833,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.612482717756271,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9345249851866483,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8190302192376062,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8769010468101915,
      "eval_loss": 0.20903073251247406,
      "eval_runtime": 204.2344,
      "eval_samples_per_second": 9.915,
      "eval_steps_per_second": 0.622,
      "step": 2400
    },
    {
      "epoch": 2.4679170779861797,
      "grad_norm": 0.3473336398601532,
      "learning_rate": 3.941499085923218e-06,
      "loss": 0.0327,
      "step": 2500
    },
    {
      "epoch": 2.4679170779861797,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6181611692672329,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9379814339324511,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8215484890381197,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8832214102310882,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7347609073841157,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7317605707764854,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7821769875234544,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6181611692672329,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09379814339324512,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.27384949634603983,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17664428204621765,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6181611692672329,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9379814339324511,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8215484890381197,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8832214102310882,
      "eval_loss": 0.1959671527147293,
      "eval_runtime": 204.2804,
      "eval_samples_per_second": 9.913,
      "eval_steps_per_second": 0.622,
      "step": 2500
    },
    {
      "epoch": 2.566633761105627,
      "grad_norm": 3.084174871444702,
      "learning_rate": 3.210237659963437e-06,
      "loss": 0.0369,
      "step": 2600
    },
    {
      "epoch": 2.566633761105627,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6188524590163934,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9379814339324511,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8248568042662453,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.882678253999605,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7355447502017791,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7324816047954649,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7827478877040044,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6188524590163934,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09379814339324513,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2749522680887484,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17653565079992103,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6188524590163934,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9379814339324511,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8248568042662453,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.882678253999605,
      "eval_loss": 0.19852839410305023,
      "eval_runtime": 203.3999,
      "eval_samples_per_second": 9.956,
      "eval_steps_per_second": 0.624,
      "step": 2600
    },
    {
      "epoch": 2.665350444225074,
      "grad_norm": 0.2841149568557739,
      "learning_rate": 2.4789762340036565e-06,
      "loss": 0.0493,
      "step": 2700
    },
    {
      "epoch": 2.665350444225074,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6227039304760024,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9392652577523207,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8246099150701165,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8839126999802489,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7377982498848669,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7347973078888687,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7847760881684369,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6227039304760024,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.0939265257752321,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2748699716900388,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.1767825399960498,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6227039304760024,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9392652577523207,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8246099150701165,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8839126999802489,
      "eval_loss": 0.1986970454454422,
      "eval_runtime": 203.1067,
      "eval_samples_per_second": 9.97,
      "eval_steps_per_second": 0.625,
      "step": 2700
    },
    {
      "epoch": 2.7640671273445214,
      "grad_norm": 35.21987533569336,
      "learning_rate": 1.7477148080438758e-06,
      "loss": 0.0466,
      "step": 2800
    },
    {
      "epoch": 2.7640671273445214,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6232964645467114,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9393640134307722,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8262887616037922,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8854434129962473,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7388327302504635,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7358363978944812,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7856277834747031,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6232964645467114,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09393640134307724,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2754295872012641,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17708868259924945,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6232964645467114,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9393640134307722,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8262887616037922,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8854434129962473,
      "eval_loss": 0.20075508952140808,
      "eval_runtime": 203.5383,
      "eval_samples_per_second": 9.949,
      "eval_steps_per_second": 0.624,
      "step": 2800
    },
    {
      "epoch": 2.8627838104639682,
      "grad_norm": 1.2376320362091064,
      "learning_rate": 1.0164533820840951e-06,
      "loss": 0.03,
      "step": 2900
    },
    {
      "epoch": 2.8627838104639682,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6220620185660676,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9396109026269011,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8248568042662453,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8844558562117322,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7376002793094979,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7346429237637674,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7847496074277119,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.6220620185660676,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09396109026269013,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2749522680887484,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17689117124234646,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.6220620185660676,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9396109026269011,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8248568042662453,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8844558562117322,
      "eval_loss": 0.2035462111234665,
      "eval_runtime": 202.781,
      "eval_samples_per_second": 9.986,
      "eval_steps_per_second": 0.626,
      "step": 2900
    },
    {
      "epoch": 2.9615004935834155,
      "grad_norm": 0.5317863821983337,
      "learning_rate": 2.8519195612431445e-07,
      "loss": 0.0633,
      "step": 3000
    },
    {
      "epoch": 2.9615004935834155,
      "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.621963262887616,
      "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9401046810191586,
      "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8262393837645665,
      "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8843077226940549,
      "eval_custom_dataset_evaluation_cosine_map@100": 0.7376242387041089,
      "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7346985522104195,
      "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7849111131501391,
      "eval_custom_dataset_evaluation_cosine_precision@1": 0.621963262887616,
      "eval_custom_dataset_evaluation_cosine_precision@10": 0.09401046810191586,
      "eval_custom_dataset_evaluation_cosine_precision@3": 0.2754131279215221,
      "eval_custom_dataset_evaluation_cosine_precision@5": 0.17686154453881098,
      "eval_custom_dataset_evaluation_cosine_recall@1": 0.621963262887616,
      "eval_custom_dataset_evaluation_cosine_recall@10": 0.9401046810191586,
      "eval_custom_dataset_evaluation_cosine_recall@3": 0.8262393837645665,
      "eval_custom_dataset_evaluation_cosine_recall@5": 0.8843077226940549,
      "eval_loss": 0.20363624393939972,
      "eval_runtime": 203.946,
      "eval_samples_per_second": 9.929,
      "eval_steps_per_second": 0.623,
      "step": 3000
    }
  ],
  "logging_steps": 100,
  "max_steps": 3039,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}