musabg commited on
Commit
6414281
1 Parent(s): c774912

Upload folder using huggingface_hub

Browse files
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e58fa21b802c1a0b2a0c75f37c266463af58b130387c328056d45fb5d21c2b
3
  size 13476859646
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d51eb87875752e699b0b9ce682165fb110c71a672b28fc084bbf55fc1977239
3
  size 13476859646
pytorch_model-00001-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65b9ad91fdc9f51cd72e45ae3f5d5a2cec0ce89f50b969fac93276b158b69886
3
  size 9877989650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0109702a17586a6669f6211be7906e986e5292f56b1e53682a9decc684dff422
3
  size 9877989650
pytorch_model-00002-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffa84aab9ba936e1d2ad2bfb3ab68920e0a803b0c7b7f6d4a7c7659a65967abb
3
  size 9894801206
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec7a4785a52bef4efdb94137e6c54a479ed55762251e2ecc1658ee1bf38ff38
3
  size 9894801206
pytorch_model-00003-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8658ec14f5fd9db2c4ae9773f8acd5078a09602a15a5484c91a7c83423099458
3
  size 7180990841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519cc27e43a806ed30c155517838a9b34b0e96de21ac23267a2e2fdfd4c4c2e8
3
  size 7180990841
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e3c5cb412e12159a59afe5657ce4b5e0a06e7fb420bedbb5228fe1245702762
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6bc3b332b1d7b34dd8e7d7ed0389c868155059ddb1d908e9ac3feb6672b23c
3
  size 14583
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:741230672078323886b763e522c728741456a587860909fc529ce815a7aca5ec
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de5e0c7dadcd828a8d62fffc136e170202022509240a895985c7bc45cabbced
3
  size 14583
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ea587886b41579993bb5d20c79047b968ae2d71d22ba4c739b07ce31d7486a6
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c420d12d8aa09a561480241f19154d4aedd8a866de54ed145d69f860bae6f94
3
  size 14583
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab727740f74dd67e60283d27b4339609a1dda888b067cc06520e2f1d7dc17db
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec873fd7c31f869e7956f098c0d1e17d2296924b3c55e4971a059dd097690b6f
3
  size 14583
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e43b372746f1c43e6b8e3b0d9739066714de8bc4930ffc8a745bcec6595d691
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fab38bc378f5a3363deb1f9a4cf2ada87fd73ebe45f0144d5d4b18eed6403727
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6783216783216783,
5
- "global_step": 480,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2894,11 +2894,1459 @@
2894
  "learning_rate": 8.570056464180998e-06,
2895
  "loss": 0.2311,
2896
  "step": 480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2897
  }
2898
  ],
2899
  "max_steps": 858,
2900
  "num_train_epochs": 3,
2901
- "total_flos": 1.2217975017622733e+18,
2902
  "trial_name": null,
2903
  "trial_params": null
2904
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5174825174825175,
5
+ "global_step": 720,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2894
  "learning_rate": 8.570056464180998e-06,
2895
  "loss": 0.2311,
2896
  "step": 480
2897
+ },
2898
+ {
2899
+ "epoch": 1.68,
2900
+ "learning_rate": 8.532695255446384e-06,
2901
+ "loss": 0.2439,
2902
+ "step": 481
2903
+ },
2904
+ {
2905
+ "epoch": 1.69,
2906
+ "learning_rate": 8.49535496725217e-06,
2907
+ "loss": 0.238,
2908
+ "step": 482
2909
+ },
2910
+ {
2911
+ "epoch": 1.69,
2912
+ "learning_rate": 8.458036131988792e-06,
2913
+ "loss": 0.2322,
2914
+ "step": 483
2915
+ },
2916
+ {
2917
+ "epoch": 1.69,
2918
+ "learning_rate": 8.420739281740806e-06,
2919
+ "loss": 0.2276,
2920
+ "step": 484
2921
+ },
2922
+ {
2923
+ "epoch": 1.7,
2924
+ "learning_rate": 8.383464948279319e-06,
2925
+ "loss": 0.2401,
2926
+ "step": 485
2927
+ },
2928
+ {
2929
+ "epoch": 1.7,
2930
+ "learning_rate": 8.346213663054388e-06,
2931
+ "loss": 0.2328,
2932
+ "step": 486
2933
+ },
2934
+ {
2935
+ "epoch": 1.7,
2936
+ "learning_rate": 8.308985957187466e-06,
2937
+ "loss": 0.2292,
2938
+ "step": 487
2939
+ },
2940
+ {
2941
+ "epoch": 1.71,
2942
+ "learning_rate": 8.271782361463806e-06,
2943
+ "loss": 0.2211,
2944
+ "step": 488
2945
+ },
2946
+ {
2947
+ "epoch": 1.71,
2948
+ "learning_rate": 8.234603406324909e-06,
2949
+ "loss": 0.2379,
2950
+ "step": 489
2951
+ },
2952
+ {
2953
+ "epoch": 1.71,
2954
+ "learning_rate": 8.197449621860944e-06,
2955
+ "loss": 0.2438,
2956
+ "step": 490
2957
+ },
2958
+ {
2959
+ "epoch": 1.72,
2960
+ "learning_rate": 8.16032153780322e-06,
2961
+ "loss": 0.2556,
2962
+ "step": 491
2963
+ },
2964
+ {
2965
+ "epoch": 1.72,
2966
+ "learning_rate": 8.123219683516604e-06,
2967
+ "loss": 0.217,
2968
+ "step": 492
2969
+ },
2970
+ {
2971
+ "epoch": 1.72,
2972
+ "learning_rate": 8.08614458799198e-06,
2973
+ "loss": 0.2489,
2974
+ "step": 493
2975
+ },
2976
+ {
2977
+ "epoch": 1.73,
2978
+ "learning_rate": 8.04909677983872e-06,
2979
+ "loss": 0.2447,
2980
+ "step": 494
2981
+ },
2982
+ {
2983
+ "epoch": 1.73,
2984
+ "learning_rate": 8.01207678727713e-06,
2985
+ "loss": 0.2486,
2986
+ "step": 495
2987
+ },
2988
+ {
2989
+ "epoch": 1.73,
2990
+ "learning_rate": 7.975085138130938e-06,
2991
+ "loss": 0.2513,
2992
+ "step": 496
2993
+ },
2994
+ {
2995
+ "epoch": 1.74,
2996
+ "learning_rate": 7.938122359819745e-06,
2997
+ "loss": 0.2451,
2998
+ "step": 497
2999
+ },
3000
+ {
3001
+ "epoch": 1.74,
3002
+ "learning_rate": 7.901188979351527e-06,
3003
+ "loss": 0.2301,
3004
+ "step": 498
3005
+ },
3006
+ {
3007
+ "epoch": 1.74,
3008
+ "learning_rate": 7.864285523315097e-06,
3009
+ "loss": 0.2475,
3010
+ "step": 499
3011
+ },
3012
+ {
3013
+ "epoch": 1.75,
3014
+ "learning_rate": 7.827412517872634e-06,
3015
+ "loss": 0.2394,
3016
+ "step": 500
3017
+ },
3018
+ {
3019
+ "epoch": 1.75,
3020
+ "learning_rate": 7.790570488752137e-06,
3021
+ "loss": 0.2304,
3022
+ "step": 501
3023
+ },
3024
+ {
3025
+ "epoch": 1.76,
3026
+ "learning_rate": 7.753759961239965e-06,
3027
+ "loss": 0.2185,
3028
+ "step": 502
3029
+ },
3030
+ {
3031
+ "epoch": 1.76,
3032
+ "learning_rate": 7.716981460173319e-06,
3033
+ "loss": 0.2383,
3034
+ "step": 503
3035
+ },
3036
+ {
3037
+ "epoch": 1.76,
3038
+ "learning_rate": 7.680235509932791e-06,
3039
+ "loss": 0.2208,
3040
+ "step": 504
3041
+ },
3042
+ {
3043
+ "epoch": 1.77,
3044
+ "learning_rate": 7.643522634434856e-06,
3045
+ "loss": 0.232,
3046
+ "step": 505
3047
+ },
3048
+ {
3049
+ "epoch": 1.77,
3050
+ "learning_rate": 7.606843357124426e-06,
3051
+ "loss": 0.2213,
3052
+ "step": 506
3053
+ },
3054
+ {
3055
+ "epoch": 1.77,
3056
+ "learning_rate": 7.570198200967363e-06,
3057
+ "loss": 0.2409,
3058
+ "step": 507
3059
+ },
3060
+ {
3061
+ "epoch": 1.78,
3062
+ "learning_rate": 7.5335876884430495e-06,
3063
+ "loss": 0.2161,
3064
+ "step": 508
3065
+ },
3066
+ {
3067
+ "epoch": 1.78,
3068
+ "learning_rate": 7.497012341536924e-06,
3069
+ "loss": 0.2244,
3070
+ "step": 509
3071
+ },
3072
+ {
3073
+ "epoch": 1.78,
3074
+ "learning_rate": 7.460472681733031e-06,
3075
+ "loss": 0.2406,
3076
+ "step": 510
3077
+ },
3078
+ {
3079
+ "epoch": 1.79,
3080
+ "learning_rate": 7.423969230006609e-06,
3081
+ "loss": 0.2504,
3082
+ "step": 511
3083
+ },
3084
+ {
3085
+ "epoch": 1.79,
3086
+ "learning_rate": 7.387502506816638e-06,
3087
+ "loss": 0.235,
3088
+ "step": 512
3089
+ },
3090
+ {
3091
+ "epoch": 1.79,
3092
+ "learning_rate": 7.351073032098437e-06,
3093
+ "loss": 0.2376,
3094
+ "step": 513
3095
+ },
3096
+ {
3097
+ "epoch": 1.8,
3098
+ "learning_rate": 7.314681325256232e-06,
3099
+ "loss": 0.2281,
3100
+ "step": 514
3101
+ },
3102
+ {
3103
+ "epoch": 1.8,
3104
+ "learning_rate": 7.278327905155783e-06,
3105
+ "loss": 0.2215,
3106
+ "step": 515
3107
+ },
3108
+ {
3109
+ "epoch": 1.8,
3110
+ "learning_rate": 7.242013290116944e-06,
3111
+ "loss": 0.2446,
3112
+ "step": 516
3113
+ },
3114
+ {
3115
+ "epoch": 1.81,
3116
+ "learning_rate": 7.205737997906307e-06,
3117
+ "loss": 0.2385,
3118
+ "step": 517
3119
+ },
3120
+ {
3121
+ "epoch": 1.81,
3122
+ "learning_rate": 7.169502545729798e-06,
3123
+ "loss": 0.2401,
3124
+ "step": 518
3125
+ },
3126
+ {
3127
+ "epoch": 1.81,
3128
+ "learning_rate": 7.133307450225322e-06,
3129
+ "loss": 0.2354,
3130
+ "step": 519
3131
+ },
3132
+ {
3133
+ "epoch": 1.82,
3134
+ "learning_rate": 7.097153227455379e-06,
3135
+ "loss": 0.2367,
3136
+ "step": 520
3137
+ },
3138
+ {
3139
+ "epoch": 1.82,
3140
+ "learning_rate": 7.0610403928997114e-06,
3141
+ "loss": 0.246,
3142
+ "step": 521
3143
+ },
3144
+ {
3145
+ "epoch": 1.83,
3146
+ "learning_rate": 7.024969461447973e-06,
3147
+ "loss": 0.2227,
3148
+ "step": 522
3149
+ },
3150
+ {
3151
+ "epoch": 1.83,
3152
+ "learning_rate": 6.9889409473923445e-06,
3153
+ "loss": 0.2342,
3154
+ "step": 523
3155
+ },
3156
+ {
3157
+ "epoch": 1.83,
3158
+ "learning_rate": 6.952955364420255e-06,
3159
+ "loss": 0.2398,
3160
+ "step": 524
3161
+ },
3162
+ {
3163
+ "epoch": 1.84,
3164
+ "learning_rate": 6.91701322560701e-06,
3165
+ "loss": 0.22,
3166
+ "step": 525
3167
+ },
3168
+ {
3169
+ "epoch": 1.84,
3170
+ "learning_rate": 6.881115043408512e-06,
3171
+ "loss": 0.2343,
3172
+ "step": 526
3173
+ },
3174
+ {
3175
+ "epoch": 1.84,
3176
+ "learning_rate": 6.845261329653923e-06,
3177
+ "loss": 0.244,
3178
+ "step": 527
3179
+ },
3180
+ {
3181
+ "epoch": 1.85,
3182
+ "learning_rate": 6.809452595538403e-06,
3183
+ "loss": 0.2396,
3184
+ "step": 528
3185
+ },
3186
+ {
3187
+ "epoch": 1.85,
3188
+ "learning_rate": 6.7736893516157795e-06,
3189
+ "loss": 0.2296,
3190
+ "step": 529
3191
+ },
3192
+ {
3193
+ "epoch": 1.85,
3194
+ "learning_rate": 6.7379721077913095e-06,
3195
+ "loss": 0.2295,
3196
+ "step": 530
3197
+ },
3198
+ {
3199
+ "epoch": 1.86,
3200
+ "learning_rate": 6.70230137331437e-06,
3201
+ "loss": 0.245,
3202
+ "step": 531
3203
+ },
3204
+ {
3205
+ "epoch": 1.86,
3206
+ "learning_rate": 6.666677656771239e-06,
3207
+ "loss": 0.2305,
3208
+ "step": 532
3209
+ },
3210
+ {
3211
+ "epoch": 1.86,
3212
+ "learning_rate": 6.631101466077801e-06,
3213
+ "loss": 0.2527,
3214
+ "step": 533
3215
+ },
3216
+ {
3217
+ "epoch": 1.87,
3218
+ "learning_rate": 6.595573308472338e-06,
3219
+ "loss": 0.2522,
3220
+ "step": 534
3221
+ },
3222
+ {
3223
+ "epoch": 1.87,
3224
+ "learning_rate": 6.56009369050829e-06,
3225
+ "loss": 0.2481,
3226
+ "step": 535
3227
+ },
3228
+ {
3229
+ "epoch": 1.87,
3230
+ "learning_rate": 6.524663118047015e-06,
3231
+ "loss": 0.2197,
3232
+ "step": 536
3233
+ },
3234
+ {
3235
+ "epoch": 1.88,
3236
+ "learning_rate": 6.489282096250602e-06,
3237
+ "loss": 0.2209,
3238
+ "step": 537
3239
+ },
3240
+ {
3241
+ "epoch": 1.88,
3242
+ "learning_rate": 6.453951129574644e-06,
3243
+ "loss": 0.2253,
3244
+ "step": 538
3245
+ },
3246
+ {
3247
+ "epoch": 1.88,
3248
+ "learning_rate": 6.4186707217610735e-06,
3249
+ "loss": 0.2205,
3250
+ "step": 539
3251
+ },
3252
+ {
3253
+ "epoch": 1.89,
3254
+ "learning_rate": 6.3834413758309525e-06,
3255
+ "loss": 0.2536,
3256
+ "step": 540
3257
+ },
3258
+ {
3259
+ "epoch": 1.89,
3260
+ "learning_rate": 6.348263594077319e-06,
3261
+ "loss": 0.229,
3262
+ "step": 541
3263
+ },
3264
+ {
3265
+ "epoch": 1.9,
3266
+ "learning_rate": 6.3131378780580134e-06,
3267
+ "loss": 0.2434,
3268
+ "step": 542
3269
+ },
3270
+ {
3271
+ "epoch": 1.9,
3272
+ "learning_rate": 6.278064728588542e-06,
3273
+ "loss": 0.2428,
3274
+ "step": 543
3275
+ },
3276
+ {
3277
+ "epoch": 1.9,
3278
+ "learning_rate": 6.243044645734917e-06,
3279
+ "loss": 0.2441,
3280
+ "step": 544
3281
+ },
3282
+ {
3283
+ "epoch": 1.91,
3284
+ "learning_rate": 6.208078128806549e-06,
3285
+ "loss": 0.2493,
3286
+ "step": 545
3287
+ },
3288
+ {
3289
+ "epoch": 1.91,
3290
+ "learning_rate": 6.173165676349103e-06,
3291
+ "loss": 0.234,
3292
+ "step": 546
3293
+ },
3294
+ {
3295
+ "epoch": 1.91,
3296
+ "learning_rate": 6.138307786137415e-06,
3297
+ "loss": 0.2335,
3298
+ "step": 547
3299
+ },
3300
+ {
3301
+ "epoch": 1.92,
3302
+ "learning_rate": 6.103504955168382e-06,
3303
+ "loss": 0.2273,
3304
+ "step": 548
3305
+ },
3306
+ {
3307
+ "epoch": 1.92,
3308
+ "learning_rate": 6.0687576796538685e-06,
3309
+ "loss": 0.2285,
3310
+ "step": 549
3311
+ },
3312
+ {
3313
+ "epoch": 1.92,
3314
+ "learning_rate": 6.0340664550136494e-06,
3315
+ "loss": 0.23,
3316
+ "step": 550
3317
+ },
3318
+ {
3319
+ "epoch": 1.93,
3320
+ "learning_rate": 5.999431775868329e-06,
3321
+ "loss": 0.2425,
3322
+ "step": 551
3323
+ },
3324
+ {
3325
+ "epoch": 1.93,
3326
+ "learning_rate": 5.96485413603231e-06,
3327
+ "loss": 0.2407,
3328
+ "step": 552
3329
+ },
3330
+ {
3331
+ "epoch": 1.93,
3332
+ "learning_rate": 5.930334028506726e-06,
3333
+ "loss": 0.2289,
3334
+ "step": 553
3335
+ },
3336
+ {
3337
+ "epoch": 1.94,
3338
+ "learning_rate": 5.895871945472434e-06,
3339
+ "loss": 0.2091,
3340
+ "step": 554
3341
+ },
3342
+ {
3343
+ "epoch": 1.94,
3344
+ "learning_rate": 5.861468378282984e-06,
3345
+ "loss": 0.2212,
3346
+ "step": 555
3347
+ },
3348
+ {
3349
+ "epoch": 1.94,
3350
+ "learning_rate": 5.827123817457631e-06,
3351
+ "loss": 0.2393,
3352
+ "step": 556
3353
+ },
3354
+ {
3355
+ "epoch": 1.95,
3356
+ "learning_rate": 5.792838752674309e-06,
3357
+ "loss": 0.2308,
3358
+ "step": 557
3359
+ },
3360
+ {
3361
+ "epoch": 1.95,
3362
+ "learning_rate": 5.75861367276269e-06,
3363
+ "loss": 0.2339,
3364
+ "step": 558
3365
+ },
3366
+ {
3367
+ "epoch": 1.95,
3368
+ "learning_rate": 5.724449065697182e-06,
3369
+ "loss": 0.2286,
3370
+ "step": 559
3371
+ },
3372
+ {
3373
+ "epoch": 1.96,
3374
+ "learning_rate": 5.690345418589978e-06,
3375
+ "loss": 0.2509,
3376
+ "step": 560
3377
+ },
3378
+ {
3379
+ "epoch": 1.96,
3380
+ "learning_rate": 5.656303217684133e-06,
3381
+ "loss": 0.2434,
3382
+ "step": 561
3383
+ },
3384
+ {
3385
+ "epoch": 1.97,
3386
+ "learning_rate": 5.622322948346595e-06,
3387
+ "loss": 0.2346,
3388
+ "step": 562
3389
+ },
3390
+ {
3391
+ "epoch": 1.97,
3392
+ "learning_rate": 5.588405095061322e-06,
3393
+ "loss": 0.245,
3394
+ "step": 563
3395
+ },
3396
+ {
3397
+ "epoch": 1.97,
3398
+ "learning_rate": 5.55455014142234e-06,
3399
+ "loss": 0.2255,
3400
+ "step": 564
3401
+ },
3402
+ {
3403
+ "epoch": 1.98,
3404
+ "learning_rate": 5.5207585701268805e-06,
3405
+ "loss": 0.2267,
3406
+ "step": 565
3407
+ },
3408
+ {
3409
+ "epoch": 1.98,
3410
+ "learning_rate": 5.4870308629684675e-06,
3411
+ "loss": 0.2529,
3412
+ "step": 566
3413
+ },
3414
+ {
3415
+ "epoch": 1.98,
3416
+ "learning_rate": 5.453367500830069e-06,
3417
+ "loss": 0.2457,
3418
+ "step": 567
3419
+ },
3420
+ {
3421
+ "epoch": 1.99,
3422
+ "learning_rate": 5.419768963677233e-06,
3423
+ "loss": 0.2378,
3424
+ "step": 568
3425
+ },
3426
+ {
3427
+ "epoch": 1.99,
3428
+ "learning_rate": 5.3862357305512524e-06,
3429
+ "loss": 0.2198,
3430
+ "step": 569
3431
+ },
3432
+ {
3433
+ "epoch": 1.99,
3434
+ "learning_rate": 5.352768279562315e-06,
3435
+ "loss": 0.2448,
3436
+ "step": 570
3437
+ },
3438
+ {
3439
+ "epoch": 2.0,
3440
+ "learning_rate": 5.319367087882717e-06,
3441
+ "loss": 0.2713,
3442
+ "step": 571
3443
+ },
3444
+ {
3445
+ "epoch": 2.0,
3446
+ "learning_rate": 5.286032631740023e-06,
3447
+ "loss": 0.2335,
3448
+ "step": 572
3449
+ },
3450
+ {
3451
+ "epoch": 2.0,
3452
+ "eval_loss": 0.3002837300300598,
3453
+ "eval_runtime": 165.4835,
3454
+ "eval_samples_per_second": 4.514,
3455
+ "eval_steps_per_second": 0.145,
3456
+ "step": 572
3457
+ },
3458
+ {
3459
+ "epoch": 2.0,
3460
+ "learning_rate": 5.2527653864103124e-06,
3461
+ "loss": 0.1816,
3462
+ "step": 573
3463
+ },
3464
+ {
3465
+ "epoch": 2.01,
3466
+ "learning_rate": 5.219565826211382e-06,
3467
+ "loss": 0.1793,
3468
+ "step": 574
3469
+ },
3470
+ {
3471
+ "epoch": 2.01,
3472
+ "learning_rate": 5.18643442449598e-06,
3473
+ "loss": 0.1813,
3474
+ "step": 575
3475
+ },
3476
+ {
3477
+ "epoch": 2.01,
3478
+ "learning_rate": 5.15337165364507e-06,
3479
+ "loss": 0.1827,
3480
+ "step": 576
3481
+ },
3482
+ {
3483
+ "epoch": 2.02,
3484
+ "learning_rate": 5.1203779850610865e-06,
3485
+ "loss": 0.1763,
3486
+ "step": 577
3487
+ },
3488
+ {
3489
+ "epoch": 2.02,
3490
+ "learning_rate": 5.087453889161229e-06,
3491
+ "loss": 0.1937,
3492
+ "step": 578
3493
+ },
3494
+ {
3495
+ "epoch": 2.02,
3496
+ "learning_rate": 5.054599835370724e-06,
3497
+ "loss": 0.1733,
3498
+ "step": 579
3499
+ },
3500
+ {
3501
+ "epoch": 2.03,
3502
+ "learning_rate": 5.021816292116175e-06,
3503
+ "loss": 0.1892,
3504
+ "step": 580
3505
+ },
3506
+ {
3507
+ "epoch": 2.03,
3508
+ "learning_rate": 4.989103726818836e-06,
3509
+ "loss": 0.1766,
3510
+ "step": 581
3511
+ },
3512
+ {
3513
+ "epoch": 2.03,
3514
+ "learning_rate": 4.956462605887994e-06,
3515
+ "loss": 0.1728,
3516
+ "step": 582
3517
+ },
3518
+ {
3519
+ "epoch": 2.04,
3520
+ "learning_rate": 4.92389339471428e-06,
3521
+ "loss": 0.178,
3522
+ "step": 583
3523
+ },
3524
+ {
3525
+ "epoch": 2.04,
3526
+ "learning_rate": 4.891396557663056e-06,
3527
+ "loss": 0.1998,
3528
+ "step": 584
3529
+ },
3530
+ {
3531
+ "epoch": 2.05,
3532
+ "learning_rate": 4.858972558067784e-06,
3533
+ "loss": 0.1779,
3534
+ "step": 585
3535
+ },
3536
+ {
3537
+ "epoch": 2.05,
3538
+ "learning_rate": 4.826621858223431e-06,
3539
+ "loss": 0.1827,
3540
+ "step": 586
3541
+ },
3542
+ {
3543
+ "epoch": 2.05,
3544
+ "learning_rate": 4.794344919379872e-06,
3545
+ "loss": 0.1717,
3546
+ "step": 587
3547
+ },
3548
+ {
3549
+ "epoch": 2.06,
3550
+ "learning_rate": 4.762142201735299e-06,
3551
+ "loss": 0.1767,
3552
+ "step": 588
3553
+ },
3554
+ {
3555
+ "epoch": 2.06,
3556
+ "learning_rate": 4.730014164429689e-06,
3557
+ "loss": 0.1754,
3558
+ "step": 589
3559
+ },
3560
+ {
3561
+ "epoch": 2.06,
3562
+ "learning_rate": 4.697961265538231e-06,
3563
+ "loss": 0.1693,
3564
+ "step": 590
3565
+ },
3566
+ {
3567
+ "epoch": 2.07,
3568
+ "learning_rate": 4.665983962064807e-06,
3569
+ "loss": 0.1814,
3570
+ "step": 591
3571
+ },
3572
+ {
3573
+ "epoch": 2.07,
3574
+ "learning_rate": 4.6340827099354734e-06,
3575
+ "loss": 0.1857,
3576
+ "step": 592
3577
+ },
3578
+ {
3579
+ "epoch": 2.07,
3580
+ "learning_rate": 4.60225796399197e-06,
3581
+ "loss": 0.1727,
3582
+ "step": 593
3583
+ },
3584
+ {
3585
+ "epoch": 2.08,
3586
+ "learning_rate": 4.570510177985213e-06,
3587
+ "loss": 0.1726,
3588
+ "step": 594
3589
+ },
3590
+ {
3591
+ "epoch": 2.08,
3592
+ "learning_rate": 4.538839804568857e-06,
3593
+ "loss": 0.1771,
3594
+ "step": 595
3595
+ },
3596
+ {
3597
+ "epoch": 2.08,
3598
+ "learning_rate": 4.5072472952928015e-06,
3599
+ "loss": 0.1735,
3600
+ "step": 596
3601
+ },
3602
+ {
3603
+ "epoch": 2.09,
3604
+ "learning_rate": 4.475733100596795e-06,
3605
+ "loss": 0.1716,
3606
+ "step": 597
3607
+ },
3608
+ {
3609
+ "epoch": 2.09,
3610
+ "learning_rate": 4.444297669803981e-06,
3611
+ "loss": 0.1667,
3612
+ "step": 598
3613
+ },
3614
+ {
3615
+ "epoch": 2.09,
3616
+ "learning_rate": 4.412941451114499e-06,
3617
+ "loss": 0.1808,
3618
+ "step": 599
3619
+ },
3620
+ {
3621
+ "epoch": 2.1,
3622
+ "learning_rate": 4.381664891599111e-06,
3623
+ "loss": 0.1858,
3624
+ "step": 600
3625
+ },
3626
+ {
3627
+ "epoch": 2.1,
3628
+ "learning_rate": 4.350468437192801e-06,
3629
+ "loss": 0.1798,
3630
+ "step": 601
3631
+ },
3632
+ {
3633
+ "epoch": 2.1,
3634
+ "learning_rate": 4.319352532688444e-06,
3635
+ "loss": 0.172,
3636
+ "step": 602
3637
+ },
3638
+ {
3639
+ "epoch": 2.11,
3640
+ "learning_rate": 4.288317621730434e-06,
3641
+ "loss": 0.1827,
3642
+ "step": 603
3643
+ },
3644
+ {
3645
+ "epoch": 2.11,
3646
+ "learning_rate": 4.257364146808394e-06,
3647
+ "loss": 0.1782,
3648
+ "step": 604
3649
+ },
3650
+ {
3651
+ "epoch": 2.12,
3652
+ "learning_rate": 4.226492549250829e-06,
3653
+ "loss": 0.1728,
3654
+ "step": 605
3655
+ },
3656
+ {
3657
+ "epoch": 2.12,
3658
+ "learning_rate": 4.1957032692188685e-06,
3659
+ "loss": 0.1727,
3660
+ "step": 606
3661
+ },
3662
+ {
3663
+ "epoch": 2.12,
3664
+ "learning_rate": 4.164996745699966e-06,
3665
+ "loss": 0.1807,
3666
+ "step": 607
3667
+ },
3668
+ {
3669
+ "epoch": 2.13,
3670
+ "learning_rate": 4.134373416501652e-06,
3671
+ "loss": 0.1742,
3672
+ "step": 608
3673
+ },
3674
+ {
3675
+ "epoch": 2.13,
3676
+ "learning_rate": 4.103833718245282e-06,
3677
+ "loss": 0.1783,
3678
+ "step": 609
3679
+ },
3680
+ {
3681
+ "epoch": 2.13,
3682
+ "learning_rate": 4.073378086359834e-06,
3683
+ "loss": 0.1821,
3684
+ "step": 610
3685
+ },
3686
+ {
3687
+ "epoch": 2.14,
3688
+ "learning_rate": 4.043006955075667e-06,
3689
+ "loss": 0.171,
3690
+ "step": 611
3691
+ },
3692
+ {
3693
+ "epoch": 2.14,
3694
+ "learning_rate": 4.0127207574183576e-06,
3695
+ "loss": 0.1798,
3696
+ "step": 612
3697
+ },
3698
+ {
3699
+ "epoch": 2.14,
3700
+ "learning_rate": 3.9825199252025185e-06,
3701
+ "loss": 0.1747,
3702
+ "step": 613
3703
+ },
3704
+ {
3705
+ "epoch": 2.15,
3706
+ "learning_rate": 3.952404889025626e-06,
3707
+ "loss": 0.1792,
3708
+ "step": 614
3709
+ },
3710
+ {
3711
+ "epoch": 2.15,
3712
+ "learning_rate": 3.922376078261905e-06,
3713
+ "loss": 0.1712,
3714
+ "step": 615
3715
+ },
3716
+ {
3717
+ "epoch": 2.15,
3718
+ "learning_rate": 3.892433921056184e-06,
3719
+ "loss": 0.1724,
3720
+ "step": 616
3721
+ },
3722
+ {
3723
+ "epoch": 2.16,
3724
+ "learning_rate": 3.862578844317817e-06,
3725
+ "loss": 0.1755,
3726
+ "step": 617
3727
+ },
3728
+ {
3729
+ "epoch": 2.16,
3730
+ "learning_rate": 3.832811273714569e-06,
3731
+ "loss": 0.1834,
3732
+ "step": 618
3733
+ },
3734
+ {
3735
+ "epoch": 2.16,
3736
+ "learning_rate": 3.8031316336665725e-06,
3737
+ "loss": 0.1735,
3738
+ "step": 619
3739
+ },
3740
+ {
3741
+ "epoch": 2.17,
3742
+ "learning_rate": 3.7735403473402475e-06,
3743
+ "loss": 0.1738,
3744
+ "step": 620
3745
+ },
3746
+ {
3747
+ "epoch": 2.17,
3748
+ "learning_rate": 3.7440378366423e-06,
3749
+ "loss": 0.176,
3750
+ "step": 621
3751
+ },
3752
+ {
3753
+ "epoch": 2.17,
3754
+ "learning_rate": 3.714624522213681e-06,
3755
+ "loss": 0.1803,
3756
+ "step": 622
3757
+ },
3758
+ {
3759
+ "epoch": 2.18,
3760
+ "learning_rate": 3.6853008234236023e-06,
3761
+ "loss": 0.1704,
3762
+ "step": 623
3763
+ },
3764
+ {
3765
+ "epoch": 2.18,
3766
+ "learning_rate": 3.6560671583635467e-06,
3767
+ "loss": 0.1804,
3768
+ "step": 624
3769
+ },
3770
+ {
3771
+ "epoch": 2.19,
3772
+ "learning_rate": 3.626923943841325e-06,
3773
+ "loss": 0.1823,
3774
+ "step": 625
3775
+ },
3776
+ {
3777
+ "epoch": 2.19,
3778
+ "learning_rate": 3.5978715953751207e-06,
3779
+ "loss": 0.184,
3780
+ "step": 626
3781
+ },
3782
+ {
3783
+ "epoch": 2.19,
3784
+ "learning_rate": 3.568910527187557e-06,
3785
+ "loss": 0.1731,
3786
+ "step": 627
3787
+ },
3788
+ {
3789
+ "epoch": 2.2,
3790
+ "learning_rate": 3.5400411521998123e-06,
3791
+ "loss": 0.1812,
3792
+ "step": 628
3793
+ },
3794
+ {
3795
+ "epoch": 2.2,
3796
+ "learning_rate": 3.5112638820257115e-06,
3797
+ "loss": 0.1786,
3798
+ "step": 629
3799
+ },
3800
+ {
3801
+ "epoch": 2.2,
3802
+ "learning_rate": 3.4825791269658782e-06,
3803
+ "loss": 0.1806,
3804
+ "step": 630
3805
+ },
3806
+ {
3807
+ "epoch": 2.21,
3808
+ "learning_rate": 3.453987296001866e-06,
3809
+ "loss": 0.1803,
3810
+ "step": 631
3811
+ },
3812
+ {
3813
+ "epoch": 2.21,
3814
+ "learning_rate": 3.4254887967903373e-06,
3815
+ "loss": 0.1745,
3816
+ "step": 632
3817
+ },
3818
+ {
3819
+ "epoch": 2.21,
3820
+ "learning_rate": 3.397084035657243e-06,
3821
+ "loss": 0.1852,
3822
+ "step": 633
3823
+ },
3824
+ {
3825
+ "epoch": 2.22,
3826
+ "learning_rate": 3.3687734175920505e-06,
3827
+ "loss": 0.1852,
3828
+ "step": 634
3829
+ },
3830
+ {
3831
+ "epoch": 2.22,
3832
+ "learning_rate": 3.3405573462419362e-06,
3833
+ "loss": 0.1529,
3834
+ "step": 635
3835
+ },
3836
+ {
3837
+ "epoch": 2.22,
3838
+ "learning_rate": 3.3124362239060627e-06,
3839
+ "loss": 0.1798,
3840
+ "step": 636
3841
+ },
3842
+ {
3843
+ "epoch": 2.23,
3844
+ "learning_rate": 3.284410451529816e-06,
3845
+ "loss": 0.1723,
3846
+ "step": 637
3847
+ },
3848
+ {
3849
+ "epoch": 2.23,
3850
+ "learning_rate": 3.2564804286991137e-06,
3851
+ "loss": 0.1769,
3852
+ "step": 638
3853
+ },
3854
+ {
3855
+ "epoch": 2.23,
3856
+ "learning_rate": 3.2286465536346857e-06,
3857
+ "loss": 0.1831,
3858
+ "step": 639
3859
+ },
3860
+ {
3861
+ "epoch": 2.24,
3862
+ "learning_rate": 3.2009092231864047e-06,
3863
+ "loss": 0.1845,
3864
+ "step": 640
3865
+ },
3866
+ {
3867
+ "epoch": 2.24,
3868
+ "learning_rate": 3.173268832827643e-06,
3869
+ "loss": 0.1609,
3870
+ "step": 641
3871
+ },
3872
+ {
3873
+ "epoch": 2.24,
3874
+ "learning_rate": 3.145725776649602e-06,
3875
+ "loss": 0.1716,
3876
+ "step": 642
3877
+ },
3878
+ {
3879
+ "epoch": 2.25,
3880
+ "learning_rate": 3.1182804473557295e-06,
3881
+ "loss": 0.1853,
3882
+ "step": 643
3883
+ },
3884
+ {
3885
+ "epoch": 2.25,
3886
+ "learning_rate": 3.0909332362560875e-06,
3887
+ "loss": 0.172,
3888
+ "step": 644
3889
+ },
3890
+ {
3891
+ "epoch": 2.26,
3892
+ "learning_rate": 3.0636845332617994e-06,
3893
+ "loss": 0.1818,
3894
+ "step": 645
3895
+ },
3896
+ {
3897
+ "epoch": 2.26,
3898
+ "learning_rate": 3.036534726879473e-06,
3899
+ "loss": 0.1801,
3900
+ "step": 646
3901
+ },
3902
+ {
3903
+ "epoch": 2.26,
3904
+ "learning_rate": 3.0094842042056706e-06,
3905
+ "loss": 0.1942,
3906
+ "step": 647
3907
+ },
3908
+ {
3909
+ "epoch": 2.27,
3910
+ "learning_rate": 2.982533350921383e-06,
3911
+ "loss": 0.177,
3912
+ "step": 648
3913
+ },
3914
+ {
3915
+ "epoch": 2.27,
3916
+ "learning_rate": 2.9556825512865418e-06,
3917
+ "loss": 0.1784,
3918
+ "step": 649
3919
+ },
3920
+ {
3921
+ "epoch": 2.27,
3922
+ "learning_rate": 2.9289321881345257e-06,
3923
+ "loss": 0.179,
3924
+ "step": 650
3925
+ },
3926
+ {
3927
+ "epoch": 2.28,
3928
+ "learning_rate": 2.902282642866716e-06,
3929
+ "loss": 0.1772,
3930
+ "step": 651
3931
+ },
3932
+ {
3933
+ "epoch": 2.28,
3934
+ "learning_rate": 2.8757342954470537e-06,
3935
+ "loss": 0.1745,
3936
+ "step": 652
3937
+ },
3938
+ {
3939
+ "epoch": 2.28,
3940
+ "learning_rate": 2.8492875243966114e-06,
3941
+ "loss": 0.1796,
3942
+ "step": 653
3943
+ },
3944
+ {
3945
+ "epoch": 2.29,
3946
+ "learning_rate": 2.8229427067882165e-06,
3947
+ "loss": 0.1744,
3948
+ "step": 654
3949
+ },
3950
+ {
3951
+ "epoch": 2.29,
3952
+ "learning_rate": 2.7967002182410596e-06,
3953
+ "loss": 0.177,
3954
+ "step": 655
3955
+ },
3956
+ {
3957
+ "epoch": 2.29,
3958
+ "learning_rate": 2.7705604329153434e-06,
3959
+ "loss": 0.177,
3960
+ "step": 656
3961
+ },
3962
+ {
3963
+ "epoch": 2.3,
3964
+ "learning_rate": 2.7445237235069453e-06,
3965
+ "loss": 0.1765,
3966
+ "step": 657
3967
+ },
3968
+ {
3969
+ "epoch": 2.3,
3970
+ "learning_rate": 2.7185904612421177e-06,
3971
+ "loss": 0.1784,
3972
+ "step": 658
3973
+ },
3974
+ {
3975
+ "epoch": 2.3,
3976
+ "learning_rate": 2.6927610158721708e-06,
3977
+ "loss": 0.1796,
3978
+ "step": 659
3979
+ },
3980
+ {
3981
+ "epoch": 2.31,
3982
+ "learning_rate": 2.6670357556682245e-06,
3983
+ "loss": 0.1842,
3984
+ "step": 660
3985
+ },
3986
+ {
3987
+ "epoch": 2.31,
3988
+ "learning_rate": 2.6414150474159405e-06,
3989
+ "loss": 0.1843,
3990
+ "step": 661
3991
+ },
3992
+ {
3993
+ "epoch": 2.31,
3994
+ "learning_rate": 2.615899256410306e-06,
3995
+ "loss": 0.1816,
3996
+ "step": 662
3997
+ },
3998
+ {
3999
+ "epoch": 2.32,
4000
+ "learning_rate": 2.5904887464504115e-06,
4001
+ "loss": 0.1872,
4002
+ "step": 663
4003
+ },
4004
+ {
4005
+ "epoch": 2.32,
4006
+ "learning_rate": 2.565183879834272e-06,
4007
+ "loss": 0.1793,
4008
+ "step": 664
4009
+ },
4010
+ {
4011
+ "epoch": 2.33,
4012
+ "learning_rate": 2.53998501735367e-06,
4013
+ "loss": 0.1846,
4014
+ "step": 665
4015
+ },
4016
+ {
4017
+ "epoch": 2.33,
4018
+ "learning_rate": 2.514892518288988e-06,
4019
+ "loss": 0.1696,
4020
+ "step": 666
4021
+ },
4022
+ {
4023
+ "epoch": 2.33,
4024
+ "learning_rate": 2.4899067404041156e-06,
4025
+ "loss": 0.1821,
4026
+ "step": 667
4027
+ },
4028
+ {
4029
+ "epoch": 2.34,
4030
+ "learning_rate": 2.465028039941316e-06,
4031
+ "loss": 0.1667,
4032
+ "step": 668
4033
+ },
4034
+ {
4035
+ "epoch": 2.34,
4036
+ "learning_rate": 2.4402567716161806e-06,
4037
+ "loss": 0.186,
4038
+ "step": 669
4039
+ },
4040
+ {
4041
+ "epoch": 2.34,
4042
+ "learning_rate": 2.415593288612541e-06,
4043
+ "loss": 0.1727,
4044
+ "step": 670
4045
+ },
4046
+ {
4047
+ "epoch": 2.35,
4048
+ "learning_rate": 2.3910379425774544e-06,
4049
+ "loss": 0.1783,
4050
+ "step": 671
4051
+ },
4052
+ {
4053
+ "epoch": 2.35,
4054
+ "learning_rate": 2.366591083616178e-06,
4055
+ "loss": 0.1595,
4056
+ "step": 672
4057
+ },
4058
+ {
4059
+ "epoch": 2.35,
4060
+ "learning_rate": 2.3422530602871874e-06,
4061
+ "loss": 0.1577,
4062
+ "step": 673
4063
+ },
4064
+ {
4065
+ "epoch": 2.36,
4066
+ "learning_rate": 2.318024219597196e-06,
4067
+ "loss": 0.1816,
4068
+ "step": 674
4069
+ },
4070
+ {
4071
+ "epoch": 2.36,
4072
+ "learning_rate": 2.2939049069962183e-06,
4073
+ "loss": 0.182,
4074
+ "step": 675
4075
+ },
4076
+ {
4077
+ "epoch": 2.36,
4078
+ "learning_rate": 2.26989546637263e-06,
4079
+ "loss": 0.1792,
4080
+ "step": 676
4081
+ },
4082
+ {
4083
+ "epoch": 2.37,
4084
+ "learning_rate": 2.2459962400482848e-06,
4085
+ "loss": 0.1826,
4086
+ "step": 677
4087
+ },
4088
+ {
4089
+ "epoch": 2.37,
4090
+ "learning_rate": 2.222207568773619e-06,
4091
+ "loss": 0.1798,
4092
+ "step": 678
4093
+ },
4094
+ {
4095
+ "epoch": 2.37,
4096
+ "learning_rate": 2.1985297917227922e-06,
4097
+ "loss": 0.1721,
4098
+ "step": 679
4099
+ },
4100
+ {
4101
+ "epoch": 2.38,
4102
+ "learning_rate": 2.1749632464888594e-06,
4103
+ "loss": 0.1786,
4104
+ "step": 680
4105
+ },
4106
+ {
4107
+ "epoch": 2.38,
4108
+ "learning_rate": 2.1515082690789535e-06,
4109
+ "loss": 0.1832,
4110
+ "step": 681
4111
+ },
4112
+ {
4113
+ "epoch": 2.38,
4114
+ "learning_rate": 2.1281651939094996e-06,
4115
+ "loss": 0.1741,
4116
+ "step": 682
4117
+ },
4118
+ {
4119
+ "epoch": 2.39,
4120
+ "learning_rate": 2.1049343538014354e-06,
4121
+ "loss": 0.1692,
4122
+ "step": 683
4123
+ },
4124
+ {
4125
+ "epoch": 2.39,
4126
+ "learning_rate": 2.0818160799754826e-06,
4127
+ "loss": 0.181,
4128
+ "step": 684
4129
+ },
4130
+ {
4131
+ "epoch": 2.4,
4132
+ "learning_rate": 2.0588107020474056e-06,
4133
+ "loss": 0.1675,
4134
+ "step": 685
4135
+ },
4136
+ {
4137
+ "epoch": 2.4,
4138
+ "learning_rate": 2.03591854802333e-06,
4139
+ "loss": 0.1842,
4140
+ "step": 686
4141
+ },
4142
+ {
4143
+ "epoch": 2.4,
4144
+ "learning_rate": 2.0131399442950507e-06,
4145
+ "loss": 0.1693,
4146
+ "step": 687
4147
+ },
4148
+ {
4149
+ "epoch": 2.41,
4150
+ "learning_rate": 1.990475215635388e-06,
4151
+ "loss": 0.1825,
4152
+ "step": 688
4153
+ },
4154
+ {
4155
+ "epoch": 2.41,
4156
+ "learning_rate": 1.967924685193552e-06,
4157
+ "loss": 0.1864,
4158
+ "step": 689
4159
+ },
4160
+ {
4161
+ "epoch": 2.41,
4162
+ "learning_rate": 1.94548867449054e-06,
4163
+ "loss": 0.1732,
4164
+ "step": 690
4165
+ },
4166
+ {
4167
+ "epoch": 2.42,
4168
+ "learning_rate": 1.9231675034145515e-06,
4169
+ "loss": 0.1752,
4170
+ "step": 691
4171
+ },
4172
+ {
4173
+ "epoch": 2.42,
4174
+ "learning_rate": 1.9009614902164175e-06,
4175
+ "loss": 0.1724,
4176
+ "step": 692
4177
+ },
4178
+ {
4179
+ "epoch": 2.42,
4180
+ "learning_rate": 1.8788709515050808e-06,
4181
+ "loss": 0.1729,
4182
+ "step": 693
4183
+ },
4184
+ {
4185
+ "epoch": 2.43,
4186
+ "learning_rate": 1.8568962022430637e-06,
4187
+ "loss": 0.177,
4188
+ "step": 694
4189
+ },
4190
+ {
4191
+ "epoch": 2.43,
4192
+ "learning_rate": 1.8350375557419875e-06,
4193
+ "loss": 0.186,
4194
+ "step": 695
4195
+ },
4196
+ {
4197
+ "epoch": 2.43,
4198
+ "learning_rate": 1.813295323658103e-06,
4199
+ "loss": 0.1835,
4200
+ "step": 696
4201
+ },
4202
+ {
4203
+ "epoch": 2.44,
4204
+ "learning_rate": 1.791669815987852e-06,
4205
+ "loss": 0.1777,
4206
+ "step": 697
4207
+ },
4208
+ {
4209
+ "epoch": 2.44,
4210
+ "learning_rate": 1.7701613410634367e-06,
4211
+ "loss": 0.1762,
4212
+ "step": 698
4213
+ },
4214
+ {
4215
+ "epoch": 2.44,
4216
+ "learning_rate": 1.7487702055484345e-06,
4217
+ "loss": 0.1754,
4218
+ "step": 699
4219
+ },
4220
+ {
4221
+ "epoch": 2.45,
4222
+ "learning_rate": 1.7274967144334155e-06,
4223
+ "loss": 0.1846,
4224
+ "step": 700
4225
+ },
4226
+ {
4227
+ "epoch": 2.45,
4228
+ "learning_rate": 1.7063411710316047e-06,
4229
+ "loss": 0.1677,
4230
+ "step": 701
4231
+ },
4232
+ {
4233
+ "epoch": 2.45,
4234
+ "learning_rate": 1.6853038769745466e-06,
4235
+ "loss": 0.1734,
4236
+ "step": 702
4237
+ },
4238
+ {
4239
+ "epoch": 2.46,
4240
+ "learning_rate": 1.6643851322078176e-06,
4241
+ "loss": 0.1817,
4242
+ "step": 703
4243
+ },
4244
+ {
4245
+ "epoch": 2.46,
4246
+ "learning_rate": 1.643585234986733e-06,
4247
+ "loss": 0.1695,
4248
+ "step": 704
4249
+ },
4250
+ {
4251
+ "epoch": 2.47,
4252
+ "learning_rate": 1.6229044818721062e-06,
4253
+ "loss": 0.1866,
4254
+ "step": 705
4255
+ },
4256
+ {
4257
+ "epoch": 2.47,
4258
+ "learning_rate": 1.6023431677260215e-06,
4259
+ "loss": 0.1666,
4260
+ "step": 706
4261
+ },
4262
+ {
4263
+ "epoch": 2.47,
4264
+ "learning_rate": 1.5819015857076215e-06,
4265
+ "loss": 0.1903,
4266
+ "step": 707
4267
+ },
4268
+ {
4269
+ "epoch": 2.48,
4270
+ "learning_rate": 1.5615800272689352e-06,
4271
+ "loss": 0.1652,
4272
+ "step": 708
4273
+ },
4274
+ {
4275
+ "epoch": 2.48,
4276
+ "learning_rate": 1.5413787821507141e-06,
4277
+ "loss": 0.1727,
4278
+ "step": 709
4279
+ },
4280
+ {
4281
+ "epoch": 2.48,
4282
+ "learning_rate": 1.5212981383783154e-06,
4283
+ "loss": 0.1794,
4284
+ "step": 710
4285
+ },
4286
+ {
4287
+ "epoch": 2.49,
4288
+ "learning_rate": 1.5013383822575766e-06,
4289
+ "loss": 0.18,
4290
+ "step": 711
4291
+ },
4292
+ {
4293
+ "epoch": 2.49,
4294
+ "learning_rate": 1.4814997983707457e-06,
4295
+ "loss": 0.1676,
4296
+ "step": 712
4297
+ },
4298
+ {
4299
+ "epoch": 2.49,
4300
+ "learning_rate": 1.4617826695724224e-06,
4301
+ "loss": 0.17,
4302
+ "step": 713
4303
+ },
4304
+ {
4305
+ "epoch": 2.5,
4306
+ "learning_rate": 1.4421872769855262e-06,
4307
+ "loss": 0.1875,
4308
+ "step": 714
4309
+ },
4310
+ {
4311
+ "epoch": 2.5,
4312
+ "learning_rate": 1.4227138999972801e-06,
4313
+ "loss": 0.1725,
4314
+ "step": 715
4315
+ },
4316
+ {
4317
+ "epoch": 2.5,
4318
+ "learning_rate": 1.403362816255236e-06,
4319
+ "loss": 0.183,
4320
+ "step": 716
4321
+ },
4322
+ {
4323
+ "epoch": 2.51,
4324
+ "learning_rate": 1.3841343016633168e-06,
4325
+ "loss": 0.1806,
4326
+ "step": 717
4327
+ },
4328
+ {
4329
+ "epoch": 2.51,
4330
+ "learning_rate": 1.3650286303778715e-06,
4331
+ "loss": 0.1901,
4332
+ "step": 718
4333
+ },
4334
+ {
4335
+ "epoch": 2.51,
4336
+ "learning_rate": 1.3460460748037773e-06,
4337
+ "loss": 0.1834,
4338
+ "step": 719
4339
+ },
4340
+ {
4341
+ "epoch": 2.52,
4342
+ "learning_rate": 1.3271869055905495e-06,
4343
+ "loss": 0.1769,
4344
+ "step": 720
4345
  }
4346
  ],
4347
  "max_steps": 858,
4348
  "num_train_epochs": 3,
4349
+ "total_flos": 1.8324177213299098e+18,
4350
  "trial_name": null,
4351
  "trial_params": null
4352
  }