sedrickkeh commited on
Commit
f12c3a8
·
verified ·
1 Parent(s): eb36138

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970a61347e0ccb4787616cbe5db776cd93fbfbe0e1240bd5fc509ee86ac936d2
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93028c4ad9f23fe74f1c4cc050aa75224997398228e1fa7d01db38fa914c06cd
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5919abe0329bd3749b5845d0cbb9045bfcda8353124ef44394784a2054951ba
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50bc87a9fe7dac9ec0d1ecc6156994accd57adf0fc419c3e5af0efb0bb341098
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02c96f768b1b3ae3b3261af33dae58270421ee700a4c9855e34596b7931b5584
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0539f9d3676e64c39198f75cf53579ccc88a20835168fcc5ba571e84481f4e46
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aae6ffeeffd8b12aed63e306e4cf3c12cc00f8e5ac72b517a1a58b9d067d9251
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4ab7118c1c57bf09a523c3f9b4dd337522df0f4bef1e2768009b21c4560400
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -60,3 +60,65 @@
60
  {"current_steps": 600, "total_steps": 1827, "loss": 0.8157, "lr": 5e-06, "epoch": 0.9848173984407058, "percentage": 32.84, "elapsed_time": "9:42:44", "remaining_time": "19:51:42"}
61
  {"current_steps": 609, "total_steps": 1827, "eval_loss": 0.8145917654037476, "epoch": 0.9995896594173164, "percentage": 33.33, "elapsed_time": "10:02:32", "remaining_time": "20:05:05"}
62
  {"current_steps": 610, "total_steps": 1827, "loss": 0.8731, "lr": 5e-06, "epoch": 1.001231021748051, "percentage": 33.39, "elapsed_time": "10:04:26", "remaining_time": "20:05:54"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  {"current_steps": 600, "total_steps": 1827, "loss": 0.8157, "lr": 5e-06, "epoch": 0.9848173984407058, "percentage": 32.84, "elapsed_time": "9:42:44", "remaining_time": "19:51:42"}
61
  {"current_steps": 609, "total_steps": 1827, "eval_loss": 0.8145917654037476, "epoch": 0.9995896594173164, "percentage": 33.33, "elapsed_time": "10:02:32", "remaining_time": "20:05:05"}
62
  {"current_steps": 610, "total_steps": 1827, "loss": 0.8731, "lr": 5e-06, "epoch": 1.001231021748051, "percentage": 33.39, "elapsed_time": "10:04:26", "remaining_time": "20:05:54"}
63
+ {"current_steps": 620, "total_steps": 1827, "loss": 0.7758, "lr": 5e-06, "epoch": 1.017644645055396, "percentage": 33.94, "elapsed_time": "10:14:09", "remaining_time": "19:55:37"}
64
+ {"current_steps": 630, "total_steps": 1827, "loss": 0.7704, "lr": 5e-06, "epoch": 1.034058268362741, "percentage": 34.48, "elapsed_time": "10:23:52", "remaining_time": "19:45:20"}
65
+ {"current_steps": 640, "total_steps": 1827, "loss": 0.774, "lr": 5e-06, "epoch": 1.0504718916700861, "percentage": 35.03, "elapsed_time": "10:33:34", "remaining_time": "19:35:05"}
66
+ {"current_steps": 650, "total_steps": 1827, "loss": 0.7706, "lr": 5e-06, "epoch": 1.0668855149774312, "percentage": 35.58, "elapsed_time": "10:43:19", "remaining_time": "19:24:54"}
67
+ {"current_steps": 660, "total_steps": 1827, "loss": 0.7812, "lr": 5e-06, "epoch": 1.0832991382847763, "percentage": 36.12, "elapsed_time": "10:53:03", "remaining_time": "19:14:42"}
68
+ {"current_steps": 670, "total_steps": 1827, "loss": 0.7786, "lr": 5e-06, "epoch": 1.0997127615921214, "percentage": 36.67, "elapsed_time": "11:02:47", "remaining_time": "19:04:33"}
69
+ {"current_steps": 680, "total_steps": 1827, "loss": 0.7749, "lr": 5e-06, "epoch": 1.1161263848994665, "percentage": 37.22, "elapsed_time": "11:12:31", "remaining_time": "18:54:23"}
70
+ {"current_steps": 690, "total_steps": 1827, "loss": 0.7782, "lr": 5e-06, "epoch": 1.1325400082068118, "percentage": 37.77, "elapsed_time": "11:22:14", "remaining_time": "18:44:12"}
71
+ {"current_steps": 700, "total_steps": 1827, "loss": 0.771, "lr": 5e-06, "epoch": 1.1489536315141566, "percentage": 38.31, "elapsed_time": "11:31:59", "remaining_time": "18:34:06"}
72
+ {"current_steps": 710, "total_steps": 1827, "loss": 0.769, "lr": 5e-06, "epoch": 1.165367254821502, "percentage": 38.86, "elapsed_time": "11:41:43", "remaining_time": "18:23:58"}
73
+ {"current_steps": 720, "total_steps": 1827, "loss": 0.7735, "lr": 5e-06, "epoch": 1.181780878128847, "percentage": 39.41, "elapsed_time": "11:51:25", "remaining_time": "18:13:49"}
74
+ {"current_steps": 730, "total_steps": 1827, "loss": 0.7735, "lr": 5e-06, "epoch": 1.198194501436192, "percentage": 39.96, "elapsed_time": "12:01:09", "remaining_time": "18:03:42"}
75
+ {"current_steps": 740, "total_steps": 1827, "loss": 0.7735, "lr": 5e-06, "epoch": 1.2146081247435372, "percentage": 40.5, "elapsed_time": "12:10:51", "remaining_time": "17:53:34"}
76
+ {"current_steps": 750, "total_steps": 1827, "loss": 0.7773, "lr": 5e-06, "epoch": 1.2310217480508823, "percentage": 41.05, "elapsed_time": "12:20:35", "remaining_time": "17:43:29"}
77
+ {"current_steps": 760, "total_steps": 1827, "loss": 0.7776, "lr": 5e-06, "epoch": 1.2474353713582274, "percentage": 41.6, "elapsed_time": "12:30:18", "remaining_time": "17:33:23"}
78
+ {"current_steps": 770, "total_steps": 1827, "loss": 0.7746, "lr": 5e-06, "epoch": 1.2638489946655724, "percentage": 42.15, "elapsed_time": "12:40:02", "remaining_time": "17:23:20"}
79
+ {"current_steps": 780, "total_steps": 1827, "loss": 0.7707, "lr": 5e-06, "epoch": 1.2802626179729175, "percentage": 42.69, "elapsed_time": "12:49:47", "remaining_time": "17:13:17"}
80
+ {"current_steps": 790, "total_steps": 1827, "loss": 0.7725, "lr": 5e-06, "epoch": 1.2966762412802626, "percentage": 43.24, "elapsed_time": "12:59:28", "remaining_time": "17:03:11"}
81
+ {"current_steps": 800, "total_steps": 1827, "loss": 0.7744, "lr": 5e-06, "epoch": 1.3130898645876077, "percentage": 43.79, "elapsed_time": "13:09:11", "remaining_time": "16:53:08"}
82
+ {"current_steps": 810, "total_steps": 1827, "loss": 0.7733, "lr": 5e-06, "epoch": 1.3295034878949528, "percentage": 44.33, "elapsed_time": "13:18:53", "remaining_time": "16:43:03"}
83
+ {"current_steps": 820, "total_steps": 1827, "loss": 0.7714, "lr": 5e-06, "epoch": 1.3459171112022978, "percentage": 44.88, "elapsed_time": "13:28:36", "remaining_time": "16:33:00"}
84
+ {"current_steps": 830, "total_steps": 1827, "loss": 0.7715, "lr": 5e-06, "epoch": 1.362330734509643, "percentage": 45.43, "elapsed_time": "13:38:18", "remaining_time": "16:22:57"}
85
+ {"current_steps": 840, "total_steps": 1827, "loss": 0.7726, "lr": 5e-06, "epoch": 1.3787443578169882, "percentage": 45.98, "elapsed_time": "13:47:59", "remaining_time": "16:12:53"}
86
+ {"current_steps": 850, "total_steps": 1827, "loss": 0.7747, "lr": 5e-06, "epoch": 1.395157981124333, "percentage": 46.52, "elapsed_time": "13:57:43", "remaining_time": "16:02:53"}
87
+ {"current_steps": 860, "total_steps": 1827, "loss": 0.7744, "lr": 5e-06, "epoch": 1.4115716044316784, "percentage": 47.07, "elapsed_time": "14:07:26", "remaining_time": "15:52:53"}
88
+ {"current_steps": 870, "total_steps": 1827, "loss": 0.7727, "lr": 5e-06, "epoch": 1.4279852277390233, "percentage": 47.62, "elapsed_time": "14:17:10", "remaining_time": "15:42:53"}
89
+ {"current_steps": 880, "total_steps": 1827, "loss": 0.7744, "lr": 5e-06, "epoch": 1.4443988510463686, "percentage": 48.17, "elapsed_time": "14:26:53", "remaining_time": "15:32:53"}
90
+ {"current_steps": 890, "total_steps": 1827, "loss": 0.77, "lr": 5e-06, "epoch": 1.4608124743537136, "percentage": 48.71, "elapsed_time": "14:36:33", "remaining_time": "15:22:51"}
91
+ {"current_steps": 900, "total_steps": 1827, "loss": 0.7742, "lr": 5e-06, "epoch": 1.4772260976610587, "percentage": 49.26, "elapsed_time": "14:46:17", "remaining_time": "15:12:52"}
92
+ {"current_steps": 910, "total_steps": 1827, "loss": 0.7674, "lr": 5e-06, "epoch": 1.4936397209684038, "percentage": 49.81, "elapsed_time": "14:55:59", "remaining_time": "15:02:53"}
93
+ {"current_steps": 920, "total_steps": 1827, "loss": 0.7716, "lr": 5e-06, "epoch": 1.5100533442757489, "percentage": 50.36, "elapsed_time": "15:05:40", "remaining_time": "14:52:52"}
94
+ {"current_steps": 930, "total_steps": 1827, "loss": 0.7711, "lr": 5e-06, "epoch": 1.526466967583094, "percentage": 50.9, "elapsed_time": "15:15:21", "remaining_time": "14:42:52"}
95
+ {"current_steps": 940, "total_steps": 1827, "loss": 0.7724, "lr": 5e-06, "epoch": 1.542880590890439, "percentage": 51.45, "elapsed_time": "15:25:04", "remaining_time": "14:32:54"}
96
+ {"current_steps": 950, "total_steps": 1827, "loss": 0.7693, "lr": 5e-06, "epoch": 1.5592942141977841, "percentage": 52.0, "elapsed_time": "15:34:45", "remaining_time": "14:22:55"}
97
+ {"current_steps": 960, "total_steps": 1827, "loss": 0.7712, "lr": 5e-06, "epoch": 1.5757078375051292, "percentage": 52.55, "elapsed_time": "15:44:28", "remaining_time": "14:12:58"}
98
+ {"current_steps": 970, "total_steps": 1827, "loss": 0.7709, "lr": 5e-06, "epoch": 1.5921214608124743, "percentage": 53.09, "elapsed_time": "15:54:10", "remaining_time": "14:03:01"}
99
+ {"current_steps": 980, "total_steps": 1827, "loss": 0.7772, "lr": 5e-06, "epoch": 1.6085350841198194, "percentage": 53.64, "elapsed_time": "16:03:54", "remaining_time": "13:53:05"}
100
+ {"current_steps": 990, "total_steps": 1827, "loss": 0.7722, "lr": 5e-06, "epoch": 1.6249487074271647, "percentage": 54.19, "elapsed_time": "16:13:37", "remaining_time": "13:43:09"}
101
+ {"current_steps": 1000, "total_steps": 1827, "loss": 0.7694, "lr": 5e-06, "epoch": 1.6413623307345095, "percentage": 54.73, "elapsed_time": "16:23:20", "remaining_time": "13:33:13"}
102
+ {"current_steps": 1010, "total_steps": 1827, "loss": 0.7673, "lr": 5e-06, "epoch": 1.6577759540418548, "percentage": 55.28, "elapsed_time": "16:33:03", "remaining_time": "13:23:17"}
103
+ {"current_steps": 1020, "total_steps": 1827, "loss": 0.7713, "lr": 5e-06, "epoch": 1.6741895773491997, "percentage": 55.83, "elapsed_time": "16:42:47", "remaining_time": "13:13:22"}
104
+ {"current_steps": 1030, "total_steps": 1827, "loss": 0.7687, "lr": 5e-06, "epoch": 1.690603200656545, "percentage": 56.38, "elapsed_time": "16:52:28", "remaining_time": "13:03:26"}
105
+ {"current_steps": 1040, "total_steps": 1827, "loss": 0.7697, "lr": 5e-06, "epoch": 1.7070168239638899, "percentage": 56.92, "elapsed_time": "17:02:11", "remaining_time": "12:53:31"}
106
+ {"current_steps": 1050, "total_steps": 1827, "loss": 0.7691, "lr": 5e-06, "epoch": 1.7234304472712352, "percentage": 57.47, "elapsed_time": "17:11:54", "remaining_time": "12:43:36"}
107
+ {"current_steps": 1060, "total_steps": 1827, "loss": 0.7741, "lr": 5e-06, "epoch": 1.7398440705785803, "percentage": 58.02, "elapsed_time": "17:21:37", "remaining_time": "12:33:42"}
108
+ {"current_steps": 1070, "total_steps": 1827, "loss": 0.7661, "lr": 5e-06, "epoch": 1.7562576938859253, "percentage": 58.57, "elapsed_time": "17:31:19", "remaining_time": "12:23:47"}
109
+ {"current_steps": 1080, "total_steps": 1827, "loss": 0.7689, "lr": 5e-06, "epoch": 1.7726713171932704, "percentage": 59.11, "elapsed_time": "17:41:00", "remaining_time": "12:13:52"}
110
+ {"current_steps": 1090, "total_steps": 1827, "loss": 0.7705, "lr": 5e-06, "epoch": 1.7890849405006155, "percentage": 59.66, "elapsed_time": "17:50:41", "remaining_time": "12:03:56"}
111
+ {"current_steps": 1100, "total_steps": 1827, "loss": 0.7726, "lr": 5e-06, "epoch": 1.8054985638079606, "percentage": 60.21, "elapsed_time": "18:00:21", "remaining_time": "11:54:01"}
112
+ {"current_steps": 1110, "total_steps": 1827, "loss": 0.7698, "lr": 5e-06, "epoch": 1.8219121871153057, "percentage": 60.76, "elapsed_time": "18:10:05", "remaining_time": "11:44:08"}
113
+ {"current_steps": 1120, "total_steps": 1827, "loss": 0.767, "lr": 5e-06, "epoch": 1.8383258104226508, "percentage": 61.3, "elapsed_time": "18:19:49", "remaining_time": "11:34:15"}
114
+ {"current_steps": 1130, "total_steps": 1827, "loss": 0.7686, "lr": 5e-06, "epoch": 1.8547394337299958, "percentage": 61.85, "elapsed_time": "18:29:32", "remaining_time": "11:24:22"}
115
+ {"current_steps": 1140, "total_steps": 1827, "loss": 0.7717, "lr": 5e-06, "epoch": 1.8711530570373411, "percentage": 62.4, "elapsed_time": "18:39:16", "remaining_time": "11:14:30"}
116
+ {"current_steps": 1150, "total_steps": 1827, "loss": 0.7715, "lr": 5e-06, "epoch": 1.887566680344686, "percentage": 62.94, "elapsed_time": "18:49:00", "remaining_time": "11:04:38"}
117
+ {"current_steps": 1160, "total_steps": 1827, "loss": 0.7719, "lr": 5e-06, "epoch": 1.9039803036520313, "percentage": 63.49, "elapsed_time": "18:58:44", "remaining_time": "10:54:46"}
118
+ {"current_steps": 1170, "total_steps": 1827, "loss": 0.7715, "lr": 5e-06, "epoch": 1.9203939269593762, "percentage": 64.04, "elapsed_time": "19:08:27", "remaining_time": "10:44:53"}
119
+ {"current_steps": 1180, "total_steps": 1827, "loss": 0.7653, "lr": 5e-06, "epoch": 1.9368075502667215, "percentage": 64.59, "elapsed_time": "19:18:10", "remaining_time": "10:35:02"}
120
+ {"current_steps": 1190, "total_steps": 1827, "loss": 0.7678, "lr": 5e-06, "epoch": 1.9532211735740663, "percentage": 65.13, "elapsed_time": "19:27:54", "remaining_time": "10:25:10"}
121
+ {"current_steps": 1200, "total_steps": 1827, "loss": 0.7687, "lr": 5e-06, "epoch": 1.9696347968814116, "percentage": 65.68, "elapsed_time": "19:37:39", "remaining_time": "10:15:19"}
122
+ {"current_steps": 1210, "total_steps": 1827, "loss": 0.7667, "lr": 5e-06, "epoch": 1.9860484201887567, "percentage": 66.23, "elapsed_time": "19:47:23", "remaining_time": "10:05:28"}
123
+ {"current_steps": 1218, "total_steps": 1827, "eval_loss": 0.8003594875335693, "epoch": 1.9991793188346327, "percentage": 66.67, "elapsed_time": "20:06:24", "remaining_time": "10:03:12"}
124
+ {"current_steps": 1220, "total_steps": 1827, "loss": 0.8157, "lr": 5e-06, "epoch": 2.002462043496102, "percentage": 66.78, "elapsed_time": "20:09:07", "remaining_time": "10:01:35"}