chchen commited on
Commit
4f029bb
1 Parent(s): 97c56f5

Training in progress, step 1500

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +51 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:808e1a5168b9e55c08c5814ea7ae3f9581106344b9fffde10f65785dd48e58db
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:084ac3848c3bd3d696b2dc3290d9093b4f938c66ba135c9eb5ae15196b1614a7
3
  size 83945296
trainer_log.jsonl CHANGED
@@ -100,3 +100,54 @@
100
  {"current_steps": 990, "total_steps": 1686, "loss": 0.8683, "accuracy": 0.53125, "learning_rate": 1.8239381598343576e-06, "epoch": 1.76039119804401, "percentage": 58.72, "elapsed_time": "5:03:09", "remaining_time": "3:33:07"}
101
  {"current_steps": 1000, "total_steps": 1686, "loss": 0.8737, "accuracy": 0.5625, "learning_rate": 1.779207600392312e-06, "epoch": 1.7781729273171816, "percentage": 59.31, "elapsed_time": "5:04:51", "remaining_time": "3:29:07"}
102
  {"current_steps": 1000, "total_steps": 1686, "eval_loss": 0.8773505687713623, "epoch": 1.7781729273171816, "percentage": 59.31, "elapsed_time": "5:08:00", "remaining_time": "3:31:17"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  {"current_steps": 990, "total_steps": 1686, "loss": 0.8683, "accuracy": 0.53125, "learning_rate": 1.8239381598343576e-06, "epoch": 1.76039119804401, "percentage": 58.72, "elapsed_time": "5:03:09", "remaining_time": "3:33:07"}
101
  {"current_steps": 1000, "total_steps": 1686, "loss": 0.8737, "accuracy": 0.5625, "learning_rate": 1.779207600392312e-06, "epoch": 1.7781729273171816, "percentage": 59.31, "elapsed_time": "5:04:51", "remaining_time": "3:29:07"}
102
  {"current_steps": 1000, "total_steps": 1686, "eval_loss": 0.8773505687713623, "epoch": 1.7781729273171816, "percentage": 59.31, "elapsed_time": "5:08:00", "remaining_time": "3:31:17"}
103
+ {"current_steps": 1010, "total_steps": 1686, "loss": 0.8833, "accuracy": 0.4937500059604645, "learning_rate": 1.7347273253353552e-06, "epoch": 1.7959546565903532, "percentage": 59.91, "elapsed_time": "5:09:41", "remaining_time": "3:27:16"}
104
+ {"current_steps": 1020, "total_steps": 1686, "loss": 0.8927, "accuracy": 0.581250011920929, "learning_rate": 1.690512779774029e-06, "epoch": 1.8137363858635251, "percentage": 60.5, "elapsed_time": "5:11:19", "remaining_time": "3:23:16"}
105
+ {"current_steps": 1030, "total_steps": 1686, "loss": 0.8352, "accuracy": 0.612500011920929, "learning_rate": 1.6465793165482838e-06, "epoch": 1.831518115136697, "percentage": 61.09, "elapsed_time": "5:13:00", "remaining_time": "3:19:21"}
106
+ {"current_steps": 1040, "total_steps": 1686, "loss": 0.8252, "accuracy": 0.612500011920929, "learning_rate": 1.6029421908964305e-06, "epoch": 1.849299844409869, "percentage": 61.68, "elapsed_time": "5:14:32", "remaining_time": "3:15:22"}
107
+ {"current_steps": 1050, "total_steps": 1686, "loss": 0.8694, "accuracy": 0.543749988079071, "learning_rate": 1.559616555157985e-06, "epoch": 1.8670815736830408, "percentage": 62.28, "elapsed_time": "5:16:13", "remaining_time": "3:11:32"}
108
+ {"current_steps": 1060, "total_steps": 1686, "loss": 0.9289, "accuracy": 0.5, "learning_rate": 1.516617453512252e-06, "epoch": 1.8848633029562125, "percentage": 62.87, "elapsed_time": "5:17:52", "remaining_time": "3:07:43"}
109
+ {"current_steps": 1070, "total_steps": 1686, "loss": 0.8714, "accuracy": 0.543749988079071, "learning_rate": 1.473959816754449e-06, "epoch": 1.9026450322293842, "percentage": 63.46, "elapsed_time": "5:19:27", "remaining_time": "3:03:54"}
110
+ {"current_steps": 1080, "total_steps": 1686, "loss": 0.9088, "accuracy": 0.5375000238418579, "learning_rate": 1.4316584571112213e-06, "epoch": 1.920426761502556, "percentage": 64.06, "elapsed_time": "5:21:07", "remaining_time": "3:00:11"}
111
+ {"current_steps": 1090, "total_steps": 1686, "loss": 0.9054, "accuracy": 0.550000011920929, "learning_rate": 1.389728063097306e-06, "epoch": 1.938208490775728, "percentage": 64.65, "elapsed_time": "5:22:47", "remaining_time": "2:56:30"}
112
+ {"current_steps": 1100, "total_steps": 1686, "loss": 0.8794, "accuracy": 0.625, "learning_rate": 1.348183194415179e-06, "epoch": 1.9559902200488999, "percentage": 65.24, "elapsed_time": "5:24:21", "remaining_time": "2:52:47"}
113
+ {"current_steps": 1110, "total_steps": 1686, "loss": 0.8756, "accuracy": 0.512499988079071, "learning_rate": 1.3070382768994015e-06, "epoch": 1.9737719493220716, "percentage": 65.84, "elapsed_time": "5:26:02", "remaining_time": "2:49:11"}
114
+ {"current_steps": 1120, "total_steps": 1686, "loss": 0.8606, "accuracy": 0.59375, "learning_rate": 1.2663075975074746e-06, "epoch": 1.9915536785952432, "percentage": 66.43, "elapsed_time": "5:27:42", "remaining_time": "2:45:36"}
115
+ {"current_steps": 1130, "total_steps": 1686, "loss": 0.9829, "accuracy": 0.46875, "learning_rate": 1.2260052993589034e-06, "epoch": 2.009335407868415, "percentage": 67.02, "elapsed_time": "5:29:22", "remaining_time": "2:42:04"}
116
+ {"current_steps": 1140, "total_steps": 1686, "loss": 0.7897, "accuracy": 0.65625, "learning_rate": 1.1861453768242099e-06, "epoch": 2.027117137141587, "percentage": 67.62, "elapsed_time": "5:31:01", "remaining_time": "2:38:32"}
117
+ {"current_steps": 1150, "total_steps": 1686, "loss": 0.9448, "accuracy": 0.5, "learning_rate": 1.1467416706655982e-06, "epoch": 2.044898866414759, "percentage": 68.21, "elapsed_time": "5:32:45", "remaining_time": "2:35:05"}
118
+ {"current_steps": 1160, "total_steps": 1686, "loss": 0.8405, "accuracy": 0.5625, "learning_rate": 1.1078078632309559e-06, "epoch": 2.062680595687931, "percentage": 68.8, "elapsed_time": "5:34:22", "remaining_time": "2:31:37"}
119
+ {"current_steps": 1170, "total_steps": 1686, "loss": 0.8792, "accuracy": 0.543749988079071, "learning_rate": 1.0693574737028627e-06, "epoch": 2.0804623249611023, "percentage": 69.4, "elapsed_time": "5:36:03", "remaining_time": "2:28:12"}
120
+ {"current_steps": 1180, "total_steps": 1686, "loss": 0.8285, "accuracy": 0.5375000238418579, "learning_rate": 1.0314038534042586e-06, "epoch": 2.098244054234274, "percentage": 69.99, "elapsed_time": "5:37:43", "remaining_time": "2:24:49"}
121
+ {"current_steps": 1190, "total_steps": 1686, "loss": 0.8813, "accuracy": 0.5375000238418579, "learning_rate": 9.939601811623946e-07, "epoch": 2.116025783507446, "percentage": 70.58, "elapsed_time": "5:39:19", "remaining_time": "2:21:25"}
122
+ {"current_steps": 1200, "total_steps": 1686, "loss": 0.8608, "accuracy": 0.6000000238418579, "learning_rate": 9.570394587326825e-07, "epoch": 2.133807512780618, "percentage": 71.17, "elapsed_time": "5:40:58", "remaining_time": "2:18:05"}
123
+ {"current_steps": 1210, "total_steps": 1686, "loss": 0.831, "accuracy": 0.581250011920929, "learning_rate": 9.206545062840302e-07, "epoch": 2.15158924205379, "percentage": 71.77, "elapsed_time": "5:42:35", "remaining_time": "2:14:46"}
124
+ {"current_steps": 1220, "total_steps": 1686, "loss": 0.8349, "accuracy": 0.4937500059604645, "learning_rate": 8.848179579472285e-07, "epoch": 2.1693709713269618, "percentage": 72.36, "elapsed_time": "5:44:15", "remaining_time": "2:11:29"}
125
+ {"current_steps": 1230, "total_steps": 1686, "loss": 0.7646, "accuracy": 0.625, "learning_rate": 8.495422574279403e-07, "epoch": 2.1871527006001332, "percentage": 72.95, "elapsed_time": "5:45:50", "remaining_time": "2:08:12"}
126
+ {"current_steps": 1240, "total_steps": 1686, "loss": 0.8959, "accuracy": 0.550000011920929, "learning_rate": 8.148396536858063e-07, "epoch": 2.204934429873305, "percentage": 73.55, "elapsed_time": "5:47:28", "remaining_time": "2:04:58"}
127
+ {"current_steps": 1250, "total_steps": 1686, "loss": 0.8822, "accuracy": 0.5687500238418579, "learning_rate": 7.807221966811815e-07, "epoch": 2.222716159146477, "percentage": 74.14, "elapsed_time": "5:49:09", "remaining_time": "2:01:47"}
128
+ {"current_steps": 1260, "total_steps": 1686, "loss": 0.8162, "accuracy": 0.5062500238418579, "learning_rate": 7.47201733190962e-07, "epoch": 2.240497888419649, "percentage": 74.73, "elapsed_time": "5:50:49", "remaining_time": "1:58:36"}
129
+ {"current_steps": 1270, "total_steps": 1686, "loss": 0.8504, "accuracy": 0.59375, "learning_rate": 7.142899026949721e-07, "epoch": 2.258279617692821, "percentage": 75.33, "elapsed_time": "5:52:29", "remaining_time": "1:55:27"}
130
+ {"current_steps": 1280, "total_steps": 1686, "loss": 0.8347, "accuracy": 0.581250011920929, "learning_rate": 6.819981333343273e-07, "epoch": 2.2760613469659923, "percentage": 75.92, "elapsed_time": "5:54:04", "remaining_time": "1:52:18"}
131
+ {"current_steps": 1290, "total_steps": 1686, "loss": 0.9299, "accuracy": 0.5375000238418579, "learning_rate": 6.503376379431839e-07, "epoch": 2.293843076239164, "percentage": 76.51, "elapsed_time": "5:55:43", "remaining_time": "1:49:12"}
132
+ {"current_steps": 1300, "total_steps": 1686, "loss": 0.8139, "accuracy": 0.6312500238418579, "learning_rate": 6.193194101552502e-07, "epoch": 2.311624805512336, "percentage": 77.11, "elapsed_time": "5:57:25", "remaining_time": "1:46:07"}
133
+ {"current_steps": 1310, "total_steps": 1686, "loss": 0.873, "accuracy": 0.6000000238418579, "learning_rate": 5.889542205864083e-07, "epoch": 2.329406534785508, "percentage": 77.7, "elapsed_time": "5:58:59", "remaining_time": "1:43:02"}
134
+ {"current_steps": 1320, "total_steps": 1686, "loss": 0.8988, "accuracy": 0.48750001192092896, "learning_rate": 5.592526130947862e-07, "epoch": 2.34718826405868, "percentage": 78.29, "elapsed_time": "6:00:39", "remaining_time": "1:40:00"}
135
+ {"current_steps": 1330, "total_steps": 1686, "loss": 0.8506, "accuracy": 0.5625, "learning_rate": 5.302249011195507e-07, "epoch": 2.3649699933318518, "percentage": 78.88, "elapsed_time": "6:02:11", "remaining_time": "1:36:56"}
136
+ {"current_steps": 1340, "total_steps": 1686, "loss": 0.8955, "accuracy": 0.612500011920929, "learning_rate": 5.018811640997307e-07, "epoch": 2.382751722605023, "percentage": 79.48, "elapsed_time": "6:03:55", "remaining_time": "1:33:58"}
137
+ {"current_steps": 1350, "total_steps": 1686, "loss": 0.8855, "accuracy": 0.543749988079071, "learning_rate": 4.7423124397427105e-07, "epoch": 2.400533451878195, "percentage": 80.07, "elapsed_time": "6:05:26", "remaining_time": "1:30:57"}
138
+ {"current_steps": 1360, "total_steps": 1686, "loss": 0.8703, "accuracy": 0.6187499761581421, "learning_rate": 4.472847417645787e-07, "epoch": 2.418315181151367, "percentage": 80.66, "elapsed_time": "6:07:00", "remaining_time": "1:27:58"}
139
+ {"current_steps": 1370, "total_steps": 1686, "loss": 0.8472, "accuracy": 0.574999988079071, "learning_rate": 4.210510142406993e-07, "epoch": 2.436096910424539, "percentage": 81.26, "elapsed_time": "6:08:35", "remaining_time": "1:25:01"}
140
+ {"current_steps": 1380, "total_steps": 1686, "loss": 0.87, "accuracy": 0.5625, "learning_rate": 3.9553917067232966e-07, "epoch": 2.4538786396977104, "percentage": 81.85, "elapsed_time": "6:10:12", "remaining_time": "1:22:05"}
141
+ {"current_steps": 1390, "total_steps": 1686, "loss": 0.8546, "accuracy": 0.5562499761581421, "learning_rate": 3.707580696657509e-07, "epoch": 2.4716603689708823, "percentage": 82.44, "elapsed_time": "6:11:46", "remaining_time": "1:19:10"}
142
+ {"current_steps": 1400, "total_steps": 1686, "loss": 0.8858, "accuracy": 0.5625, "learning_rate": 3.4671631608781815e-07, "epoch": 2.489442098244054, "percentage": 83.04, "elapsed_time": "6:13:19", "remaining_time": "1:16:15"}
143
+ {"current_steps": 1410, "total_steps": 1686, "loss": 0.8579, "accuracy": 0.5625, "learning_rate": 3.234222580780405e-07, "epoch": 2.507223827517226, "percentage": 83.63, "elapsed_time": "6:14:52", "remaining_time": "1:13:22"}
144
+ {"current_steps": 1420, "total_steps": 1686, "loss": 0.8918, "accuracy": 0.550000011920929, "learning_rate": 3.0088398414982375e-07, "epoch": 2.525005556790398, "percentage": 84.22, "elapsed_time": "6:16:29", "remaining_time": "1:10:31"}
145
+ {"current_steps": 1430, "total_steps": 1686, "loss": 0.8665, "accuracy": 0.6312500238418579, "learning_rate": 2.7910932038184487e-07, "epoch": 2.54278728606357, "percentage": 84.82, "elapsed_time": "6:18:04", "remaining_time": "1:07:41"}
146
+ {"current_steps": 1440, "total_steps": 1686, "loss": 0.8421, "accuracy": 0.5687500238418579, "learning_rate": 2.5810582770057325e-07, "epoch": 2.5605690153367417, "percentage": 85.41, "elapsed_time": "6:19:47", "remaining_time": "1:04:52"}
147
+ {"current_steps": 1450, "total_steps": 1686, "loss": 0.9052, "accuracy": 0.5249999761581421, "learning_rate": 2.3788079925484402e-07, "epoch": 2.578350744609913, "percentage": 86.0, "elapsed_time": "6:21:29", "remaining_time": "1:02:05"}
148
+ {"current_steps": 1460, "total_steps": 1686, "loss": 0.8163, "accuracy": 0.612500011920929, "learning_rate": 2.1844125788342661e-07, "epoch": 2.596132473883085, "percentage": 86.6, "elapsed_time": "6:23:04", "remaining_time": "0:59:17"}
149
+ {"current_steps": 1470, "total_steps": 1686, "loss": 0.8278, "accuracy": 0.59375, "learning_rate": 1.9979395367644428e-07, "epoch": 2.613914203156257, "percentage": 87.19, "elapsed_time": "6:24:43", "remaining_time": "0:56:31"}
150
+ {"current_steps": 1480, "total_steps": 1686, "loss": 0.8828, "accuracy": 0.5062500238418579, "learning_rate": 1.81945361631512e-07, "epoch": 2.631695932429429, "percentage": 87.78, "elapsed_time": "6:26:19", "remaining_time": "0:53:46"}
151
+ {"current_steps": 1490, "total_steps": 1686, "loss": 0.8283, "accuracy": 0.581250011920929, "learning_rate": 1.6490167940538343e-07, "epoch": 2.6494776617026004, "percentage": 88.37, "elapsed_time": "6:28:47", "remaining_time": "0:51:08"}
152
+ {"current_steps": 1500, "total_steps": 1686, "loss": 0.8923, "accuracy": 0.5375000238418579, "learning_rate": 1.4866882516191339e-07, "epoch": 2.6672593909757722, "percentage": 88.97, "elapsed_time": "6:30:49", "remaining_time": "0:48:27"}
153
+ {"current_steps": 1500, "total_steps": 1686, "eval_loss": 0.8734214901924133, "epoch": 2.6672593909757722, "percentage": 88.97, "elapsed_time": "6:34:47", "remaining_time": "0:48:57"}