Training in progress, step 42
Browse files- model.safetensors +1 -1
- trainer_log.jsonl +67 -36
- training_args.bin +1 -1
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3441185608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:497db5d132eaa270b0002bd6cf6b6ced840e8096df45acfa7bfcb3b1c7eb2069
|
3 |
size 3441185608
|
trainer_log.jsonl
CHANGED
@@ -1,36 +1,67 @@
|
|
1 |
-
{"current_steps": 1, "total_steps":
|
2 |
-
{"current_steps": 2, "total_steps":
|
3 |
-
{"current_steps": 3, "total_steps":
|
4 |
-
{"current_steps": 4, "total_steps":
|
5 |
-
{"current_steps": 5, "total_steps":
|
6 |
-
{"current_steps": 6, "total_steps":
|
7 |
-
{"current_steps": 7, "total_steps":
|
8 |
-
{"current_steps": 8, "total_steps":
|
9 |
-
{"current_steps": 9, "total_steps":
|
10 |
-
{"current_steps": 10, "total_steps":
|
11 |
-
{"current_steps": 11, "total_steps":
|
12 |
-
{"current_steps": 12, "total_steps":
|
13 |
-
{"current_steps": 13, "total_steps":
|
14 |
-
{"current_steps": 14, "total_steps":
|
15 |
-
{"current_steps": 15, "total_steps":
|
16 |
-
{"current_steps": 16, "total_steps":
|
17 |
-
{"current_steps": 17, "total_steps":
|
18 |
-
{"current_steps": 18, "total_steps":
|
19 |
-
{"current_steps": 19, "total_steps":
|
20 |
-
{"current_steps": 20, "total_steps":
|
21 |
-
{"current_steps": 21, "total_steps":
|
22 |
-
{"current_steps": 22, "total_steps":
|
23 |
-
{"current_steps": 23, "total_steps":
|
24 |
-
{"current_steps": 24, "total_steps":
|
25 |
-
{"current_steps": 25, "total_steps":
|
26 |
-
{"current_steps": 26, "total_steps":
|
27 |
-
{"current_steps": 27, "total_steps":
|
28 |
-
{"current_steps": 28, "total_steps":
|
29 |
-
{"current_steps": 29, "total_steps":
|
30 |
-
{"current_steps": 30, "total_steps":
|
31 |
-
{"current_steps": 31, "total_steps":
|
32 |
-
{"current_steps": 32, "total_steps":
|
33 |
-
{"current_steps": 33, "total_steps":
|
34 |
-
{"current_steps": 34, "total_steps":
|
35 |
-
{"current_steps": 35, "total_steps":
|
36 |
-
{"current_steps":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"current_steps": 1, "total_steps": 255, "loss": 1.5712, "lr": 0.0, "epoch": 0.0196078431372549, "percentage": 0.39, "elapsed_time": "0:00:03", "remaining_time": "0:14:04"}
|
2 |
+
{"current_steps": 2, "total_steps": 255, "loss": 1.8824, "lr": 3.3333333333333333e-06, "epoch": 0.0392156862745098, "percentage": 0.78, "elapsed_time": "0:00:05", "remaining_time": "0:11:43"}
|
3 |
+
{"current_steps": 3, "total_steps": 255, "loss": 1.7331, "lr": 6.666666666666667e-06, "epoch": 0.058823529411764705, "percentage": 1.18, "elapsed_time": "0:00:06", "remaining_time": "0:08:50"}
|
4 |
+
{"current_steps": 4, "total_steps": 255, "loss": 1.4968, "lr": 1e-05, "epoch": 0.0784313725490196, "percentage": 1.57, "elapsed_time": "0:00:07", "remaining_time": "0:07:23"}
|
5 |
+
{"current_steps": 5, "total_steps": 255, "loss": 1.6074, "lr": 9.999611462404874e-06, "epoch": 0.09803921568627451, "percentage": 1.96, "elapsed_time": "0:00:07", "remaining_time": "0:06:31"}
|
6 |
+
{"current_steps": 6, "total_steps": 255, "loss": 1.3465, "lr": 9.998445910004082e-06, "epoch": 0.11764705882352941, "percentage": 2.35, "elapsed_time": "0:00:08", "remaining_time": "0:05:56"}
|
7 |
+
{"current_steps": 7, "total_steps": 255, "loss": 1.4326, "lr": 9.996503523941994e-06, "epoch": 0.13725490196078433, "percentage": 2.75, "elapsed_time": "0:00:09", "remaining_time": "0:05:30"}
|
8 |
+
{"current_steps": 8, "total_steps": 255, "loss": 1.3606, "lr": 9.993784606094612e-06, "epoch": 0.1568627450980392, "percentage": 3.14, "elapsed_time": "0:00:10", "remaining_time": "0:05:11"}
|
9 |
+
{"current_steps": 9, "total_steps": 255, "loss": 1.3686, "lr": 9.990289579022661e-06, "epoch": 0.17647058823529413, "percentage": 3.53, "elapsed_time": "0:00:10", "remaining_time": "0:04:56"}
|
10 |
+
{"current_steps": 10, "total_steps": 255, "loss": 1.2102, "lr": 9.986018985905901e-06, "epoch": 0.19607843137254902, "percentage": 3.92, "elapsed_time": "0:00:11", "remaining_time": "0:04:44"}
|
11 |
+
{"current_steps": 11, "total_steps": 255, "loss": 1.2232, "lr": 9.980973490458728e-06, "epoch": 0.21568627450980393, "percentage": 4.31, "elapsed_time": "0:00:12", "remaining_time": "0:04:34"}
|
12 |
+
{"current_steps": 12, "total_steps": 255, "loss": 1.1679, "lr": 9.975153876827008e-06, "epoch": 0.23529411764705882, "percentage": 4.71, "elapsed_time": "0:00:13", "remaining_time": "0:04:25"}
|
13 |
+
{"current_steps": 13, "total_steps": 255, "loss": 0.9655, "lr": 9.968561049466214e-06, "epoch": 0.2549019607843137, "percentage": 5.1, "elapsed_time": "0:00:13", "remaining_time": "0:04:18"}
|
14 |
+
{"current_steps": 14, "total_steps": 255, "loss": 1.0298, "lr": 9.961196033000862e-06, "epoch": 0.27450980392156865, "percentage": 5.49, "elapsed_time": "0:00:14", "remaining_time": "0:04:11"}
|
15 |
+
{"current_steps": 15, "total_steps": 255, "loss": 1.3297, "lr": 9.953059972065264e-06, "epoch": 0.29411764705882354, "percentage": 5.88, "elapsed_time": "0:00:15", "remaining_time": "0:04:06"}
|
16 |
+
{"current_steps": 16, "total_steps": 255, "loss": 1.1663, "lr": 9.944154131125643e-06, "epoch": 0.3137254901960784, "percentage": 6.27, "elapsed_time": "0:00:16", "remaining_time": "0:04:01"}
|
17 |
+
{"current_steps": 17, "total_steps": 255, "loss": 1.0548, "lr": 9.934479894283607e-06, "epoch": 0.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:16", "remaining_time": "0:03:56"}
|
18 |
+
{"current_steps": 18, "total_steps": 255, "loss": 1.2631, "lr": 9.924038765061042e-06, "epoch": 0.35294117647058826, "percentage": 7.06, "elapsed_time": "0:00:17", "remaining_time": "0:03:52"}
|
19 |
+
{"current_steps": 19, "total_steps": 255, "loss": 1.1965, "lr": 9.912832366166443e-06, "epoch": 0.37254901960784315, "percentage": 7.45, "elapsed_time": "0:00:18", "remaining_time": "0:03:48"}
|
20 |
+
{"current_steps": 20, "total_steps": 255, "loss": 1.0466, "lr": 9.900862439242719e-06, "epoch": 0.39215686274509803, "percentage": 7.84, "elapsed_time": "0:00:19", "remaining_time": "0:03:45"}
|
21 |
+
{"current_steps": 21, "total_steps": 255, "loss": 0.9809, "lr": 9.888130844596525e-06, "epoch": 0.4117647058823529, "percentage": 8.24, "elapsed_time": "0:00:19", "remaining_time": "0:03:41"}
|
22 |
+
{"current_steps": 22, "total_steps": 255, "loss": 1.1545, "lr": 9.874639560909118e-06, "epoch": 0.43137254901960786, "percentage": 8.63, "elapsed_time": "0:00:20", "remaining_time": "0:03:38"}
|
23 |
+
{"current_steps": 23, "total_steps": 255, "loss": 0.9701, "lr": 9.860390684928873e-06, "epoch": 0.45098039215686275, "percentage": 9.02, "elapsed_time": "0:00:21", "remaining_time": "0:03:36"}
|
24 |
+
{"current_steps": 24, "total_steps": 255, "loss": 0.7863, "lr": 9.84538643114539e-06, "epoch": 0.47058823529411764, "percentage": 9.41, "elapsed_time": "0:00:22", "remaining_time": "0:03:33"}
|
25 |
+
{"current_steps": 25, "total_steps": 255, "loss": 1.0002, "lr": 9.829629131445342e-06, "epoch": 0.49019607843137253, "percentage": 9.8, "elapsed_time": "0:00:22", "remaining_time": "0:03:30"}
|
26 |
+
{"current_steps": 26, "total_steps": 255, "loss": 0.8831, "lr": 9.81312123475006e-06, "epoch": 0.5098039215686274, "percentage": 10.2, "elapsed_time": "0:00:23", "remaining_time": "0:03:28"}
|
27 |
+
{"current_steps": 27, "total_steps": 255, "loss": 0.9045, "lr": 9.795865306634939e-06, "epoch": 0.5294117647058824, "percentage": 10.59, "elapsed_time": "0:00:24", "remaining_time": "0:03:26"}
|
28 |
+
{"current_steps": 28, "total_steps": 255, "loss": 0.8205, "lr": 9.777864028930705e-06, "epoch": 0.5490196078431373, "percentage": 10.98, "elapsed_time": "0:00:25", "remaining_time": "0:03:24"}
|
29 |
+
{"current_steps": 29, "total_steps": 255, "loss": 0.9167, "lr": 9.759120199306613e-06, "epoch": 0.5686274509803921, "percentage": 11.37, "elapsed_time": "0:00:25", "remaining_time": "0:03:22"}
|
30 |
+
{"current_steps": 30, "total_steps": 255, "loss": 0.9964, "lr": 9.73963673083566e-06, "epoch": 0.5882352941176471, "percentage": 11.76, "elapsed_time": "0:00:26", "remaining_time": "0:03:20"}
|
31 |
+
{"current_steps": 31, "total_steps": 255, "loss": 0.9844, "lr": 9.719416651541839e-06, "epoch": 0.6078431372549019, "percentage": 12.16, "elapsed_time": "0:00:27", "remaining_time": "0:03:18"}
|
32 |
+
{"current_steps": 32, "total_steps": 255, "loss": 0.971, "lr": 9.698463103929542e-06, "epoch": 0.6274509803921569, "percentage": 12.55, "elapsed_time": "0:00:28", "remaining_time": "0:03:16"}
|
33 |
+
{"current_steps": 33, "total_steps": 255, "loss": 1.0724, "lr": 9.67677934449517e-06, "epoch": 0.6470588235294118, "percentage": 12.94, "elapsed_time": "0:00:28", "remaining_time": "0:03:14"}
|
34 |
+
{"current_steps": 34, "total_steps": 255, "loss": 1.0107, "lr": 9.654368743221022e-06, "epoch": 0.6666666666666666, "percentage": 13.33, "elapsed_time": "0:00:29", "remaining_time": "0:03:13"}
|
35 |
+
{"current_steps": 35, "total_steps": 255, "loss": 0.9819, "lr": 9.631234783051544e-06, "epoch": 0.6862745098039216, "percentage": 13.73, "elapsed_time": "0:00:30", "remaining_time": "0:03:11"}
|
36 |
+
{"current_steps": 36, "total_steps": 255, "loss": 0.863, "lr": 9.60738105935204e-06, "epoch": 0.7058823529411765, "percentage": 14.12, "elapsed_time": "0:00:31", "remaining_time": "0:03:10"}
|
37 |
+
{"current_steps": 37, "total_steps": 255, "loss": 0.9197, "lr": 9.582811279349881e-06, "epoch": 0.7254901960784313, "percentage": 14.51, "elapsed_time": "0:00:31", "remaining_time": "0:03:08"}
|
38 |
+
{"current_steps": 38, "total_steps": 255, "loss": 1.0188, "lr": 9.557529261558367e-06, "epoch": 0.7450980392156863, "percentage": 14.9, "elapsed_time": "0:00:32", "remaining_time": "0:03:07"}
|
39 |
+
{"current_steps": 39, "total_steps": 255, "loss": 0.8321, "lr": 9.531538935183252e-06, "epoch": 0.7647058823529411, "percentage": 15.29, "elapsed_time": "0:00:33", "remaining_time": "0:03:05"}
|
40 |
+
{"current_steps": 40, "total_steps": 255, "loss": 0.9346, "lr": 9.504844339512096e-06, "epoch": 0.7843137254901961, "percentage": 15.69, "elapsed_time": "0:00:34", "remaining_time": "0:03:04"}
|
41 |
+
{"current_steps": 41, "total_steps": 255, "loss": 0.884, "lr": 9.477449623286505e-06, "epoch": 0.803921568627451, "percentage": 16.08, "elapsed_time": "0:00:35", "remaining_time": "0:03:02"}
|
42 |
+
{"current_steps": 42, "total_steps": 255, "loss": 0.8966, "lr": 9.449359044057344e-06, "epoch": 0.8235294117647058, "percentage": 16.47, "elapsed_time": "0:00:35", "remaining_time": "0:03:01"}
|
43 |
+
{"current_steps": 43, "total_steps": 255, "loss": 0.9415, "lr": 9.420576967523049e-06, "epoch": 0.8431372549019608, "percentage": 16.86, "elapsed_time": "0:01:55", "remaining_time": "0:09:27"}
|
44 |
+
{"current_steps": 44, "total_steps": 255, "loss": 0.9063, "lr": 9.391107866851143e-06, "epoch": 0.8627450980392157, "percentage": 17.25, "elapsed_time": "0:01:55", "remaining_time": "0:09:15"}
|
45 |
+
{"current_steps": 45, "total_steps": 255, "loss": 0.7435, "lr": 9.360956321983028e-06, "epoch": 0.8823529411764706, "percentage": 17.65, "elapsed_time": "0:01:56", "remaining_time": "0:09:04"}
|
46 |
+
{"current_steps": 46, "total_steps": 255, "loss": 0.9956, "lr": 9.330127018922195e-06, "epoch": 0.9019607843137255, "percentage": 18.04, "elapsed_time": "0:01:57", "remaining_time": "0:08:53"}
|
47 |
+
{"current_steps": 47, "total_steps": 255, "loss": 0.9303, "lr": 9.298624749005953e-06, "epoch": 0.9215686274509803, "percentage": 18.43, "elapsed_time": "0:01:58", "remaining_time": "0:08:43"}
|
48 |
+
{"current_steps": 48, "total_steps": 255, "loss": 0.7471, "lr": 9.266454408160779e-06, "epoch": 0.9411764705882353, "percentage": 18.82, "elapsed_time": "0:01:59", "remaining_time": "0:08:33"}
|
49 |
+
{"current_steps": 49, "total_steps": 255, "loss": 0.7865, "lr": 9.233620996141421e-06, "epoch": 0.9607843137254902, "percentage": 19.22, "elapsed_time": "0:01:59", "remaining_time": "0:08:24"}
|
50 |
+
{"current_steps": 50, "total_steps": 255, "loss": 0.7501, "lr": 9.200129615753858e-06, "epoch": 0.9803921568627451, "percentage": 19.61, "elapsed_time": "0:02:00", "remaining_time": "0:08:14"}
|
51 |
+
{"current_steps": 51, "total_steps": 255, "loss": 0.7297, "lr": 9.165985472062245e-06, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:02:01", "remaining_time": "0:08:06"}
|
52 |
+
{"current_steps": 52, "total_steps": 255, "loss": 0.5673, "lr": 9.131193871579975e-06, "epoch": 1.0196078431372548, "percentage": 20.39, "elapsed_time": "0:02:04", "remaining_time": "0:08:06"}
|
53 |
+
{"current_steps": 53, "total_steps": 255, "loss": 0.7061, "lr": 9.09576022144496e-06, "epoch": 1.0392156862745099, "percentage": 20.78, "elapsed_time": "0:02:05", "remaining_time": "0:07:57"}
|
54 |
+
{"current_steps": 54, "total_steps": 255, "loss": 0.5602, "lr": 9.059690028579285e-06, "epoch": 1.0588235294117647, "percentage": 21.18, "elapsed_time": "0:02:06", "remaining_time": "0:07:49"}
|
55 |
+
{"current_steps": 55, "total_steps": 255, "loss": 0.5984, "lr": 9.022988898833342e-06, "epoch": 1.0784313725490196, "percentage": 21.57, "elapsed_time": "0:02:06", "remaining_time": "0:07:41"}
|
56 |
+
{"current_steps": 56, "total_steps": 255, "loss": 0.6954, "lr": 8.985662536114614e-06, "epoch": 1.0980392156862746, "percentage": 21.96, "elapsed_time": "0:02:07", "remaining_time": "0:07:33"}
|
57 |
+
{"current_steps": 57, "total_steps": 255, "loss": 0.6879, "lr": 8.947716741501178e-06, "epoch": 1.1176470588235294, "percentage": 22.35, "elapsed_time": "0:02:08", "remaining_time": "0:07:25"}
|
58 |
+
{"current_steps": 58, "total_steps": 255, "loss": 0.5973, "lr": 8.90915741234015e-06, "epoch": 1.1372549019607843, "percentage": 22.75, "elapsed_time": "0:02:09", "remaining_time": "0:07:18"}
|
59 |
+
{"current_steps": 59, "total_steps": 255, "loss": 0.6397, "lr": 8.869990541331137e-06, "epoch": 1.156862745098039, "percentage": 23.14, "elapsed_time": "0:02:09", "remaining_time": "0:07:11"}
|
60 |
+
{"current_steps": 60, "total_steps": 255, "loss": 0.676, "lr": 8.83022221559489e-06, "epoch": 1.1764705882352942, "percentage": 23.53, "elapsed_time": "0:02:10", "remaining_time": "0:07:04"}
|
61 |
+
{"current_steps": 61, "total_steps": 255, "loss": 0.6512, "lr": 8.789858615727266e-06, "epoch": 1.196078431372549, "percentage": 23.92, "elapsed_time": "0:02:11", "remaining_time": "0:06:58"}
|
62 |
+
{"current_steps": 62, "total_steps": 255, "loss": 0.6142, "lr": 8.748906014838672e-06, "epoch": 1.215686274509804, "percentage": 24.31, "elapsed_time": "0:02:12", "remaining_time": "0:06:51"}
|
63 |
+
{"current_steps": 63, "total_steps": 255, "loss": 0.6279, "lr": 8.707370777579134e-06, "epoch": 1.2352941176470589, "percentage": 24.71, "elapsed_time": "0:02:12", "remaining_time": "0:06:45"}
|
64 |
+
{"current_steps": 64, "total_steps": 255, "loss": 0.6442, "lr": 8.665259359149132e-06, "epoch": 1.2549019607843137, "percentage": 25.1, "elapsed_time": "0:02:13", "remaining_time": "0:06:39"}
|
65 |
+
{"current_steps": 65, "total_steps": 255, "loss": 0.6373, "lr": 8.622578304296364e-06, "epoch": 1.2745098039215685, "percentage": 25.49, "elapsed_time": "0:02:14", "remaining_time": "0:06:33"}
|
66 |
+
{"current_steps": 66, "total_steps": 255, "loss": 0.4815, "lr": 8.579334246298593e-06, "epoch": 1.2941176470588236, "percentage": 25.88, "elapsed_time": "0:02:15", "remaining_time": "0:06:27"}
|
67 |
+
{"current_steps": 67, "total_steps": 255, "loss": 0.5527, "lr": 8.535533905932739e-06, "epoch": 1.3137254901960784, "percentage": 26.27, "elapsed_time": "0:02:16", "remaining_time": "0:06:21"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8273
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d20515e18cbcd276b6ec9653705809e64b4d4559ce13c446d6735b639325180
|
3 |
size 8273
|