lmytimagedata commited on
Commit
a197cf9
·
verified ·
1 Parent(s): 38f2d5b

Upload checkpoint-50000

Browse files
checkpoint-50000/config.json ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "batch_size": 64,
3
+ "buffer_size": 64,
4
+ "eval_mix": "qiwu_test",
5
+ "frozen_keys": [
6
+ "*hf_model*"
7
+ ],
8
+ "lora": false,
9
+ "lora_config": {
10
+ "bias": "none",
11
+ "lora_alpha": 16,
12
+ "lora_dropout": 0.05,
13
+ "r": 8
14
+ },
15
+ "model": {
16
+ "heads": {
17
+ "action": {
18
+ "args": [],
19
+ "kwargs": {
20
+ "action_dim": 7,
21
+ "action_horizon": 4,
22
+ "dropout_rate": 0.0,
23
+ "n_diffusion_samples": 1,
24
+ "readout_key": "readout_action",
25
+ "token_embedding_size": 768,
26
+ "use_map": false
27
+ },
28
+ "module": "octo.model.components.action_heads",
29
+ "name": "DiffusionActionHead"
30
+ }
31
+ },
32
+ "max_horizon": 10,
33
+ "observation_tokenizers": {
34
+ "primary": {
35
+ "args": [],
36
+ "kwargs": {
37
+ "encoder": {
38
+ "args": [],
39
+ "kwargs": {
40
+ "in_features": 6
41
+ },
42
+ "module": "octo.model.components.vit_encoders",
43
+ "name": "SmallStem16"
44
+ },
45
+ "obs_stack_keys": [
46
+ "image_primary"
47
+ ],
48
+ "task_stack_keys": [
49
+ "image_primary"
50
+ ]
51
+ },
52
+ "module": "octo.model.components.tokenizers",
53
+ "name": "ImageTokenizer"
54
+ },
55
+ "state": {
56
+ "args": [],
57
+ "kwargs": {
58
+ "bin_type": "normal",
59
+ "high": 2.0,
60
+ "low": -2.0,
61
+ "n_bins": 256,
62
+ "obs_keys": [
63
+ "state"
64
+ ]
65
+ },
66
+ "module": "model.octo.components.tokenizers",
67
+ "name": "LowdimObsTokenizer"
68
+ },
69
+ "wrist": {
70
+ "args": [],
71
+ "kwargs": {
72
+ "encoder": {
73
+ "args": [],
74
+ "kwargs": {
75
+ "in_features": 6
76
+ },
77
+ "module": "octo.model.components.vit_encoders",
78
+ "name": "SmallStem16"
79
+ },
80
+ "obs_stack_keys": [
81
+ "image_wrist"
82
+ ],
83
+ "task_stack_keys": [
84
+ "image_wrist"
85
+ ]
86
+ },
87
+ "module": "octo.model.components.tokenizers",
88
+ "name": "ImageTokenizer"
89
+ }
90
+ },
91
+ "readouts": {
92
+ "action": 1
93
+ },
94
+ "repeat_task_tokens": true,
95
+ "task_tokenizers": {
96
+ "language": {
97
+ "args": [],
98
+ "kwargs": {
99
+ "encoder": "t5-base",
100
+ "finetune_encoder": false
101
+ },
102
+ "module": "octo.model.components.tokenizers",
103
+ "name": "LanguageTokenizer"
104
+ }
105
+ },
106
+ "token_embedding_size": 768,
107
+ "transformer_kwargs": {
108
+ "add_position_embedding": false,
109
+ "attention_dropout_rate": 0.0,
110
+ "dropout_rate": 0.0,
111
+ "mlp_dim": 3072,
112
+ "num_attention_heads": 12,
113
+ "num_layers": 12
114
+ },
115
+ "use_correct_attention": true
116
+ },
117
+ "no_state": false,
118
+ "obs_token_nums": {
119
+ "primary": 256,
120
+ "state": 7,
121
+ "wrist": 64
122
+ },
123
+ "output_dir": "ljp_qiwu_merged_20250617",
124
+ "pretrained": {
125
+ "kwargs": {
126
+ "subpath": "base"
127
+ },
128
+ "model": "hf://chuanmew/octo_torch"
129
+ },
130
+ "run_name": "ljp_qiwu_merged_20250617",
131
+ "sampler_num_samples": 2560000,
132
+ "seed": 42,
133
+ "subsample_length": 99999,
134
+ "text_processor": {
135
+ "args": [],
136
+ "kwargs": {
137
+ "encode_with_model": false,
138
+ "tokenizer_kwargs": {
139
+ "max_length": 16,
140
+ "padding": "max_length",
141
+ "return_tensors": "np",
142
+ "truncation": true
143
+ },
144
+ "tokenizer_name": "t5-base"
145
+ },
146
+ "module": "octo.components.text_processing",
147
+ "name": "HFTokenizer"
148
+ },
149
+ "train_mix": "ljp_qiwu_merged_20250519to20250529_20250604",
150
+ "training_arguments": {
151
+ "bf16": true,
152
+ "dataloader_pin_memory": true,
153
+ "ddp_find_unused_parameters": false,
154
+ "eval_steps": 5000,
155
+ "gradient_accumulation_steps": 1,
156
+ "learning_rate": 0.0003,
157
+ "logging_nan_inf_filter": false,
158
+ "logging_steps": 5000,
159
+ "lr_scheduler_kwargs": {
160
+ "decay_type": "cosine",
161
+ "min_lr_ratio": 0.1,
162
+ "num_decay_steps": 50000
163
+ },
164
+ "lr_scheduler_type": "warmup_stable_decay",
165
+ "max_grad_norm": 1,
166
+ "max_steps": 50000,
167
+ "optim": "adamw_torch_fused",
168
+ "per_device_eval_batch_size": 1,
169
+ "per_device_train_batch_size": 1,
170
+ "report_to": "wandb",
171
+ "save_safetensors": true,
172
+ "save_steps": 50000,
173
+ "torch_compile": true,
174
+ "warmup_steps": 2000,
175
+ "weight_decay": 0.01
176
+ },
177
+ "training_keys": [
178
+ "*action*",
179
+ "*observation_tokenizers*",
180
+ "*transformer.transformer*",
181
+ "*task_projections*",
182
+ "*obs_projections*",
183
+ "*readout_embeddings*",
184
+ "*task_pos_embeddings*",
185
+ "*obs_pos_embeddings*"
186
+ ]
187
+ }
checkpoint-50000/dataset_statistics.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "action": {
4
+ "mean": [
5
+ -0.001587849808856845,
6
+ 0.0010738142300397158,
7
+ -0.000697988027241081,
8
+ -0.006544447969645262,
9
+ 0.009556379169225693,
10
+ 0.020673424005508423,
11
+ 0.45607081055641174
12
+ ],
13
+ "std": [
14
+ 0.03518082574009895,
15
+ 0.02438654750585556,
16
+ 0.0410635769367218,
17
+ 0.11703108996152878,
18
+ 0.10797987133264542,
19
+ 0.17201891541481018,
20
+ 0.46286532282829285
21
+ ],
22
+ "min": [
23
+ -0.2540467381477356,
24
+ -0.16329476237297058,
25
+ -0.19035658240318298,
26
+ -1.4179747104644775,
27
+ -0.8620505332946777,
28
+ -2.1886229515075684,
29
+ 0.0
30
+ ],
31
+ "max": [
32
+ 0.18435201048851013,
33
+ 0.15960073471069336,
34
+ 0.2124226987361908,
35
+ 1.345801591873169,
36
+ 1.2207748889923096,
37
+ 2.1311380863189697,
38
+ 1.0
39
+ ],
40
+ "mask": [
41
+ true,
42
+ true,
43
+ true,
44
+ true,
45
+ true,
46
+ true,
47
+ false
48
+ ]
49
+ },
50
+ "state": {
51
+ "mean": [
52
+ -1.0741499662399292,
53
+ -1.0714620351791382,
54
+ -1.357372760772705,
55
+ -0.6174096465110779,
56
+ -0.880132257938385,
57
+ 0.6550877094268799,
58
+ 0.4558027982711792
59
+ ],
60
+ "std": [
61
+ 0.2520765960216522,
62
+ 0.2545098066329956,
63
+ 0.1924552172422409,
64
+ 0.559248685836792,
65
+ 0.41956013441085815,
66
+ 1.6006327867507935,
67
+ 0.46324989199638367
68
+ ],
69
+ "min": [
70
+ -2.1934425830841064,
71
+ -1.8970158100128174,
72
+ -2.2100930213928223,
73
+ -3.1213817596435547,
74
+ -2.0163440704345703,
75
+ -6.283377170562744,
76
+ 0.0
77
+ ],
78
+ "max": [
79
+ -0.30637508630752563,
80
+ -0.13648474216461182,
81
+ -0.47022661566734314,
82
+ 2.9531495571136475,
83
+ 1.5494160652160645,
84
+ 3.9177753925323486,
85
+ 1.0
86
+ ],
87
+ "mask": [
88
+ true,
89
+ true,
90
+ true,
91
+ true,
92
+ true,
93
+ true,
94
+ false
95
+ ]
96
+ }
97
+ }
98
+ ]
checkpoint-50000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766b98569fcfd684c0c784c74a997d8d821e49dd378b31027ec717593b5f8f1d
3
+ size 405079824
checkpoint-50000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84708e6e31c0f7b2f3625075b8e64466ab7415d46182fe238b4e3f63fb9ce6d5
3
+ size 371730170
checkpoint-50000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a650642393a3a381a8a2823234abb6a563a46c3dcc0e697ec52ae03202e97fe
3
+ size 15984
checkpoint-50000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937a6237622e2c1cab9836e08409d6dca738bbfaac7903ec67708ff3eb967473
3
+ size 15984
checkpoint-50000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ff7dc9506f1716fd24bcfa48cccfe1f49e6205c22585de7b5eaa0a211a7eb7
3
+ size 15984
checkpoint-50000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddfd0e87c3201ae1c793a61be552947fc95ef008a4c1c4ebf7ec7afdd54414a
3
+ size 15984
checkpoint-50000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f9d92856c22d5b435e814cf3cb979c1f63908e6d47c831c5870cff56f147496
3
+ size 1064
checkpoint-50000/trainer_state.json ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
+ "eval_steps": 5000,
7
+ "global_step": 50000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5,
14
+ "grad_norm": 16.625,
15
+ "learning_rate": 0.00029339525062130936,
16
+ "loss": 0.6821,
17
+ "step": 5000
18
+ },
19
+ {
20
+ "epoch": 0.5,
21
+ "eval_loss": 0.8639410734176636,
22
+ "eval_runtime": 14.9757,
23
+ "eval_samples_per_second": 1349.986,
24
+ "eval_steps_per_second": 337.547,
25
+ "step": 5000
26
+ },
27
+ {
28
+ "epoch": 1.0,
29
+ "grad_norm": 11.0,
30
+ "learning_rate": 0.00027422227979597064,
31
+ "loss": 0.6693,
32
+ "step": 10000
33
+ },
34
+ {
35
+ "epoch": 1.0,
36
+ "eval_loss": 1.0540602207183838,
37
+ "eval_runtime": 13.9502,
38
+ "eval_samples_per_second": 1449.223,
39
+ "eval_steps_per_second": 362.36,
40
+ "step": 10000
41
+ },
42
+ {
43
+ "epoch": 1.5,
44
+ "grad_norm": 10.25,
45
+ "learning_rate": 0.0002443578712278313,
46
+ "loss": 0.4299,
47
+ "step": 15000
48
+ },
49
+ {
50
+ "epoch": 1.5,
51
+ "eval_loss": 1.183585286140442,
52
+ "eval_runtime": 14.3742,
53
+ "eval_samples_per_second": 1406.479,
54
+ "eval_steps_per_second": 351.672,
55
+ "step": 15000
56
+ },
57
+ {
58
+ "epoch": 2.0,
59
+ "grad_norm": 7.40625,
60
+ "learning_rate": 0.00020672536130511077,
61
+ "loss": 0.3463,
62
+ "step": 20000
63
+ },
64
+ {
65
+ "epoch": 2.0,
66
+ "eval_loss": 1.3365648984909058,
67
+ "eval_runtime": 13.9804,
68
+ "eval_samples_per_second": 1446.098,
69
+ "eval_steps_per_second": 361.578,
70
+ "step": 20000
71
+ },
72
+ {
73
+ "epoch": 2.5,
74
+ "grad_norm": 5.4375,
75
+ "learning_rate": 0.00016500848230015908,
76
+ "loss": 0.2826,
77
+ "step": 25000
78
+ },
79
+ {
80
+ "epoch": 2.5,
81
+ "eval_loss": 1.6001883745193481,
82
+ "eval_runtime": 14.1078,
83
+ "eval_samples_per_second": 1433.035,
84
+ "eval_steps_per_second": 358.312,
85
+ "step": 25000
86
+ },
87
+ {
88
+ "epoch": 3.0,
89
+ "grad_norm": 10.25,
90
+ "learning_rate": 0.00012329077298856825,
91
+ "loss": 0.229,
92
+ "step": 30000
93
+ },
94
+ {
95
+ "epoch": 3.0,
96
+ "eval_loss": 1.7588920593261719,
97
+ "eval_runtime": 13.9516,
98
+ "eval_samples_per_second": 1449.085,
99
+ "eval_steps_per_second": 362.325,
100
+ "step": 30000
101
+ },
102
+ {
103
+ "epoch": 3.5,
104
+ "grad_norm": 5.4375,
105
+ "learning_rate": 8.565585342212883e-05,
106
+ "loss": 0.1854,
107
+ "step": 35000
108
+ },
109
+ {
110
+ "epoch": 3.5,
111
+ "eval_loss": 2.074185371398926,
112
+ "eval_runtime": 14.3285,
113
+ "eval_samples_per_second": 1410.965,
114
+ "eval_steps_per_second": 352.794,
115
+ "step": 35000
116
+ },
117
+ {
118
+ "epoch": 4.0,
119
+ "grad_norm": 5.21875,
120
+ "learning_rate": 5.5787691745907415e-05,
121
+ "loss": 0.1527,
122
+ "step": 40000
123
+ },
124
+ {
125
+ "epoch": 4.0,
126
+ "eval_loss": 2.3301103115081787,
127
+ "eval_runtime": 14.184,
128
+ "eval_samples_per_second": 1425.335,
129
+ "eval_steps_per_second": 356.387,
130
+ "step": 40000
131
+ },
132
+ {
133
+ "epoch": 4.5,
134
+ "grad_norm": 7.15625,
135
+ "learning_rate": 3.6609991728491735e-05,
136
+ "loss": 0.1346,
137
+ "step": 45000
138
+ },
139
+ {
140
+ "epoch": 4.5,
141
+ "eval_loss": 2.443498373031616,
142
+ "eval_runtime": 14.4995,
143
+ "eval_samples_per_second": 1394.32,
144
+ "eval_steps_per_second": 348.632,
145
+ "step": 45000
146
+ },
147
+ {
148
+ "epoch": 5.0,
149
+ "grad_norm": 4.875,
150
+ "learning_rate": 3.000000026647932e-05,
151
+ "loss": 0.1258,
152
+ "step": 50000
153
+ },
154
+ {
155
+ "epoch": 5.0,
156
+ "eval_loss": 2.586019277572632,
157
+ "eval_runtime": 14.4123,
158
+ "eval_samples_per_second": 1402.757,
159
+ "eval_steps_per_second": 350.741,
160
+ "step": 50000
161
+ }
162
+ ],
163
+ "logging_steps": 5000,
164
+ "max_steps": 50000,
165
+ "num_input_tokens_seen": 0,
166
+ "num_train_epochs": 5,
167
+ "save_steps": 50000,
168
+ "stateful_callbacks": {
169
+ "TrainerControl": {
170
+ "args": {
171
+ "should_epoch_stop": false,
172
+ "should_evaluate": false,
173
+ "should_log": false,
174
+ "should_save": true,
175
+ "should_training_stop": true
176
+ },
177
+ "attributes": {}
178
+ }
179
+ },
180
+ "total_flos": 0.0,
181
+ "train_batch_size": 1,
182
+ "trial_name": null,
183
+ "trial_params": null
184
+ }
checkpoint-50000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f76da97d105f4a011c8c8ceea81db6ba502c8ad553055fff92ef2afca7eda75
3
+ size 5432