yuntian-deng commited on
Commit
2cd7747
·
verified ·
1 Parent(s): 4bfd6c8

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +126 -0
config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "save_path": "saved_standard_challenging_context32_nocond_cont_cont_all_cont_eval",
3
+ "model": {
4
+ "base_learning_rate": 8e-05,
5
+ "target": "latent_diffusion.ldm.models.diffusion.ddpm.LatentDiffusion",
6
+ "params": {
7
+ "linear_start": 0.0015,
8
+ "linear_end": 0.0195,
9
+ "num_timesteps_cond": 1,
10
+ "log_every_t": 200,
11
+ "timesteps": 1000,
12
+ "first_stage_key": "image",
13
+ "cond_stage_key": "action_",
14
+ "scheduler_sampling_rate": 0.0,
15
+ "hybrid_key": "c_concat",
16
+ "image_size": [
17
+ 64,
18
+ 48
19
+ ],
20
+ "channels": 3,
21
+ "cond_stage_trainable": false,
22
+ "conditioning_key": "hybrid",
23
+ "monitor": "val/loss_simple_ema",
24
+ "unet_config": {
25
+ "target": "latent_diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel",
26
+ "params": {
27
+ "image_size": [
28
+ 64,
29
+ 48
30
+ ],
31
+ "in_channels": 8,
32
+ "out_channels": 4,
33
+ "model_channels": 192,
34
+ "attention_resolutions": [
35
+ 8,
36
+ 4,
37
+ 2
38
+ ],
39
+ "num_res_blocks": 2,
40
+ "channel_mult": [
41
+ 1,
42
+ 2,
43
+ 3,
44
+ 5
45
+ ],
46
+ "num_head_channels": 32,
47
+ "use_spatial_transformer": false,
48
+ "transformer_depth": 1
49
+ }
50
+ },
51
+ "temporal_encoder_config": {
52
+ "target": "latent_diffusion.ldm.modules.encoders.temporal_encoder.TemporalEncoder",
53
+ "params": {
54
+ "input_channels": 6,
55
+ "hidden_size": 1024,
56
+ "num_layers": 1,
57
+ "dropout": 0.1,
58
+ "output_channels": 4,
59
+ "output_height": 48,
60
+ "output_width": 64
61
+ }
62
+ },
63
+ "first_stage_config": {
64
+ "target": "latent_diffusion.ldm.models.autoencoder.AutoencoderKL",
65
+ "params": {
66
+ "embed_dim": 4,
67
+ "monitor": "val/rec_loss",
68
+ "ddconfig": {
69
+ "double_z": true,
70
+ "z_channels": 4,
71
+ "resolution": 256,
72
+ "in_channels": 3,
73
+ "out_ch": 3,
74
+ "ch": 128,
75
+ "ch_mult": [
76
+ 1,
77
+ 2,
78
+ 4,
79
+ 4
80
+ ],
81
+ "num_res_blocks": 2,
82
+ "attn_resolutions": [],
83
+ "dropout": 0.0
84
+ },
85
+ "lossconfig": {
86
+ "target": "torch.nn.Identity"
87
+ }
88
+ }
89
+ },
90
+ "cond_stage_config": "__is_unconditional__"
91
+ }
92
+ },
93
+ "data": {
94
+ "target": "data.data_processing.datasets.DataModule",
95
+ "params": {
96
+ "batch_size": 8,
97
+ "num_workers": 1,
98
+ "wrap": false,
99
+ "shuffle": true,
100
+ "drop_last": true,
101
+ "pin_memory": true,
102
+ "prefetch_factor": 2,
103
+ "persistent_workers": true,
104
+ "train": {
105
+ "target": "data.data_processing.datasets.ActionsData",
106
+ "params": {
107
+ "data_csv_path": "desktop_sequences_filtered_with_desktop_1.5k.challenging.train.target_frames.csv",
108
+ "normalization": "standard",
109
+ "context_length": 32
110
+ }
111
+ }
112
+ }
113
+ },
114
+ "lightning": {
115
+ "trainer": {
116
+ "benchmark": false,
117
+ "max_epochs": 6400,
118
+ "limit_val_batches": 0,
119
+ "accelerator": "gpu",
120
+ "gpus": 1,
121
+ "accumulate_grad_batches": 999999,
122
+ "gradient_clip_val": 1,
123
+ "checkpoint_callback": true
124
+ }
125
+ }
126
+ }