File size: 3,483 Bytes
bee5301
 
 
 
 
 
 
 
 
 
 
 
 
 
46442c8
 
bee5301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
{
  "data": {
    "augmentations": {
      "blur_p": 0.2,
      "flip_p": 0.5,
      "gamma_p": 0.8,
      "grayscale_p": 0.2,
      "jitter_p": 0.8,
      "random_blur": 2.0,
      "random_gamma": 0.2,
      "random_jitter": 0.4,
      "random_scale": 2.0,
      "shape_constraints": {
        "height_min": 15,
        "pixels_max": 600000,
        "pixels_min": 200000,
        "ratio_bounds": [
          0.5,
          2.5
        ],
        "sample": true,
        "shape_mult": 14,
        "width_min": 15
      },
      "test_context": 1.0
    },
    "crop": "garg",
    "data_root": "datasets",
    "image_shape": [
      480,
      640
    ],
    "normalization": "imagenet",
    "num_copies": 2,
    "num_frames": 1,
    "sampling": {
      "ETH3D": 1.0,
      "Waymo": 1.0
    },
    "train_datasets": [
      "ETH3D",
      "Waymo"
    ],
    "val_datasets": [
      "IBims"
    ]
  },
  "eps": 1e-06,
  "generic": {
    "deterministic": true,
    "seed": 13
  },
  "model": {
    "expansion": 4,
    "layer_scale": 1.0,
    "name": "UniDepthV2",
    "num_heads": 8,
    "pixel_decoder": {
      "depths": [
        2,
        2,
        2
      ],
      "dropout": 0.0,
      "hidden_dim": 384,
      "kernel_size": 3,
      "name": "Decoder",
      "out_dim": 48
    },
    "pixel_encoder": {
      "cls_token_embed_dims": [
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768
      ],
      "depths": [
        3,
        6,
        9,
        12
      ],
      "embed_dim": 768,
      "embed_dims": [
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768,
        768
      ],
      "freeze_norm": true,
      "frozen_stages": 0,
      "lr": 2e-06,
      "name": "dinov2_vitb14",
      "num_register_tokens": 0,
      "output_idx": [
        3,
        6,
        9,
        12
      ],
      "patch_size": 14,
      "pretrained": null,
      "stacking_fn": "last",
      "use_norm": true,
      "wd": 0.1
    }
  },
  "training": {
    "batch_size": 8,
    "clipping": 1.0,
    "cycle_beta": false,
    "drop_path": 0.0,
    "ema": true,
    "f16": true,
    "ld": 1.0,
    "losses": {
      "camera": {
        "alpha": 1.0,
        "fn": "l2",
        "gamma": 1.0,
        "input_fn": "linear",
        "name": "Regression",
        "output_fn": "sqrt",
        "weight": 0.25
      },
      "confidence": {
        "alpha": 1.0,
        "fn": "l1",
        "gamma": 1.0,
        "input_fn": "linear",
        "name": "Regression",
        "output_fn": "sqrt",
        "weight": 0.1
      },
      "depth": {
        "dims": [
          -2,
          -1
        ],
        "input_fn": "log",
        "integrated": 0.15,
        "name": "SILog",
        "output_fn": "sqrt",
        "weight": 1.0
      },
      "invariance": {
        "name": "SelfDistill",
        "output_fn": "sqrt",
        "weight": 0.1
      },
      "ssi": {
        "input_fn": "log1i",
        "min_samples": 6,
        "name": "EdgeGuidedLocalSSI",
        "output_fn": "sqrt",
        "use_global": true,
        "weight": 1.0
      }
    },
    "lr": 0.0001,
    "lr_final": 1e-06,
    "lr_warmup": 1.0,
    "n_iters": 300000,
    "nsteps_accumulation_gradient": 2,
    "use_checkpoint": false,
    "validation_interval": 2,
    "warmup_iters": 75000,
    "wd": 0.1,
    "wd_final": 0.1
  }
}