Image Segmentation
Transformers
PyTorch
upernet
Inference Endpoints
mccaly commited on
Commit
26f3ff2
1 Parent(s): 6fec5b6

Upload 2 files

Browse files
iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b67e032728959023a14b7a6a4831009367d58331cd2d188fa6540e7e40a05121
3
+ size 975633422
upernet_swin_small_patch4_window7_512x1024_80k.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ model = dict(
3
+ type='EncoderDecoder',
4
+ pretrained='pretrained/swin_small_patch4_window7_224.pth',
5
+ backbone=dict(
6
+ type='SwinTransformer',
7
+ embed_dim=96,
8
+ depths=[2, 2, 18, 2],
9
+ num_heads=[3, 6, 12, 24],
10
+ window_size=7,
11
+ mlp_ratio=4.0,
12
+ qkv_bias=True,
13
+ qk_scale=None,
14
+ drop_rate=0.0,
15
+ attn_drop_rate=0.0,
16
+ drop_path_rate=0.3,
17
+ ape=False,
18
+ patch_norm=True,
19
+ out_indices=(0, 1, 2, 3),
20
+ use_checkpoint=False),
21
+ decode_head=dict(
22
+ type='UPerHead',
23
+ in_channels=[96, 192, 384, 768],
24
+ in_index=[0, 1, 2, 3],
25
+ pool_scales=(1, 2, 3, 6),
26
+ channels=512,
27
+ dropout_ratio=0.1,
28
+ num_classes=104,
29
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
30
+ align_corners=False,
31
+ loss_decode=dict(
32
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
33
+ auxiliary_head=dict(
34
+ type='FCNHead',
35
+ in_channels=384,
36
+ in_index=2,
37
+ channels=256,
38
+ num_convs=1,
39
+ concat_input=False,
40
+ dropout_ratio=0.1,
41
+ num_classes=104,
42
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
43
+ align_corners=False,
44
+ loss_decode=dict(
45
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
46
+ train_cfg=dict(),
47
+ test_cfg=dict(mode='whole'))
48
+ dataset_type = 'CustomDataset'
49
+ data_root = './data/FoodSeg103/Images/'
50
+ img_norm_cfg = dict(
51
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
52
+ crop_size = (512, 1024)
53
+ train_pipeline = [
54
+ dict(type='LoadImageFromFile'),
55
+ dict(type='LoadAnnotations'),
56
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
57
+ dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75),
58
+ dict(type='RandomFlip', prob=0.5),
59
+ dict(type='PhotoMetricDistortion'),
60
+ dict(
61
+ type='Normalize',
62
+ mean=[123.675, 116.28, 103.53],
63
+ std=[58.395, 57.12, 57.375],
64
+ to_rgb=True),
65
+ dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255),
66
+ dict(type='DefaultFormatBundle'),
67
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
68
+ ]
69
+ test_pipeline = [
70
+ dict(type='LoadImageFromFile'),
71
+ dict(
72
+ type='MultiScaleFlipAug',
73
+ img_scale=(2048, 1024),
74
+ flip=False,
75
+ transforms=[
76
+ dict(type='Resize', keep_ratio=True),
77
+ dict(type='RandomFlip'),
78
+ dict(
79
+ type='Normalize',
80
+ mean=[123.675, 116.28, 103.53],
81
+ std=[58.395, 57.12, 57.375],
82
+ to_rgb=True),
83
+ dict(type='ImageToTensor', keys=['img']),
84
+ dict(type='Collect', keys=['img'])
85
+ ])
86
+ ]
87
+ data = dict(
88
+ samples_per_gpu=2,
89
+ workers_per_gpu=2,
90
+ train=dict(
91
+ type='CustomDataset',
92
+ data_root='./data/FoodSeg103/Images/',
93
+ img_dir='img_dir/train',
94
+ ann_dir='ann_dir/train',
95
+ pipeline=[
96
+ dict(type='LoadImageFromFile'),
97
+ dict(type='LoadAnnotations'),
98
+ dict(
99
+ type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
100
+ dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75),
101
+ dict(type='RandomFlip', prob=0.5),
102
+ dict(type='PhotoMetricDistortion'),
103
+ dict(
104
+ type='Normalize',
105
+ mean=[123.675, 116.28, 103.53],
106
+ std=[58.395, 57.12, 57.375],
107
+ to_rgb=True),
108
+ dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255),
109
+ dict(type='DefaultFormatBundle'),
110
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
111
+ ]),
112
+ val=dict(
113
+ type='CustomDataset',
114
+ data_root='./data/FoodSeg103/Images/',
115
+ img_dir='img_dir/test',
116
+ ann_dir='ann_dir/test',
117
+ pipeline=[
118
+ dict(type='LoadImageFromFile'),
119
+ dict(
120
+ type='MultiScaleFlipAug',
121
+ img_scale=(2048, 1024),
122
+ flip=False,
123
+ transforms=[
124
+ dict(type='Resize', keep_ratio=True),
125
+ dict(type='RandomFlip'),
126
+ dict(
127
+ type='Normalize',
128
+ mean=[123.675, 116.28, 103.53],
129
+ std=[58.395, 57.12, 57.375],
130
+ to_rgb=True),
131
+ dict(type='ImageToTensor', keys=['img']),
132
+ dict(type='Collect', keys=['img'])
133
+ ])
134
+ ]),
135
+ test=dict(
136
+ type='CustomDataset',
137
+ data_root='./data/FoodSeg103/Images/',
138
+ img_dir='img_dir/test',
139
+ ann_dir='ann_dir/test',
140
+ pipeline=[
141
+ dict(type='LoadImageFromFile'),
142
+ dict(
143
+ type='MultiScaleFlipAug',
144
+ img_scale=(2048, 1024),
145
+ flip=False,
146
+ transforms=[
147
+ dict(type='Resize', keep_ratio=True),
148
+ dict(type='RandomFlip'),
149
+ dict(
150
+ type='Normalize',
151
+ mean=[123.675, 116.28, 103.53],
152
+ std=[58.395, 57.12, 57.375],
153
+ to_rgb=True),
154
+ dict(type='ImageToTensor', keys=['img']),
155
+ dict(type='Collect', keys=['img'])
156
+ ])
157
+ ]))
158
+ log_config = dict(
159
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
160
+ dist_params = dict(backend='nccl')
161
+ log_level = 'INFO'
162
+ load_from = None
163
+ resume_from = None
164
+ workflow = [('train', 1)]
165
+ cudnn_benchmark = True
166
+ optimizer = dict(
167
+ type='AdamW',
168
+ lr=6e-05,
169
+ betas=(0.9, 0.999),
170
+ weight_decay=0.01,
171
+ paramwise_cfg=dict(
172
+ custom_keys=dict(
173
+ absolute_pos_embed=dict(decay_mult=0.0),
174
+ relative_position_bias_table=dict(decay_mult=0.0),
175
+ norm=dict(decay_mult=0.0))))
176
+ optimizer_config = dict()
177
+ lr_config = dict(
178
+ policy='poly',
179
+ warmup='linear',
180
+ warmup_iters=1500,
181
+ warmup_ratio=1e-06,
182
+ power=1.0,
183
+ min_lr=0.0,
184
+ by_epoch=False)
185
+ runner = dict(type='IterBasedRunner', max_iters=80000)
186
+ checkpoint_config = dict(by_epoch=False, interval=8000)
187
+ evaluation = dict(interval=8000, metric='mIoU')
188
+ work_dir = './work_dirs/upernet_swin_small_patch4_window7_512x1024_80k'
189
+ gpu_ids = range(0, 1)