|
| 1 | +# runtime |
| 2 | +max_epochs = 270 |
| 3 | +stage2_num_epochs = 30 |
| 4 | +base_lr = 4e-3 |
| 5 | + |
| 6 | +train_cfg = dict(max_epochs=max_epochs, val_interval=10) |
| 7 | +randomness = dict(seed=21) |
| 8 | + |
| 9 | +# optimizer |
| 10 | +optim_wrapper = dict( |
| 11 | + type='OptimWrapper', |
| 12 | + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), |
| 13 | + paramwise_cfg=dict( |
| 14 | + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) |
| 15 | + |
| 16 | +# learning rate |
| 17 | +param_scheduler = [ |
| 18 | + dict( |
| 19 | + type='LinearLR', |
| 20 | + start_factor=1.0e-5, |
| 21 | + by_epoch=False, |
| 22 | + begin=0, |
| 23 | + end=1000), |
| 24 | + dict( |
| 25 | + # use cosine lr from 150 to 300 epoch |
| 26 | + type='CosineAnnealingLR', |
| 27 | + eta_min=base_lr * 0.05, |
| 28 | + begin=max_epochs // 2, |
| 29 | + end=max_epochs, |
| 30 | + T_max=max_epochs // 2, |
| 31 | + by_epoch=True, |
| 32 | + convert_to_iter_based=True), |
| 33 | +] |
| 34 | + |
| 35 | +# automatically scaling LR based on the actual training batch size |
| 36 | +auto_scale_lr = dict(base_batch_size=512) |
| 37 | + |
| 38 | +# codec settings |
| 39 | +codec = dict( |
| 40 | + type='SimCCLabel', |
| 41 | + input_size=(288, 384), |
| 42 | + sigma=(6., 6.93), |
| 43 | + simcc_split_ratio=2.0, |
| 44 | + normalize=False, |
| 45 | + use_dark=False) |
| 46 | + |
| 47 | +# model settings |
| 48 | +model = dict( |
| 49 | + type='TopdownPoseEstimator', |
| 50 | + data_preprocessor=dict( |
| 51 | + type='PoseDataPreprocessor', |
| 52 | + mean=[123.675, 116.28, 103.53], |
| 53 | + std=[58.395, 57.12, 57.375], |
| 54 | + bgr_to_rgb=True), |
| 55 | + backbone=dict( |
| 56 | + _scope_='mmdet', |
| 57 | + type='CSPNeXt', |
| 58 | + arch='P5', |
| 59 | + expand_ratio=0.5, |
| 60 | + deepen_factor=1., |
| 61 | + widen_factor=1., |
| 62 | + out_indices=(4, ), |
| 63 | + channel_attention=True, |
| 64 | + norm_cfg=dict(type='SyncBN'), |
| 65 | + act_cfg=dict(type='SiLU'), |
| 66 | + init_cfg=dict( |
| 67 | + type='Pretrained', |
| 68 | + prefix='backbone.', |
| 69 | + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' |
| 70 | + 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa |
| 71 | + )), |
| 72 | + head=dict( |
| 73 | + type='RTMCCHead', |
| 74 | + in_channels=1024, |
| 75 | + out_channels=133, |
| 76 | + input_size=codec['input_size'], |
| 77 | + in_featuremap_size=(9, 12), |
| 78 | + simcc_split_ratio=codec['simcc_split_ratio'], |
| 79 | + final_layer_kernel_size=7, |
| 80 | + gau_cfg=dict( |
| 81 | + hidden_dims=256, |
| 82 | + s=128, |
| 83 | + expansion_factor=2, |
| 84 | + dropout_rate=0., |
| 85 | + drop_path=0., |
| 86 | + act_fn='SiLU', |
| 87 | + use_rel_bias=False, |
| 88 | + pos_enc=False), |
| 89 | + loss=dict( |
| 90 | + type='KLDiscretLoss', |
| 91 | + use_target_weight=True, |
| 92 | + beta=10., |
| 93 | + label_softmax=True), |
| 94 | + decoder=codec), |
| 95 | + test_cfg=dict(flip_test=True, )) |
| 96 | + |
| 97 | +# base dataset settings |
| 98 | +dataset_type = 'CocoWholeBodyDataset' |
| 99 | +data_mode = 'topdown' |
| 100 | +data_root = '/data/' |
| 101 | + |
| 102 | +backend_args = dict(backend='local') |
| 103 | +# backend_args = dict( |
| 104 | +# backend='petrel', |
| 105 | +# path_mapping=dict({ |
| 106 | +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', |
| 107 | +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' |
| 108 | +# })) |
| 109 | + |
| 110 | +# pipelines |
| 111 | +train_pipeline = [ |
| 112 | + dict(type='LoadImage', backend_args=backend_args), |
| 113 | + dict(type='GetBBoxCenterScale'), |
| 114 | + dict(type='RandomFlip', direction='horizontal'), |
| 115 | + dict(type='RandomHalfBody'), |
| 116 | + dict( |
| 117 | + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), |
| 118 | + dict(type='TopdownAffine', input_size=codec['input_size']), |
| 119 | + dict(type='mmdet.YOLOXHSVRandomAug'), |
| 120 | + dict( |
| 121 | + type='Albumentation', |
| 122 | + transforms=[ |
| 123 | + dict(type='Blur', p=0.1), |
| 124 | + dict(type='MedianBlur', p=0.1), |
| 125 | + dict( |
| 126 | + type='CoarseDropout', |
| 127 | + max_holes=1, |
| 128 | + max_height=0.4, |
| 129 | + max_width=0.4, |
| 130 | + min_holes=1, |
| 131 | + min_height=0.2, |
| 132 | + min_width=0.2, |
| 133 | + p=1.0), |
| 134 | + ]), |
| 135 | + dict(type='GenerateTarget', encoder=codec), |
| 136 | + dict(type='PackPoseInputs') |
| 137 | +] |
| 138 | +val_pipeline = [ |
| 139 | + dict(type='LoadImage', backend_args=backend_args), |
| 140 | + dict(type='GetBBoxCenterScale'), |
| 141 | + dict(type='TopdownAffine', input_size=codec['input_size']), |
| 142 | + dict(type='PackPoseInputs') |
| 143 | +] |
| 144 | + |
| 145 | +train_pipeline_stage2 = [ |
| 146 | + dict(type='LoadImage', backend_args=backend_args), |
| 147 | + dict(type='GetBBoxCenterScale'), |
| 148 | + dict(type='RandomFlip', direction='horizontal'), |
| 149 | + dict(type='RandomHalfBody'), |
| 150 | + dict( |
| 151 | + type='RandomBBoxTransform', |
| 152 | + shift_factor=0., |
| 153 | + scale_factor=[0.75, 1.25], |
| 154 | + rotate_factor=60), |
| 155 | + dict(type='TopdownAffine', input_size=codec['input_size']), |
| 156 | + dict(type='mmdet.YOLOXHSVRandomAug'), |
| 157 | + dict( |
| 158 | + type='Albumentation', |
| 159 | + transforms=[ |
| 160 | + dict(type='Blur', p=0.1), |
| 161 | + dict(type='MedianBlur', p=0.1), |
| 162 | + dict( |
| 163 | + type='CoarseDropout', |
| 164 | + max_holes=1, |
| 165 | + max_height=0.4, |
| 166 | + max_width=0.4, |
| 167 | + min_holes=1, |
| 168 | + min_height=0.2, |
| 169 | + min_width=0.2, |
| 170 | + p=0.5), |
| 171 | + ]), |
| 172 | + dict(type='GenerateTarget', encoder=codec), |
| 173 | + dict(type='PackPoseInputs') |
| 174 | +] |
| 175 | + |
| 176 | +datasets = [] |
| 177 | +dataset_coco=dict( |
| 178 | + type=dataset_type, |
| 179 | + data_root=data_root, |
| 180 | + data_mode=data_mode, |
| 181 | + ann_file='coco/annotations/coco_wholebody_train_v1.0.json', |
| 182 | + data_prefix=dict(img='coco/train2017/'), |
| 183 | + pipeline=[], |
| 184 | +) |
| 185 | +datasets.append(dataset_coco) |
| 186 | + |
| 187 | +scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class', |
| 188 | + 'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow', |
| 189 | + 'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference'] |
| 190 | + |
| 191 | +for i in range(len(scene)): |
| 192 | + datasets.append( |
| 193 | + dict( |
| 194 | + type=dataset_type, |
| 195 | + data_root=data_root, |
| 196 | + data_mode=data_mode, |
| 197 | + ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json', |
| 198 | + data_prefix=dict(img='UBody/images/'+scene[i]+'/'), |
| 199 | + pipeline=[], |
| 200 | + ) |
| 201 | + ) |
| 202 | + |
| 203 | +# data loaders |
| 204 | +train_dataloader = dict( |
| 205 | + batch_size=32, |
| 206 | + num_workers=10, |
| 207 | + persistent_workers=True, |
| 208 | + sampler=dict(type='DefaultSampler', shuffle=True), |
| 209 | + dataset=dict( |
| 210 | + type='CombinedDataset', |
| 211 | + metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'), |
| 212 | + datasets=datasets, |
| 213 | + pipeline=train_pipeline, |
| 214 | + test_mode=False, |
| 215 | + )) |
| 216 | +val_dataloader = dict( |
| 217 | + batch_size=32, |
| 218 | + num_workers=10, |
| 219 | + persistent_workers=True, |
| 220 | + drop_last=False, |
| 221 | + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), |
| 222 | + dataset=dict( |
| 223 | + type=dataset_type, |
| 224 | + data_root=data_root, |
| 225 | + data_mode=data_mode, |
| 226 | + ann_file='coco/annotations/coco_wholebody_val_v1.0.json', |
| 227 | + bbox_file=f'{data_root}coco/person_detection_results/' |
| 228 | + 'COCO_val2017_detections_AP_H_56_person.json', |
| 229 | + data_prefix=dict(img='coco/val2017/'), |
| 230 | + test_mode=True, |
| 231 | + pipeline=val_pipeline, |
| 232 | + )) |
| 233 | +test_dataloader = val_dataloader |
| 234 | + |
| 235 | +# hooks |
| 236 | +default_hooks = dict( |
| 237 | + checkpoint=dict( |
| 238 | + save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) |
| 239 | + |
| 240 | +custom_hooks = [ |
| 241 | + dict( |
| 242 | + type='EMAHook', |
| 243 | + ema_type='ExpMomentumEMA', |
| 244 | + momentum=0.0002, |
| 245 | + update_buffers=True, |
| 246 | + priority=49), |
| 247 | + dict( |
| 248 | + type='mmdet.PipelineSwitchHook', |
| 249 | + switch_epoch=max_epochs - stage2_num_epochs, |
| 250 | + switch_pipeline=train_pipeline_stage2) |
| 251 | +] |
| 252 | + |
| 253 | +# evaluators |
| 254 | +val_evaluator = dict( |
| 255 | + type='CocoWholeBodyMetric', |
| 256 | + ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json') |
| 257 | +test_evaluator = val_evaluator |
0 commit comments