|
1 | 1 | _base_ = ('../../third_party/mmyolo/configs/yolov8/'
|
2 |
| - 'yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py') |
| 2 | + 'yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py') |
3 | 3 | custom_imports = dict(imports=['yolo_world'], allow_failed_imports=False)
|
4 | 4 |
|
5 | 5 | # hyper-parameters
|
|
11 | 11 | text_channels = 512
|
12 | 12 | neck_embed_channels = [128, 256, _base_.last_stage_out_channels // 2]
|
13 | 13 | neck_num_heads = [4, 8, _base_.last_stage_out_channels // 2 // 32]
|
14 |
| -base_lr = 2e-3 |
| 14 | +base_lr = 2e-4 |
15 | 15 | weight_decay = 0.05
|
16 | 16 | train_batch_size_per_gpu = 16
|
17 |
| -load_from = '/group/40034/adriancheng/notebooks/rep_models/yolo_world_v2_x_obj365v1_goldg_cc3mlite_pretrain_1280ft-14996a36_repconv.pth' |
| 17 | +load_from = '../FastDet/output_models/pretrain_yolow-v8_s_clipv2_frozen_te_noprompt_t2i_bn_2e-3adamw_scale_lr_wd_32xb16-100e_obj365v1_goldg_cc3mram250k_train_lviseval-e3592307_rep_conv.pth' |
18 | 18 | persistent_workers = False
|
| 19 | +mixup_prob = 0.15 |
| 20 | +copypaste_prob = 0.3 |
19 | 21 |
|
20 | 22 | # model settings
|
21 | 23 | model = dict(type='SimpleYOLOWorldDetector',
|
|
28 | 30 | type='MultiModalYOLOBackbone',
|
29 | 31 | text_model=None,
|
30 | 32 | image_model={{_base_.model.backbone}},
|
31 |
| - frozen_stages=4, |
32 | 33 | with_text_model=False),
|
33 | 34 | neck=dict(type='YOLOWorldPAFPN',
|
34 |
| - guide_channels=num_classes, |
| 35 | + guide_channels=text_channels, |
35 | 36 | embed_channels=neck_embed_channels,
|
36 | 37 | num_heads=neck_num_heads,
|
37 |
| - block_cfg=dict(type='RepConvMaxSigmoidCSPLayerWithTwoConv', |
38 |
| - guide_channels=num_classes)), |
| 38 | + block_cfg=dict(type='EfficientCSPLayerWithTwoConv')), |
39 | 39 | bbox_head=dict(head_module=dict(type='RepYOLOWorldHeadModule',
|
40 | 40 | embed_dims=text_channels,
|
41 | 41 | num_guide=num_classes,
|
|
53 | 53 | img_scale=_base_.img_scale,
|
54 | 54 | pad_val=114.0,
|
55 | 55 | pre_transform=_base_.pre_transform),
|
56 |
| - dict(type='YOLOv5CopyPaste', prob=_base_.copypaste_prob), |
| 56 | + dict(type='YOLOv5CopyPaste', prob=copypaste_prob), |
57 | 57 | dict(
|
58 | 58 | type='YOLOv5RandomAffine',
|
59 | 59 | max_rotate_degree=0.0,
|
|
69 | 69 | train_pipeline = [
|
70 | 70 | *_base_.pre_transform, *mosaic_affine_transform,
|
71 | 71 | dict(type='YOLOv5MixUp',
|
72 |
| - prob=_base_.mixup_prob, |
| 72 | + prob=mixup_prob, |
73 | 73 | pre_transform=[*_base_.pre_transform, *mosaic_affine_transform]),
|
74 | 74 | *_base_.last_transform[:-1], *final_transform
|
75 | 75 | ]
|
|
135 | 135 | lr=base_lr,
|
136 | 136 | weight_decay=weight_decay,
|
137 | 137 | batch_size_per_gpu=train_batch_size_per_gpu),
|
138 |
| - paramwise_cfg=dict(bias_decay_mult=0.0, |
139 |
| - norm_decay_mult=0.0, |
140 |
| - custom_keys={ |
141 |
| - 'backbone.text_model': |
142 |
| - dict(lr_mult=0.01), |
143 |
| - 'logit_scale': |
144 |
| - dict(weight_decay=0.0), |
145 |
| - 'embeddings': |
146 |
| - dict(weight_decay=0.0) |
147 |
| - }), |
148 | 138 | constructor='YOLOWv5OptimizerConstructor')
|
149 | 139 |
|
150 | 140 | # evaluation settings
|
|
153 | 143 | proposal_nums=(100, 1, 10),
|
154 | 144 | ann_file='data/coco/annotations/instances_val2017.json',
|
155 | 145 | metric='bbox')
|
156 |
| -find_unused_parameters = True |
0 commit comments