Reimplement "FCOS: Fully Convolutional One-Stage Object Detection" (#…

…586) * add fcos * use P5 instead of C5 * add relu before extra convs in FPN * add singleclass_nms, use caffe2 lr * fix log interval * use caffe2init and relu in extra layers * fix scale layer, use p5 instead of c5 * fix focs target * refactor code * delete useless file * clean * refactor code * change num_classes to cls_out_channels * fix bug of in get_bboxes * fix bug in test * add r101 2x cfg * ms use value mode, add x101-64x4d cfg * add more comment and rename some variable * rename centers to points, modify doc string of distance2bbox * add fcos detector, replace frozen with requires_grad * add README.md * add r101-1x performance, rename cfg, add detector FCOS * update fcos r50 2x performance, remove fpn caffe2 initialize * fix flake8 error * rename cfg * fix grammar error of some comments * minor fix comment * change work_dir to be consistent with config name * add FCOS support in README
xvjiarui · May 17, 2019 · 8d38fd8 · 8d38fd8
1 parent 981ee2f
commit 8d38fd8
Show file tree

Hide file tree

Showing 18 changed files with 882 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -92,6 +92,7 @@ Results and models are available in the [Model zoo](MODEL_ZOO.md).
 | SSD                | ✗        | ✗        | ✗        | ✓        |
 | RetinaNet          | ✓        | ✓        | ☐        | ✗        |
 | Hybrid Task Cascade| ✓        | ✓        | ☐        | ✗        |
+| FCOS               | ✓        | ✓        | ☐        | ✗        |
 
 Other features
 - [x] DCNv2

diff --git a/configs/fcos/README.md b/configs/fcos/README.md
@@ -0,0 +1,25 @@
+# FCOS: Fully Convolutional One-Stage Object Detection
+
+## Introduction
+
+```
+@article{tian2019fcos,
+  title={FCOS: Fully Convolutional One-Stage Object Detection},
+  author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
+  journal={arXiv preprint arXiv:1904.01355},
+  year={2019}
+}
+```
+
+## Results and Models
+
+| Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
+|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
+| R-50-FPN  | caffe   | 1x      | 6.9      | 0.396               | 13.6           | 36.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_1x-9f253a93.pth) |
+| R-50-FPN  | caffe   | 2x      | -        | -                   | -              | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_2x-f7329d80.pth) |
+| R-101-FPN | caffe   | 1x      | 10.4     | 0.558               | 11.6           | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_1x-e4889733.pth) |
+| R-101-FPN | caffe   | 2x      | -        | -                   | -              | 40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_2x-42e6f62d.pth) |
+| X-101-64x4d-FPN | caffe   |2x | 9.7      | 0.892               | 7.0            | 42.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_x101_64x4d_fpn_2x-a36c0872.pth) |
+
+**Notes:**
+- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.
diff --git a/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py b/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
@@ -0,0 +1,124 @@
+# model settings
+model = dict(
+    type='FCOS',
+    pretrained='open-mmlab://resnet101_caffe',
+    backbone=dict(
+        type='ResNet',
+        depth=101,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        style='caffe'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=81,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128]))
+# training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.4,
+        min_pos_iou=0,
+        ignore_iof_thr=-1),
+    smoothl1_beta=0.11,
+    gamma=2.0,
+    alpha=0.25,
+    allowed_border=-1,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_thr=0.5),
+    max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=[(1333, 640), (1333, 800)],
+        multiscale_mode='value',
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=0.0001,
+    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[16, 22])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 24
+device_ids = range(4)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
diff --git a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
@@ -0,0 +1,125 @@
+# model settings
+model = dict(
+    type='FCOS',
+    pretrained='open-mmlab://resnext101_64x4d',
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs=True,
+        extra_convs_on_inputs=False,  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=81,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128]))
+# training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.4,
+        min_pos_iou=0,
+        ignore_iof_thr=-1),
+    smoothl1_beta=0.11,
+    gamma=2.0,
+    alpha=0.25,
+    allowed_border=-1,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_thr=0.5),
+    max_per_img=100)
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=[(1333, 640), (1333, 800)],
+        multiscale_mode='value',
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+    momentum=0.9,
+    weight_decay=0.0001,
+    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='constant',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[16, 22])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 24
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]