-
Notifications
You must be signed in to change notification settings - Fork 165
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reimplement "FCOS: Fully Convolutional One-Stage Object Detection" (#…
…586) * add fcos * use P5 instead of C5 * add relu before extra convs in FPN * add singleclass_nms, use caffe2 lr * fix log interval * use caffe2init and relu in extra layers * fix scale layer, use p5 instead of c5 * fix focs target * refactor code * delete useless file * clean * refactor code * change num_classes to cls_out_channels * fix bug of in get_bboxes * fix bug in test * add r101 2x cfg * ms use value mode, add x101-64x4d cfg * add more comment and rename some variable * rename centers to points, modify doc string of distance2bbox * add fcos detector, replace frozen with requires_grad * add README.md * add r101-1x performance, rename cfg, add detector FCOS * update fcos r50 2x performance, remove fpn caffe2 initialize * fix flake8 error * rename cfg * fix grammar error of some comments * minor fix comment * change work_dir to be consistent with config name * add FCOS support in README
- Loading branch information
Showing
18 changed files
with
882 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# FCOS: Fully Convolutional One-Stage Object Detection | ||
|
||
## Introduction | ||
|
||
``` | ||
@article{tian2019fcos, | ||
title={FCOS: Fully Convolutional One-Stage Object Detection}, | ||
author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, | ||
journal={arXiv preprint arXiv:1904.01355}, | ||
year={2019} | ||
} | ||
``` | ||
|
||
## Results and Models | ||
|
||
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | | ||
|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| | ||
| R-50-FPN | caffe | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_1x-9f253a93.pth) | | ||
| R-50-FPN | caffe | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_2x-f7329d80.pth) | | ||
| R-101-FPN | caffe | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_1x-e4889733.pth) | | ||
| R-101-FPN | caffe | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_2x-42e6f62d.pth) | | ||
| X-101-64x4d-FPN | caffe |2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_x101_64x4d_fpn_2x-a36c0872.pth) | | ||
|
||
**Notes:** | ||
- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models. |
124 changes: 124 additions & 0 deletions
124
configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# model settings | ||
model = dict( | ||
type='FCOS', | ||
pretrained='open-mmlab://resnet101_caffe', | ||
backbone=dict( | ||
type='ResNet', | ||
depth=101, | ||
num_stages=4, | ||
out_indices=(0, 1, 2, 3), | ||
frozen_stages=1, | ||
norm_cfg=dict(type='BN', requires_grad=False), | ||
style='caffe'), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=[256, 512, 1024, 2048], | ||
out_channels=256, | ||
start_level=1, | ||
add_extra_convs=True, | ||
extra_convs_on_inputs=False, # use P5 | ||
num_outs=5, | ||
relu_before_extra_convs=True), | ||
bbox_head=dict( | ||
type='FCOSHead', | ||
num_classes=81, | ||
in_channels=256, | ||
stacked_convs=4, | ||
feat_channels=256, | ||
strides=[8, 16, 32, 64, 128])) | ||
# training and testing settings | ||
train_cfg = dict( | ||
assigner=dict( | ||
type='MaxIoUAssigner', | ||
pos_iou_thr=0.5, | ||
neg_iou_thr=0.4, | ||
min_pos_iou=0, | ||
ignore_iof_thr=-1), | ||
smoothl1_beta=0.11, | ||
gamma=2.0, | ||
alpha=0.25, | ||
allowed_border=-1, | ||
pos_weight=-1, | ||
debug=False) | ||
test_cfg = dict( | ||
nms_pre=1000, | ||
min_bbox_size=0, | ||
score_thr=0.05, | ||
nms=dict(type='nms', iou_thr=0.5), | ||
max_per_img=100) | ||
# dataset settings | ||
dataset_type = 'CocoDataset' | ||
data_root = 'data/coco/' | ||
img_norm_cfg = dict( | ||
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) | ||
data = dict( | ||
imgs_per_gpu=4, | ||
workers_per_gpu=4, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file=data_root + 'annotations/instances_train2017.json', | ||
img_prefix=data_root + 'train2017/', | ||
img_scale=[(1333, 640), (1333, 800)], | ||
multiscale_mode='value', | ||
img_norm_cfg=img_norm_cfg, | ||
size_divisor=32, | ||
flip_ratio=0.5, | ||
with_mask=False, | ||
with_crowd=False, | ||
with_label=True), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file=data_root + 'annotations/instances_val2017.json', | ||
img_prefix=data_root + 'val2017/', | ||
img_scale=(1333, 800), | ||
img_norm_cfg=img_norm_cfg, | ||
size_divisor=32, | ||
flip_ratio=0, | ||
with_mask=False, | ||
with_crowd=False, | ||
with_label=True), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file=data_root + 'annotations/instances_val2017.json', | ||
img_prefix=data_root + 'val2017/', | ||
img_scale=(1333, 800), | ||
img_norm_cfg=img_norm_cfg, | ||
size_divisor=32, | ||
flip_ratio=0, | ||
with_mask=False, | ||
with_crowd=False, | ||
with_label=False, | ||
test_mode=True)) | ||
# optimizer | ||
optimizer = dict( | ||
type='SGD', | ||
lr=0.01, | ||
momentum=0.9, | ||
weight_decay=0.0001, | ||
paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) | ||
optimizer_config = dict(grad_clip=None) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
warmup='constant', | ||
warmup_iters=500, | ||
warmup_ratio=1.0 / 3, | ||
step=[16, 22]) | ||
checkpoint_config = dict(interval=1) | ||
# yapf:disable | ||
log_config = dict( | ||
interval=50, | ||
hooks=[ | ||
dict(type='TextLoggerHook'), | ||
# dict(type='TensorboardLoggerHook') | ||
]) | ||
# yapf:enable | ||
# runtime settings | ||
total_epochs = 24 | ||
device_ids = range(4) | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu' | ||
load_from = None | ||
resume_from = None | ||
workflow = [('train', 1)] |
125 changes: 125 additions & 0 deletions
125
configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# model settings | ||
model = dict( | ||
type='FCOS', | ||
pretrained='open-mmlab://resnext101_64x4d', | ||
backbone=dict( | ||
type='ResNeXt', | ||
depth=101, | ||
groups=64, | ||
base_width=4, | ||
num_stages=4, | ||
out_indices=(0, 1, 2, 3), | ||
frozen_stages=1, | ||
style='pytorch'), | ||
neck=dict( | ||
type='FPN', | ||
in_channels=[256, 512, 1024, 2048], | ||
out_channels=256, | ||
start_level=1, | ||
add_extra_convs=True, | ||
extra_convs_on_inputs=False, # use P5 | ||
num_outs=5, | ||
relu_before_extra_convs=True), | ||
bbox_head=dict( | ||
type='FCOSHead', | ||
num_classes=81, | ||
in_channels=256, | ||
stacked_convs=4, | ||
feat_channels=256, | ||
strides=[8, 16, 32, 64, 128])) | ||
# training and testing settings | ||
train_cfg = dict( | ||
assigner=dict( | ||
type='MaxIoUAssigner', | ||
pos_iou_thr=0.5, | ||
neg_iou_thr=0.4, | ||
min_pos_iou=0, | ||
ignore_iof_thr=-1), | ||
smoothl1_beta=0.11, | ||
gamma=2.0, | ||
alpha=0.25, | ||
allowed_border=-1, | ||
pos_weight=-1, | ||
debug=False) | ||
test_cfg = dict( | ||
nms_pre=1000, | ||
min_bbox_size=0, | ||
score_thr=0.05, | ||
nms=dict(type='nms', iou_thr=0.5), | ||
max_per_img=100) | ||
# dataset settings | ||
dataset_type = 'CocoDataset' | ||
data_root = 'data/coco/' | ||
img_norm_cfg = dict( | ||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | ||
data = dict( | ||
imgs_per_gpu=2, | ||
workers_per_gpu=2, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file=data_root + 'annotations/instances_train2017.json', | ||
img_prefix=data_root + 'train2017/', | ||
img_scale=[(1333, 640), (1333, 800)], | ||
multiscale_mode='value', | ||
img_norm_cfg=img_norm_cfg, | ||
size_divisor=32, | ||
flip_ratio=0.5, | ||
with_mask=False, | ||
with_crowd=False, | ||
with_label=True), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file=data_root + 'annotations/instances_val2017.json', | ||
img_prefix=data_root + 'val2017/', | ||
img_scale=(1333, 800), | ||
img_norm_cfg=img_norm_cfg, | ||
size_divisor=32, | ||
flip_ratio=0, | ||
with_mask=False, | ||
with_crowd=False, | ||
with_label=True), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file=data_root + 'annotations/instances_val2017.json', | ||
img_prefix=data_root + 'val2017/', | ||
img_scale=(1333, 800), | ||
img_norm_cfg=img_norm_cfg, | ||
size_divisor=32, | ||
flip_ratio=0, | ||
with_mask=False, | ||
with_crowd=False, | ||
with_label=False, | ||
test_mode=True)) | ||
# optimizer | ||
optimizer = dict( | ||
type='SGD', | ||
lr=0.01, | ||
momentum=0.9, | ||
weight_decay=0.0001, | ||
paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) | ||
optimizer_config = dict(grad_clip=None) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
warmup='constant', | ||
warmup_iters=500, | ||
warmup_ratio=1.0 / 3, | ||
step=[16, 22]) | ||
checkpoint_config = dict(interval=1) | ||
# yapf:disable | ||
log_config = dict( | ||
interval=50, | ||
hooks=[ | ||
dict(type='TextLoggerHook'), | ||
# dict(type='TensorboardLoggerHook') | ||
]) | ||
# yapf:enable | ||
# runtime settings | ||
total_epochs = 24 | ||
device_ids = range(8) | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x' | ||
load_from = None | ||
resume_from = None | ||
workflow = [('train', 1)] |
Oops, something went wrong.