Skip to content

Commit 98a88ca

Browse files
committed
add dwpose
1 parent 1e6bdc0 commit 98a88ca

7 files changed

Lines changed: 1037 additions & 3 deletions

File tree

README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
3535
# "lineart_coarse", "lineart_realistic", "mediapipe_face", "mlsd", "normal_bae", "normal_midas",
3636
# "openpose", "openpose_face", "openpose_faceonly", "openpose_full", "openpose_hand",
3737
# "scribble_hed, "scribble_pidinet", "shuffle", "softedge_hed", "softedge_hedsafe",
38-
# "softedge_pidinet", "softedge_pidsafe"]
38+
# "softedge_pidinet", "softedge_pidsafe", "dwpose"]
3939
processor_id = 'scribble_hed'
4040
processor = Processor(processor_id)
4141

@@ -47,7 +47,7 @@ Each model can be loaded individually by importing and instantiating them as fol
4747
from PIL import Image
4848
import requests
4949
from io import BytesIO
50-
from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector
50+
from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector, DWposeDetector
5151

5252
# load image
5353
url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
@@ -69,6 +69,15 @@ sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkp
6969
mobile_sam = SamDetector.from_pretrained("dhkim2810/MobileSAM", model_type="vit_t", filename="mobile_sam.pt")
7070
leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
7171

72+
# download sepecify configs and ckpts
73+
# det_config: ./src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
74+
# det_ckpt: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth
75+
# pose_config: ./src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
76+
# pose_ckpt: https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth
77+
import torch
78+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
79+
dwpose = DWposeDetector(det_config, det_ckpt, pose_config, pose_ckpt, device)
80+
7281
# instantiate
7382
canny = CannyDetector()
7483
content = ContentShuffleDetector()
@@ -91,4 +100,5 @@ processed_image_leres = leres(img)
91100
processed_image_canny = canny(img)
92101
processed_image_content = content(img)
93102
processed_image_mediapipe_face = face_detector(img)
103+
processed_image_dwpose = dwpose(img)
94104
```
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Openpose
2+
# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
3+
# 2nd Edited by https://github.com/Hzzone/pytorch-openpose
4+
# 3rd Edited by ControlNet
5+
# 4th Edited by ControlNet (added face and correct hands)
6+
7+
import os
8+
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
9+
10+
import cv2
11+
import torch
12+
import numpy as np
13+
from PIL import Image
14+
15+
from . import util
16+
from .wholebody import Wholebody
17+
18+
19+
def draw_pose(pose, H, W):
20+
bodies = pose['bodies']
21+
faces = pose['faces']
22+
hands = pose['hands']
23+
candidate = bodies['candidate']
24+
subset = bodies['subset']
25+
26+
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
27+
canvas = util.draw_bodypose(canvas, candidate, subset)
28+
canvas = util.draw_handpose(canvas, hands)
29+
canvas = util.draw_facepose(canvas, faces)
30+
31+
return canvas
32+
33+
class DWposeDetector:
34+
def __init__(self, det_config, det_ckpt, pose_config, pose_ckpt, device):
35+
36+
self.pose_estimation = Wholebody(det_config, det_ckpt, pose_config, pose_ckpt, device)
37+
38+
def __call__(self, oriImg, output_type="pil", detect_resolution=512, image_resolution=512):
39+
40+
oriImg = oriImg.copy()
41+
input_image = cv2.cvtColor(np.array(oriImg), cv2.COLOR_RGB2BGR)
42+
43+
input_image = util.HWC3(input_image)
44+
input_image = util.resize_image(input_image, detect_resolution)
45+
H, W, C = input_image.shape
46+
47+
with torch.no_grad():
48+
candidate, subset = self.pose_estimation(input_image)
49+
nums, keys, locs = candidate.shape
50+
candidate[..., 0] /= float(W)
51+
candidate[..., 1] /= float(H)
52+
body = candidate[:,:18].copy()
53+
body = body.reshape(nums*18, locs)
54+
score = subset[:,:18]
55+
56+
for i in range(len(score)):
57+
for j in range(len(score[i])):
58+
if score[i][j] > 0.3:
59+
score[i][j] = int(18*i+j)
60+
else:
61+
score[i][j] = -1
62+
63+
un_visible = subset<0.3
64+
candidate[un_visible] = -1
65+
66+
foot = candidate[:,18:24]
67+
68+
faces = candidate[:,24:92]
69+
70+
hands = candidate[:,92:113]
71+
hands = np.vstack([hands, candidate[:,113:]])
72+
73+
bodies = dict(candidate=body, subset=score)
74+
pose = dict(bodies=bodies, hands=hands, faces=faces)
75+
76+
detected_map = draw_pose(pose, H, W)
77+
detected_map = util.HWC3(detected_map)
78+
79+
img = util.resize_image(input_image, image_resolution)
80+
H, W, C = img.shape
81+
82+
detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
83+
84+
if output_type == "pil":
85+
detected_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
86+
87+
return detected_map
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# runtime
2+
max_epochs = 270
3+
stage2_num_epochs = 30
4+
base_lr = 4e-3
5+
6+
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
7+
randomness = dict(seed=21)
8+
9+
# optimizer
10+
optim_wrapper = dict(
11+
type='OptimWrapper',
12+
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
13+
paramwise_cfg=dict(
14+
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
15+
16+
# learning rate
17+
param_scheduler = [
18+
dict(
19+
type='LinearLR',
20+
start_factor=1.0e-5,
21+
by_epoch=False,
22+
begin=0,
23+
end=1000),
24+
dict(
25+
# use cosine lr from 150 to 300 epoch
26+
type='CosineAnnealingLR',
27+
eta_min=base_lr * 0.05,
28+
begin=max_epochs // 2,
29+
end=max_epochs,
30+
T_max=max_epochs // 2,
31+
by_epoch=True,
32+
convert_to_iter_based=True),
33+
]
34+
35+
# automatically scaling LR based on the actual training batch size
36+
auto_scale_lr = dict(base_batch_size=512)
37+
38+
# codec settings
39+
codec = dict(
40+
type='SimCCLabel',
41+
input_size=(288, 384),
42+
sigma=(6., 6.93),
43+
simcc_split_ratio=2.0,
44+
normalize=False,
45+
use_dark=False)
46+
47+
# model settings
48+
model = dict(
49+
type='TopdownPoseEstimator',
50+
data_preprocessor=dict(
51+
type='PoseDataPreprocessor',
52+
mean=[123.675, 116.28, 103.53],
53+
std=[58.395, 57.12, 57.375],
54+
bgr_to_rgb=True),
55+
backbone=dict(
56+
_scope_='mmdet',
57+
type='CSPNeXt',
58+
arch='P5',
59+
expand_ratio=0.5,
60+
deepen_factor=1.,
61+
widen_factor=1.,
62+
out_indices=(4, ),
63+
channel_attention=True,
64+
norm_cfg=dict(type='SyncBN'),
65+
act_cfg=dict(type='SiLU'),
66+
init_cfg=dict(
67+
type='Pretrained',
68+
prefix='backbone.',
69+
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
70+
'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
71+
)),
72+
head=dict(
73+
type='RTMCCHead',
74+
in_channels=1024,
75+
out_channels=133,
76+
input_size=codec['input_size'],
77+
in_featuremap_size=(9, 12),
78+
simcc_split_ratio=codec['simcc_split_ratio'],
79+
final_layer_kernel_size=7,
80+
gau_cfg=dict(
81+
hidden_dims=256,
82+
s=128,
83+
expansion_factor=2,
84+
dropout_rate=0.,
85+
drop_path=0.,
86+
act_fn='SiLU',
87+
use_rel_bias=False,
88+
pos_enc=False),
89+
loss=dict(
90+
type='KLDiscretLoss',
91+
use_target_weight=True,
92+
beta=10.,
93+
label_softmax=True),
94+
decoder=codec),
95+
test_cfg=dict(flip_test=True, ))
96+
97+
# base dataset settings
98+
dataset_type = 'CocoWholeBodyDataset'
99+
data_mode = 'topdown'
100+
data_root = '/data/'
101+
102+
backend_args = dict(backend='local')
103+
# backend_args = dict(
104+
# backend='petrel',
105+
# path_mapping=dict({
106+
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
107+
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
108+
# }))
109+
110+
# pipelines
111+
train_pipeline = [
112+
dict(type='LoadImage', backend_args=backend_args),
113+
dict(type='GetBBoxCenterScale'),
114+
dict(type='RandomFlip', direction='horizontal'),
115+
dict(type='RandomHalfBody'),
116+
dict(
117+
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
118+
dict(type='TopdownAffine', input_size=codec['input_size']),
119+
dict(type='mmdet.YOLOXHSVRandomAug'),
120+
dict(
121+
type='Albumentation',
122+
transforms=[
123+
dict(type='Blur', p=0.1),
124+
dict(type='MedianBlur', p=0.1),
125+
dict(
126+
type='CoarseDropout',
127+
max_holes=1,
128+
max_height=0.4,
129+
max_width=0.4,
130+
min_holes=1,
131+
min_height=0.2,
132+
min_width=0.2,
133+
p=1.0),
134+
]),
135+
dict(type='GenerateTarget', encoder=codec),
136+
dict(type='PackPoseInputs')
137+
]
138+
val_pipeline = [
139+
dict(type='LoadImage', backend_args=backend_args),
140+
dict(type='GetBBoxCenterScale'),
141+
dict(type='TopdownAffine', input_size=codec['input_size']),
142+
dict(type='PackPoseInputs')
143+
]
144+
145+
train_pipeline_stage2 = [
146+
dict(type='LoadImage', backend_args=backend_args),
147+
dict(type='GetBBoxCenterScale'),
148+
dict(type='RandomFlip', direction='horizontal'),
149+
dict(type='RandomHalfBody'),
150+
dict(
151+
type='RandomBBoxTransform',
152+
shift_factor=0.,
153+
scale_factor=[0.75, 1.25],
154+
rotate_factor=60),
155+
dict(type='TopdownAffine', input_size=codec['input_size']),
156+
dict(type='mmdet.YOLOXHSVRandomAug'),
157+
dict(
158+
type='Albumentation',
159+
transforms=[
160+
dict(type='Blur', p=0.1),
161+
dict(type='MedianBlur', p=0.1),
162+
dict(
163+
type='CoarseDropout',
164+
max_holes=1,
165+
max_height=0.4,
166+
max_width=0.4,
167+
min_holes=1,
168+
min_height=0.2,
169+
min_width=0.2,
170+
p=0.5),
171+
]),
172+
dict(type='GenerateTarget', encoder=codec),
173+
dict(type='PackPoseInputs')
174+
]
175+
176+
datasets = []
177+
dataset_coco=dict(
178+
type=dataset_type,
179+
data_root=data_root,
180+
data_mode=data_mode,
181+
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
182+
data_prefix=dict(img='coco/train2017/'),
183+
pipeline=[],
184+
)
185+
datasets.append(dataset_coco)
186+
187+
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
188+
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
189+
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
190+
191+
for i in range(len(scene)):
192+
datasets.append(
193+
dict(
194+
type=dataset_type,
195+
data_root=data_root,
196+
data_mode=data_mode,
197+
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
198+
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
199+
pipeline=[],
200+
)
201+
)
202+
203+
# data loaders
204+
train_dataloader = dict(
205+
batch_size=32,
206+
num_workers=10,
207+
persistent_workers=True,
208+
sampler=dict(type='DefaultSampler', shuffle=True),
209+
dataset=dict(
210+
type='CombinedDataset',
211+
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
212+
datasets=datasets,
213+
pipeline=train_pipeline,
214+
test_mode=False,
215+
))
216+
val_dataloader = dict(
217+
batch_size=32,
218+
num_workers=10,
219+
persistent_workers=True,
220+
drop_last=False,
221+
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
222+
dataset=dict(
223+
type=dataset_type,
224+
data_root=data_root,
225+
data_mode=data_mode,
226+
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
227+
bbox_file=f'{data_root}coco/person_detection_results/'
228+
'COCO_val2017_detections_AP_H_56_person.json',
229+
data_prefix=dict(img='coco/val2017/'),
230+
test_mode=True,
231+
pipeline=val_pipeline,
232+
))
233+
test_dataloader = val_dataloader
234+
235+
# hooks
236+
default_hooks = dict(
237+
checkpoint=dict(
238+
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
239+
240+
custom_hooks = [
241+
dict(
242+
type='EMAHook',
243+
ema_type='ExpMomentumEMA',
244+
momentum=0.0002,
245+
update_buffers=True,
246+
priority=49),
247+
dict(
248+
type='mmdet.PipelineSwitchHook',
249+
switch_epoch=max_epochs - stage2_num_epochs,
250+
switch_pipeline=train_pipeline_stage2)
251+
]
252+
253+
# evaluators
254+
val_evaluator = dict(
255+
type='CocoWholeBodyMetric',
256+
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
257+
test_evaluator = val_evaluator

0 commit comments

Comments
 (0)