-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d803985
commit 41674a5
Showing
14 changed files
with
1,295 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
## 主要目录 | ||
|
||
. | ||
├── model #存放网络的模块定义, dataloader等。 | ||
├── utils #存放一些utility function,如文件操作,网格处理等。 | ||
├── evaluate_cascade.py # code for evaluating cascaded network | ||
├── evaluate_3Pole.py # code for evaluating one-stage network | ||
├── train_stage1.py # code for training stage 1 of cascaded framework | ||
├── train_stage2.py # code for training stage 2 of cascaded framework | ||
├── retrieve_stage1_result.py # code for retrieving stage-1 results | ||
├── stage2_trainer.py # callable training module for training 3-category classification | ||
├── train_3PoleSDF_in_one_stage.py # code for training one-stage framework | ||
├── finetune_model.py # code for fine tuning pretrained model | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
from tensorflow.keras import layers, models, Input | ||
from tensorflow.keras.models import Model | ||
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, ReLU, BatchNormalization, GlobalAveragePooling2D | ||
|
||
|
||
def ImageEncoder(): | ||
|
||
input_image = Input(shape=(224, 224, 4)) | ||
|
||
# Block1 | ||
x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block1_conv1')(input_image) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block1_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) | ||
|
||
# Block2 | ||
x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block2_conv1')(x1) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block2_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) | ||
|
||
# Block3 | ||
x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv1')(x2) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv3')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv4')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) | ||
|
||
# Block4 | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv1')(x3) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv3')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv4')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) | ||
|
||
# Block5 | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv1')(x4) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv3')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv4')(x) | ||
x = BatchNormalization()(x) | ||
x = ReLU()(x) | ||
x5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) | ||
|
||
# Global features | ||
x = GlobalAveragePooling2D()(x5) | ||
x = Dense(units=1024, activation= None, name='output_predictions')(x) | ||
|
||
# Output global and local features | ||
model = Model(inputs=input_image, outputs=([x1, x2, x3, x4, x5], x), name='Classifier') | ||
|
||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
import tensorflow as tf | ||
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, ReLU | ||
from tensorflow.keras import Model | ||
|
||
|
||
class Classifier(Model): | ||
def __init__(self): | ||
super(Classifier, self).__init__() | ||
self.layer_add1 = Dense(1024, activation=None, trainable=True, name="dense_add1") | ||
self.bn_a1 = BatchNormalization() | ||
self.relu_a1 = ReLU() | ||
|
||
self.layer1 = Dense(1024, activation=None, trainable=True, name="dense4") | ||
self.bn1 = BatchNormalization() | ||
self.relu1 = ReLU() | ||
|
||
self.layer_add2 = Dense(768, activation=None, trainable=True, name="dense_add2") | ||
self.bn_a2 = BatchNormalization() | ||
self.relu_a2 = ReLU() | ||
|
||
self.layer2 = Dense(512, activation=None, trainable=True, name="dense5") | ||
self.bn2 = BatchNormalization() | ||
self.relu2 = ReLU() | ||
|
||
self.layer3 = Dense(256, activation=None, trainable=True, name="dense6") | ||
self.bn3 = BatchNormalization() | ||
self.relu3 = ReLU() | ||
|
||
self.layer4 = Dense(128, activation=None, trainable=True, name="dense7") | ||
self.bn4 = BatchNormalization() | ||
self.relu4 = ReLU() | ||
|
||
self.layer5 = Dense(3, activation=None, trainable=True, name="dense8") | ||
|
||
def call(self, img_feat, point_feat): | ||
x = tf.keras.layers.Concatenate(axis=2)([img_feat, point_feat]) | ||
x = self.layer_add1(x) | ||
x = self.relu_a1(self.bn_a1(x)) | ||
|
||
x = self.layer1(x) | ||
x = self.relu1(self.bn1(x)) | ||
|
||
x = self.layer_add2(x) | ||
x = self.relu_a2(self.bn_a2(x)) | ||
|
||
x = self.layer2(x) | ||
x = self.relu2(self.bn2(x)) | ||
|
||
x = self.layer3(x) | ||
x = self.relu3(self.bn3(x)) | ||
|
||
x = self.layer4(x) | ||
x = self.relu4(self.bn4(x)) | ||
|
||
x = self.layer5(x) | ||
|
||
return x |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import tensorflow as tf | ||
from tensorflow.keras import Model | ||
from src.model.mlpClassifier import Classifier | ||
from src.model.pointConv import PointConv | ||
from src.utils.transform_utils import grid_sample | ||
from src.model.imgEncoder import ImageEncoder | ||
|
||
class DeepImpNet(Model): | ||
def __init__(self): | ||
super(DeepImpNet, self).__init__() | ||
# modules / functions | ||
self.pointConv = PointConv() | ||
self.img_encoder = ImageEncoder() | ||
self.local_classifier = Classifier() | ||
self.global_classifier = Classifier() | ||
|
||
def projection_shapenet(self, pts, camera_dict): | ||
''' | ||
Compute the orthogonally projected UV coordinates of 3D points given transform matrices | ||
:param pts: [Nv, N ,3] Tensor of 3D points, N is number of points | ||
:param cam_pos: [Nv, 3, 1] camera position | ||
:param cam_rot: [Nv, 3, 3] camera rotation | ||
:param image_size: resolution of image | ||
:param f: derived from the intrinsic parameters K | ||
:return uv: [Nv, N, 3] xyz coordinates for each point on multiview images | ||
''' | ||
|
||
point_num = pts.shape[1] | ||
|
||
#parse camera | ||
cam_pos = camera_dict['cam_pos'] | ||
cam_rot = camera_dict['cam_rot'] | ||
cam_K = camera_dict['cam_K'] | ||
|
||
cam_pos = tf.convert_to_tensor(cam_pos, tf.float32) | ||
cam_rot = tf.convert_to_tensor(cam_rot, tf.float32) | ||
cam_K = tf.convert_to_tensor(cam_K, tf.float32) | ||
|
||
cam_pos = tf.tile(cam_pos, [1, point_num, 1]) | ||
|
||
# projection | ||
pts_cam = tf.einsum('aij,ajk->aik', pts, cam_rot) + cam_pos | ||
X, Y, Z = pts_cam[:, :, 0], pts_cam[:, :, 1], pts_cam[:, :, 2] | ||
pts_cam = pts_cam / Z[:, :, None] | ||
pts_img = tf.einsum('aij,ajk->aik', cam_K, tf.transpose(pts_cam, perm=[0, 2, 1])) | ||
pts_img = tf.transpose(pts_img, perm=[0, 2, 1]) | ||
uv = pts_img[:, :, 0:2] | ||
uvz = tf.concat([uv, Z[:, :, None]], 2) | ||
|
||
return uvz | ||
|
||
def index(self, feat, uv): | ||
''' | ||
Extract the local feature from according to the UV coordinates | ||
:param feat: [Nv, H, W, F] image features, Nv is the number of images, F is num. of feat. channels | ||
:param uv: [Nv, N, 2] uv coordinate, Nv is the number of images, N is number of points | ||
:return [N, Nv, F] | ||
''' | ||
uv = tf.expand_dims(uv, 1) | ||
local_feat = grid_sample(feat, uv) | ||
local_feat = tf.squeeze(local_feat) | ||
|
||
if len(local_feat.shape) == 2: | ||
local_feat = tf.expand_dims(local_feat, axis = 0) | ||
local_feat = tf.transpose(local_feat, perm = [1,0,2]) | ||
|
||
return local_feat | ||
|
||
@tf.function | ||
def __call__(self, imgs, pts, camera_dict, view_num=1): | ||
''' | ||
Forward pass of the network | ||
:param imgs: [B, H, W, C] input images, B is number of input images | ||
:param pts: [B, N, 3] N is number of sampling points | ||
:param camera_dict: A dict contains camera matrices, cam_rot [B,3,3], cam_pos [B,1,3], cam_K [B,3,3] | ||
:return [B, N, 3] possibility of 3-way classification | ||
''' | ||
|
||
# transfer sampled points from world coordinates to image coordinates | ||
img_xyz = self.projection_shapenet(pts, camera_dict) | ||
point_num = pts.shape[1] | ||
|
||
# normalize image coordinate to [-1,1] | ||
img_scale = float(imgs.shape[1]) - 1 | ||
img_xyz = tf.clip_by_value(img_xyz, 0, img_scale) | ||
img_u_normalized = 2 * img_xyz[:, :, 0] / img_scale - 1.0 | ||
img_v_normalized = 2 * img_xyz[:, :, 1] / img_scale - 1.0 | ||
img_uv_normalized = tf.concat([img_u_normalized[:, :, None], img_v_normalized[:, :, None]], axis=2) | ||
|
||
# compute point feature (using world coordinate) | ||
xyz = tf.expand_dims(pts, 1) | ||
point_coord_feat = self.pointConv(xyz) | ||
point_coord_feat = tf.squeeze(point_coord_feat) | ||
|
||
# encode image feature | ||
feat_local, feat_global = self.img_encoder(imgs) | ||
feat_global = tf.expand_dims(feat_global, 1) | ||
|
||
# compute global image feature | ||
feat_global = tf.tile(feat_global, [1, point_num, 1]) | ||
|
||
# compute local image feature for points | ||
img_local_feat_list = [self.index(feat, img_uv_normalized) for feat in feat_local] | ||
final_img_local_feat = tf.concat(img_local_feat_list, 2) | ||
final_img_local_feat = tf.transpose(final_img_local_feat, perm=[1,0,2]) | ||
|
||
# pred sdf from two branches | ||
pred1 = self.local_classifier(final_img_local_feat, point_coord_feat) | ||
pred2 = self.global_classifier(feat_global, point_coord_feat) | ||
|
||
pred_final = tf.nn.softmax(pred1 + pred2) | ||
|
||
return pred_final |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
from tensorflow.keras.layers import Conv1D, Conv2D, ReLU, BatchNormalization | ||
from tensorflow.keras import Model | ||
|
||
class PointConv(Model): | ||
def __init__(self): | ||
super(PointConv, self).__init__() | ||
self.conv1 = Conv1D(filters = 64, kernel_size = 1, strides = 1, activation = None) | ||
self.bn1 = BatchNormalization() | ||
self.relu1 = ReLU() | ||
|
||
self.conv2 = Conv1D(filters = 256, kernel_size = 1, strides = 1, activation = None) | ||
self.bn2 = BatchNormalization() | ||
self.relu2 = ReLU() | ||
|
||
self.conv3 = Conv1D(filters = 512, kernel_size = 1, strides = 1, activation = None) | ||
self.bn3 = BatchNormalization() | ||
self.relu3 = ReLU() | ||
|
||
self.conv4 = Conv1D(filters = 768, kernel_size = 1, strides = 1, activation = None) | ||
self.bn4 = BatchNormalization() | ||
self.relu4 = ReLU() | ||
|
||
def call(self, x): | ||
x = self.conv1(x) | ||
x = self.relu1(self.bn1(x)) | ||
x = self.conv2(x) | ||
x = self.relu2(self.bn2(x)) | ||
x = self.conv3(x) | ||
x = self.relu3(self.bn3(x)) | ||
x = self.conv4(x) | ||
x = self.bn4(x) | ||
return x |
Oops, something went wrong.