source code for SVR

chenweikai · Jun 13, 2022 · 41674a5 · 41674a5
1 parent d803985
commit 41674a5
Show file tree

Hide file tree

Showing 14 changed files with 1,295 additions and 0 deletions.
diff --git a/single_view_recon/README.md b/single_view_recon/README.md
@@ -0,0 +1,21 @@
+## 主要目录
+
+    .
+    ├── model                           #存放网络的模块定义, dataloader等。
+    ├── utils                           #存放一些utility function，如文件操作，网格处理等。
+    ├── evaluate_cascade.py             # code for evaluating cascaded network       
+    ├── evaluate_3Pole.py               # code for evaluating one-stage network 
+    ├── train_stage1.py                 # code for training stage 1 of cascaded framework    
+    ├── train_stage2.py                 # code for training stage 2 of cascaded framework 
+    ├── retrieve_stage1_result.py       # code for retrieving stage-1 results
+    ├── stage2_trainer.py               # callable training module for training 3-category classification 
+    ├── train_3PoleSDF_in_one_stage.py  # code for training one-stage framework
+    ├── finetune_model.py               # code for fine tuning pretrained model
+
+
+
+
+
+
+
+
diff --git a/single_view_recon/model/__pycache__/dataLoader.cpython-38.pyc b/single_view_recon/model/__pycache__/dataLoader.cpython-38.pyc
diff --git a/single_view_recon/model/__pycache__/network.cpython-38.pyc b/single_view_recon/model/__pycache__/network.cpython-38.pyc
diff --git a/single_view_recon/model/__pycache__/pointConv.cpython-38.pyc b/single_view_recon/model/__pycache__/pointConv.cpython-38.pyc
diff --git a/single_view_recon/model/dataLoader.py b/single_view_recon/model/dataLoader.py
diff --git a/single_view_recon/model/imgEncoder.py b/single_view_recon/model/imgEncoder.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+from tensorflow.keras import layers, models, Input
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, ReLU, BatchNormalization, GlobalAveragePooling2D
+
+
+def ImageEncoder():
+
+    input_image = Input(shape=(224, 224, 4))
+
+    # Block1
+    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block1_conv1')(input_image)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block1_conv2')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
+
+    # Block2
+    x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block2_conv1')(x1)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block2_conv2')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
+
+    # Block3
+    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv1')(x2)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv2')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv3')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block3_conv4')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
+
+    # Block4
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv1')(x3)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv2')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv3')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block4_conv4')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
+
+    # Block5
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv1')(x4)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv2')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv3')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x = Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', name='block5_conv4')(x)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    x5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
+
+    # Global features
+    x = GlobalAveragePooling2D()(x5)
+    x = Dense(units=1024, activation= None, name='output_predictions')(x)
+
+    # Output global and local features
+    model = Model(inputs=input_image, outputs=([x1, x2, x3, x4, x5], x), name='Classifier')
+
+    return model
diff --git a/single_view_recon/model/mlpClassifier.py b/single_view_recon/model/mlpClassifier.py
@@ -0,0 +1,58 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+import tensorflow as tf
+from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, ReLU
+from tensorflow.keras import Model
+
+
+class Classifier(Model):
+    def __init__(self):
+        super(Classifier, self).__init__()
+        self.layer_add1 = Dense(1024, activation=None, trainable=True, name="dense_add1")
+        self.bn_a1 = BatchNormalization()
+        self.relu_a1 = ReLU()
+
+        self.layer1 = Dense(1024, activation=None, trainable=True, name="dense4")
+        self.bn1 = BatchNormalization()
+        self.relu1 = ReLU()
+
+        self.layer_add2 = Dense(768, activation=None, trainable=True, name="dense_add2")
+        self.bn_a2 = BatchNormalization()
+        self.relu_a2 = ReLU()
+
+        self.layer2 = Dense(512, activation=None, trainable=True, name="dense5")
+        self.bn2 = BatchNormalization()
+        self.relu2 = ReLU()
+
+        self.layer3 = Dense(256, activation=None, trainable=True, name="dense6")
+        self.bn3 = BatchNormalization()
+        self.relu3 = ReLU()
+
+        self.layer4 = Dense(128, activation=None, trainable=True, name="dense7")
+        self.bn4 = BatchNormalization()
+        self.relu4 = ReLU()
+
+        self.layer5 = Dense(3, activation=None, trainable=True, name="dense8")
+
+    def call(self, img_feat, point_feat):
+        x = tf.keras.layers.Concatenate(axis=2)([img_feat, point_feat])
+        x = self.layer_add1(x)
+        x = self.relu_a1(self.bn_a1(x))
+
+        x = self.layer1(x)
+        x = self.relu1(self.bn1(x))
+
+        x = self.layer_add2(x)
+        x = self.relu_a2(self.bn_a2(x))
+
+        x = self.layer2(x)
+        x = self.relu2(self.bn2(x))
+
+        x = self.layer3(x)
+        x = self.relu3(self.bn3(x))
+
+        x = self.layer4(x)
+        x = self.relu4(self.bn4(x))
+
+        x = self.layer5(x)
+
+        return x
diff --git a/single_view_recon/model/network.py b/single_view_recon/model/network.py
@@ -0,0 +1,113 @@
+import tensorflow as tf
+from tensorflow.keras import Model
+from src.model.mlpClassifier import Classifier
+from src.model.pointConv import PointConv
+from src.utils.transform_utils import grid_sample
+from src.model.imgEncoder import ImageEncoder
+
+class DeepImpNet(Model):
+    def __init__(self):
+        super(DeepImpNet, self).__init__()
+        # modules / functions
+        self.pointConv = PointConv()
+        self.img_encoder = ImageEncoder()
+        self.local_classifier = Classifier()
+        self.global_classifier = Classifier()
+
+    def projection_shapenet(self, pts, camera_dict):
+        '''
+        Compute the orthogonally projected UV coordinates of 3D points given transform matrices
+        :param pts: [Nv, N ,3] Tensor of 3D points, N is number of points
+        :param cam_pos: [Nv, 3, 1] camera position
+        :param cam_rot: [Nv, 3, 3] camera rotation
+        :param image_size: resolution of image
+        :param f: derived from the intrinsic parameters K
+        :return uv: [Nv, N, 3] xyz coordinates for each point on multiview images
+        '''
+
+        point_num = pts.shape[1]
+
+        #parse camera
+        cam_pos = camera_dict['cam_pos']
+        cam_rot = camera_dict['cam_rot']
+        cam_K = camera_dict['cam_K']
+
+        cam_pos = tf.convert_to_tensor(cam_pos, tf.float32)
+        cam_rot = tf.convert_to_tensor(cam_rot, tf.float32)
+        cam_K = tf.convert_to_tensor(cam_K, tf.float32)
+
+        cam_pos = tf.tile(cam_pos, [1, point_num, 1])
+
+        # projection
+        pts_cam = tf.einsum('aij,ajk->aik', pts, cam_rot) + cam_pos
+        X, Y, Z = pts_cam[:, :, 0], pts_cam[:, :, 1], pts_cam[:, :, 2]
+        pts_cam = pts_cam / Z[:, :, None]
+        pts_img = tf.einsum('aij,ajk->aik', cam_K, tf.transpose(pts_cam, perm=[0, 2, 1]))
+        pts_img = tf.transpose(pts_img, perm=[0, 2, 1])
+        uv = pts_img[:, :, 0:2]
+        uvz = tf.concat([uv, Z[:, :, None]], 2)
+
+        return uvz
+
+    def index(self, feat, uv):
+        '''
+        Extract the local feature from according to the UV coordinates
+        :param feat: [Nv, H, W, F] image features, Nv is the number of images, F is num. of feat. channels
+        :param uv: [Nv, N, 2] uv coordinate, Nv is the number of images, N is number of points
+        :return [N, Nv, F] 
+        '''
+        uv = tf.expand_dims(uv, 1)
+        local_feat = grid_sample(feat, uv)
+        local_feat = tf.squeeze(local_feat)
+
+        if len(local_feat.shape) == 2:
+            local_feat = tf.expand_dims(local_feat, axis = 0)
+        local_feat = tf.transpose(local_feat, perm = [1,0,2])
+
+        return local_feat
+
+    @tf.function
+    def __call__(self, imgs, pts, camera_dict, view_num=1):
+        '''
+        Forward pass of the network
+        :param imgs: [B, H, W, C] input images, B is number of input images
+        :param pts: [B, N, 3] N is number of sampling points
+        :param camera_dict:  A dict contains camera matrices, cam_rot [B,3,3], cam_pos [B,1,3], cam_K [B,3,3]
+        :return [B, N, 3] possibility of 3-way classification
+        '''
+
+        # transfer sampled points from world coordinates to image coordinates
+        img_xyz = self.projection_shapenet(pts, camera_dict)
+        point_num = pts.shape[1]
+
+        # normalize image coordinate to [-1,1]
+        img_scale = float(imgs.shape[1]) - 1
+        img_xyz = tf.clip_by_value(img_xyz, 0, img_scale)
+        img_u_normalized = 2 * img_xyz[:, :, 0] / img_scale - 1.0
+        img_v_normalized = 2 * img_xyz[:, :, 1] / img_scale - 1.0
+        img_uv_normalized = tf.concat([img_u_normalized[:, :, None], img_v_normalized[:, :, None]], axis=2)
+
+        # compute point feature (using world coordinate)
+        xyz = tf.expand_dims(pts, 1)
+        point_coord_feat = self.pointConv(xyz)
+        point_coord_feat = tf.squeeze(point_coord_feat)
+
+        # encode image feature
+        feat_local, feat_global = self.img_encoder(imgs)
+        feat_global = tf.expand_dims(feat_global, 1)
+
+        # compute global image feature
+        feat_global = tf.tile(feat_global, [1, point_num, 1])
+
+        # compute local image feature for points
+        img_local_feat_list = [self.index(feat, img_uv_normalized) for feat in feat_local]
+        final_img_local_feat = tf.concat(img_local_feat_list, 2)
+        final_img_local_feat = tf.transpose(final_img_local_feat, perm=[1,0,2])
+
+        # pred sdf from two branches
+        pred1 = self.local_classifier(final_img_local_feat, point_coord_feat)
+        pred2 = self.global_classifier(feat_global,  point_coord_feat)
+
+        pred_final = tf.nn.softmax(pred1 + pred2)
+
+        return pred_final
diff --git a/single_view_recon/model/pointConv.py b/single_view_recon/model/pointConv.py
@@ -0,0 +1,33 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+from tensorflow.keras.layers import Conv1D, Conv2D, ReLU, BatchNormalization
+from tensorflow.keras import Model
+
+class PointConv(Model):
+    def __init__(self):
+        super(PointConv, self).__init__()
+        self.conv1 = Conv1D(filters = 64, kernel_size = 1, strides = 1, activation = None)
+        self.bn1 = BatchNormalization()
+        self.relu1 = ReLU()
+
+        self.conv2 = Conv1D(filters = 256, kernel_size = 1, strides = 1, activation = None)
+        self.bn2 = BatchNormalization()
+        self.relu2 = ReLU()
+
+        self.conv3 = Conv1D(filters = 512, kernel_size = 1, strides = 1, activation = None)
+        self.bn3 = BatchNormalization()
+        self.relu3 = ReLU()
+
+        self.conv4 = Conv1D(filters = 768, kernel_size = 1, strides = 1, activation = None)
+        self.bn4 = BatchNormalization()
+        self.relu4 = ReLU()
+
+    def call(self, x):
+        x = self.conv1(x)
+        x = self.relu1(self.bn1(x))
+        x = self.conv2(x)
+        x = self.relu2(self.bn2(x))
+        x = self.conv3(x)
+        x = self.relu3(self.bn3(x))
+        x = self.conv4(x)
+        x = self.bn4(x)
+        return x