From c73d830bc82b26e458644d982f3475c70e21aa6e Mon Sep 17 00:00:00 2001
From: xuzhen <zhenx@zju.edu.cn>
Date: Mon, 4 Mar 2024 15:41:34 +0800
Subject: [PATCH] easyvolcap: adding more useful scripts

---
 configs/specs/geometry.yaml                   | 13 +++++----
 .../dataloaders/datasets/geometry_dataset.py  |  8 ++++--
 .../models/cameras/optimizable_camera.py      |  9 ++++++
 .../visualizers/geometry_visualizer.py        | 28 ++++++++++++-------
 easyvolcap/utils/data_utils.py                |  8 +++++-
 scripts/colmap/unflatten_dataset.py           |  6 ++--
 scripts/tools/0_to_00.py                      | 27 +++++++++++-------
 scripts/tools/compress_videos.py              | 27 ++++++++++++++++++
 scripts/tools/reorder_data.py                 | 26 +++++++++++++++++
 9 files changed, 120 insertions(+), 32 deletions(-)
 create mode 100644 scripts/tools/compress_videos.py
 create mode 100644 scripts/tools/reorder_data.py

diff --git a/configs/specs/geometry.yaml b/configs/specs/geometry.yaml
index 2cbbfcb..a2db231 100644
--- a/configs/specs/geometry.yaml
+++ b/configs/specs/geometry.yaml
@@ -5,13 +5,14 @@ val_dataloader_cfg:
     dataset_cfg:
         type: GeometryDataset
         # skip_loading_images: True
+        cache_raw: False
         remove_outlier: False
-        vhull_thresh: 1.0
-        vhull_padding: 0.0 # padding could just be larger, this should be mostly fine
+        # vhull_thresh: 1.0
+        # vhull_padding: 0.0 # padding could just be larger, this should be mostly fine
 
-        vhull_count_factor: 1.0
-        vhull_thresh_factor: 1.0
-        vhull_ctof_factor: 20.0 # larger size?
+        # vhull_count_factor: 1.0
+        # vhull_thresh_factor: 1.0
+        # vhull_ctof_factor: 20.0 # larger size?
         view_sample: [0, null, 1] # use all views so that we can filter points to evaluate
 
 model_cfg:
@@ -24,7 +25,7 @@ runner_cfg:
     visualizer_cfg:
         type: GeometryVisualizer
         types: [MESH]
-        occ_thresh: 0.35 # leave out more points?
+        occ_thresh: 0.15 # leave out more points?
         result_dir: data/geometry
     evaluator_cfg:
         type: GeometryEvaluator
diff --git a/easyvolcap/dataloaders/datasets/geometry_dataset.py b/easyvolcap/dataloaders/datasets/geometry_dataset.py
index 616b0ff..65550d7 100644
--- a/easyvolcap/dataloaders/datasets/geometry_dataset.py
+++ b/easyvolcap/dataloaders/datasets/geometry_dataset.py
@@ -28,8 +28,9 @@ def __init__(self,
 
         def carve_using_bytes(H, W, K, R, T, latent_index):
             if hasattr(self, 'mks_bytes'):
-                bytes = [self.mks_bytes[i * self.n_latents + latent_index] for i in range(len(H))]  # get mask bytes of this frame
-                msks = parallel_execution(bytes, normalize=True, action=load_image_from_bytes, sequential=True)
+                msks = [self.mks_bytes[i * self.n_latents + latent_index] for i in range(len(H))]  # get mask bytes of this frame
+                if not self.cache_raw:
+                    msks = parallel_execution(msks, normalize=True, action=load_image_from_bytes, sequential=True)
                 msks = to_tensor(msks)
 
                 # Fill blank canvas for each mask
@@ -78,6 +79,9 @@ def carve_using_bytes(H, W, K, R, T, latent_index):
             self.valid.append(valid)
             self.inds.append(inds)
 
+    def __len__(self):
+        return self.n_latents
+
     def __getitem__(self, index: int):
         output = self.get_metadata(index)
         output.xyz = self.xyz[output.latent_index]
diff --git a/easyvolcap/models/cameras/optimizable_camera.py b/easyvolcap/models/cameras/optimizable_camera.py
index 421c414..1a7e4d4 100644
--- a/easyvolcap/models/cameras/optimizable_camera.py
+++ b/easyvolcap/models/cameras/optimizable_camera.py
@@ -76,6 +76,15 @@ def __init__(self,
         if freeze_camera:
             freeze_module(self)
 
+        self.pre_handle = self._register_load_state_dict_pre_hook(self._load_state_dict_pre_hook)
+
+    def _load_state_dict_pre_hook(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+
+        # Historical reasons
+        if prefix + 'pose_resd' in state_dict:
+            if state_dict[prefix + 'pose_resd'].shape[0] == self.n_views and state_dict[prefix + 'pose_resd'].shape[1] == self.n_frames:
+                state_dict[prefix + 'pose_resd'] = state_dict[prefix + 'pose_resd'].transpose(0, 1)
+
     def forward_srcs(self, batch: dotdict):
         s_inds = batch.src_inds  # B, S, selected source views
         t_inds = batch.t_inds
diff --git a/easyvolcap/runners/visualizers/geometry_visualizer.py b/easyvolcap/runners/visualizers/geometry_visualizer.py
index 924fe9d..c5c82e2 100644
--- a/easyvolcap/runners/visualizers/geometry_visualizer.py
+++ b/easyvolcap/runners/visualizers/geometry_visualizer.py
@@ -36,12 +36,14 @@ def __init__(self,
                      Visualization.POINT.name: (lambda mesh, filename: export_pts(**mesh, filename=filename)),
                  },
                  verbose: bool = True,
-                 max_pending_pools: int = 100,  # maximum number of pending tasks in the thread pool
+                 pool_limit: int = 10,  # maximum number of pending tasks in the thread pool
 
                  occ_thresh: float = 0.5,
+                 sdf_thresh: float = 0.0,
                  **kwargs,
                  ):
         self.occ_thresh = occ_thresh
+        self.sdf_thresh = sdf_thresh
 
         result_dir = join(result_dir, cfg.exp_name)  # MARK: global configuration
         result_dir = join(result_dir, save_tag) if save_tag != '' else result_dir
@@ -49,27 +51,32 @@ def __init__(self,
         self.types = [Visualization[t] for t in types]  # types of visualization
         self.exts = exts  # file extensions for each type of visualization
         self.exports = exports
+        self.verbose = verbose
 
         self.thread_pools: List[ThreadPool] = []
-        self.max_pending_pools = max_pending_pools
+        self.pool_limit = pool_limit
         self.geo_pattern = f'{{type}}/frame{{frame:04d}}_camera{{camera:04d}}{{ext}}'
 
         if verbose:
-            log(f'Visualization output: {yellow(join(result_dir, os.path.dirname(self.geo_pattern)))}')  # use yellow for output path
+            log(f'Visualization output: {yellow(join(result_dir, dirname(self.geo_pattern)))}')  # use yellow for output path
             log(f'Visualization types:', line(types))
 
     def generate_type(self, output: dotdict, batch: dotdict, type: Visualization = Visualization.MESH):
         if type == Visualization.MESH:
-            occ = output.occ
+            if 'sdf' in output:
+                occ = -output.sdf + 0.5
+                self.occ_thresh = -self.sdf_thresh + 0.5
+            else:
+                occ = output.occ
             voxel_size = batch.meta.voxel_size
             W, H, D = batch.meta.W[0].item(), batch.meta.H[0].item(), batch.meta.D[0].item()  # !: BATCH
-            cube = torch.zeros(np.prod(batch.valid.shape), dtype=occ.dtype, device='cpu')[None]  # 1, WHD
-            cube = multi_scatter_(cube[..., None], batch.inds.to('cpu', non_blocking=True), occ.to('cpu', non_blocking=True))  # dim = -2 # B, WHD, 1 assigned B, P, 1
+            cube = torch.full((np.prod(batch.valid.shape),), -10.0, dtype=occ.dtype, device='cpu')[None]  # 1, WHD
+            cube = multi_scatter_(cube[..., None], batch.inds.cpu(), occ.cpu())  # dim = -2 # B, WHD, 1 assigned B, P, 1
             cube = cube.view(-1, W, H, D)  # B, W, H, D
 
             # We leave the results on CPU but as tensors instead of numpy arrays
             torch.cuda.synchronize()  # some of the batched data are asynchronously moved to the cpu
-            verts, faces = mcubes.marching_cubes(cube.detach().cpu().float().numpy()[0], self.occ_thresh)
+            verts, faces = mcubes.marching_cubes(cube.float().numpy()[0], self.occ_thresh)
             verts = torch.as_tensor(verts, dtype=torch.float)[None]
             faces = torch.as_tensor(faces.astype(np.int32), dtype=torch.int)[None]
             verts = verts * voxel_size.to(verts.dtype) + batch.meta.bounds[:, 0].to(verts.dtype)  # !: BATCH
@@ -77,6 +84,7 @@ def generate_type(self, output: dotdict, batch: dotdict, type: Visualization = V
             mesh = dotdict()
             mesh.verts = verts
             mesh.faces = faces
+            log(f'Number of vertices: {verts.numel()}, faces: {faces.numel()} (frame: {batch.meta.frame_index.item()}, camera: {batch.meta.camera_index.item()})')
         else:
             raise NotImplementedError(f'Unimplemented visualization type: {type}')
         return mesh
@@ -119,11 +127,11 @@ def visualize(self, output: dotdict, batch: dotdict):
         return geo_stats
 
     def limit_thread_pools(self):
-        if len(self.thread_pools) > self.max_pending_pools:
-            for pool in self.thread_pools[:self.max_pending_pools]:
+        if len(self.thread_pools) > self.pool_limit:
+            for pool in self.thread_pools[:self.pool_limit]:
                 pool.close()
                 pool.join()
-            self.thread_pools = self.thread_pools[self.max_pending_pools:]
+            self.thread_pools = self.thread_pools[self.pool_limit:]
 
     def summarize(self):
         for pool in self.thread_pools:  # finish all pending taskes before generating videos
diff --git a/easyvolcap/utils/data_utils.py b/easyvolcap/utils/data_utils.py
index 4d82125..99a04cf 100644
--- a/easyvolcap/utils/data_utils.py
+++ b/easyvolcap/utils/data_utils.py
@@ -105,6 +105,7 @@ def read_pfm(filename):
 
 def generate_video(result_str: str,
                    output: str,
+                   verbose: bool = False,
                    fps: int = 30,
                    crf: int = 17,
                    cqv: int = 19,
@@ -118,14 +119,19 @@ def generate_video(result_str: str,
     cmd = [
         'ffmpeg',
         '-hwaccel', hwaccel,
+    ] + ([
         '-hide_banner',
         '-loglevel', 'error',
+    ] if not verbose else []) + ([
         '-framerate', fps,
+    ] if fps > 0 else []) + ([
         '-f', 'image2',
         '-pattern_type', 'glob',
+    ] if not (splitext(result_str)[-1] or result_str.endswith('*')) else []) + ([
+        '-r', fps,
+    ] if fps > 0 else []) + [
         '-nostdin',  # otherwise you cannot chain commands together
         '-y',
-        '-r', fps,
         '-i', result_str,
         '-c:v', vcodec,
         '-preset', preset,
diff --git a/scripts/colmap/unflatten_dataset.py b/scripts/colmap/unflatten_dataset.py
index 10f4d1f..bbeb7ef 100644
--- a/scripts/colmap/unflatten_dataset.py
+++ b/scripts/colmap/unflatten_dataset.py
@@ -1,3 +1,6 @@
+"""
+Find the images folder, rename it to images_flatten, and create a new images folder with the same structure as separated cameras
+"""
 # Rearrange images and camera parameters
 import shutil
 import argparse
@@ -6,9 +9,6 @@
 
 @catch_throw
 def main():
-    """
-    Find the images folder, rename it to images_flatten, and create a new images folder with the same structure as separated cameras
-    """
     parser = argparse.ArgumentParser()
     parser.add_argument('--data_root', default='data/zju/ipstage')
     parser.add_argument('--images_dirs', nargs='+', default=['images', 'masks'])
diff --git a/scripts/tools/0_to_00.py b/scripts/tools/0_to_00.py
index dce3f07..7e0373d 100644
--- a/scripts/tools/0_to_00.py
+++ b/scripts/tools/0_to_00.py
@@ -6,21 +6,28 @@
 from easyvolcap.utils.easy_utils import read_camera, write_camera
 
 
+@catch_throw
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('input', default='data/mobile_stage/female')
+    parser.add_argument('--data_root', default='data/mobile_stage/female')
+    parser.add_argument('--dirs', default=['images', 'videos', 'masks', 'cameras'])
     args = parser.parse_args()
 
-    for cam in os.listdir(join(args.input, 'images')):
-        new_name = f"{int(cam):02d}"
-        if new_name != cam:
-            os.system(f'mv {join(args.input, "images", cam)} {join(args.input, "images", new_name)}')
+    for dir in args.dirs:
+        if not exists(join(args.data_root, dir)): continue
+        for cam in os.listdir(join(args.data_root, dir)):
+            idx, ext = splitext(cam)
+            idx = int(idx)
+            new_name = f"{idx:02d}{ext}"
+            if new_name != cam:
+                os.system(f'mv {join(args.data_root, dir, cam)} {join(args.data_root, dir, new_name)}')
 
-    cams = read_camera(join(args.input, 'intri.yml'), join(args.input, 'extri.yml'))
-    new_cams = {}
-    for cam in cams:
-        new_cams[f"{int(cam):02d}"] = cams[cam]
-    write_camera(new_cams, args.input)
+    if exists(join(args.data_root, 'intri.yml')) and exists(join(args.data_root, 'extri.yml')):
+        cams = read_camera(join(args.data_root, 'intri.yml'), join(args.data_root, 'extri.yml'))
+        new_cams = {}
+        for cam in cams:
+            new_cams[f"{int(cam):02d}"] = cams[cam]
+        write_camera(new_cams, args.data_root)
 
 
 if __name__ == '__main__':
diff --git a/scripts/tools/compress_videos.py b/scripts/tools/compress_videos.py
new file mode 100644
index 0000000..d43944e
--- /dev/null
+++ b/scripts/tools/compress_videos.py
@@ -0,0 +1,27 @@
+"""
+Compress the video folder to something like libx265
+"""
+
+from easyvolcap.utils.console_utils import *
+from easyvolcap.utils.data_utils import generate_video
+
+
+@catch_throw
+def main():
+    args = dotdict(
+        data_root='data/bullet/final',
+        videos_dir='videos',
+        output_dir='videos_compressed',
+    )
+
+    args = dotdict(vars(build_parser(args, description=__doc__).parse_args()))
+    for video in sorted(os.listdir(join(args.data_root, args.videos_dir))):
+        if not video.endswith('.mp4'): continue
+        video_path = join(args.data_root, args.videos_dir, video)
+        output_path = join(args.data_root, args.output_dir, video)
+        os.makedirs(dirname(output_path), exist_ok=True)
+        generate_video(video_path, output_path, fps=-1, verbose=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/tools/reorder_data.py b/scripts/tools/reorder_data.py
new file mode 100644
index 0000000..7aeedc5
--- /dev/null
+++ b/scripts/tools/reorder_data.py
@@ -0,0 +1,26 @@
+import os
+import argparse
+from os.path import join
+
+from easyvolcap.utils.console_utils import *
+from easyvolcap.utils.easy_utils import read_camera, write_camera
+
+
+@catch_throw
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data_root', default='data/bullet/final')
+    parser.add_argument('--dirs', default=['images', 'videos', 'masks', 'cameras'])
+    args = parser.parse_args()
+
+    for dir in args.dirs:
+        if not exists(join(args.data_root, dir)): continue
+        for idx, cam in enumerate(sorted(os.listdir(join(args.data_root, dir)))):
+            _, ext = splitext(cam)
+            new_name = f"{idx:02d}{ext}"
+            if new_name != cam:
+                os.system(f'mv {join(args.data_root, dir, cam)} {join(args.data_root, dir, new_name)}')
+
+
+if __name__ == '__main__':
+    main()