easyvolcap: adding recording for timer and visualizer & other qol updates

dendenxu · dendenxu · commit 23b786b7e582 · 2024-01-28T18:01:30.000+08:00
diff --git a/configs/datasets/NHR/NHR.yaml b/configs/datasets/NHR/NHR.yaml
@@ -5,6 +5,8 @@ dataloader_cfg: # we see the term "dataloader" as one word?
         vhull_thresh: 0.95
         count_thresh: 6 # common views
 
+        use_aligned_cameras: True
+
         vhull_thresh_factor: 0.75
         vhull_count_factor: 1.0
 
diff --git a/configs/datasets/mobile_stage/mobile_stage.yaml b/configs/datasets/mobile_stage/mobile_stage.yaml
@@ -8,6 +8,8 @@ dataloader_cfg: # we see the term "dataloader" as one word?
         view_sample: [0, null, 1]
         frame_sample: [0, null, 1] # only train for a thousand frames
 
+        use_aligned_cameras: True
+
         vhull_thresh: 0.85 # 21 cameras?
         count_thresh: 6 # more visibility
         vhull_thresh_factor: 0.9 # FIXME: 313 need 1.5, 390, 394 requires 1.0
diff --git a/configs/datasets/my_zjumocap/my_zjumocap.yaml b/configs/datasets/my_zjumocap/my_zjumocap.yaml
@@ -5,6 +5,8 @@ dataloader_cfg: # we see the term "dataloader" as one word?
         view_sample: [0, null, 1]
         frame_sample: [0, 200, 1] # only train for a thousand frames
 
+        use_aligned_cameras: True
+
         # MARK: This is for now the best vhull extraction setting
         vhull_thresh: 0.95 # 18 cameras?
         count_thresh: 16 # common views
diff --git a/easyvolcap/engine/__init__.py b/easyvolcap/engine/__init__.py
@@ -115,7 +115,8 @@ def parse_cfg(args):
             )
         )  # empty config
     else:
-        raise FileNotFoundError(f"Config file {blue(args.config)} not found")
+        raise FileNotFoundError(f"Config file {args.config} not found")
+        # raise FileNotFoundError(f"Config file {markup_to_ansi(blue(args.config))} not found")
 
 
 parser = get_parser()
diff --git a/easyvolcap/models/samplers/gaussiant_sampler.py b/easyvolcap/models/samplers/gaussiant_sampler.py
@@ -21,11 +21,11 @@
 
 from easyvolcap.utils.console_utils import *
 from easyvolcap.utils.console_utils import dotdict
-from easyvolcap.utils.gaussian_utils import GaussianModel
-from easyvolcap.utils.data_utils import load_pts, export_pts, to_x, to_cuda, to_cpu, to_tensor, remove_batch
-from easyvolcap.utils.net_utils import normalize, typed, update_optimizer_state
-from easyvolcap.utils.chunk_utils import multi_gather, multi_scatter
 from easyvolcap.utils.bound_utils import get_bounds
+from easyvolcap.utils.chunk_utils import multi_gather, multi_scatter
+from easyvolcap.utils.gaussian_utils import GaussianModel, in_frustrum
+from easyvolcap.utils.net_utils import normalize, typed, update_optimizer_state
+from easyvolcap.utils.data_utils import load_pts, export_pts, to_x, to_cuda, to_cpu, to_tensor, remove_batch
 
 from easyvolcap.models.cameras.optimizable_camera import OptimizableCamera
 from easyvolcap.models.samplers.point_planes_sampler import PointPlanesSampler
@@ -128,6 +128,9 @@ def render_gaussians(self, xyz: torch.Tensor, sh: torch.Tensor, scale3: torch.Te
         # Prepare the camera transformation for Gaussian
         gaussian_camera = to_x(prepare_gaussian_camera(batch), torch.float)
 
+        # is_in_frustrum = in_frustrum(xyz, gaussian_camera.full_proj_transform)
+        # print('Number of points to render:', is_in_frustrum.sum().item())
+
         # Prepare rasterization settings for gaussian
         raster_settings = GaussianRasterizationSettings(
             image_height=gaussian_camera.image_height,
diff --git a/easyvolcap/runners/evaluators/volumetric_video_evaluator.py b/easyvolcap/runners/evaluators/volumetric_video_evaluator.py
@@ -38,16 +38,17 @@ def evaluate(self, output: dotdict, batch: dotdict):
             for compute in self.compute_metrics:
                 metrics[compute.__name__] = compute(img, img_gt)  # actual computation of the metrics
 
-        self.metrics.append(metrics)
+        if len(metrics):
+            self.metrics.append(metrics)
 
-        # For recording
-        c = batch.meta.camera_index.item()
-        f = batch.meta.frame_index.item()
-        log(f'camera: {c}', f'frame: {f}', metrics)
-        metrics.camera = c
-        metrics.frame = f
-        scalar_stats = dotdict({f'{k}_frame{f:04d}_cam{c:04d}': v for k, v in metrics.items()})
+            # For recording
+            c = batch.meta.camera_index.item()
+            f = batch.meta.frame_index.item()
+            log(f'camera: {c}', f'frame: {f}', metrics)
+            metrics.camera = c
+            metrics.frame = f
 
+        scalar_stats = dotdict({f'{k}_frame{f:04d}_cam{c:04d}': v for k, v in metrics.items()})
         return scalar_stats
 
     def summarize(self):
diff --git a/easyvolcap/runners/visualizers/volumetric_video_visualizer.py b/easyvolcap/runners/visualizers/volumetric_video_visualizer.py
@@ -35,12 +35,13 @@ def __init__(self,
                      Visualization.ALPHA.name,
                  ],
 
-                 stream_delay: int = 5,  # after this number of pending copy, start synchronizing the stream and saving to disk
-                 pool_limit: int = 5,  # maximum number of pending tasks in the thread pool, keep this small to avoid using too much resource
+                 stream_delay: int = 2,  # after this number of pending copy, start synchronizing the stream and saving to disk
+                 pool_limit: int = 10,  # maximum number of pending tasks in the thread pool, keep this small to avoid using too much resource
                  video_fps: int = 60,
                  verbose: bool = True,
 
                  dpt_curve: str = 'normalize',  # looks good
+                 dpt_mult: float = 1.0,
                  dpt_cm: str = 'virdis' if args.type != 'gui' else 'linear',  # looks good
                  ):
         super().__init__()
@@ -71,6 +72,7 @@ def __init__(self,
         self.video_fps = video_fps
         self.verbose = verbose
         self.dpt_curve = dpt_curve
+        self.dpt_mult = dpt_mult
         self.dpt_cm = dpt_cm
 
         if self.verbose:
@@ -102,11 +104,15 @@ def norm_curve_fn(norm):
                 img = output.dpt_map
             else:
                 img = depth_curve_fn(output.dpt_map, cm=self.dpt_cm)
+            # img = (img - 0.5) * self.dpt_mult + 0.5
+            img = img * self.dpt_mult
             if self.store_ground_truth and 'dpt' in batch:
                 if self.dpt_curve == 'linear':
                     img_gt = batch.dpt
                 else:
                     img_gt = depth_curve_fn(batch.dpt, cm=self.dpt_cm)
+                # img_gt = (img_gt - 0.5) * self.dpt_mult + 0.5
+                img_gt = img_gt * self.dpt_mult
 
         elif type == Visualization.FEATURE:
             # This visualizes the xyzt + xyz feature output
diff --git a/easyvolcap/runners/volumetric_video_runner.py b/easyvolcap/runners/volumetric_video_runner.py
@@ -83,7 +83,8 @@ def __init__(self,
 
                  # Debugging
                  collect_timing: bool = False,  # will lose 1 fps over copying
-                 timer_sync_cuda: bool = True,
+                 timer_sync_cuda: bool = True,  # will explicitly call torch.cuda.synchronize() before collecting
+                 timer_record_to_file: bool = False, # will write to a json file for collected analysis of the timing
                  ):
         self.model = model  # possibly already a ddp model?
 
@@ -148,6 +149,7 @@ def __init__(self,
         # Debugging
         self.collect_timing = collect_timing  # another fancy self.timer (different from fps counter)
         self.timer_sync_cuda = timer_sync_cuda  # this enables accurate time recording for each section, but would slow down the programs
+        self.timer_record_to_file = timer_record_to_file
 
     @property
     def collect_timing(self):
@@ -157,6 +159,10 @@ def collect_timing(self):
     def timer_sync_cuda(self):
         return timer.sync_cuda
 
+    @property
+    def timer_record_to_file(self):
+        return timer.record_to_file
+
     @collect_timing.setter
     def collect_timing(self, val: bool):
         timer.disabled = not val
@@ -165,6 +171,16 @@ def collect_timing(self, val: bool):
     def timer_sync_cuda(self, val: bool):
         timer.sync_cuda = val
 
+    @timer_record_to_file.setter
+    def timer_record_to_file(self, val: bool):
+        timer.record_to_file = val
+        if timer.record_to_file:
+            log(yellow(f'Will record timing results to {blue(join(self.recorder.record_dir, f"{self.exp_name}.json"))}'))
+            timer.exp_name = self.exp_name
+            timer.record_dir = self.recorder.record_dir
+            if not hasattr(timer, 'timing_record'):
+                timer.timing_record = dotdict()
+
     @property
     def total_iter(self):
         return self.epochs * self.ep_iter
diff --git a/easyvolcap/runners/volumetric_video_viewer.py b/easyvolcap/runners/volumetric_video_viewer.py
@@ -71,6 +71,7 @@ def __init__(self,
                  update_mem_time: float = 0.1,  # be less stressful
                  use_quad_draw: bool = False,  # different rendering solution
                  use_quad_cuda: bool = True,
+                 use_vsync: bool = False,
 
                  # This is important for works like K-planes or IBR (or stableenerf), since it's not easy to perform interpolation (slow motion)
                  # For point clouds, only a fixed number of point clouds are produces since we performed discrete training (no interpolation)
@@ -99,6 +100,7 @@ def __init__(self,
         self.fullscreen = fullscreen
         self.window_size = window_size
         self.window_title = window_title
+        self.use_vsync = use_vsync
         self.use_window_focal = use_window_focal
 
         # Quad related configurations
@@ -901,7 +903,15 @@ def draw_banner_gui(self, batch: dotdict = dotdict(), output: dotdict = dotdict(
         imgui.pop_font()
 
         # Full frame timings
-        timer.disabled = not imgui_toggle.toggle('Collect timings', not timer.disabled, config=self.static.toggle_ios_style)[1]
+        self.runner.collect_timing = imgui_toggle.toggle('Collect timing', self.runner.collect_timing, config=self.static.toggle_ios_style)[1]
+        changed, value = imgui_toggle.toggle('Record timing', self.runner.timer_record_to_file, config=self.static.toggle_ios_style)
+        if changed:
+            self.runner.timer_record_to_file = value
+        self.runner.timer_sync_cuda = imgui_toggle.toggle('Sync timing', self.runner.timer_sync_cuda, config=self.static.toggle_ios_style)[1]
+        changed, self.use_vsync = imgui_toggle.toggle('Enable VSync', self.use_vsync, config=self.static.toggle_ios_style)
+        if changed:
+            glfw.swap_interval(self.use_vsync)
+
         if not timer.disabled:
             if imgui.collapsing_header('Timing'):
                 imgui.text(f'gui  : {batch.gui_time * 1000:7.3f}ms')
@@ -1417,7 +1427,7 @@ def init_glfw(self):
         # Create a windowed mode window and its OpenGL context
         window = glfw.create_window(self.W, self.H, self.window_title, None, None)
         glfw.make_context_current(window)
-        glfw.swap_interval(0)  # disable vsync
+        glfw.swap_interval(self.use_vsync)  # disable vsync
 
         icon = load_image(self.icon_file)
         pixels = (icon * 255).astype(np.uint8)
diff --git a/easyvolcap/utils/console_utils.py b/easyvolcap/utils/console_utils.py
@@ -599,13 +599,25 @@ def wrapper(func: Callable):
 
 
 class Timer:
-    def __init__(self, name='', disabled: bool = False, sync_cuda: bool = True):
+    def __init__(self, 
+                 name='base',
+                 exp_name='',
+                 record_dir: str = 'data/timing',
+                 disabled: bool = False, 
+                 sync_cuda: bool = True, 
+                 record_to_file: bool = False,
+                 ):
         self.sync_cuda = sync_cuda
         self.disabled = disabled
         self.name = name
+        self.exp_name = exp_name
         self.start_time = time.perf_counter()  # manually record another start time incase timer is disabled during initialization
         self.start()  # you can always restart multiple times to reuse this timer
 
+        self.record_to_file = record_to_file
+        if self.record_to_file:
+            self.timing_record = dotdict()
+
     def __enter__(self):
         self.start()
 
@@ -636,6 +648,13 @@ def record(self, event: str = ''):
         if self.disabled: return 0
         self.name = event
         diff = self.stop(print=bool(event), back=3)
+        if self.record_to_file and event:
+            if event not in self.timing_record:
+                self.timing_record[event] = []
+            self.timing_record[event].append(diff)
+
+            with open(join(self.record_dir, f'{self.exp_name}.json'), 'w') as f:
+                json.dump(self.timing_record, f, indent=4)
         self.start()
         return diff
 
diff --git a/easyvolcap/utils/data_utils.py b/easyvolcap/utils/data_utils.py
@@ -231,7 +231,7 @@ def video_to_numpy(input_filename):
         'ffmpeg',
         '-hwaccel', 'cuda',
         '-v', 'quiet', '-stats',
-        # '-vcodec', 'hevc_cuvid',
+        '-vcodec', 'hevc_cuvid',
         '-i', input_filename,
         '-f', 'image2pipe',
         '-pix_fmt', 'rgb24',
@@ -244,7 +244,8 @@ def video_to_numpy(input_filename):
 
     # Convert the raw data to numpy array and reshape
     video_np = np.frombuffer(raw_data, dtype=np.uint8)
-    video_np = video_np.reshape(-1, H, W, 3)
+    H2, W2 = (H + 1) // 2 * 2, (W + 1) // 2 * 2
+    video_np = video_np.reshape(-1, H2, W2, 3)[:, :H, :W, :]
     return video_np
 
 
diff --git a/easyvolcap/utils/gaussian_utils.py b/easyvolcap/utils/gaussian_utils.py
@@ -8,9 +8,41 @@
 from easyvolcap.utils.console_utils import *
 from easyvolcap.utils.sh_utils import eval_sh
 from easyvolcap.utils.blend_utils import batch_rodrigues
-from easyvolcap.utils.math_utils import torch_inverse_2x2
 from easyvolcap.utils.data_utils import to_x, add_batch, load_pts
 from easyvolcap.utils.net_utils import make_buffer, make_params, typed
+from easyvolcap.utils.math_utils import torch_inverse_2x2, point_padding
+
+
+# def in_frustrum(xyz: torch.Tensor, ixt: torch.Tensor, ext: torch.Tensor):
+def in_frustrum(xyz: torch.Tensor, full_proj_matrix: torch.Tensor, padding: float = 0.01):
+    # __forceinline__ __device__ bool in_frustum(int idx,
+    # 	const float* orig_points,
+    # 	const float* viewmatrix,
+    # 	const float* projmatrix,
+    # 	bool prefiltered,
+    # 	float3& p_view,
+    # 	const float padding = 0.01f // padding in ndc space
+    # 	)
+    # {
+    # 	float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
+
+    # 	// Bring points to screen space
+    # 	float4 p_hom = transformPoint4x4(p_orig, projmatrix);
+    # 	float p_w = 1.0f / (p_hom.w + 0.0000001f);
+    # 	float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
+    # 	p_view = transformPoint4x3(p_orig, viewmatrix); // write this outside
+
+    # 	// if (idx % 32768 == 0) printf("Viewspace point: %f, %f, %f\n", p_view.x, p_view.y, p_view.z);
+    # 	// if (idx % 32768 == 0) printf("Projected point: %f, %f, %f\n", p_proj.x, p_proj.y, p_proj.z);
+    # 	return (p_proj.z > -1 - padding) && (p_proj.z < 1 + padding) && (p_proj.x > -1 - padding) && (p_proj.x < 1. + padding) && (p_proj.y > -1 - padding) && (p_proj.y < 1. + padding);
+    # }
+
+    # xyz: N, 3
+    # ndc = (xyz @ R.mT + T)[..., :3] @ K # N, 3
+    # ndc[..., :2] = ndc[..., :2] / ndc[..., 2:] / torch.as_tensor([W, H], device=ndc.device) # N, 2, normalized x and y
+    ndc = point_padding(xyz) @ full_proj_matrix
+    ndc = ndc[..., :3] / ndc[..., 3:]
+    return (ndc[..., 2] > -1 - padding) & (ndc[..., 2] < 1 + padding) & (ndc[..., 0] > -1 - padding) & (ndc[..., 0] < 1. + padding) & (ndc[..., 1] > -1 - padding) & (ndc[..., 1] < 1. + padding)  # N,
 
 
 @torch.jit.script
@@ -199,7 +231,8 @@ def prepare_gaussian_camera(batch):
 def convert_to_gaussian_camera(K: torch.Tensor,
                                R: torch.Tensor,
                                T: torch.Tensor,
-                               H: int, W: int,
+                               H: int,
+                               W: int,
                                znear: float = 0.01,
                                zfar: float = 100.
                                ):
@@ -220,7 +253,7 @@ def convert_to_gaussian_camera(K: torch.Tensor,
 
     output.world_view_transform = getWorld2View(output.R, output.T).transpose(0, 1)
     output.projection_matrix = getProjectionMatrix(output.K, output.image_height, output.image_width, znear, zfar).transpose(0, 1)
-    output.full_proj_transform = torch.matmul(output.world_view_transform, output.projection_matrix)
+    output.full_proj_transform = torch.matmul(output.world_view_transform, output.projection_matrix)  # 4, 4
     output.camera_center = output.world_view_transform.inverse()[3:, :3]
 
     # Set up rasterization configuration
@@ -686,6 +719,9 @@ def render(self, batch: dotdict):
         # Prepare the camera transformation for Gaussian
         gaussian_camera = to_x(prepare_gaussian_camera(batch), torch.float)
 
+        # is_in_frustrum = in_frustrum(xyz, gaussian_camera.full_proj_transform)
+        # print('Number of points to render:', is_in_frustrum.sum().item())
+
         # Prepare rasterization settings for gaussian
         raster_settings = GaussianRasterizationSettings(
             image_height=gaussian_camera.image_height,
diff --git a/scripts/tools/runtime_as_ply.py b/scripts/tools/runtime_as_ply.py
@@ -17,18 +17,17 @@
 def main():
     # fmt: off
     import sys
-    sys.path.append('.')
 
-    sep_ind = sys.argv.index('--')
+    sep_ind = sys.argv.index('--') if '--' in sys.argv else len(sys.argv)
     our_args = sys.argv[1:sep_ind]
     evv_args = sys.argv[sep_ind + 1:]
-    sys.argv = [sys.argv[0]] + ['-t','test'] + evv_args + ['val_dataloader_cfg.dataset_cfg.type=VolumetricVideoDataset'] # use default dataset
+    sys.argv = [sys.argv[0]] + ['-t','test'] + evv_args
 
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--result_dir', type=str, default='data/geometry')
-    parser.add_argument('--frame_index', type=int, default=0)
-    parser.add_argument('--skip_align', action='store_true')
-    args = parser.parse_args(our_args)
+    args = dotdict()
+    args.result_dir = 'data/geometry'
+    args.frame_index = 0
+    args.skip_align = False
+    args =dotdict(vars(build_parser(args).parse_args(our_args)))
 
     sys.argv += [f'val_dataloader_cfg.dataset_cfg.frame_sample={args.frame_index},{args.frame_index+1},1']
 
@@ -47,11 +46,19 @@ def main():
     special_mapping = {
         f'sampler.pcds.{args.frame_index}': 'pts',
         f'sampler.rgbs.{args.frame_index}': 'color',
+        f'sampler.bg_sampler.pcds.{args.frame_index}': 'pts',
+        f'sampler.bg_sampler.rgbs.{args.frame_index}': 'color',
+        f'sampler.fg_sampler.pcds.{args.frame_index}': 'pts',
+        f'sampler.fg_sampler.rgbs.{args.frame_index}': 'color',
     }
 
     named_mapping = {
         f'sampler.rads.{args.frame_index}': 'radius',
         f'sampler.occs.{args.frame_index}': 'alpha',
+        f'sampler.bg_sampler.rads.{args.frame_index}': 'radius',
+        f'sampler.bg_sampler.occs.{args.frame_index}': 'alpha',
+        f'sampler.fg_sampler.rads.{args.frame_index}': 'radius',
+        f'sampler.fg_sampler.occs.{args.frame_index}': 'alpha',
     }
 
     # Save the model's registered parameters as numpy arrays in npz

Original file line number	Diff line number	Diff line change
`@@ -115,7 +115,8 @@ def parse_cfg(args):`
`115`	`115`	`)`
`116`	`116`	`) # empty config`
`117`	`117`	`else:`
`118`		`- raise FileNotFoundError(f"Config file {blue(args.config)} not found")`
	`118`	`+ raise FileNotFoundError(f"Config file {args.config} not found")`
	`119`	`+ # raise FileNotFoundError(f"Config file {markup_to_ansi(blue(args.config))} not found")`
`119`	`120`
`120`	`121`
`121`	`122`	`parser = get_parser()`