Save

zhangkaiitugithub · Nov 23, 2021 · 394318b · 394318b
1 parent 49128b7
commit 394318b
Show file tree

Hide file tree

Showing 17 changed files with 245 additions and 87 deletions.
diff --git a/opt/configs/custom.json b/opt/configs/custom.json
@@ -4,20 +4,22 @@
     "n_iters": 102400,
     "background_nlayers": 64,
     "background_reso": 1024,
-    "cam_scale_factor": 0.95,
+    "cam_scale_factor": 0.9,
     "upsamp_every": 25600,
+    "near_clip": 0.35,
     "lr_sigma": 3e1,
     "lr_sh": 1e-2,
     "lr_sigma_delay_steps": 0,
     "lr_fg_begin_step": 1000,
     "thresh_type": "weight",
     "weight_thresh": 1.28,
-    "lambda_tv": 5e-5,
+    "lambda_tv": 5e-3,
     "lambda_tv_sh": 5e-3,
-    "lambda_tv_background_sigma": 1e-3,
-    "lambda_tv_background_color": 1e-3,
+    "lambda_tv_background_sigma": 5e-3,
+    "lambda_tv_background_color": 5e-3,
     "lambda_beta": 1e-5,
     "lambda_sparsity": 1e-11,
-    "background_brightness": 1.0,
-    "tv_early_only": 0
+    "background_brightness": 0.5,
+    "tv_early_only": 0,
+    "tv_decay": 0.5
 }
diff --git a/opt/opt.py b/opt/opt.py
@@ -199,6 +199,8 @@
 group.add_argument('--tv_lumisphere_sparsity', type=float, default=0.01)
 group.add_argument('--tv_lumisphere_dir_factor', type=float, default=0.0)
 
+group.add_argument('--tv_decay', type=float, default=1.0)
+
 group.add_argument('--lambda_l2_sh', type=float, default=0.0)#1e-4)
 group.add_argument('--tv_early_only', type=int, default=1, help="Turn off TV regularization after the first split/prune")
 
@@ -233,6 +235,10 @@
 group.add_argument('--lr_decay', action='store_true', default=True)
 
 group.add_argument('--n_train', type=int, default=None, help='Number of training images. Defaults to use all avaiable.')
+
+group.add_argument('--nosphereinit', action='store_true', default=False,
+                     help='do not start with sphere bounds (please do not use for 360)')
+group.add_argument('--redense', action='store_true', default=False)
 args = parser.parse_args()
 config_util.maybe_merge_config_file(args)
 
@@ -274,7 +280,7 @@
 grid = svox2.SparseGrid(reso=reso_list[reso_id],
                         center=dset.scene_center,
                         radius=dset.scene_radius,
-                        use_sphere_bound=dset.use_sphere_bound,
+                        use_sphere_bound=dset.use_sphere_bound and not args.nosphereinit,
                         basis_dim=args.sh_dim,
                         use_z_order=True,
                         device=device,
@@ -389,8 +395,8 @@ def eval_step():
                                    dset_test.intrins.get('fy', img_id),
                                    dset_test.intrins.get('cx', img_id),
                                    dset_test.intrins.get('cy', img_id),
-                                   width=dset_test.get_image_size(i)[1],
-                                   height=dset_test.get_image_size(i)[0],
+                                   width=dset_test.get_image_size(img_id)[1],
+                                   height=dset_test.get_image_size(img_id)[0],
                                    ndc_coeffs=dset_test.ndc_coeffs)
                 rgb_pred_test = grid.volume_render_image(cam, use_kernel=True)
                 rgb_gt_test = dset_test.gt[img_id].to(device=device)
@@ -471,10 +477,7 @@ def train_step():
             batch_end = min(batch_begin + args.batch_size, epoch_size)
             batch_origins = dset.rays.origins[batch_begin: batch_end]
             batch_dirs = dset.rays.dirs[batch_begin: batch_end]
-            #  batch_dirs.normal_() # FIXME
-            #  batch_dirs /= batch_dirs.norm(dim=-1).unsqueeze(-1) # FIXME
             rgb_gt = dset.rays.gt[batch_begin: batch_end]
-            #  rgb_gt[:] = 0 # FIXME
             rays = svox2.Rays(batch_origins, batch_dirs)
 
             #  with Timing("volrend_fused"):
@@ -509,8 +512,7 @@ def train_step():
                 #          tv_sh = grid.tv_color()
                 #      summary_writer.add_scalar("loss_tv_sh", tv_sh, global_step=gstep_id)
                 #  with torch.no_grad():
-                #      tv_basis = grid.tv_basis()
-                #  summary_writer.add_scalar("loss_tv_basis", tv_basis, global_step=gstep_id)
+                #      tv_basis = grid.tv_basis() #  summary_writer.add_scalar("loss_tv_basis", tv_basis, global_step=gstep_id)
                 summary_writer.add_scalar("lr_sh", lr_sh, global_step=gstep_id)
                 summary_writer.add_scalar("lr_sigma", lr_sigma, global_step=gstep_id)
                 if grid.basis_type == svox2.BASIS_TYPE_3D_TEXTURE:
@@ -596,6 +598,9 @@ def train_step():
         print('Saving', ckpt_path)
         grid.save(ckpt_path)
 
+    if args.redense and epoch_id == 1:
+        grid.density_data.data[:] = args.init_sigma
+
     if (gstep_id_base - last_upsamp_step) >= args.upsamp_every:
         last_upsamp_step = gstep_id_base
         if reso_id < len(reso_list) - 1:
@@ -604,6 +609,10 @@ def train_step():
                 print('turning off TV regularization')
                 args.lambda_tv = 0.0
                 args.lambda_tv_sh = 0.0
+            elif args.tv_decay != 1.0:
+                args.lambda_tv *= args.tv_decay
+                args.lambda_tv_sh *= args.tv_decay
+
             reso_id += 1
             use_sparsify = True
             z_reso = reso_list[reso_id] if isinstance(reso_list[reso_id], int) else reso_list[reso_id][2]

diff --git a/opt/render_imgs.py b/opt/render_imgs.py
@@ -50,10 +50,6 @@
                     help="FPS of video")
 
 # Camera adjustment
-parser.add_argument('--near_clip',
-                    type=float,
-                    default=0.0,
-                    help="Near clip of poses (in voxels)")
 parser.add_argument('--crop',
                     type=float,
                     default=1.0,
@@ -103,8 +99,6 @@
     want_metrics = False
 
 # Handle various image transforms
-if args.near_clip != 0:
-    render_dir += f'_nclip{args.near_clip}'
 if not args.render_path:
     # Do not crop if not render_path
     args.crop = 1.0
@@ -155,8 +149,6 @@
     avg_lpips = 0.0
     n_images_gen = 0
     c2ws = dset.render_c2w.to(device=device) if args.render_path else dset.c2w.to(device=device)
-    if args.near_clip != 0.0:
-        grid.opt.near_clip = args.near_clip
     # DEBUGGING
     #  rad = [1.496031746031746, 1.6613756613756614, 1.0]
     #  half_sz = [grid.links.size(0) // 2, grid.links.size(1) // 2]
@@ -246,6 +238,6 @@
                     f.write(str(avg_lpips))
     if not args.no_vid and len(frames):
         vid_path = render_dir + '.mp4'
-        imageio.mimwrite(vid_path, frames, fps=args.fps)  # pip install imageio-ffmpeg
+        imageio.mimwrite(vid_path, frames, fps=args.fps, macro_block_size=8)  # pip install imageio-ffmpeg
 
 
diff --git a/opt/scripts/colmap2nsvf.py b/opt/scripts/colmap2nsvf.py
@@ -279,6 +279,14 @@ def main():
     base_dir = osp.dirname(osp.dirname(args.sparse_dir))
     pose_dir = osp.join(base_dir, "pose_colmap" if args.colmap_suffix else "pose")
     feat_dir = osp.join(base_dir, "feature")
+    base_scale_file = osp.join(base_dir, "base_scale.txt")
+    if osp.exists(base_scale_file):
+        with open(base_scale_file, 'r') as f:
+            base_scale = float(f.read())
+        print('base_scale', base_scale)
+    else:
+        base_scale = 1.0
+        print('base_scale defaulted to', base_scale)
     print("BASE_DIR", base_dir)
     print("POSE_DIR", pose_dir)
     print("FEATURE_DIR", feat_dir)
@@ -307,10 +315,10 @@ def create_or_recreate_dir(dirname):
 
     print("Get intrinsics")
     K = np.eye(4)
-    K[0, 0] = cameras[0].params[0]
-    K[1, 1] = cameras[0].params[0]
-    K[0, 2] = cameras[0].params[1]
-    K[1, 2] = cameras[0].params[2]
+    K[0, 0] = cameras[0].params[0] / base_scale
+    K[1, 1] = cameras[0].params[0] / base_scale
+    K[0, 2] = cameras[0].params[1] / base_scale
+    K[1, 2] = cameras[0].params[2] / base_scale
     print("f", K[0, 0], "c", K[0:2, 2])
     np.savetxt(osp.join(base_dir, "intrinsics_colmap.txt" if args.colmap_suffix else "intrinsics.txt"), K)
     del K

diff --git a/opt/scripts/create_split.py b/opt/scripts/create_split.py
@@ -15,7 +15,7 @@
 
 parser = argparse.ArgumentParser("Automatic dataset splitting")
 parser.add_argument('root_dir', type=str, help="COLMAP dataset root dir")
-parser.add_argument('--every', type=int, default=8, help="Every x images used for testing")
+parser.add_argument('--every', type=int, default=16, help="Every x images used for testing")
 parser.add_argument('--dry_run', action='store_true', help="Dry run, prints renames without modifying any files")
 parser.add_argument('--yes', '-y', action='store_true', help="Answer yes")
 parser.add_argument('--random', action='store_true', help="If set, chooses the split randomly rather than at a fixed interval "

diff --git a/opt/scripts/proc_colmap.sh b/opt/scripts/proc_colmap.sh
@@ -2,6 +2,6 @@
 
 # USAGE: bash proc_colmap.sh <dir of images>
 
-python run_colmap.py $1
+python run_colmap.py $1 ${@:2}
 python colmap2nsvf.py $1/sparse/0
 python create_split.py -y $1
diff --git a/opt/scripts/run_colmap.py b/opt/scripts/run_colmap.py
@@ -164,11 +164,12 @@ def resize_frames(vid_root, args):
         glob.glob(os.path.join(vid_root, args.image_input, '*.png')))
 
     print('Resizing images ...')
+    factor = 1.0
     for file_ind, file in enumerate(tqdm(files, desc=f'imresize: {vid_name}')):
         out_frame_fn = f'{frames_dir}/{file_ind:05}.png'
 
         # skip if both the output frame and the mask exist
-        if os.path.exists(out_frame_fn) and not args.overwrite:
+        if os.path.exists(out_frame_fn) and not overwrite:
             continue
 
         im = cv2.imread(file)
@@ -180,8 +181,9 @@ def resize_frames(vid_root, args):
             im = cv2.resize(src=im, dsize=dsize, interpolation=cv2.INTER_AREA)
 
         cv2.imwrite(out_frame_fn, im)
+    return factor
 
-def run_colmap(vid_root, args, overwrite=False):
+def run_colmap(vid_root, args, factor, overwrite=False):
     max_num_matches = 132768
     overlap_frames = 75  # only used with sequential matching
 
@@ -191,12 +193,15 @@ def run_colmap(vid_root, args, overwrite=False):
         colmap feature_extractor \
             --database_path={vid_root}/database.db \
             --image_path={vid_root}/{args.images_resized}\
-            --ImageReader.camera_model=SIMPLE_RADIAL \
             --ImageReader.single_camera=1 \
-            --ImageReader.default_focal_length_factor=0.95 \
+            --ImageReader.default_focal_length_factor=0.69388 \
             --SiftExtraction.peak_threshold=0.004 \
             --SiftExtraction.max_num_features=8192 \
             --SiftExtraction.edge_threshold=16'''
+    if args.noradial:
+        extractor_cmd += ' --ImageReader.camera_model=SIMPLE_PINHOLE'
+    else:
+        extractor_cmd += ' --ImageReader.camera_model=SIMPLE_RADIAL'
     if args.use_masks:
         extractor_cmd += ' --ImageReader.mask_path={vid_root}/masks'
     known_intrin = False
@@ -206,10 +211,13 @@ def run_colmap(vid_root, args, overwrite=False):
             known_intrin = True
             print('Using known intrinsics')
             intrins = np.loadtxt(intrin_path)
-            focal = (intrins[0, 0] + intrins[1, 1]) * 0.5
-            cx, cy = intrins[0, 2], intrins[1, 2]
+            focal = (intrins[0, 0] + intrins[1, 1]) * 0.5 / factor
+            cx, cy = intrins[0, 2] / factor, intrins[1, 2] / factor
             # f cx cy
-            extractor_cmd += f' --ImageReader.camera_params "{focal:.10f},{cx:.10f},{cy:.10f}"'
+            if args.noradial:
+                extractor_cmd += f' --ImageReader.camera_params "{focal:.10f},{cx:.10f},{cy:.10f}"'
+            else:
+                extractor_cmd += f' --ImageReader.camera_params "{focal:.10f},{cx:.10f},{cy:.10f},0.0"'
         else:
             print('--known-intrin given but intrinsics.txt does not exist in data')
     os.system(extractor_cmd)
@@ -250,15 +258,16 @@ def run_colmap(vid_root, args, overwrite=False):
 
     os.system(mapper_cmd)
 
-    undist_dir = os.path.join(vid_root, args.undistorted_output)
-    if not os.path.exists(undist_dir) or args.overwrite:
-        os.makedirs(undist_dir, exist_ok=True)
-        os.system(f'''
-            colmap image_undistorter \
-                --input_path={vid_root}/sparse/0 \
-                --image_path={vid_root}/{args.images_resized} \
-                --output_path={vid_root} \
-                --output_type=COLMAP''')
+    if not args.noradial:
+        undist_dir = os.path.join(vid_root, args.undistorted_output)
+        if not os.path.exists(undist_dir) or overwrite:
+            os.makedirs(undist_dir, exist_ok=True)
+            os.system(f'''
+                colmap image_undistorter \
+                    --input_path={vid_root}/sparse/0 \
+                    --image_path={vid_root}/{args.images_resized} \
+                    --output_path={vid_root} \
+                    --output_type=COLMAP''')
 
 
 def render_movie(vid_root, args):
@@ -356,13 +365,13 @@ def preprocess(vid_root, args):
                 os.rename(src_path, os.path.join(frames_dir, fname))
 
     overwrite = True
-    if not args.debug_only:
-        resize_frames(vid_root, args)
-        # colmap
-        if args.use_masks:
-            generate_masks(vid_root, args, overwrite=overwrite)
-        run_colmap(vid_root, args, overwrite=overwrite)
-    render_movie(vid_root, args)
+    factor = resize_frames(vid_root, args)
+    # colmap
+    if args.use_masks:
+        generate_masks(vid_root, args, overwrite=overwrite)
+    run_colmap(vid_root, args, factor, overwrite=overwrite)
+    if args.debug:
+        render_movie(vid_root, args)
 
 
 if __name__ == '__main__':
@@ -377,18 +386,21 @@ def preprocess(vid_root, args):
     parser.add_argument('--mask-output', default='masks', help='location to store motion masks')
     parser.add_argument('--known-intrin', action='store_true', default=False, help='use intrinsics in <root>/intrinsics.txt if available')
     parser.add_argument('--fix-intrin', action='store_true', default=False, help='fix intrinsics in bundle adjustment, only used if --known-intrin is given and intrinsics.txt exists')
-    parser.add_argument('--debug-only', action='store_true', default=False, help='only render debug video')
+    parser.add_argument('--debug', action='store_true', default=False, help='render debug video')
+    parser.add_argument('--noradial', action='store_true', default=False, help='do not use radial distortion')
     parser.add_argument('--use-masks', action='store_true', default=False, help='use automatic masks')
     parser.add_argument(
                     '--images-resized', default='images_resized', help='location for resized/renamed images')
     parser.add_argument(
         '--do-sequential', action='store_true', default=False, help='sequential rather than exhaustive matching')
     parser.add_argument('--max-width', type=int, default=1280, help='max image width')
-    parser.add_argument('--max-height', type=int, default=720, help='max image height')
+    parser.add_argument('--max-height', type=int, default=768, help='max image height')
     parser.add_argument(
             '--undistorted-output', default='images', help='location of undistorted images')
 
     args = parser.parse_args()
+    if args.noradial:
+        args.images_resized = args.undistorted_output
 
     from vendor import read_write_model