Merge branch 'master' of github.com:sxyu/svox2

zhangkaiitugithub · Dec 17, 2021 · 59984d6 · 59984d6
2 parents bf1bace + e0cfa0e
commit 59984d6
Show file tree

Hide file tree

Showing 11 changed files with 398 additions and 98 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2021, the Plenoxels authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Citation:
 ```
 @misc{yu2021plenoxels,
       title={Plenoxels: Radiance Fields without Neural Networks}, 
-      author={Alex Yu and Sara Fridovich-Keil and Matthew Tancik and Qinhong Chen and Benjamin Recht and Angjoo Kanazawa},
+      author={{Alex Yu and Sara Fridovich-Keil} and Matthew Tancik and Qinhong Chen and Benjamin Recht and Angjoo Kanazawa},
       year={2021},
       eprint={2112.05131},
       archivePrefix={arXiv},
@@ -27,6 +27,7 @@ Citation:
 ```
 
 This contains the official optimization code.
+A JAX implementation is also available at <https://github.com/sarafridov/plenoxels>. However, note that the JAX version is currently feature-limited, running in about 1 hour per epoch and only supporting bounded scenes (at present). 
 
 ![Fast optimization](https://raw.githubusercontent.com/sxyu/svox2/master/github_img/fastopt.gif)
 
@@ -83,6 +84,8 @@ for forward-facing scenes, and
 The dataset format will be auto-detected from `data_dir`.
 Checkpoints will be in `ckpt/exp_name`.
 
+**For pretrained checkpoints please see:** https://drive.google.com/drive/folders/1SOEJDw8mot7kf5viUK9XryOAmZGe_vvE?usp=sharing
+
 ## Evaluation
 
 Use `opt/render_imgs.py`
@@ -124,12 +127,13 @@ For Tanks and Temples scenes
 python autotune.py -g '<space delimited GPU ids>' tasks/eval_tnt.json
 ```
 
-## Using a custom image set
+## Using a custom image set (360)
 
+Please take images all around the object and try to take images at different elevations.
 First make sure you have colmap installed. Then
 
-(in opt/)
-`bash scripts/proc_colmap.sh <img_dir>`
+(in opt/scripts)
+`bash proc_colmap.sh <img_dir>`
 
 Where `<img_dir>` should be a directory directly containing png/jpg images from a 
 normal perspective camera.
@@ -139,14 +143,21 @@ For custom datasets we adopt a data format similar to that in NSVF
 You should be able to use this dataset directly afterwards. The format will be auto-detected.
 
 To view the data use:
-`python scripts/view_data.py <img_dir>`
+`python view_data.py <img_dir>`
 
 This should launch a server at localhost:8889
 
 
-You may need to tune the TV. For forward-facing scenes, often making the TV weights 10x
-higher is helpful (`configs/llff_hitv.json`).
-For the real lego scene I used the config `configs/custom.json`.
+Now follow the "Voxel Optimization (aka Training)" section to train:
+
+`./launch.sh <exp_name> <GPU_id> <data_dir> -c configs/custom.json`
+
+You can also try `configs/custom_alt.json` which has some minor differences.
+You may need to tune the TV for best results.
+
+To render a video, please see the "rendering a spiral" section.
+To convert to a svox1-compatible PlenOctree (not perfect quality since interpolation is not implemented)
+you can try `to_svox1.py <ckpt>`
 
 ## Random tip: how to make pip install faster for native extensions
 

diff --git a/environment.yml b/environment.yml
@@ -19,8 +19,11 @@ dependencies:
       - imageio
       - imageio-ffmpeg
       - pymcubes
+      - moviepy
+      - matplotlib
       - scipy>=1.6.0
   - pytorch
+  - torchvision
   - cudatoolkit
   - tqdm
 
diff --git a/opt/configs/custom_iphone.json → opt/configs/custom_alt.json b/opt/configs/custom_iphone.json → opt/configs/custom_alt.json
@@ -1,8 +1,6 @@
 {
     "reso": "[[128, 128, 128], [256, 256, 256], [512, 512, 512]]",
-    "scale": 1.0,
     "n_iters": 102400,
-    "cam_scale_factor": 1.5,
     "background_nlayers": 64,
     "background_reso": 1024,
     "cam_scale_factor": 0.95,

diff --git a/opt/opt.py b/opt/opt.py
@@ -156,6 +156,11 @@
                    default=0.1,
                    help='initialization sigma (for BG)')
 
+# Extra logging
+group.add_argument('--log_mse_image', action='store_true', default=False)
+group.add_argument('--log_depth_map', action='store_true', default=False)
+group.add_argument('--log_depth_map_use_thresh', type=float, default=None,
+        help="If specified, uses the Dex-neRF version of depth with given thresh; else returns expected term")
 
 
 group = parser.add_argument_group("misc experiments")
@@ -238,6 +243,7 @@
 
 group.add_argument('--nosphereinit', action='store_true', default=False,
                      help='do not start with sphere bounds (please do not use for 360)')
+
 args = parser.parse_args()
 config_util.maybe_merge_config_file(args)
 
@@ -405,9 +411,19 @@ def eval_step():
                     img_pred.clamp_max_(1.0)
                     summary_writer.add_image(f'test/image_{img_id:04d}',
                             img_pred, global_step=gstep_id_base, dataformats='HWC')
-                    mse_img = all_mses / all_mses.max()
-                    summary_writer.add_image(f'test/mse_map_{img_id:04d}',
-                            mse_img, global_step=gstep_id_base, dataformats='HWC')
+                    if args.log_mse_image:
+                        mse_img = all_mses / all_mses.max()
+                        summary_writer.add_image(f'test/mse_map_{img_id:04d}',
+                                mse_img, global_step=gstep_id_base, dataformats='HWC')
+                    if args.log_depth_map:
+                        depth_img = grid.volume_render_depth_image(cam,
+                                    args.log_depth_map_use_thresh if
+                                    args.log_depth_map_use_thresh else None
+                                )
+                        depth_img = viridis_cmap(depth_img.cpu())
+                        summary_writer.add_image(f'test/depth_map_{img_id:04d}',
+                                depth_img,
+                                global_step=gstep_id_base, dataformats='HWC')
 
                 rgb_pred_test = rgb_gt_test = None
                 mse_num : float = all_mses.mean().item()

diff --git a/opt/scripts/view_data.py b/opt/scripts/view_data.py
@@ -229,19 +229,12 @@ def look_for_dir(cands, required=True):
     image_files = sorted([x for x in os.listdir(images_dir) if x.lower().endswith('.png') or x.lower().endswith('.jpg')], key=sort_key)
 
     all_poses = []
-    pnum, seg_begin = None, 0
-    segs = []
     for i, pose_file in enumerate(pose_files):
         pose = np.loadtxt(path.join(pose_dir, pose_file)).reshape(4, 4)
-        splt = path.splitext(pose_file)[0].split('_')
-        num = int(splt[1] if len(splt) > 1 else splt[0])
-        if pnum is not None and num - pnum > 1 and seg_begin < num:
-            segs.append((seg_begin, num))
-            seg_begin = num
-        pnum = num
+        #  splt = path.splitext(pose_file)[0].split('_')
+        #  num = int(splt[1] if len(splt) > 1 else splt[0])
         all_poses.append(pose)
     all_poses = np.stack(all_poses)
-    segs.append((seg_begin, len(pose_files)))
 
     def get_transform(c2w):
         t = c2w[:, :3, 3]
@@ -323,18 +316,14 @@ def get_transform(c2w):
     center = origin - vforward * np.linalg.norm(t - origin, axis=-1).mean() * 0.7 * 3
     print('  camera center', center, 'vforward', vforward, 'world_up', world_up)
 
-    for i, seg in enumerate(segs):
-        print(seg)
-        print(R.shape, t.shape)
-        print(seg[0], seg[1])
-        scene.add_camera_frustum(name=f"traj_{i:04d}", focal_length=focal,
-                                 image_width=image_wh[0],
-                                 image_height=image_wh[1],
-                                 z=0.1,
-                                 r=R[seg[0]:seg[1]],
-                                 t=t[seg[0]:seg[1]],
-                                 connect=args.seg,
-                                 color=[1.0, 0.0, 0.0])
+    scene.add_camera_frustum(name=f"traj_{i:04d}", focal_length=focal,
+                             image_width=image_wh[0],
+                             image_height=image_wh[1],
+                             z=0.1,
+                             r=R,
+                             t=t,
+                             connect=args.seg,
+                             color=[1.0, 0.0, 0.0])
 
     if pose_gt_dir is not None:
         print('Loading GT')

diff --git a/svox2/csrc/loss_kernel.cu b/svox2/csrc/loss_kernel.cu
@@ -61,6 +61,7 @@ void calculate_ray_scale(float ndc_coeffx,
             maxz, \
             out_name)
 
+__launch_bounds__(TV_GRAD_CUDA_THREADS, MIN_BLOCKS_PER_SM)
 __global__ void tv_kernel(
         torch::PackedTensorAccessor32<int32_t, 3, torch::RestrictPtrTraits> links,
         torch::PackedTensorAccessor64<float, 2, torch::RestrictPtrTraits> data,
@@ -497,10 +498,9 @@ torch::Tensor tv(torch::Tensor links, torch::Tensor data,
     int nl = (links.size(0) - 1) * (links.size(1) - 1) * (links.size(2) - 1);
     size_t Q = nl * size_t(end_dim - start_dim);
 
-    const int cuda_n_threads = 1024;
-    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, cuda_n_threads);
+    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, TV_GRAD_CUDA_THREADS);
     torch::Tensor result = torch::zeros({}, data.options());
-    device::tv_kernel<<<blocks, cuda_n_threads>>>(
+    device::tv_kernel<<<blocks, TV_GRAD_CUDA_THREADS>>>(
             links.packed_accessor32<int32_t, 3, torch::RestrictPtrTraits>(),
             data.packed_accessor64<float, 2, torch::RestrictPtrTraits>(),
             start_dim,

diff --git a/svox2/csrc/misc_kernel.cu b/svox2/csrc/misc_kernel.cu
@@ -11,9 +11,13 @@
 #include "cubemap_util.cuh"
 
 namespace {
+
+const int MISC_CUDA_THREADS = 256;
+const int MISC_MIN_BLOCKS_PER_SM = 4;
 namespace device {
 
 // Can also implement using convs.....
+__launch_bounds__(MISC_CUDA_THREADS, MISC_MIN_BLOCKS_PER_SM)
 __global__ void dilate_kernel(
         const torch::PackedTensorAccessor32<bool, 3, torch::RestrictPtrTraits> grid,
         // Output
@@ -49,6 +53,7 @@ __global__ void dilate_kernel(
 // ** Distance transforms
 // TODO: Maybe replace this with an euclidean distance transform eg PBA
 // Actual L-infty distance transform; turns out this is slower than the geometric way
+__launch_bounds__(MISC_CUDA_THREADS, MISC_MIN_BLOCKS_PER_SM)
 __global__ void accel_linf_dist_transform_kernel(
         torch::PackedTensorAccessor32<int32_t, 3, torch::RestrictPtrTraits> grid,
         int32_t* __restrict__ tmp,
@@ -101,6 +106,7 @@ __global__ void accel_linf_dist_transform_kernel(
 }
 
 // Geometric L-infty distance transform-ish thing
+__launch_bounds__(MISC_CUDA_THREADS, MISC_MIN_BLOCKS_PER_SM)
 __global__ void accel_dist_set_kernel(
         const torch::PackedTensorAccessor32<int32_t, 3, torch::RestrictPtrTraits> grid,
         bool* __restrict__ tmp) {
@@ -134,6 +140,7 @@ __global__ void accel_dist_set_kernel(
     }
 }
 
+__launch_bounds__(MISC_CUDA_THREADS, MISC_MIN_BLOCKS_PER_SM)
 __global__ void accel_dist_prop_kernel(
         torch::PackedTensorAccessor32<int32_t, 3, torch::RestrictPtrTraits> grid,
         const bool* __restrict__ tmp) {
@@ -299,6 +306,7 @@ __device__ __inline__ void grid_trace_ray(
 //             chnl_id);
 // }
 
+__launch_bounds__(MISC_CUDA_THREADS, MISC_MIN_BLOCKS_PER_SM)
 __global__ void grid_weight_render_kernel(
     const torch::PackedTensorAccessor32<float, 3, torch::RestrictPtrTraits>
         data,
@@ -336,10 +344,9 @@ torch::Tensor dilate(torch::Tensor grid) {
 
     int Q = grid.size(0) * grid.size(1) * grid.size(2);
 
-    const int cuda_n_threads = std::min<int>(Q, CUDA_MAX_THREADS);
-    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, cuda_n_threads);
+    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, MISC_CUDA_THREADS);
     torch::Tensor result = torch::empty_like(grid);
-    device::dilate_kernel<<<blocks, cuda_n_threads>>>(
+    device::dilate_kernel<<<blocks, MISC_CUDA_THREADS>>>(
             grid.packed_accessor32<bool, 3, torch::RestrictPtrTraits>(),
             // Output
             result.packed_accessor32<bool, 3, torch::RestrictPtrTraits>());
@@ -359,8 +366,7 @@ void accel_dist_prop(torch::Tensor grid) {
 
     int Q = grid.size(0) * grid.size(1) * grid.size(2);
 
-    const int cuda_n_threads = std::min<int>(Q, CUDA_MAX_THREADS);
-    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, cuda_n_threads);
+    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, MISC_CUDA_THREADS);
 
     int64_t req_size = 0;
     while (sz_x > 1 && sz_y > 1 && sz_z > 1) {
@@ -376,11 +382,11 @@ void accel_dist_prop(torch::Tensor grid) {
                   .device(grid.device())
                   .requires_grad(false);
     torch::Tensor tmp = torch::zeros({req_size}, tmp_options);
-    device::accel_dist_set_kernel<<<blocks, cuda_n_threads>>>(
+    device::accel_dist_set_kernel<<<blocks, MISC_CUDA_THREADS>>>(
             grid.packed_accessor32<int32_t, 3, torch::RestrictPtrTraits>(),
             tmp.data_ptr<bool>());
 
-    device::accel_dist_prop_kernel<<<blocks, cuda_n_threads>>>(
+    device::accel_dist_prop_kernel<<<blocks, MISC_CUDA_THREADS>>>(
             grid.packed_accessor32<int32_t, 3, torch::RestrictPtrTraits>(),
             tmp.data_ptr<bool>());
 
@@ -396,10 +402,9 @@ void accel_dist_prop(torch::Tensor grid) {
     //         int d1 = 3 - d0 - d2;
     //         int Q = grid.size(d0) * grid.size(d1);
     //
-    //         const int cuda_n_threads = std::min<int>(Q, CUDA_MAX_THREADS);
-    //         const int blocks = CUDA_N_BLOCKS_NEEDED(Q, cuda_n_threads);
+    //         const int blocks = CUDA_N_BLOCKS_NEEDED(Q, MISC_CUDA_THREADS);
     //
-    //         device::accel_linf_dist_transform_kernel<<<blocks, cuda_n_threads>>>(
+    //         device::accel_linf_dist_transform_kernel<<<blocks, MISC_CUDA_THREADS>>>(
     //                 grid.packed_accessor32<int32_t, 3, torch::RestrictPtrTraits>(),
     //                 tmp,
     //                 d2);
@@ -425,10 +430,9 @@ void grid_weight_render(
     cam.check();
     const size_t Q = size_t(cam.width) * cam.height;
 
-    const int cuda_n_threads = 512;
-    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, cuda_n_threads);
+    const int blocks = CUDA_N_BLOCKS_NEEDED(Q, MISC_CUDA_THREADS);
 
-    device::grid_weight_render_kernel<<<blocks, cuda_n_threads>>>(
+    device::grid_weight_render_kernel<<<blocks, MISC_CUDA_THREADS>>>(
         data.packed_accessor32<float, 3, torch::RestrictPtrTraits>(),
         cam,
         step_size,
@@ -455,10 +459,9 @@ void grid_weight_render(
 //     TORCH_CHECK(cubemap.size(1) == cubemap.size(2));
 //
 //     const size_t Q = size_t(dirs.size(0)) * cubemap.size(3);
-//     const int cuda_n_threads = 512;
-//     const int blocks = CUDA_N_BLOCKS_NEEDED(Q, cuda_n_threads);
+//     const int blocks = CUDA_N_BLOCKS_NEEDED(Q, MISC_CUDA_THREADS);
 //
-//     device::sample_cubemap_kernel<<<blocks, cuda_n_threads>>>(
+//     device::sample_cubemap_kernel<<<blocks, MISC_CUDA_THREADS>>>(
 //         cubemap.packed_accessor32<float, 4, torch::RestrictPtrTraits>(),
 //         dirs.packed_accessor32<float, 2, torch::RestrictPtrTraits>(),
 //         Q,