Skip to content

Commit

Permalink
Somewhat optimized backward (still bad)
Browse files Browse the repository at this point in the history
  • Loading branch information
sxyu committed Oct 18, 2021
1 parent 1158913 commit bd8d9b7
Show file tree
Hide file tree
Showing 13 changed files with 297 additions and 592 deletions.
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
CUDAExtension('svox2.csrc', [
'svox2/csrc/svox2.cpp',
'svox2/csrc/svox2_kernel.cu',
'svox2/csrc/render_lerp_kernel_multistage.cu',
'svox2/csrc/render_lerp_kernel_cuvol.cu',
'svox2/csrc/misc_kernel.cu',
], include_dirs=[osp.join(ROOT_DIR, "svox2", "csrc", "include"),],
Expand Down
1 change: 1 addition & 0 deletions svox2/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ endif (POLICY CMP0072)

project( svox2 )

set(CMAKE_CXX_STANDARD 14)
enable_language(CUDA)
message(STATUS "CUDA enabled")
set( CMAKE_CUDA_STANDARD 14 )
Expand Down
3 changes: 0 additions & 3 deletions svox2/csrc/include/cuda_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,3 @@ __device__ __inline__ void transform_coord(float* __restrict__ point,

// Subtract and fused multiply-add
#define lerp(a, b, w) fmaf(w, b - a, a)

#define _EXP(x) __expf(x)
#define _SIGMOID(x) (1 / (1 + _EXP(-(x))))
26 changes: 16 additions & 10 deletions svox2/csrc/include/data_spec_packed.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,23 @@
#pragma once
#include <torch/extension.h>
#include "data_spec.hpp"
#include "cuda_util.cuh"

namespace {
namespace device {

struct PackedSparseGridSpec {
PackedSparseGridSpec(SparseGridSpec& spec)
: density_data(spec.density_data.packed_accessor32<float, 2, torch::RestrictPtrTraits>()),
sh_data(spec.sh_data.packed_accessor64<float, 2, torch::RestrictPtrTraits>()),
links(spec.links.packed_accessor32<int32_t, 3, torch::RestrictPtrTraits>()),
basis_dim(spec.basis_dim),
_offset{spec._offset.data<float>()[0],
spec._offset.data<float>()[1],
spec._offset.data<float>()[2]},
_scaling{spec._scaling.data<float>()[0],
spec._scaling.data<float>()[1],
spec._scaling.data<float>()[2]} {
_offset{spec._offset.data_ptr<float>()[0],
spec._offset.data_ptr<float>()[1],
spec._offset.data_ptr<float>()[2]},
_scaling{spec._scaling.data_ptr<float>()[0],
spec._scaling.data_ptr<float>()[1],
spec._scaling.data_ptr<float>()[2]} {
}

torch::PackedTensorAccessor32<float, 2, torch::RestrictPtrTraits> density_data;
Expand Down Expand Up @@ -50,17 +54,19 @@ struct SingleRaySpec {
SingleRaySpec() = default;
__device__ SingleRaySpec(const float* __restrict__ origin, const float* __restrict__ dir)
: origin{origin[0], origin[1], origin[2]},
dir{dir[0], dir[1], dir[2]},
vdir(dir) {}
dir{dir[0], dir[1], dir[2]} {}
__device__ void set(const float* __restrict__ origin, const float* __restrict__ dir) {
vdir = dir;
#pragma unroll 3
for (int i = 0; i < 3; ++i) {
this->origin[i] = origin[i];
this->dir[i] = dir[i];
}
}

float origin[3];
float dir[3];
const float* __restrict__ vdir;
float tmin, tmax, world_step;
};

} // namespace device
} // namespace
60 changes: 49 additions & 11 deletions svox2/csrc/include/render_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,16 @@ __device__ __inline__ void calc_sh(
const float xx = x * x, yy = y * y, zz = z * z;
const float xy = x * y, yz = y * z, xz = x * z;
switch (basis_dim) {
case 16:
out[9] = C3[0] * y * (3 * xx - yy);
out[10] = C3[1] * xy * z;
out[11] = C3[2] * y * (4 * zz - xx - yy);
out[12] = C3[3] * z * (2 * zz - 3 * xx - 3 * yy);
out[13] = C3[4] * x * (4 * zz - xx - yy);
out[14] = C3[5] * z * (xx - yy);
out[15] = C3[6] * x * (xx - 3 * yy);
[[fallthrough]];
// 16 not supported rn due to warp size
// case 16:
// out[9] = C3[0] * y * (3 * xx - yy);
// out[10] = C3[1] * xy * z;
// out[11] = C3[2] * y * (4 * zz - xx - yy);
// out[12] = C3[3] * z * (2 * zz - 3 * xx - 3 * yy);
// out[13] = C3[4] * x * (4 * zz - xx - yy);
// out[14] = C3[5] * z * (xx - yy);
// out[15] = C3[6] * x * (xx - 3 * yy);
// [[fallthrough]];
case 9:
out[4] = C2[0] * xy;
out[5] = C2[1] * yz;
Expand All @@ -72,9 +73,23 @@ __device__ __inline__ void calc_sh(
}
}

__host__ __device__ __inline__ static float _norm(
enum SphFuncType {
SPHFUNC_TYPE_SH = 0,
};

__device__ __inline__ void calc_sphfunc(
const int sphfunc_type, // Placeholder
const int basis_dim,
const float* __restrict__ dir,
float* __restrict__ out) {
// Placeholder
return calc_sh(basis_dim, dir, out);
}

__device__ __inline__ static float _norm(
float* dir) {
return sqrtf(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]);
// return sqrtf(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]);
return norm3df(dir[0], dir[1], dir[2]);
}

__device__ __inline__ float _intersect_aabb_unit(
Expand Down Expand Up @@ -121,5 +136,28 @@ __device__ __inline__ void cam2world_ray(
origin[0] = cam.c2w[0][3]; origin[1] = cam.c2w[1][3]; origin[2] = cam.c2w[2][3];
}

__device__ __inline__ void ray_find_bounds(
SingleRaySpec& __restrict__ ray,
const PackedSparseGridSpec& __restrict__ grid,
const RenderOptions& __restrict__ opt) {
// Warning: modifies ray.origin
transform_coord(ray.origin, grid._scaling, grid._offset);
// Warning: modifies ray.dir
ray.world_step = _get_delta_scale(grid._scaling, ray.dir) * opt.step_size;

ray.tmin = 0.0f;
ray.tmax = 1e9f;
#pragma unroll 3
for (int i = 0; i < 3; ++i) {
const float invdir = 1.0 / ray.dir[i];
const float t1 = (- ray.origin[i]) * invdir;
const float t2 = (grid.links.size(i) - 1.f - ray.origin[i]) * invdir;
if (ray.dir[i] != 0.f) {
ray.tmin = max(ray.tmin, min(t1, t2));
ray.tmax = min(ray.tmax, max(t1, t2));
}
}
}

} // namespace device
} // namespace
8 changes: 6 additions & 2 deletions svox2/csrc/include/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
CHECK_CPU(x); \
CHECK_CONTIGUOUS(x)

#if !defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
#define _EXP(x) __expf(x)
#define _SIGMOID(x) (1 / (1 + _EXP(-(x))))

#else

#define _EXP(x) expf(x)
#define _SIGMOID(x) (1 / (1 + expf(-(x))))
// CUDA version is in cuda_util.cuh
#endif
#define _SQR(x) ((x) * (x))
10 changes: 5 additions & 5 deletions svox2/csrc/misc_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ torch::Tensor tv(torch::Tensor links, torch::Tensor data,
1.f / nl,
Q,
// Output
result.data<float>());
result.data_ptr<float>());
CUDA_CHECK_ERRORS;
return result;
}
Expand Down Expand Up @@ -387,7 +387,7 @@ void tv_grad(torch::Tensor links, torch::Tensor data,
scale / nl,
Q,
// Output
grad_data.data<float>());
grad_data.data_ptr<float>());
CUDA_CHECK_ERRORS;
}

Expand Down Expand Up @@ -419,7 +419,7 @@ void tv_aniso_grad(torch::Tensor links, torch::Tensor data,
scale / nl,
Q,
// Output
grad_data.data<float>());
grad_data.data_ptr<float>());
CUDA_CHECK_ERRORS;
}

Expand All @@ -442,8 +442,8 @@ void grid_weight_render(
data.packed_accessor32<float, 3, torch::RestrictPtrTraits>(),
cam,
step_size,
offset.data<float>(),
scaling.data<float>(),
offset.data_ptr<float>(),
scaling.data_ptr<float>(),
grid_weight_out.packed_accessor32<float, 3, torch::RestrictPtrTraits>());
CUDA_CHECK_ERRORS;
}
Expand Down
Loading

0 comments on commit bd8d9b7

Please sign in to comment.