Skip to content

Commit

Permalink
[Reland] Upgrade NVTX to NVTX3 (pytorch#97582)
Browse files Browse the repository at this point in the history
PR pytorch#90689 replaces NVTX with NVTX3. However, the torch::nvtoolsext is created only when the third party NVTX is used.
 This is clear a logical error. We now move the creation code out of the branch to cover all cases. This should fix the issues reported in the comments of  pytorch#90689.

It would be better to move configurations of the failed FRL jobs to CI tests so that we can find such issues early before merging.
Pull Request resolved: pytorch#97582
Approved by: https://github.com/peterbell10
  • Loading branch information
cyyever authored and pytorchmergebot committed Aug 14, 2023
1 parent 461c703 commit 5bbfb96
Show file tree
Hide file tree
Showing 11 changed files with 25 additions and 26 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,6 @@
[submodule "third_party/mimalloc"]
path = third_party/mimalloc
url = https://github.com/microsoft/mimalloc.git
[submodule "third_party/NVTX"]
path = third_party/NVTX
url = https://github.com/NVIDIA/NVTX.git
5 changes: 3 additions & 2 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1516,7 +1516,8 @@ if(USE_CUDA)
target_link_libraries(torch_cpu PRIVATE torch::cudart)
endif()
target_link_libraries(torch_cuda INTERFACE torch::cudart)
target_link_libraries(torch_cuda PUBLIC c10_cuda torch::nvtoolsext)
target_link_libraries(torch_cuda PUBLIC c10_cuda)
target_link_libraries(torch_cuda PRIVATE torch::nvtoolsext)

target_include_directories(
torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
Expand Down Expand Up @@ -1571,7 +1572,7 @@ if(BUILD_SHARED_LIBS)
# not find them, because they're usually in non-standard locations)
if(USE_CUDA)
target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext)
target_link_libraries(torch_global_deps torch::cudart)
endif()
if(USE_TBB)
target_link_libraries(torch_global_deps TBB::tbb)
Expand Down
14 changes: 1 addition & 13 deletions cmake/TorchConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -129,30 +129,18 @@ endif()

if(@USE_CUDA@)
if(MSVC)
if(NOT NVTOOLEXT_HOME)
set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt")
endif()
if(DEFINED ENV{NVTOOLSEXT_PATH})
set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH})
endif()
set(TORCH_CUDA_LIBRARIES
${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib
${CUDA_LIBRARIES})
list(APPEND TORCH_INCLUDE_DIRS ${NVTOOLEXT_HOME}/include)
set(TORCH_CUDA_LIBRARIES ${CUDA_LIBRARIES})
find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
elseif(APPLE)
set(TORCH_CUDA_LIBRARIES
${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib
${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib
${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib
${CUDA_LIBRARIES})
else()
find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
set(TORCH_CUDA_LIBRARIES
${CUDA_CUDA_LIB}
${CUDA_NVRTC_LIB}
${LIBNVTOOLSEXT}
${CUDA_LIBRARIES})
endif()
if(@BUILD_SHARED_LIBS@)
Expand Down
11 changes: 4 additions & 7 deletions cmake/public/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION STREQUAL CUDAToolkit_VERSION OR
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIR}'")
endif()

if(NOT TARGET CUDA::nvToolsExt)
message(FATAL_ERROR "Failed to find nvToolsExt")
endif()

message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
Expand Down Expand Up @@ -216,9 +212,10 @@ endif()

# nvToolsExt
add_library(torch::nvtoolsext INTERFACE IMPORTED)
set_property(
TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::nvToolsExt)
find_path(nvtx3_dir NAMES nvtx3 PATHS "${CUDA_INCLUDE_DIRS}" "${CMAKE_CURRENT_LIST_DIR}/../../third_party/NVTX/c/include" NO_DEFAULT_PATH)
find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
target_include_directories(torch::nvtoolsext INTERFACE "${nvtx3_dir}")


# cublas
add_library(caffe2::cublas INTERFACE IMPORTED)
Expand Down
3 changes: 0 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,6 @@
# NVFUSER_SOURCE_DIR
# specify nvfuser root directory
#
# NVTOOLSEXT_PATH (Windows only)
# specify where nvtoolsext is installed
#
# ACL_ROOT_DIR
# specify where Compute Library is installed
#
Expand Down
1 change: 1 addition & 0 deletions third_party/NVTX
Submodule NVTX added at e17059
4 changes: 4 additions & 0 deletions third_party/nvfuser/csrc/instrumentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

#include <utils.h>

#ifndef FBCODE_CAFFE2
#include <nvtx3/nvToolsExt.h>
#else
#include <nvToolsExt.h>
#endif

// NOLINTNEXTLINE(modernize-deprecated-headers)
#include <stdio.h>
Expand Down
1 change: 0 additions & 1 deletion torch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ if(USE_CUDA)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
endif()

list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
endif()

Expand Down
4 changes: 4 additions & 0 deletions torch/csrc/cuda/shared/nvtx.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#ifdef _WIN32
#include <wchar.h> // _wgetenv for nvtx
#endif
#ifndef FBCODE_CAFFE2
#include <nvtx3/nvToolsExt.h>
#else
#include <nvToolsExt.h>
#endif
#include <torch/csrc/utils/pybind.h>

namespace torch::cuda::shared {
Expand Down
4 changes: 4 additions & 0 deletions torch/csrc/profiler/stubs/cuda.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include <sstream>

#ifndef FBCODE_CAFFE2
#include <nvtx3/nvToolsExt.h>
#else
#include <nvToolsExt.h>
#endif

#include <c10/cuda/CUDAGuard.h>
#include <c10/util/irange.h>
Expand Down
1 change: 1 addition & 0 deletions torch/utils/hipify/cuda_to_hip_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,7 @@
("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/device/device_select.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
("nvtx3/nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
("nvml.h", ("rocm_smi/rocm_smi.h", CONV_INCLUDE, API_ROCMSMI)),
]
)
Expand Down

0 comments on commit 5bbfb96

Please sign in to comment.