Skip to content

Commit

Permalink
Merge pull request halide#376 from halide/gpu_device_selection
Browse files Browse the repository at this point in the history
Gpu device selection
  • Loading branch information
dsharletg committed Jul 15, 2014
2 parents fc80d3d + f1bbc6c commit 58ba5da
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ SOURCES = $(SOURCE_FILES:%.cpp=src/%.cpp)
OBJECTS = $(SOURCE_FILES:%.cpp=$(BUILD_DIR)/%.o)
HEADERS = $(HEADER_FILES:%.h=src/%.h)

RUNTIME_CPP_COMPONENTS = android_io cuda fake_thread_pool gcd_thread_pool ios_io android_clock linux_clock nogpu opencl posix_allocator posix_clock osx_clock windows_clock posix_error_handler posix_io nacl_io osx_io posix_math posix_thread_pool android_host_cpu_count linux_host_cpu_count osx_host_cpu_count tracing write_debug_image cuda_debug opencl_debug windows_cuda windows_cuda_debug windows_opencl windows_opencl_debug windows_io windows_thread_pool ssp opengl opengl_debug linux_opengl_context osx_opengl_context android_opengl_context posix_print
RUNTIME_CPP_COMPONENTS = android_io cuda fake_thread_pool gcd_thread_pool ios_io android_clock linux_clock nogpu opencl posix_allocator posix_clock osx_clock windows_clock posix_error_handler posix_io nacl_io osx_io posix_math posix_thread_pool android_host_cpu_count linux_host_cpu_count osx_host_cpu_count tracing write_debug_image cuda_debug opencl_debug windows_cuda windows_cuda_debug windows_opencl windows_opencl_debug windows_io windows_thread_pool ssp opengl opengl_debug linux_opengl_context osx_opengl_context android_opengl_context posix_print gpu_device_selection
RUNTIME_LL_COMPONENTS = arm posix_math ptx_dev x86_avx x86 x86_sse41 pnacl_math win32_math aarch64

INITIAL_MODULES = $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_32.o) $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_64.o) $(RUNTIME_LL_COMPONENTS:%=$(BUILD_DIR)/initmod.%_ll.o) $(PTX_DEVICE_INITIAL_MODULES:libdevice.%.bc=$(BUILD_DIR)/initmod_ptx.%_ll.o)
Expand Down
2 changes: 1 addition & 1 deletion apps/HelloAndroid/jni/native.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ JNIEXPORT void JNICALL Java_com_example_hellohalide_CameraPreview_processFrame(
uint8_t *dst = (uint8_t *)buf.bits;

// If we're using opencl, use the gpu backend for it.
setenv("HL_OCL_DEVICE", "gpu", 1);
halide_set_ocl_device_type("gpu");

// Make these static so that we can reuse device allocations across frames.
static buffer_t srcBuf = {0};
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ set(RUNTIME_CPP
nogpu
opencl
opengl
gpu_device_selection
osx_opengl_context
linux_opengl_context
android_opengl_context
Expand Down
5 changes: 5 additions & 0 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ DECLARE_CPP_INITMOD(windows_thread_pool)
DECLARE_CPP_INITMOD(tracing)
DECLARE_CPP_INITMOD(write_debug_image)
DECLARE_CPP_INITMOD(posix_print)
DECLARE_CPP_INITMOD(gpu_device_selection)

#ifdef WITH_ARM
DECLARE_LL_INITMOD(arm)
Expand Down Expand Up @@ -491,6 +492,9 @@ void link_modules(std::vector<llvm::Module *> &modules) {
"halide_opengl_create_context",
"halide_set_custom_print",
"halide_print",
"halide_set_gpu_device",
"halide_set_ocl_platform_name",
"halide_set_ocl_device_type",
"__stack_chk_guard",
"__stack_chk_fail",
""};
Expand Down Expand Up @@ -625,6 +629,7 @@ llvm::Module *get_initial_module_for_target(Target t, llvm::LLVMContext *c) {
}

// These modules are always used
modules.push_back(get_initmod_gpu_device_selection(c, bits_64));
modules.push_back(get_initmod_posix_math(c, bits_64));
modules.push_back(get_initmod_tracing(c, bits_64));
modules.push_back(get_initmod_write_debug_image(c, bits_64));
Expand Down
41 changes: 41 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,47 @@ extern int halide_dev_run(void *user_context,
void *args[]);
// @}

/** Set the platform name for OpenCL to use (e.g. "Intel" or
* "NVIDIA"). The argument is copied internally. The opencl runtime
* will select a platform that includes this as a substring. If never
* called, Halide uses the environment variable HL_OCL_PLATFORM_NAME,
* or defaults to the first available platform. */
extern void halide_set_ocl_platform_name(const char *n);

/** Halide calls this to get the desired OpenCL platform
* name. Implement this yourself to use a different platform per
* user_context. The default implementation returns the value set by
* halide_set_ocl_platform_name, or the value of the environment
* variable HL_OCL_PLATFORM_NAME. The output is valid until the next
* call to halide_set_ocl_platform_name. */
extern const char *halide_get_ocl_platform_name(void *user_context);

/** Set the device type for OpenCL to use. The argument is copied
* internally. It must be "cpu" or "gpu". If never called, Halide uses
* the environment variable HL_OCL_DEVICE_TYPE. */
extern void halide_set_ocl_device_type(const char *n);

/** Halide calls this to gets the desired OpenCL device
* type. Implement this yourself to use a different device type per
* user_context. The default implementation returns the value set by
* halide_set_ocl_device_type, or the environment variable
* HL_OCL_DEVICE_TYPE. The result is valid until the next call to
* halide_set_ocl_device_type. */
extern const char *halide_get_ocl_device_type(void *user_context);

/** Selects which gpu device to use. 0 is usually the display
* device. If never called, Halide uses the environment variable
* HL_GPU_DEVICE. If that variable is unset, Halide uses the last
* device. Set this to -1 to use the last device. */
extern void halide_set_gpu_device(int n);

/** Halide calls this to get the desired halide gpu device
* setting. Implement this yourself to use a different gpu device per
* user_context. The default implementation returns the value set by
* halide_set_gpu_device, or the environment variable
* HL_GPU_DEVICE. */
extern int halide_get_gpu_device(void *user_context);

#ifdef __cplusplus
} // End extern "C"
#endif
Expand Down
21 changes: 6 additions & 15 deletions src/runtime/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,23 +201,14 @@ static CUresult create_context(void *user_context, CUcontext *ctx) {
return CUDA_ERROR_NO_DEVICE;
}

char *device_str = getenv("HL_GPU_DEVICE");
int device = halide_get_gpu_device(user_context);
if (device == -1) {
device = deviceCount - 1;
}

CUdevice dev;
// Get device
CUresult status;
if (device_str) {
status = cuDeviceGet(&dev, atoi(device_str));
} else {
// Try to get a device >0 first, since 0 should be our display device
// For now, don't try devices > 2 to maintain compatibility with previous behavior.
if (deviceCount > 2)
deviceCount = 2;
for (int id = deviceCount - 1; id >= 0; id--) {
status = cuDeviceGet(&dev, id);
if (status == CUDA_SUCCESS) break;
}
}
CUdevice dev;
CUresult status = cuDeviceGet(&dev, device);
if (status != CUDA_SUCCESS) {
halide_error(user_context, "CUDA: Failed to get device\n");
return status;
Expand Down
74 changes: 74 additions & 0 deletions src/runtime/gpu_device_selection.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Runtime settings for opencl and cuda device selection
#include "HalideRuntime.h"
#include "scoped_spin_lock.h"

extern "C" {

extern char *strncpy(char *dst, const char *src, size_t n);
extern int atoi(const char *);
extern char *getenv(const char *);

WEAK char halide_ocl_platform_name[256];
WEAK int halide_ocl_platform_name_lock = 0;
WEAK bool halide_ocl_platform_name_initialized = false;
WEAK void halide_set_ocl_platform_name(const char *n) {
if (n) {
strncpy(halide_ocl_platform_name, n, 255);
} else {
halide_ocl_platform_name[0] = 0;
}
halide_ocl_platform_name_initialized = true;
}

WEAK const char *halide_get_ocl_platform_name(void *user_context) {
ScopedSpinLock lock(&halide_ocl_platform_name_lock);
if (!halide_ocl_platform_name_initialized) {
const char *name = getenv("HL_OCL_PLATFORM_NAME");
halide_set_ocl_platform_name(name);
}
return halide_ocl_platform_name;
}

WEAK char halide_ocl_device_type[256];
WEAK int halide_ocl_device_type_lock = 0;
WEAK bool halide_ocl_device_type_initialized = false;
WEAK void halide_set_ocl_device_type(const char *n) {
if (n) {
strncpy(halide_ocl_device_type, n, 255);
} else {
halide_ocl_device_type[0] = 0;
}
halide_ocl_device_type_initialized = true;
}

WEAK const char *halide_get_ocl_device_type(void *user_context) {
ScopedSpinLock lock(&halide_ocl_device_type_lock);
if (!halide_ocl_device_type_initialized) {
const char *name = getenv("HL_OCL_DEVICE_TYPE");
halide_set_ocl_device_type(name);
}
return halide_ocl_device_type;
}

WEAK int halide_gpu_device = 0;
WEAK int halide_gpu_device_lock = 0;
WEAK bool halide_gpu_device_initialized = false;
WEAK void halide_set_gpu_device(int d) {
halide_gpu_device = d;
halide_gpu_device_initialized = true;
}
WEAK int halide_get_gpu_device(void *user_context) {
ScopedSpinLock lock(&halide_gpu_device_lock);
if (!halide_gpu_device_initialized) {
const char *var = getenv("HL_GPU_DEVICE");
if (var) {
halide_gpu_device = atoi(var);
} else {
halide_gpu_device = -1;
}
halide_gpu_device_initialized = true;
}
return halide_gpu_device;
}

}
18 changes: 8 additions & 10 deletions src/runtime/opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ extern int64_t halide_current_time_ns(void *user_context);
extern void free(void *);
extern void *malloc(size_t);
extern int snprintf(char *, size_t, const char *, ...);
extern char *getenv(const char *);
extern const char * strstr(const char *, const char *);
extern int atoi(const char *);

Expand Down Expand Up @@ -216,7 +215,7 @@ static int create_context(void *user_context, cl_context *ctx, cl_command_queue
cl_platform_id platform = NULL;

// Find the requested platform, or the first if none specified.
const char * name = getenv("HL_OCL_PLATFORM_NAME");
const char * name = halide_get_ocl_platform_name(user_context);
if (name != NULL) {
for (cl_uint i = 0; i < platformCount; ++i) {
const cl_uint maxPlatformName = 256;
Expand Down Expand Up @@ -254,7 +253,7 @@ static int create_context(void *user_context, cl_context *ctx, cl_command_queue

// Get the types of devices requested.
cl_device_type device_type = 0;
const char * dev_type = getenv("HL_OCL_DEVICE_TYPE");
const char * dev_type = halide_get_ocl_device_type(user_context);
if (dev_type != NULL) {
if (strstr("cpu", dev_type)) {
device_type |= CL_DEVICE_TYPE_CPU;
Expand All @@ -281,15 +280,14 @@ static int create_context(void *user_context, cl_context *ctx, cl_command_queue

// If the user indicated a specific device index to use, use
// that. Note that this is an index within the set of devices
// specified by the device type.
char *device_str = getenv("HL_GPU_DEVICE");
cl_uint device = deviceCount - 1;
if (device_str) {
device = atoi(device_str);
// specified by the device type. -1 means the last device.
int device = halide_get_gpu_device(user_context);
if (device == -1) {
device = deviceCount - 1;
}

if (device >= deviceCount) {
halide_error_varargs(user_context, "CL: Failed to get device %i\n", device);
if (device < 0 || device >= deviceCount) {
halide_error_varargs(user_context, "CL: Failed to get device %d\n", device);
return CL_DEVICE_NOT_FOUND;
}

Expand Down

0 comments on commit 58ba5da

Please sign in to comment.