Skip to content

Commit

Permalink
[FRONTEND] Enable triton to support register thirdparty backend at ru…
Browse files Browse the repository at this point in the history
…ntime (triton-lang#1643)

This PR intends to provide a mechanism to support a third-party backend
at runtime to generate the backend-specific code.

The mechanism provided a common class to abstract the third-party
backend logic and two essential functions to register and get the
third-party backend at runtime.

- `BaseBackend`: A common class to abstract the third-party backend
logic
- `register_backend`: Register a third-party backend with a given device
type
- `get_backend`: Get the third-party backend with a given device type

Generally, a third-party backend must inherit from `BaseBackend` and
implement all the member functions according to the backend
characteristics. As long as the backend implementation is ready, the
third-party backend can invoke `register_backend` to register it under a
given device. During the kernel compilation and execution, the mechanism
will get the registered backend to generate the kernel and launcher code
for a given device.

This PR added a dummy backend to simulate a third-party backend and
demonstrate the usage.

-
[test_device_backend.py](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1):
To define a third-party backend and register the backend
-
[ExtensionBackend](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1R123):
Inherit from the `BaseBackend` and implement some specific logic like
[filter out some compile
stages](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1R129-R135)
- [Register the `ExtensionBackend` for
`CPU`](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1R279)
  
-
[extension_backend.c](https://github.com/openai/triton/pull/1643/files#diff-169c1d08b3a0a7b343cfa3258fbc32b47e0f6c46305a112652fa1bdaaec89d29):
To provide the utility function to load kernel binary and get the
backend properties.
  • Loading branch information
EikanWang authored Jun 9, 2023
1 parent fdf2bd5 commit b27a91a
Show file tree
Hide file tree
Showing 13 changed files with 645 additions and 31 deletions.
48 changes: 43 additions & 5 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
id: set-matrix
run: |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
echo '::set-output name=matrix::[["self-hosted", "A100"], ["self-hosted", "V100"], ["self-hosted", "gfx908"]]'
echo '::set-output name=matrix::[["self-hosted", "A100"], ["self-hosted", "V100"], ["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
else
echo '::set-output name=matrix::["ubuntu-latest"]'
fi
Expand Down Expand Up @@ -53,6 +53,11 @@ jobs:
run: |
echo "BACKEND=ROCM" >> "${GITHUB_ENV}"
- name: Set XPU ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'arc770')}}
run: |
echo "BACKEND=XPU" >> "${GITHUB_ENV}"
- name: Clear cache
run: |
rm -rf ~/.triton
Expand All @@ -62,13 +67,22 @@ jobs:
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
- name: Check pre-commit
if: ${{ matrix.runner != 'macos-10.15' }}
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] != 'arc770') }}
run: |
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files
- name: Check pre-commit arc770
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] == 'arc770') }}
run: |
source ${HOME}/triton_vars.sh
source ${HOME}/miniconda3/bin/activate
conda activate triton-xpu-ci
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files
- name: Install Triton
if: ${{ env.BACKEND != 'ROCM'}}
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
python3 -m pip install --upgrade pip
Expand All @@ -84,8 +98,23 @@ jobs:
python3 -m pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Install Triton on XPU
if: ${{ env.BACKEND == 'XPU'}}
run: |
source ${HOME}/triton_vars.sh
source ${HOME}/miniconda3/bin/activate
conda activate triton-xpu-ci
git submodule update --init --recursive
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
export TRITON_CODEGEN_INTEL_XPU_BACKEND=1
python3 -m pip uninstall -y triton
python3 setup.py build
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Run lit tests
if: ${{ env.BACKEND != 'ROCM'}}
if: ${{ env.BACKEND == 'CUDA'}}
run: |
python3 -m pip install lit
cd python
Expand Down Expand Up @@ -115,7 +144,7 @@ jobs:
path: ~/.triton/artifacts.tar.gz

- name: Run CXX unittests
if: ${{ env.BACKEND != 'ROCM'}}
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
Expand All @@ -127,6 +156,15 @@ jobs:
cd python/test/unit/language
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py::test_empty_kernel"
- name: Run python tests on XPU
if: ${{ env.BACKEND == 'XPU'}}
run: |
source ${HOME}/triton_vars.sh
source ${HOME}/miniconda3/bin/activate
conda activate triton-xpu-ci
cd python/test/backend/third_party_backends
python3 -m pytest --capture=tee-sys -rfs --verbose --backend xpu
- name: Regression tests
if: ${{ contains(matrix.runner, 'A100') }}
run: |
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "third_party/intel_xpu_backend"]
path = third_party/intel_xpu_backend
url = http://github.com/intel/intel-xpu-backend-for-triton
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ endif()
# Options
option(TRITON_BUILD_TUTORIALS "Build C++ Triton tutorials" ON)
option(TRITON_BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF)
set(TRITON_CODEGEN_BACKENDS "" CACHE STRING "Enable different codegen backends")

# Ensure Python3 vars are set correctly
# used conditionally in this file and by lit tests
Expand Down Expand Up @@ -263,6 +264,14 @@ if(TRITON_BUILD_PYTHON_MODULE AND NOT WIN32)
target_link_libraries(triton ${CUTLASS_LIBRARIES} ${PYTHON_LDFLAGS})
endif()

list(LENGTH TRITON_CODEGEN_BACKENDS CODEGEN_BACKENDS_LEN)
if (${CODEGEN_BACKENDS_LEN} GREATER 0)
set(PYTHON_THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/python/triton/third_party)
foreach(CODEGEN_BACKEND ${TRITON_CODEGEN_BACKENDS})
add_subdirectory(third_party/${CODEGEN_BACKEND})
endforeach()
endif()

add_subdirectory(test)

add_subdirectory(unittest)
20 changes: 17 additions & 3 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ def get_build_type():
# TODO: change to release when stable enough
return "TritonRelBuildWithAsserts"


def get_codegen_backends():
backends = []
env_prefix = "TRITON_CODEGEN_"
for name, _ in os.environ.items():
if name.startswith(env_prefix) and check_env_flag(name):
assert name.count(env_prefix) <= 1
backends.append(name.replace(env_prefix, '').lower())
return backends


# --- third party packages -----


Expand Down Expand Up @@ -210,6 +221,11 @@ def build_extension(self, ext):
cfg = get_build_type()
build_args = ["--config", cfg]

codegen_backends = get_codegen_backends()
if len(codegen_backends) > 0:
all_codegen_backends = ';'.join(codegen_backends)
cmake_args += ["-DTRITON_CODEGEN_BACKENDS=" + all_codegen_backends]

if platform.system() == "Windows":
cmake_args += [f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
if sys.maxsize > 2**32:
Expand Down Expand Up @@ -256,9 +272,7 @@ def build_extension(self, ext):
"triton/ops/blocksparse",
"triton/runtime",
"triton/runtime/backends",
"triton/third_party/cuda/bin",
"triton/third_party/cuda/include",
"triton/third_party/cuda/lib",
"triton/third_party",
"triton/tools",
],
install_requires=[
Expand Down
42 changes: 42 additions & 0 deletions python/test/backend/extension_backend.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include <Python.h>
#include <stdio.h>
#include <stdlib.h>

static PyObject *getDeviceProperties(PyObject *self, PyObject *args) {
// create a struct to hold device properties
return Py_BuildValue("{s:i, s:i, s:i, s:i, s:i}", "max_shared_mem", 1024,
"multiprocessor_count", 16, "sm_clock_rate", 2100,
"mem_clock_rate", 2300, "mem_bus_width", 2400);
}

static PyObject *loadBinary(PyObject *self, PyObject *args) {
// get allocated registers and spilled registers from the function
int n_regs = 0;
int n_spills = 0;
int mod = 0;
int fun = 0;
return Py_BuildValue("(KKii)", (uint64_t)mod, (uint64_t)fun, n_regs,
n_spills);
}

static PyMethodDef ModuleMethods[] = {
{"load_binary", loadBinary, METH_VARARGS,
"Load dummy binary for the extension device"},
{"get_device_properties", getDeviceProperties, METH_VARARGS,
"Get the properties for the extension device"},
{NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {PyModuleDef_HEAD_INIT, "ext_utils",
NULL, // documentation
-1, // size
ModuleMethods};

PyMODINIT_FUNC PyInit_ext_utils(void) {
PyObject *m = PyModule_Create(&ModuleDef);
if (m == NULL) {
return NULL;
}
PyModule_AddFunctions(m, ModuleMethods);
return m;
}
Loading

0 comments on commit b27a91a

Please sign in to comment.