Skip to content

Commit

Permalink
[SPARSE] Add support for rocSPARSE backend (#544)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rbiessy authored Dec 18, 2024
1 parent def5402 commit 044479d
Show file tree
Hide file tree
Showing 38 changed files with 2,998 additions and 422 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interf

# sparse
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)
option(ENABLE_ROCSPARSE_BACKEND "Enable the rocSPARSE backend for the SPARSE_BLAS interface" OFF)

set(ONEMATH_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")
Expand Down Expand Up @@ -106,7 +107,8 @@ if(ENABLE_MKLGPU_BACKEND
endif()
if(ENABLE_MKLCPU_BACKEND
OR ENABLE_MKLGPU_BACKEND
OR ENABLE_CUSPARSE_BACKEND)
OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCSPARSE_BACKEND)
list(APPEND DOMAINS_LIST "sparse_blas")
endif()

Expand Down Expand Up @@ -134,7 +136,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++")
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
if(UNIX)
Expand Down
19 changes: 15 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org).
</thead>
<tbody>
<tr>
<td rowspan=13 align="center">oneMath</td>
<td rowspan=13 align="center">oneMath selector</td>
<td rowspan=14 align="center">oneMath</td>
<td rowspan=14 align="center">oneMath selector</td>
<td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library (oneMKL)</a></td>
<td align="center">x86 CPU, Intel GPU</td>
</tr>
Expand Down Expand Up @@ -61,7 +61,11 @@ oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org).
<td align="center">AMD GPU</td>
</tr>
<tr>
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocFFT">AMD rocFFT</a></td>
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocFFT"> AMD rocFFT</a></td>
<td align="center">AMD GPU</td>
</tr>
<tr>
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocSPARSE"> AMD rocSPARSE</a></td>
<td align="center">AMD GPU</td>
</tr>
<tr>
Expand Down Expand Up @@ -333,7 +337,7 @@ Supported compilers include:
<td align="center">Dynamic, Static</td>
</tr>
<tr>
<td rowspan=3 align="center">SPARSE_BLAS</td>
<td rowspan=4 align="center">SPARSE_BLAS</td>
<td align="center">x86 CPU</td>
<td align="center">Intel(R) oneMKL</td>
<td align="center">Intel DPC++</td>
Expand All @@ -351,6 +355,12 @@ Supported compilers include:
<td align="center">Open DPC++</td>
<td align="center">Dynamic, Static</td>
</tr>
<tr>
<td align="center">AMD GPU</td>
<td align="center">AMD rocSPARSE</td>
<td align="center">Open DPC++</td>
<td align="center">Dynamic, Static</td>
</tr>
</tbody>
</table>

Expand Down Expand Up @@ -537,6 +547,7 @@ Product | Supported Version | License
[AMD rocRAND](https://github.com/ROCm/rocRAND) | 5.1.0 | [AMD License](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt)
[AMD rocSOLVER](https://github.com/ROCm/rocSOLVER) | 5.0.0 | [AMD License](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md)
[AMD rocFFT](https://github.com/ROCm/rocFFT) | rocm-5.4.3 | [AMD License](https://github.com/ROCm/rocFFT/blob/rocm-5.4.3/LICENSE.md)
[AMD rocSPARSE](https://github.com/ROCm/rocSPARSE) | 3.1.2 | [AMD License](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md)
[NETLIB LAPACK](https://www.netlib.org/) | [5d4180c](https://github.com/Reference-LAPACK/lapack/commit/5d4180cf8288ae6ad9a771d18793d15bd0c5643c) | [BSD like license](http://www.netlib.org/lapack/LICENSE.txt)
[portBLAS](https://github.com/codeplaysoftware/portBLAS) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portBLAS/blob/main/LICENSE)
[portFFT](https://github.com/codeplaysoftware/portFFT) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portFFT/blob/main/LICENSE)
Expand Down
6 changes: 3 additions & 3 deletions cmake/FindCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ if(is_dpcpp)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
-fsycl-targets=nvptx64-nvidia-cuda)
elseif(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND
OR ENABLE_ROCSOLVER_BACKEND)
OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
-fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda
-Xsycl-target-backend --offload-arch=${HIP_TARGETS})
Expand All @@ -52,7 +52,7 @@ if(is_dpcpp)
--offload-arch=${HIP_TARGETS})
endif()
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
set_target_properties(ONEMATH::SYCL::SYCL PROPERTIES
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
INTERFACE_LINK_OPTIONS "${UNIX_INTERFACE_LINK_OPTIONS}"
Expand All @@ -69,7 +69,7 @@ if(is_dpcpp)
INTERFACE_LINK_LIBRARIES ${SYCL_LIBRARY})
endif()

if(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
if(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
# Allow find_package(HIP) to find the correct path to libclang_rt.builtins.a
# HIP's CMake uses the command `${HIP_CXX_COMPILER} -print-libgcc-file-name --rtlib=compiler-rt` to find this path.
# This can print a non-existing file if the compiler used is icpx.
Expand Down
20 changes: 12 additions & 8 deletions docs/building_the_project_with_dpcpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ The most important supported build options are:
* - ENABLE_ROCRAND_BACKEND
- True, False
- False
* - ENABLE_ROCSPARSE_BACKEND
- True, False
- False
* - ENABLE_MKLCPU_THREAD_TBB
- True, False
- True
Expand Down Expand Up @@ -197,14 +200,14 @@ Building for ROCm
^^^^^^^^^^^^^^^^^

The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND``,
``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND`` and
``ENABLE_ROCRAND_BACKEND``.
``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND``,
``ENABLE_ROCRAND_BACKEND``, and ``ENABLE_ROCSPARSE_BACKEND``.

For *RocBLAS*, *RocSOLVER* and *RocRAND*, the target device architecture must be
set. This can be set with using the ``HIP_TARGETS`` parameter. For example, to
enable a build for MI200 series GPUs, ``-DHIP_TARGETS=gfx90a`` should be set.
Currently, DPC++ can only build for a single HIP target at a time. This may
change in future versions.
For *RocBLAS*, *RocSOLVER*, *RocRAND*, and *RocSPARSE*, the target device
architecture must be set. This can be set with using the ``HIP_TARGETS``
parameter. For example, to enable a build for MI200 series GPUs,
``-DHIP_TARGETS=gfx90a`` should be set. Currently, DPC++ can only build for a
single HIP target at a time. This may change in future versions.

A few often-used architectures are listed below:

Expand Down Expand Up @@ -393,7 +396,8 @@ disabled:
-DENABLE_MKLGPU_BACKEND=False \
-DENABLE_ROCFFT_BACKEND=True \
-DENABLE_ROCBLAS_BACKEND=True \
-DENABLE_ROCSOLVER_BACKEND=True \
-DENABLE_ROCSOLVER_BACKEND=True \
-DENABLE_ROCSPARSE_BACKEND=True \
-DHIP_TARGETS=gfx90a \
-DBUILD_FUNCTIONAL_TESTS=False
Expand Down
46 changes: 46 additions & 0 deletions docs/domains/sparse_linear_algebra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,31 @@ Currently known limitations:
``cusparseSpMV_preprocess``. Feel free to create an issue if this is needed.


rocSPARSE backend
----------------

Currently known limitations:

- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
throw a ``oneapi::math::unimplemented`` exception.
- The COO format requires the indices to be sorted by row then by column. See
the `rocSPARSE COO documentation
<https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#coo-storage-format>`_.
Sparse operations using matrices with the COO format without the property
``matrix_property::sorted`` will throw a ``oneapi::math::unimplemented``
exception.
- The CSR format requires the column indices to be sorted within each row. See
the `rocSPARSE CSR documentation
<https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format>`_.
Sparse operations using matrices with the CSR format without the property
``matrix_property::sorted`` will throw a ``oneapi::math::unimplemented``
exception.
- The same sparse matrix handle cannot be reused for multiple operations
``spmm``, ``spmv``, or ``spsv``. Doing so will throw a
``oneapi::math::unimplemented`` exception. See `#332
<https://github.com/ROCm/rocSPARSE/issues/332>`_.


Operation algorithms mapping
----------------------------

Expand All @@ -89,33 +114,43 @@ spmm
* - ``spmm_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
- rocSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPMM_ALG_DEFAULT``
- ``rocsparse_spmm_alg_default``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPMM_ALG_DEFAULT``
- ``rocsparse_spmm_alg_default``
* - ``coo_alg1``
- none
- ``CUSPARSE_SPMM_COO_ALG1``
- ``rocsparse_spmm_alg_coo_segmented``
* - ``coo_alg2``
- none
- ``CUSPARSE_SPMM_COO_ALG2``
- ``rocsparse_spmm_alg_coo_atomic``
* - ``coo_alg3``
- none
- ``CUSPARSE_SPMM_COO_ALG3``
- ``rocsparse_spmm_alg_coo_segmented_atomic``
* - ``coo_alg4``
- none
- ``CUSPARSE_SPMM_COO_ALG4``
- ``rocsparse_spmm_alg_default``
* - ``csr_alg1``
- none
- ``CUSPARSE_SPMM_CSR_ALG1``
- ``rocsparse_spmm_alg_csr``
* - ``csr_alg2``
- none
- ``CUSPARSE_SPMM_CSR_ALG2``
- ``rocsparse_spmm_alg_csr_row_split``
* - ``csr_alg3``
- none
- ``CUSPARSE_SPMM_CSR_ALG3``
- ``rocsparse_spmm_alg_csr_merge``


spmv
Expand All @@ -128,27 +163,35 @@ spmv
* - ``spmv_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
- rocSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
- ``rocsparse_spmv_alg_default``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
- ``rocsparse_spmv_alg_default``
* - ``coo_alg1``
- none
- ``CUSPARSE_SPMV_COO_ALG1``
- ``rocsparse_spmv_alg_coo``
* - ``coo_alg2``
- none
- ``CUSPARSE_SPMV_COO_ALG2``
- ``rocsparse_spmv_alg_coo_atomic``
* - ``csr_alg1``
- none
- ``CUSPARSE_SPMV_CSR_ALG1``
- ``rocsparse_spmv_alg_csr_adaptive``
* - ``csr_alg2``
- none
- ``CUSPARSE_SPMV_CSR_ALG2``
- ``rocsparse_spmv_alg_csr_stream``
* - ``csr_alg3``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
- ``rocsparse_spmv_alg_csr_lrb``


spsv
Expand All @@ -161,9 +204,12 @@ spsv
* - ``spsv_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
- rocSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPSV_ALG_DEFAULT``
- ``rocsparse_spsv_alg_default``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPSV_ALG_DEFAULT``
- ``rocsparse_spsv_alg_default``
3 changes: 3 additions & 0 deletions examples/sparse_blas/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ endif()
if(ENABLE_CUSPARSE_BACKEND)
list(APPEND DEVICE_FILTERS "cuda:gpu")
endif()
if(ENABLE_ROCSPARSE_BACKEND)
list(APPEND DEVICE_FILTERS "hip:gpu")
endif()

message(STATUS "ONEAPI_DEVICE_SELECTOR will be set to the following value(s): [${DEVICE_FILTERS}] for run-time dispatching examples")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device& dev) {
oneapi::math::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz,
oneapi::math::index_base::zero, ia, ja, a);

// rocSPARSE backend requires that the property sorted is set when using matrices in CSR format.
// Setting this property is also the best practice to get best performance.
oneapi::math::sparse::set_matrix_property(main_queue, A_handle,
oneapi::math::sparse::matrix_property::sorted);

// Create and initialize dense vector handles
oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions include/oneapi/math/detail/backends.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ enum class backend {
rocfft,
portfft,
cusparse,
rocsparse,
unsupported
};

Expand All @@ -63,6 +64,7 @@ static backendmap backend_map = { { backend::mklcpu, "mklcpu" },
{ backend::rocfft, "rocfft" },
{ backend::portfft, "portfft" },
{ backend::cusparse, "cusparse" },
{ backend::rocsparse, "rocsparse" },
{ backend::unsupported, "unsupported" } };
// clang-format on

Expand Down
6 changes: 6 additions & 0 deletions include/oneapi/math/detail/backends_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
{
#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
LIB_NAME("sparse_blas_cusparse")
#endif
} },
{ device::amdgpu,
{
#ifdef ONEMATH_ENABLE_ROCSPARSE_BACKEND
LIB_NAME("sparse_blas_rocsparse")
#endif
} } } },
};
Expand Down
3 changes: 3 additions & 0 deletions include/oneapi/math/sparse_blas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
#include "sparse_blas/detail/cusparse/sparse_blas_ct.hpp"
#endif
#ifdef ONEMATH_ENABLE_ROCSPARSE_BACKEND
#include "sparse_blas/detail/rocsparse/sparse_blas_ct.hpp"
#endif

#include "sparse_blas/detail/sparse_blas_rt.hpp"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/***************************************************************************
* Copyright (C) Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**************************************************************************/

#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMATH_SPARSE_BLAS_ROCSPARSE_HPP_
#define _ONEMATH_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMATH_SPARSE_BLAS_ROCSPARSE_HPP_

#include "oneapi/math/detail/export.hpp"
#include "oneapi/math/sparse_blas/detail/helper_types.hpp"
#include "oneapi/math/sparse_blas/types.hpp"

namespace oneapi::math::sparse::rocsparse {

#include "oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx"

} // namespace oneapi::math::sparse::rocsparse

#endif // _ONEMATH_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMATH_SPARSE_BLAS_ROCSPARSE_HPP_
Loading

0 comments on commit 044479d

Please sign in to comment.