24#ifndef MORPHEUS_COO_HIP_MULTIPLY_IMPL_HPP
25#define MORPHEUS_COO_HIP_MULTIPLY_IMPL_HPP
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_HIP)
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
35#include <impl/Morpheus_HIPUtils.hpp>
36#include <impl/Coo/Kernels/Morpheus_Multiply_Impl.hpp>
42template <
typename Matrix,
typename Vector>
43void __spmv_coo_flat(
const Matrix& A,
const Vector& x, Vector& y,
46template <
typename Matrix,
typename Vector>
47void __spmv_coo_serial(
const Matrix& A,
const Vector& x, Vector& y,
50template <
typename ExecSpace,
typename Matrix,
typename Vector>
52 const Matrix& A,
const Vector& x, Vector& y,
const bool init,
53 typename std::enable_if_t<
54 Morpheus::is_coo_matrix_format_container_v<Matrix> &&
55 Morpheus::is_dense_vector_format_container_v<Vector> &&
56 Morpheus::has_custom_backend_v<ExecSpace> &&
57 Morpheus::has_hip_execution_space_v<ExecSpace> &&
58 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* =
nullptr) {
59 switch (A.options()) {
60 case MATOPT_SHORT_ROWS: __spmv_coo_serial(A, x, y, init);
break;
61 default: __spmv_coo_flat(A, x, y, init);
65template <
typename Matrix,
typename Vector>
66void __spmv_coo_serial(
const Matrix& A,
const Vector& x, Vector& y,
68 using size_type =
typename Matrix::size_type;
69 using index_type =
typename Matrix::index_type;
70 using value_type =
typename Matrix::value_type;
71 const index_type* I = A.crow_indices().data();
72 const index_type* J = A.ccolumn_indices().data();
73 const value_type* V = A.cvalues().data();
75 const value_type* x_ptr = x.data();
76 value_type* y_ptr = y.data();
79 y.assign(y.size(), 0);
82 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
83 <<<1, 1>>>(A.nnnz(), I, J, V, x_ptr, y_ptr);
85#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
86 getLastHIPError(
"spmv_coo_serial_kernel: Kernel execution failed");
104template <
typename Matrix,
typename Vector>
105void __spmv_coo_flat(
const Matrix& A,
const Vector& x, Vector& y,
107 using size_type =
typename Matrix::size_type;
108 using index_type =
typename Matrix::index_type;
109 using value_type =
typename Matrix::value_type;
112 y.assign(y.size(), 0);
118 }
else if (A.nnnz() <
static_cast<size_type
>(WARP_SIZE)) {
120 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
121 <<<1, 1, 0>>>(A.nnnz(), A.crow_indices().data(),
122 A.ccolumn_indices().data(), A.cvalues().data(), x.data(),
124#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
125 getLastHIPError(
"spmv_coo_serial_kernel: Kernel execution failed");
130 const size_type BLOCK_SIZE = 256;
131 const size_type MAX_BLOCKS = max_active_blocks(
132 Kernels::spmv_coo_flat_kernel<index_type, value_type, BLOCK_SIZE>,
134 const size_type WARPS_PER_BLOCK = BLOCK_SIZE / WARP_SIZE;
136 const size_type num_units = A.nnnz() / WARP_SIZE;
137 const size_type num_warps = std::min(num_units, WARPS_PER_BLOCK * MAX_BLOCKS);
138 const size_type num_blocks = DIVIDE_INTO(num_warps, WARPS_PER_BLOCK);
139 const size_type num_iters = DIVIDE_INTO(num_units, num_warps);
141 const size_type interval_size = WARP_SIZE * num_iters;
143 const size_type tail =
144 num_units * WARP_SIZE;
147 const size_type active_warps =
148 (interval_size == 0) ? 0 : DIVIDE_INTO(tail, interval_size);
150 typename Matrix::index_array_type temp_rows(active_warps, 0);
151 typename Matrix::value_array_type temp_vals(active_warps, 0);
153 Kernels::spmv_coo_flat_kernel<size_type, index_type, value_type, BLOCK_SIZE>
154 <<<num_blocks, BLOCK_SIZE, 0>>>(
155 tail, interval_size, A.crow_indices().data(),
156 A.ccolumn_indices().data(), A.cvalues().data(), x.data(), y.data(),
157 temp_rows.data(), temp_vals.data());
158#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
159 getLastHIPError(
"spmv_coo_flat_kernel: Kernel execution failed");
162 Kernels::spmv_coo_reduce_update_kernel<size_type, index_type, value_type,
163 BLOCK_SIZE><<<1, BLOCK_SIZE, 0>>>(
164 active_warps, temp_rows.data(), temp_vals.data(), y.data());
165#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
166 getLastHIPError(
"spmv_coo_reduce_kernel: Kernel execution failed");
169 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
170 <<<1, 1, 0>>>(A.nnnz() - tail, A.crow_indices().data() + tail,
171 A.ccolumn_indices().data() + tail,
172 A.cvalues().data() + tail, x.data(), y.data());
173#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
174 getLastHIPError(
"spmv_coo_serial_kernel: Kernel execution failed");
Generic Morpheus interfaces.
Definition: dummy.cpp:24