24#ifndef MORPHEUS_COO_CUDA_MULTIPLY_IMPL_HPP
25#define MORPHEUS_COO_CUDA_MULTIPLY_IMPL_HPP
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_CUDA)
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
35#include <impl/Morpheus_CudaUtils.hpp>
36#include <impl/Coo/Kernels/Morpheus_Multiply_Impl.hpp>
42template <
typename Matrix,
typename Vector>
43void __spmv_coo_flat(
const Matrix& A,
const Vector& x, Vector& y,
46template <
typename Matrix,
typename Vector>
47void __spmv_coo_serial(
const Matrix& A,
const Vector& x, Vector& y,
50template <
typename ExecSpace,
typename Matrix,
typename Vector>
52 const Matrix& A,
const Vector& x, Vector& y,
const bool init,
53 typename std::enable_if_t<
54 Morpheus::is_coo_matrix_format_container_v<Matrix> &&
55 Morpheus::is_dense_vector_format_container_v<Vector> &&
56 Morpheus::has_custom_backend_v<ExecSpace> &&
57 Morpheus::has_cuda_execution_space_v<ExecSpace> &&
58 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* =
nullptr) {
59 switch (A.options()) {
60 case MATOPT_SHORT_ROWS: __spmv_coo_serial(A, x, y, init);
break;
61 default: __spmv_coo_flat(A, x, y, init);
65template <
typename Matrix,
typename Vector>
66void __spmv_coo_serial(
const Matrix& A,
const Vector& x, Vector& y,
68 using size_type =
typename Matrix::size_type;
69 using index_type =
typename Matrix::index_type;
70 using value_type =
typename Matrix::value_type;
71 const index_type* I = A.crow_indices().data();
72 const index_type* J = A.ccolumn_indices().data();
73 const value_type* V = A.cvalues().data();
75 const value_type* x_ptr = x.data();
76 value_type* y_ptr = y.data();
79 y.assign(y.size(), 0);
82 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
83 <<<1, 1>>>(A.nnnz(), I, J, V, x_ptr, y_ptr);
85#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
86 getLastCudaError(
"spmv_coo_serial_kernel: Kernel execution failed");
104template <
typename Matrix,
typename Vector>
105void __spmv_coo_flat(
const Matrix& A,
const Vector& x, Vector& y,
107 using size_type =
typename Matrix::size_type;
108 using index_type =
typename Matrix::index_type;
109 using value_type =
typename Matrix::value_type;
112 y.assign(y.size(), 0);
118 }
else if (A.nnnz() <
static_cast<size_type
>(WARP_SIZE)) {
120 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
121 <<<1, 1, 0>>>(A.nnnz(), A.crow_indices().data(),
122 A.ccolumn_indices().data(), A.cvalues().data(), x.data(),
124#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
125 getLastCudaError(
"spmv_coo_serial_kernel: Kernel execution failed");
130 const size_type BLOCK_SIZE = 256;
131 const size_type MAX_BLOCKS =
132 max_active_blocks(Kernels::spmv_coo_flat_kernel<size_type, index_type,
133 value_type, BLOCK_SIZE>,
135 const size_type WARPS_PER_BLOCK = BLOCK_SIZE / WARP_SIZE;
137 const size_type num_units = A.nnnz() / WARP_SIZE;
138 const size_type num_warps = std::min(num_units, WARPS_PER_BLOCK * MAX_BLOCKS);
139 const size_type num_blocks =
140 Impl::ceil_div<size_type>(num_warps, WARPS_PER_BLOCK);
141 const size_type num_iters = Impl::ceil_div<size_type>(num_units, num_warps);
143 const size_type interval_size = WARP_SIZE * num_iters;
145 const size_type tail =
146 num_units * WARP_SIZE;
149 const size_type active_warps =
150 (interval_size == 0) ? 0 : Impl::ceil_div<size_type>(tail, interval_size);
152 typename Matrix::index_array_type temp_rows(active_warps, 0);
153 typename Matrix::value_array_type temp_vals(active_warps, 0);
155 Kernels::spmv_coo_flat_kernel<size_type, index_type, value_type, BLOCK_SIZE>
156 <<<num_blocks, BLOCK_SIZE, 0>>>(
157 tail, interval_size, A.crow_indices().data(),
158 A.ccolumn_indices().data(), A.cvalues().data(), x.data(), y.data(),
159 temp_rows.data(), temp_vals.data());
160#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
161 getLastCudaError(
"spmv_coo_flat_kernel: Kernel execution failed");
164 Kernels::spmv_coo_reduce_update_kernel<size_type, index_type, value_type,
165 BLOCK_SIZE><<<1, BLOCK_SIZE, 0>>>(
166 active_warps, temp_rows.data(), temp_vals.data(), y.data());
167#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
168 getLastCudaError(
"spmv_coo_reduce_kernel: Kernel execution failed");
171 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
172 <<<1, 1, 0>>>(A.nnnz() - tail, A.crow_indices().data() + tail,
173 A.ccolumn_indices().data() + tail,
174 A.cvalues().data() + tail, x.data(), y.data());
175#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
176 getLastCudaError(
"spmv_coo_serial_kernel: Kernel execution failed");
Generic Morpheus interfaces.
Definition: dummy.cpp:24