24#ifndef MORPHEUS_DIA_CUDA_MULTIPLY_IMPL_HPP
25#define MORPHEUS_DIA_CUDA_MULTIPLY_IMPL_HPP
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_CUDA)
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
35#include <impl/Morpheus_CudaUtils.hpp>
36#include <impl/Dia/Kernels/Morpheus_Multiply_Impl.hpp>
41template <
typename ExecSpace,
typename Matrix,
typename Vector>
43 const Matrix& A,
const Vector& x, Vector& y,
const bool init,
44 typename std::enable_if_t<
45 Morpheus::is_dia_matrix_format_container_v<Matrix> &&
46 Morpheus::is_dense_vector_format_container_v<Vector> &&
47 Morpheus::has_custom_backend_v<ExecSpace> &&
48 Morpheus::has_cuda_execution_space_v<ExecSpace> &&
49 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* =
nullptr) {
50 using index_type =
typename Matrix::index_type;
51 using size_type =
typename Matrix::size_type;
52 using value_type =
typename Matrix::value_type;
54 const size_type BLOCK_SIZE = 256;
55 const size_type MAX_BLOCKS = max_active_blocks(
56 Kernels::spmv_dia_kernel<size_type, index_type, value_type, BLOCK_SIZE>,
57 BLOCK_SIZE, (size_type)
sizeof(index_type) * BLOCK_SIZE);
58 const size_type NUM_BLOCKS =
59 std::min<size_type>(MAX_BLOCKS, DIVIDE_INTO(A.nrows(), BLOCK_SIZE));
61 const index_type* D = A.cdiagonal_offsets().data();
62 const value_type* V = A.cvalues().data();
63 const value_type* x_ptr = x.data();
64 value_type* y_ptr = y.data();
66 const index_type num_diagonals = A.cvalues().ncols();
67 const index_type pitch = A.cvalues().nrows();
69 if (num_diagonals == 0) {
75 y.assign(y.size(), 0);
78 Kernels::spmv_dia_kernel<size_type, index_type, value_type, BLOCK_SIZE>
79 <<<NUM_BLOCKS, BLOCK_SIZE, 0>>>(A.nrows(), A.ncols(), num_diagonals,
80 pitch, D, V, x_ptr, y_ptr);
82#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
83 getLastCudaError(
"spmv_dia_kernel: Kernel execution failed");
Generic Morpheus interfaces.
Definition: dummy.cpp:24