Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
Dia/Cuda/Morpheus_Multiply_Impl.hpp
1
24#ifndef MORPHEUS_DIA_CUDA_MULTIPLY_IMPL_HPP
25#define MORPHEUS_DIA_CUDA_MULTIPLY_IMPL_HPP
26
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_CUDA)
29
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34
35#include <impl/Morpheus_CudaUtils.hpp>
36#include <impl/Dia/Kernels/Morpheus_Multiply_Impl.hpp>
37
38namespace Morpheus {
39namespace Impl {
40
41template <typename ExecSpace, typename Matrix, typename Vector>
42inline void multiply(
43 const Matrix& A, const Vector& x, Vector& y, const bool init,
44 typename std::enable_if_t<
45 Morpheus::is_dia_matrix_format_container_v<Matrix> &&
46 Morpheus::is_dense_vector_format_container_v<Vector> &&
47 Morpheus::has_custom_backend_v<ExecSpace> &&
48 Morpheus::has_cuda_execution_space_v<ExecSpace> &&
49 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = nullptr) {
50 using index_type = typename Matrix::index_type;
51 using size_type = typename Matrix::size_type;
52 using value_type = typename Matrix::value_type;
53
54 const size_type BLOCK_SIZE = 256;
55 const size_type MAX_BLOCKS = max_active_blocks(
56 Kernels::spmv_dia_kernel<size_type, index_type, value_type, BLOCK_SIZE>,
57 BLOCK_SIZE, (size_type)sizeof(index_type) * BLOCK_SIZE);
58 const size_type NUM_BLOCKS =
59 std::min<size_type>(MAX_BLOCKS, DIVIDE_INTO(A.nrows(), BLOCK_SIZE));
60
61 const index_type* D = A.cdiagonal_offsets().data();
62 const value_type* V = A.cvalues().data();
63 const value_type* x_ptr = x.data();
64 value_type* y_ptr = y.data();
65
66 const index_type num_diagonals = A.cvalues().ncols();
67 const index_type pitch = A.cvalues().nrows();
68
69 if (num_diagonals == 0) {
70 // empty matrix
71 return;
72 }
73
74 if (init) {
75 y.assign(y.size(), 0);
76 }
77
78 Kernels::spmv_dia_kernel<size_type, index_type, value_type, BLOCK_SIZE>
79 <<<NUM_BLOCKS, BLOCK_SIZE, 0>>>(A.nrows(), A.ncols(), num_diagonals,
80 pitch, D, V, x_ptr, y_ptr);
81
82#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
83 getLastCudaError("spmv_dia_kernel: Kernel execution failed");
84#endif
85}
86
87} // namespace Impl
88} // namespace Morpheus
89
90#endif // MORPHEUS_ENABLE_CUDA
91#endif // MORPHEUS_DIA_CUDA_MULTIPLY_IMPL_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24