Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
Csr/HIP/Morpheus_Multiply_Impl.hpp
1
24#ifndef MORPHEUS_CSR_HIP_MULTIPLY_IMPL_HPP
25#define MORPHEUS_CSR_HIP_MULTIPLY_IMPL_HPP
26
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_HIP)
29
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34
35#include <impl/Morpheus_HIPUtils.hpp>
36#include <impl/Csr/Kernels/Morpheus_Multiply_Impl.hpp>
37
38namespace Morpheus {
39namespace Impl {
40
41// forward decl
42template <typename Matrix, typename Vector>
43void __spmv_csr_vector(const Matrix& A, const Vector& x, Vector& y,
44 const bool init);
45
46template <typename Matrix, typename Vector>
47void __spmv_csr_scalar(const Matrix& A, const Vector& x, Vector& y,
48 const bool init);
49
50template <typename ExecSpace, typename Matrix, typename Vector>
51inline void multiply(
52 const Matrix& A, const Vector& x, Vector& y, const bool init,
53 typename std::enable_if_t<
54 Morpheus::is_csr_matrix_format_container_v<Matrix> &&
55 Morpheus::is_dense_vector_format_container_v<Vector> &&
56 Morpheus::has_custom_backend_v<ExecSpace> &&
57 Morpheus::has_hip_execution_space_v<ExecSpace> &&
58 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = nullptr) {
59 switch (A.options()) {
60 case MATOPT_SHORT_ROWS: __spmv_csr_scalar(A, x, y, init); break;
61 default: __spmv_csr_vector(A, x, y, init);
62 }
63}
64
65template <typename Matrix, typename Vector>
66void __spmv_csr_scalar(const Matrix& A, const Vector& x, Vector& y,
67 const bool init) {
68 using size_type = typename Matrix::size_type;
69 using index_type = typename Matrix::index_type;
70 using value_type = typename Matrix::value_type;
71
72 const size_type BLOCK_SIZE = 256;
73 const size_type NUM_BLOCKS = Impl::ceil_div<size_type>(A.nrows(), BLOCK_SIZE);
74
75 const index_type* I = A.crow_offsets().data();
76 const index_type* J = A.ccolumn_indices().data();
77 const value_type* V = A.cvalues().data();
78
79 const value_type* x_ptr = x.data();
80 value_type* y_ptr = y.data();
81
82 if (init) {
83 y.assign(y.size(), 0);
84 }
85
86 Morpheus::Impl::Kernels::spmv_csr_scalar_kernel<size_type, index_type,
87 value_type>
88 <<<NUM_BLOCKS, BLOCK_SIZE, 0>>>(A.nrows(), I, J, V, x_ptr, y_ptr);
89#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
90 getLastCudaError("spmv_csr_scalar_kernel: Kernel execution failed");
91#endif
92}
93
94template <size_t THREADS_PER_VECTOR, typename Matrix, typename Vector>
95void __spmv_csr_vector_dispatch(const Matrix& A, const Vector& x, Vector& y,
96 const bool init) {
97 using size_type = typename Matrix::size_type;
98 using index_type = typename Matrix::index_type;
99 using value_type = typename Matrix::value_type;
100
101 const index_type* I = A.crow_offsets().data();
102 const index_type* J = A.ccolumn_indices().data();
103 const value_type* V = A.cvalues().data();
104
105 const value_type* x_ptr = x.data();
106 value_type* y_ptr = y.data();
107
108 const size_type THREADS_PER_BLOCK = 128;
109 const size_type VECTORS_PER_BLOCK = THREADS_PER_BLOCK / THREADS_PER_VECTOR;
110
111 if (init) {
112 y.assign(y.size(), 0);
113 }
114
115 const size_type MAX_BLOCKS = max_active_blocks(
116 Kernels::spmv_csr_vector_kernel<size_type, index_type, value_type,
117 VECTORS_PER_BLOCK, THREADS_PER_VECTOR>,
118 THREADS_PER_BLOCK, 0);
119
120 const size_type NUM_BLOCKS = std::min<size_type>(
121 MAX_BLOCKS, Impl::ceil_div<size_type>(A.nrows(), VECTORS_PER_BLOCK));
122
123 Kernels::spmv_csr_vector_kernel<size_type, index_type, value_type,
124 VECTORS_PER_BLOCK, THREADS_PER_VECTOR>
125 <<<NUM_BLOCKS, THREADS_PER_BLOCK, 0>>>(A.nrows(), I, J, V, x_ptr, y_ptr);
126
127#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
128 getLastCudaError("spmv_csr_vector_kernel: Kernel execution failed");
129#endif
130}
131
132template <typename Matrix, typename Vector>
133void __spmv_csr_vector(const Matrix& A, const Vector& x, Vector& y,
134 const bool init) {
135 using size_type = typename Matrix::size_type;
136
137 const size_type nnz_per_row = A.nnnz() / A.nrows();
138
139 if (nnz_per_row <= 2) {
140 __spmv_csr_vector_dispatch<2>(A, x, y, init);
141 return;
142 }
143 if (nnz_per_row <= 4) {
144 __spmv_csr_vector_dispatch<4>(A, x, y, init);
145 return;
146 }
147 if (nnz_per_row <= 8) {
148 __spmv_csr_vector_dispatch<8>(A, x, y, init);
149 return;
150 }
151 if (nnz_per_row <= 16) {
152 __spmv_csr_vector_dispatch<16>(A, x, y, init);
153 return;
154 }
155
156 if (nnz_per_row <= 32) {
157 __spmv_csr_vector_dispatch<32>(A, x, y, init);
158 return;
159 }
160
161 __spmv_csr_vector_dispatch<64>(A, x, y, init);
162}
163
164} // namespace Impl
165} // namespace Morpheus
166
167#endif // MORPHEUS_ENABLE_HIP
168#endif // MORPHEUS_CSR_HIP_MULTIPLY_IMPL_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24