Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
Coo/HIP/Morpheus_Multiply_Impl.hpp
1
24#ifndef MORPHEUS_COO_HIP_MULTIPLY_IMPL_HPP
25#define MORPHEUS_COO_HIP_MULTIPLY_IMPL_HPP
26
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_HIP)
29
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34
35#include <impl/Morpheus_HIPUtils.hpp>
36#include <impl/Coo/Kernels/Morpheus_Multiply_Impl.hpp>
37
38namespace Morpheus {
39namespace Impl {
40
41// forward decl
42template <typename Matrix, typename Vector>
43void __spmv_coo_flat(const Matrix& A, const Vector& x, Vector& y,
44 const bool init);
45
46template <typename Matrix, typename Vector>
47void __spmv_coo_serial(const Matrix& A, const Vector& x, Vector& y,
48 const bool init);
49
50template <typename ExecSpace, typename Matrix, typename Vector>
51inline void multiply(
52 const Matrix& A, const Vector& x, Vector& y, const bool init,
53 typename std::enable_if_t<
54 Morpheus::is_coo_matrix_format_container_v<Matrix> &&
55 Morpheus::is_dense_vector_format_container_v<Vector> &&
56 Morpheus::has_custom_backend_v<ExecSpace> &&
57 Morpheus::has_hip_execution_space_v<ExecSpace> &&
58 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = nullptr) {
59 switch (A.options()) {
60 case MATOPT_SHORT_ROWS: __spmv_coo_serial(A, x, y, init); break;
61 default: __spmv_coo_flat(A, x, y, init);
62 }
63}
64
65template <typename Matrix, typename Vector>
66void __spmv_coo_serial(const Matrix& A, const Vector& x, Vector& y,
67 const bool init) {
68 using size_type = typename Matrix::size_type;
69 using index_type = typename Matrix::index_type;
70 using value_type = typename Matrix::value_type;
71 const index_type* I = A.crow_indices().data();
72 const index_type* J = A.ccolumn_indices().data();
73 const value_type* V = A.cvalues().data();
74
75 const value_type* x_ptr = x.data();
76 value_type* y_ptr = y.data();
77
78 if (init) {
79 y.assign(y.size(), 0);
80 }
81
82 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
83 <<<1, 1>>>(A.nnnz(), I, J, V, x_ptr, y_ptr);
84
85#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
86 getLastHIPError("spmv_coo_serial_kernel: Kernel execution failed");
87#endif
88}
89
91// COO SpMV kernel which flattens data irregularity (segmented reduction)
93// Copyright 2008-2014 NVIDIA Corporation
94// spmv_coo_flat
95// The input coo_matrix must be sorted by row. Columns within each row
96// may appear in any order and duplicate entries are also acceptable.
97// This sorted COO format is easily obtained by expanding the row pointer
98// of a CSR matrix (csr.Ap) into proper row indices and then copying
99// the arrays containing the CSR column indices (csr.Aj) and nonzero values
100// (csr.Ax) verbatim. A segmented reduction is used to compute the per-row
101// sums.
102//
103//
104template <typename Matrix, typename Vector>
105void __spmv_coo_flat(const Matrix& A, const Vector& x, Vector& y,
106 const bool init) {
107 using size_type = typename Matrix::size_type;
108 using index_type = typename Matrix::index_type;
109 using value_type = typename Matrix::value_type;
110
111 if (init) {
112 y.assign(y.size(), 0);
113 }
114
115 if (A.nnnz() == 0) {
116 // empty matrix
117 return;
118 } else if (A.nnnz() < static_cast<size_type>(WARP_SIZE)) {
119 // small matrix
120 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
121 <<<1, 1, 0>>>(A.nnnz(), A.crow_indices().data(),
122 A.ccolumn_indices().data(), A.cvalues().data(), x.data(),
123 y.data());
124#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
125 getLastHIPError("spmv_coo_serial_kernel: Kernel execution failed");
126#endif
127 return;
128 }
129
130 const size_type BLOCK_SIZE = 256;
131 const size_type MAX_BLOCKS = max_active_blocks(
132 Kernels::spmv_coo_flat_kernel<index_type, value_type, BLOCK_SIZE>,
133 BLOCK_SIZE, 0);
134 const size_type WARPS_PER_BLOCK = BLOCK_SIZE / WARP_SIZE;
135
136 const size_type num_units = A.nnnz() / WARP_SIZE;
137 const size_type num_warps = std::min(num_units, WARPS_PER_BLOCK * MAX_BLOCKS);
138 const size_type num_blocks = DIVIDE_INTO(num_warps, WARPS_PER_BLOCK);
139 const size_type num_iters = DIVIDE_INTO(num_units, num_warps);
140
141 const size_type interval_size = WARP_SIZE * num_iters;
142
143 const size_type tail =
144 num_units * WARP_SIZE; // do the last few nonzeros separately (fewer
145 // than WARP_SIZE elements)
146
147 const size_type active_warps =
148 (interval_size == 0) ? 0 : DIVIDE_INTO(tail, interval_size);
149
150 typename Matrix::index_array_type temp_rows(active_warps, 0);
151 typename Matrix::value_array_type temp_vals(active_warps, 0);
152
153 Kernels::spmv_coo_flat_kernel<size_type, index_type, value_type, BLOCK_SIZE>
154 <<<num_blocks, BLOCK_SIZE, 0>>>(
155 tail, interval_size, A.crow_indices().data(),
156 A.ccolumn_indices().data(), A.cvalues().data(), x.data(), y.data(),
157 temp_rows.data(), temp_vals.data());
158#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
159 getLastHIPError("spmv_coo_flat_kernel: Kernel execution failed");
160#endif
161
162 Kernels::spmv_coo_reduce_update_kernel<size_type, index_type, value_type,
163 BLOCK_SIZE><<<1, BLOCK_SIZE, 0>>>(
164 active_warps, temp_rows.data(), temp_vals.data(), y.data());
165#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
166 getLastHIPError("spmv_coo_reduce_kernel: Kernel execution failed");
167#endif
168
169 Kernels::spmv_coo_serial_kernel<size_type, index_type, value_type>
170 <<<1, 1, 0>>>(A.nnnz() - tail, A.crow_indices().data() + tail,
171 A.ccolumn_indices().data() + tail,
172 A.cvalues().data() + tail, x.data(), y.data());
173#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
174 getLastHIPError("spmv_coo_serial_kernel: Kernel execution failed");
175#endif
176}
177
178} // namespace Impl
179} // namespace Morpheus
180
181#endif // MORPHEUS_ENABLE_HIP
182#endif // MORPHEUS_COO_HIP_MULTIPLY_IMPL_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24