Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
Coo/OpenMP/Morpheus_Multiply_Impl.hpp
1
24#ifndef MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP
25#define MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP
26
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_OPENMP)
29
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34#include <Morpheus_Scan.hpp>
35
36#include <impl/Morpheus_OpenMPUtils.hpp>
37
38#include <limits>
39
40namespace Morpheus {
41namespace Impl {
42
43template <typename T>
44int is_row_stop(T container, typename T::index_type start_idx,
45 typename T::index_type end_idx) {
46 return container[start_idx] != container[end_idx];
47}
48
49template <typename ExecSpace, typename Matrix, typename Vector>
50inline void multiply(
51 const Matrix& A, const Vector& x, Vector& y, const bool init,
52 typename std::enable_if_t<
53 Morpheus::is_coo_matrix_format_container_v<Matrix> &&
54 Morpheus::is_dense_vector_format_container_v<Vector> &&
55 Morpheus::has_custom_backend_v<ExecSpace> &&
56 Morpheus::has_openmp_execution_space_v<ExecSpace> &&
57 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = nullptr) {
58 using size_type = typename Matrix::size_type;
59 using value_type = typename Vector::value_type;
60 using index_type = typename Matrix::index_type;
61
62 if (init) {
63 y.assign(y.size(), 0);
64 }
65
66 const size_type nrows = A.nrows();
67 const size_type nnnz = A.nnnz();
68 const size_type sentinel_row = nrows + 1;
69
70 index_type* rind = A.crow_indices().data();
71 index_type* cind = A.ccolumn_indices().data();
72 value_type* Aval = A.cvalues().data();
73 value_type* xval = x.data();
74 value_type* yval = y.data();
75
76#pragma omp parallel
77 {
78 const size_type num_threads = omp_get_num_threads();
79 const size_type work_per_thread =
80 Impl::ceil_div<size_type>(nnnz, num_threads);
81 const size_type thread_id = omp_get_thread_num();
82 const size_type begin = work_per_thread * thread_id;
83 const size_type end = std::min(begin + work_per_thread, nnnz);
84 size_type n = begin;
85
86 if (begin < end) {
87 const index_type first = begin > 0 ? rind[begin - 1] : sentinel_row;
88 const index_type last = end < nnnz ? rind[end] : sentinel_row;
89
90 // handle row overlap with previous thread
91 if (first != (index_type)sentinel_row) {
92 value_type partial_sum = value_type(0);
93 for (; n < end && rind[n] == first; n++) {
94 partial_sum += Aval[n] * xval[cind[n]];
95 }
96 Impl::atomic_add(&yval[first], partial_sum);
97 }
98
99 // handle non-overlapping rows
100 for (; n < end && A.crow_indices(n) != last; n++) {
101 yval[rind[n]] += Aval[n] * xval[cind[n]];
102 }
103
104 // handle row overlap with following thread
105 if (last != (index_type)sentinel_row) {
106 value_type partial_sum = value_type(0);
107 for (; n < end; n++) {
108 partial_sum += Aval[n] * xval[cind[n]];
109 }
110 Impl::atomic_add(&yval[last], partial_sum);
111 }
112 }
113 }
114}
115
116} // namespace Impl
117} // namespace Morpheus
118
119#endif // MORPHEUS_ENABLE_OPENMP
120#endif // MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24