24#ifndef MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP
25#define MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_OPENMP)
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34#include <Morpheus_Scan.hpp>
36#include <impl/Morpheus_OpenMPUtils.hpp>
44int is_row_stop(T container,
typename T::index_type start_idx,
45 typename T::index_type end_idx) {
46 return container[start_idx] != container[end_idx];
49template <
typename ExecSpace,
typename Matrix,
typename Vector>
51 const Matrix& A,
const Vector& x, Vector& y,
const bool init,
52 typename std::enable_if_t<
53 Morpheus::is_coo_matrix_format_container_v<Matrix> &&
54 Morpheus::is_dense_vector_format_container_v<Vector> &&
55 Morpheus::has_custom_backend_v<ExecSpace> &&
56 Morpheus::has_openmp_execution_space_v<ExecSpace> &&
57 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* =
nullptr) {
58 using size_type =
typename Matrix::size_type;
59 using value_type =
typename Vector::value_type;
60 using index_type =
typename Matrix::index_type;
63 y.assign(y.size(), 0);
66 const size_type nrows = A.nrows();
67 const size_type nnnz = A.nnnz();
68 const size_type sentinel_row = nrows + 1;
70 index_type* rind = A.crow_indices().data();
71 index_type* cind = A.ccolumn_indices().data();
72 value_type* Aval = A.cvalues().data();
73 value_type* xval = x.data();
74 value_type* yval = y.data();
78 const size_type num_threads = omp_get_num_threads();
79 const size_type work_per_thread =
80 Impl::ceil_div<size_type>(nnnz, num_threads);
81 const size_type thread_id = omp_get_thread_num();
82 const size_type begin = work_per_thread * thread_id;
83 const size_type end = std::min(begin + work_per_thread, nnnz);
87 const index_type first = begin > 0 ? rind[begin - 1] : sentinel_row;
88 const index_type last = end < nnnz ? rind[end] : sentinel_row;
91 if (first != (index_type)sentinel_row) {
92 value_type partial_sum = value_type(0);
93 for (; n < end && rind[n] == first; n++) {
94 partial_sum += Aval[n] * xval[cind[n]];
96 Impl::atomic_add(&yval[first], partial_sum);
100 for (; n < end && A.crow_indices(n) != last; n++) {
101 yval[rind[n]] += Aval[n] * xval[cind[n]];
105 if (last != (index_type)sentinel_row) {
106 value_type partial_sum = value_type(0);
107 for (; n < end; n++) {
108 partial_sum += Aval[n] * xval[cind[n]];
110 Impl::atomic_add(&yval[last], partial_sum);
Generic Morpheus interfaces.
Definition: dummy.cpp:24