24#ifndef MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP 
   25#define MORPHEUS_COO_OPENMP_MULTIPLY_IMPL_HPP 
   27#include <Morpheus_Macros.hpp> 
   28#if defined(MORPHEUS_ENABLE_OPENMP) 
   30#include <Morpheus_SpaceTraits.hpp> 
   31#include <Morpheus_FormatTraits.hpp> 
   32#include <Morpheus_FormatTags.hpp> 
   33#include <Morpheus_Spaces.hpp> 
   34#include <Morpheus_Scan.hpp> 
   36#include <impl/Morpheus_OpenMPUtils.hpp> 
   44int is_row_stop(T container, 
typename T::index_type start_idx,
 
   45                typename T::index_type end_idx) {
 
   46  return container[start_idx] != container[end_idx];
 
   49template <
typename ExecSpace, 
typename Matrix, 
typename Vector>
 
   51    const Matrix& A, 
const Vector& x, Vector& y, 
const bool init,
 
   52    typename std::enable_if_t<
 
   53        Morpheus::is_coo_matrix_format_container_v<Matrix> &&
 
   54        Morpheus::is_dense_vector_format_container_v<Vector> &&
 
   55        Morpheus::has_custom_backend_v<ExecSpace> &&
 
   56        Morpheus::has_openmp_execution_space_v<ExecSpace> &&
 
   57        Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = 
nullptr) {
 
   58  using size_type  = 
typename Matrix::size_type;
 
   59  using value_type = 
typename Vector::value_type;
 
   60  using index_type = 
typename Matrix::index_type;
 
   63    y.assign(y.size(), 0);
 
   66  const size_type nrows        = A.nrows();
 
   67  const size_type nnnz         = A.nnnz();
 
   68  const size_type sentinel_row = nrows + 1;
 
   70  index_type* rind = A.crow_indices().data();
 
   71  index_type* cind = A.ccolumn_indices().data();
 
   72  value_type* Aval = A.cvalues().data();
 
   73  value_type* xval = x.data();
 
   74  value_type* yval = y.data();
 
   78    const size_type num_threads = omp_get_num_threads();
 
   79    const size_type work_per_thread =
 
   80        Impl::ceil_div<size_type>(nnnz, num_threads);
 
   81    const size_type thread_id = omp_get_thread_num();
 
   82    const size_type begin     = work_per_thread * thread_id;
 
   83    const size_type end       = std::min(begin + work_per_thread, nnnz);
 
   87      const index_type first = begin > 0 ? rind[begin - 1] : sentinel_row;
 
   88      const index_type last  = end < nnnz ? rind[end] : sentinel_row;
 
   91      if (first != (index_type)sentinel_row) {
 
   92        value_type partial_sum = value_type(0);
 
   93        for (; n < end && rind[n] == first; n++) {
 
   94          partial_sum += Aval[n] * xval[cind[n]];
 
   96        Impl::atomic_add(&yval[first], partial_sum);
 
  100      for (; n < end && A.crow_indices(n) != last; n++) {
 
  101        yval[rind[n]] += Aval[n] * xval[cind[n]];
 
  105      if (last != (index_type)sentinel_row) {
 
  106        value_type partial_sum = value_type(0);
 
  107        for (; n < end; n++) {
 
  108          partial_sum += Aval[n] * xval[cind[n]];
 
  110        Impl::atomic_add(&yval[last], partial_sum);
 
Generic Morpheus interfaces.
Definition: dummy.cpp:24