Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
Dia/Kokkos/Morpheus_Multiply_Impl.hpp
1
24#ifndef MORPHEUS_DIA_KOKKOS_MULTIPLY_IMPL_HPP
25#define MORPHEUS_DIA_KOKKOS_MULTIPLY_IMPL_HPP
26
27#include <Morpheus_SpaceTraits.hpp>
28#include <Morpheus_FormatTraits.hpp>
29#include <Morpheus_FormatTags.hpp>
30#include <Morpheus_Spaces.hpp>
31
32namespace Morpheus {
33namespace Impl {
34
35template <typename ExecSpace, typename Matrix, typename Vector>
36inline void multiply(
37 const Matrix& A, const Vector& x, Vector& y, const bool init,
38 typename std::enable_if_t<
39 Morpheus::is_dia_matrix_format_container_v<Matrix> &&
40 Morpheus::is_dense_vector_format_container_v<Vector> &&
41 Morpheus::has_generic_backend_v<ExecSpace> &&
42 Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = nullptr) {
43 using execution_space = typename ExecSpace::execution_space;
44 using value_array_type = typename Matrix::value_array_type::value_array_type;
45 using index_array_type = typename Matrix::index_array_type::value_array_type;
46 using size_type = typename Matrix::size_type;
47 using index_type = typename Matrix::index_type;
48 using value_type = typename Matrix::value_type;
49 using member_type = typename Kokkos::TeamPolicy<execution_space>::member_type;
50
51 const value_array_type values = A.cvalues().const_view();
52 const typename Vector::value_array_type x_view = x.const_view();
53 const index_array_type diagonal_offsets = A.cdiagonal_offsets().const_view();
54 typename Vector::value_array_type y_view = y.view();
55 size_type ndiag = A.cdiagonal_offsets().size(), ncols = A.ncols();
56
57 const Kokkos::TeamPolicy<execution_space> policy(A.nrows(), Kokkos::AUTO,
58 Kokkos::AUTO);
59
60 Kokkos::parallel_for(
61 policy, KOKKOS_LAMBDA(const member_type& team_member) {
62 const index_type row = team_member.league_rank();
63
64 value_type sum = init ? value_type(0) : y_view[row];
65 Kokkos::parallel_reduce(
66 Kokkos::TeamThreadRange(team_member, ndiag),
67 [=](const size_type& n, value_type& lsum) {
68 const index_type col = row + diagonal_offsets[n];
69
70 if (col >= 0 && col < (index_type)ncols) {
71 lsum += values(row, n) * x_view[col];
72 }
73 },
74 sum);
75 y_view[row] = init ? sum : sum + y_view[row];
76 });
77}
78
79// template <typename ExecSpace, typename Matrix, typename Vector>
80// inline void multiply(
81// const Matrix& A, const Vector& x, Vector& y,
82// typename std::enable_if_t<
83// Morpheus::is_dia_matrix_format_container_v<Matrix> &&
84// Morpheus::is_dense_vector_format_container_v<Vector> &&
85// Morpheus::has_generic_backend_v<ExecSpace> &&
86// Morpheus::has_access_v<ExecSpace, Matrix, Vector>>* = nullptr) {
87// using execution_space = typename ExecSpace::execution_space;
88// using value_array_type = typename
89// Matrix::value_array_type::value_array_type; using index_array_type =
90// typename Matrix::index_array_type::value_array_type; using size_type =
91// typename Matrix::size_type; using index_type = typename
92// Matrix::index_type; using value_type = typename Matrix::value_type;
93// using range_policy =
94// Kokkos::RangePolicy<Kokkos::IndexType<size_type>, execution_space>;
95
96// const value_array_type values = A.cvalues().const_view();
97// const typename Vector::value_array_type x_view = x.const_view();
98// const index_array_type diagonal_offsets =
99// A.cdiagonal_offsets().const_view(); typename Vector2::value_array_type
100// y_view = y.view(); size_type ndiag = A.cvalues().ncols(), ncols =
101// A.ncols();
102
103// range_policy policy(0, A.nrows());
104
105// Kokkos::parallel_for(
106// policy, KOKKOS_LAMBDA(const size_type row) {
107// value_type sum = y_view[row];
108
109// for (size_type n = 0; n < ndiag; n++) {
110// const index_type col = row + diagonal_offsets[n];
111
112// if (col >= 0 && col < (index_type)ncols) {
113// sum += values(row, n) * x_view[col];
114// }
115// }
116
117// y_view[row] = sum;
118// });
119// }
120
121} // namespace Impl
122} // namespace Morpheus
123
124#endif // MORPHEUS_DIA_KOKKOS_MULTIPLY_IMPL_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24