25#ifndef MORPHEUS_DIA_KERNELS_MULTIPLY_IMPL_HPP
26#define MORPHEUS_DIA_KERNELS_MULTIPLY_IMPL_HPP
28#include <Morpheus_Macros.hpp>
29#if defined(MORPHEUS_ENABLE_CUDA) || defined(MORPHEUS_ENABLE_HIP)
31#include <impl/Morpheus_Utils.hpp>
37template <
typename SizeType,
typename IndexType,
typename ValueType,
39__launch_bounds__(BLOCK_SIZE, 1) __global__
40 void spmv_dia_kernel(const SizeType num_rows, const SizeType num_cols,
41 const SizeType num_diagonals, const SizeType pitch,
42 const IndexType* diagonal_offsets,
43 const ValueType* values, const ValueType* x,
45 __shared__ IndexType offsets[BLOCK_SIZE];
47 const SizeType thread_id = BLOCK_SIZE * blockIdx.x + threadIdx.x;
48 const SizeType grid_size = BLOCK_SIZE * gridDim.x;
50 for (SizeType base = 0; base < num_diagonals; base += BLOCK_SIZE) {
52 const SizeType chunk_size =
53 Morpheus::Impl::min(SizeType(BLOCK_SIZE), num_diagonals - base);
55 if (threadIdx.x < chunk_size)
56 offsets[threadIdx.x] = diagonal_offsets[base + threadIdx.x];
61 for (SizeType row = thread_id; row < num_rows; row += grid_size) {
62 ValueType sum = ValueType(0);
65 SizeType idx = row + pitch * base;
67 for (SizeType n = 0; n < chunk_size; n++) {
68 const IndexType col = row + offsets[n];
70 if (col >= 0 && col < (IndexType)num_cols) {
71 const ValueType A_ij = values[idx];
Generic Morpheus interfaces.
Definition: dummy.cpp:24