Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
DenseVector/Cuda/Morpheus_Dot_Impl.hpp
1
24#ifndef MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP
25#define MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP
26
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_CUDA)
29
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34#include <Morpheus_Reduction.hpp>
35
36#include <impl/DenseVector/Cuda/Morpheus_Workspace.hpp>
37
38#include <impl/Morpheus_CudaUtils.hpp>
39#include <impl/DenseVector/Kernels/Morpheus_Dot_Impl.hpp>
40
41#ifdef MORPHEUS_ENABLE_TPL_CUBLAS
42#include <Morpheus_TypeTraits.hpp>
43#include <cublas_v2.h>
44#endif // MORPHEUS_ENABLE_TPL_CUBLAS
45
46namespace Morpheus {
47namespace Impl {
48
49template <typename Vector1, typename Vector2>
50typename Vector2::value_type dot_ref(const typename Vector1::size_type n,
51 const Vector1& x, const Vector2& y);
52template <typename SizeType>
53double dot_cublas(const SizeType n, const double* x, int incx, const double* y,
54 int incy);
55template <typename SizeType>
56double dot_cublas(const SizeType n, const float* x, int incx, const float* y,
57 int incy);
58
59template <typename ExecSpace, typename Vector1, typename Vector2>
60typename Vector2::value_type dot(
61 const typename Vector1::size_type n, const Vector1& x, const Vector2& y,
62 typename std::enable_if_t<
63 Morpheus::is_dense_vector_format_container_v<Vector1> &&
64 Morpheus::is_dense_vector_format_container_v<Vector2> &&
65 Morpheus::has_custom_backend_v<ExecSpace> &&
66 Morpheus::has_cuda_execution_space_v<ExecSpace> &&
67 Morpheus::has_access_v<ExecSpace, Vector1, Vector2>>* = nullptr) {
68 using value_type1 = typename Vector1::non_const_value_type;
69 using value_type2 = typename Vector2::value_type;
70
71 value_type2 local_result;
72
73#ifdef MORPHEUS_ENABLE_TPL_CUBLAS
74 using index_type = typename Vector1::index_type;
75 using val_t =
76 typename std::remove_pointer_t<Morpheus::remove_cvref_t<value_type1>>;
77 if constexpr (std::is_floating_point_v<val_t>) {
78 index_type incx = 1, incy = 1;
79 local_result = dot_cublas(n, x.data(), incx, y.data(), incy);
80 } else {
81 local_result = dot_ref(n, x, y);
82 }
83#else
84 local_result = dot_ref(n, x, y);
85#endif // MORPHEUS_ENABLE_TPL_CUBLAS
86
87 return local_result;
88}
89
90template <typename SizeType>
91double dot_cublas(const SizeType n, const double* x, int incx, const double* y,
92 int incy) {
93 double lres = 0;
94 cublasdotspace.init();
95 cublasdotspace.allocate<double>(1);
96 cublasDdot(cublasdotspace.handle(), n, x, incx, y, incy,
97 (double*)cublasdotspace.data<double>());
98
99 checkCudaErrors(cudaMemcpy(&lres, cublasdotspace.data<double>(),
100 sizeof(double), cudaMemcpyDeviceToHost));
101
102 return lres;
103}
104
105template <typename SizeType>
106float dot_cublas(const SizeType n, const float* x, int incx, const float* y,
107 int incy) {
108 float lres = 0;
109 cublasdotspace.init();
110 cublasdotspace.allocate<float>(1);
111 cublasDdot(cublasdotspace.handle(), n, x, incx, y, incy,
112 (float*)cublasdotspace.data<float>());
113
114 checkCudaErrors(cudaMemcpy(&lres, cublasdotspace.data<float>(), sizeof(float),
115 cudaMemcpyDeviceToHost));
116
117 return lres;
118}
119
120template <typename Vector1, typename Vector2>
121typename Vector2::value_type dot_ref(const typename Vector1::size_type n,
122 const Vector1& x, const Vector2& y) {
123 using size_type = typename Vector1::size_type;
124 using value_type = typename Vector2::value_type;
125
126 value_type lres = 0;
127 cudotspace.allocate<value_type>(n);
128
129 Kernels::dot_kernel_part1<256, value_type, size_type>
130 <<<256, 256>>>(n, x.data(), y.data(), cudotspace.data<value_type>());
131#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
132 getLastCudaError("dot: Kernel execution failed");
133#endif
134
135 Kernels::dot_kernel_part2<256, value_type>
136 <<<1, 256>>>(cudotspace.data<value_type>());
137#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
138 getLastCudaError("dot: Kernel execution failed");
139#endif
140
141 cudaMemcpy(&lres, cudotspace.data<value_type>(), sizeof(value_type),
142 cudaMemcpyDeviceToHost);
143 return lres;
144}
145
146} // namespace Impl
147} // namespace Morpheus
148
149#endif // MORPHEUS_ENABLE_CUDA
150#endif // MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24