24#ifndef MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP
25#define MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_CUDA)
30#include <Morpheus_SpaceTraits.hpp>
31#include <Morpheus_FormatTraits.hpp>
32#include <Morpheus_FormatTags.hpp>
33#include <Morpheus_Spaces.hpp>
34#include <Morpheus_Reduction.hpp>
36#include <impl/DenseVector/Cuda/Morpheus_Workspace.hpp>
38#include <impl/Morpheus_CudaUtils.hpp>
39#include <impl/DenseVector/Kernels/Morpheus_Dot_Impl.hpp>
41#ifdef MORPHEUS_ENABLE_TPL_CUBLAS
42#include <Morpheus_TypeTraits.hpp>
49template <
typename Vector1,
typename Vector2>
50typename Vector2::value_type dot_ref(
const typename Vector1::size_type n,
51 const Vector1& x,
const Vector2& y);
52template <
typename SizeType>
53double dot_cublas(
const SizeType n,
const double* x,
int incx,
const double* y,
55template <
typename SizeType>
56double dot_cublas(
const SizeType n,
const float* x,
int incx,
const float* y,
59template <
typename ExecSpace,
typename Vector1,
typename Vector2>
60typename Vector2::value_type dot(
61 const typename Vector1::size_type n,
const Vector1& x,
const Vector2& y,
62 typename std::enable_if_t<
63 Morpheus::is_dense_vector_format_container_v<Vector1> &&
64 Morpheus::is_dense_vector_format_container_v<Vector2> &&
65 Morpheus::has_custom_backend_v<ExecSpace> &&
66 Morpheus::has_cuda_execution_space_v<ExecSpace> &&
67 Morpheus::has_access_v<ExecSpace, Vector1, Vector2>>* =
nullptr) {
68 using value_type1 =
typename Vector1::non_const_value_type;
69 using value_type2 =
typename Vector2::value_type;
71 value_type2 local_result;
73#ifdef MORPHEUS_ENABLE_TPL_CUBLAS
74 using index_type =
typename Vector1::index_type;
76 typename std::remove_pointer_t<Morpheus::remove_cvref_t<value_type1>>;
77 if constexpr (std::is_floating_point_v<val_t>) {
78 index_type incx = 1, incy = 1;
79 local_result = dot_cublas(n, x.data(), incx, y.data(), incy);
81 local_result = dot_ref(n, x, y);
84 local_result = dot_ref(n, x, y);
90template <
typename SizeType>
91double dot_cublas(
const SizeType n,
const double* x,
int incx,
const double* y,
94 cublasdotspace.init();
95 cublasdotspace.allocate<
double>(1);
96 cublasDdot(cublasdotspace.handle(), n, x, incx, y, incy,
97 (
double*)cublasdotspace.data<
double>());
99 checkCudaErrors(cudaMemcpy(&lres, cublasdotspace.data<
double>(),
100 sizeof(
double), cudaMemcpyDeviceToHost));
105template <
typename SizeType>
106float dot_cublas(
const SizeType n,
const float* x,
int incx,
const float* y,
109 cublasdotspace.init();
110 cublasdotspace.allocate<
float>(1);
111 cublasDdot(cublasdotspace.handle(), n, x, incx, y, incy,
112 (
float*)cublasdotspace.data<
float>());
114 checkCudaErrors(cudaMemcpy(&lres, cublasdotspace.data<
float>(),
sizeof(
float),
115 cudaMemcpyDeviceToHost));
120template <
typename Vector1,
typename Vector2>
121typename Vector2::value_type dot_ref(
const typename Vector1::size_type n,
122 const Vector1& x,
const Vector2& y) {
123 using size_type =
typename Vector1::size_type;
124 using value_type =
typename Vector2::value_type;
127 cudotspace.allocate<value_type>(n);
129 Kernels::dot_kernel_part1<256, value_type, size_type>
130 <<<256, 256>>>(n, x.data(), y.data(), cudotspace.data<value_type>());
131#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
132 getLastCudaError(
"dot: Kernel execution failed");
135 Kernels::dot_kernel_part2<256, value_type>
136 <<<1, 256>>>(cudotspace.data<value_type>());
137#if defined(DEBUG) || defined(MORPHEUS_DEBUG)
138 getLastCudaError(
"dot: Kernel execution failed");
141 cudaMemcpy(&lres, cudotspace.data<value_type>(),
sizeof(value_type),
142 cudaMemcpyDeviceToHost);
Generic Morpheus interfaces.
Definition: dummy.cpp:24