24#ifndef MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP 
   25#define MORPHEUS_DENSEVECTOR_CUDA_DOT_IMPL_HPP 
   27#include <Morpheus_Macros.hpp> 
   28#if defined(MORPHEUS_ENABLE_CUDA) 
   30#include <Morpheus_SpaceTraits.hpp> 
   31#include <Morpheus_FormatTraits.hpp> 
   32#include <Morpheus_FormatTags.hpp> 
   33#include <Morpheus_Spaces.hpp> 
   34#include <Morpheus_Reduction.hpp> 
   36#include <impl/DenseVector/Cuda/Morpheus_Workspace.hpp> 
   38#include <impl/Morpheus_CudaUtils.hpp> 
   39#include <impl/DenseVector/Kernels/Morpheus_Dot_Impl.hpp> 
   41#ifdef MORPHEUS_ENABLE_TPL_CUBLAS 
   42#include <Morpheus_TypeTraits.hpp> 
   49template <
typename Vector1, 
typename Vector2>
 
   50typename Vector2::value_type dot_ref(
const typename Vector1::size_type n,
 
   51                                     const Vector1& x, 
const Vector2& y);
 
   52template <
typename SizeType>
 
   53double dot_cublas(
const SizeType n, 
const double* x, 
int incx, 
const double* y,
 
   55template <
typename SizeType>
 
   56double dot_cublas(
const SizeType n, 
const float* x, 
int incx, 
const float* y,
 
   59template <
typename ExecSpace, 
typename Vector1, 
typename Vector2>
 
   60typename Vector2::value_type dot(
 
   61    const typename Vector1::size_type n, 
const Vector1& x, 
const Vector2& y,
 
   62    typename std::enable_if_t<
 
   63        Morpheus::is_dense_vector_format_container_v<Vector1> &&
 
   64        Morpheus::is_dense_vector_format_container_v<Vector2> &&
 
   65        Morpheus::has_custom_backend_v<ExecSpace> &&
 
   66        Morpheus::has_cuda_execution_space_v<ExecSpace> &&
 
   67        Morpheus::has_access_v<ExecSpace, Vector1, Vector2>>* = 
nullptr) {
 
   68  using value_type1 = 
typename Vector1::non_const_value_type;
 
   69  using value_type2 = 
typename Vector2::value_type;
 
   71  value_type2 local_result;
 
   73#ifdef MORPHEUS_ENABLE_TPL_CUBLAS 
   74  using index_type = 
typename Vector1::index_type;
 
   76      typename std::remove_pointer_t<Morpheus::remove_cvref_t<value_type1>>;
 
   77  if constexpr (std::is_floating_point_v<val_t>) {
 
   78    index_type incx = 1, incy = 1;
 
   79    local_result = dot_cublas(n, x.data(), incx, y.data(), incy);
 
   81    local_result = dot_ref(n, x, y);
 
   84  local_result = dot_ref(n, x, y);
 
   90template <
typename SizeType>
 
   91double dot_cublas(
const SizeType n, 
const double* x, 
int incx, 
const double* y,
 
   94  cublasdotspace.init();
 
   95  cublasdotspace.allocate<
double>(1);
 
   96  cublasDdot(cublasdotspace.handle(), n, x, incx, y, incy,
 
   97             (
double*)cublasdotspace.data<
double>());
 
   99  checkCudaErrors(cudaMemcpy(&lres, cublasdotspace.data<
double>(),
 
  100                             sizeof(
double), cudaMemcpyDeviceToHost));
 
  105template <
typename SizeType>
 
  106float dot_cublas(
const SizeType n, 
const float* x, 
int incx, 
const float* y,
 
  109  cublasdotspace.init();
 
  110  cublasdotspace.allocate<
float>(1);
 
  111  cublasDdot(cublasdotspace.handle(), n, x, incx, y, incy,
 
  112             (
float*)cublasdotspace.data<
float>());
 
  114  checkCudaErrors(cudaMemcpy(&lres, cublasdotspace.data<
float>(), 
sizeof(
float),
 
  115                             cudaMemcpyDeviceToHost));
 
  120template <
typename Vector1, 
typename Vector2>
 
  121typename Vector2::value_type dot_ref(
const typename Vector1::size_type n,
 
  122                                     const Vector1& x, 
const Vector2& y) {
 
  123  using size_type  = 
typename Vector1::size_type;
 
  124  using value_type = 
typename Vector2::value_type;
 
  127  cudotspace.allocate<value_type>(n);
 
  129  Kernels::dot_kernel_part1<256, value_type, size_type>
 
  130      <<<256, 256>>>(n, x.data(), y.data(), cudotspace.data<value_type>());
 
  131#if defined(DEBUG) || defined(MORPHEUS_DEBUG) 
  132  getLastCudaError(
"dot: Kernel execution failed");
 
  135  Kernels::dot_kernel_part2<256, value_type>
 
  136      <<<1, 256>>>(cudotspace.data<value_type>());
 
  137#if defined(DEBUG) || defined(MORPHEUS_DEBUG) 
  138  getLastCudaError(
"dot: Kernel execution failed");
 
  141  cudaMemcpy(&lres, cudotspace.data<value_type>(), 
sizeof(value_type),
 
  142             cudaMemcpyDeviceToHost);
 
Generic Morpheus interfaces.
Definition: dummy.cpp:24