24#ifndef MORPHEUS_HIP_UTILS_HPP
25#define MORPHEUS_HIP_UTILS_HPP
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_HIP)
30#include <impl/Morpheus_Utils.hpp>
34#include "hip/hip_runtime.h"
40const int MAX_BLOCK_DIM_SIZE = 65535;
41const int WARP_SIZE = 64;
44static const char *_hipGetErrorEnum(T error) {
45 return hipGetErrorName(error);
49void check_hip(T result,
char const *
const func,
const char *
const file,
52 fprintf(stderr,
"HIP error at %s:%d code=%d(%s) \"%s\" \n", file, line,
53 static_cast<unsigned int>(result), _hipGetErrorEnum(result), func);
58#define checkHIPErrors(val) check_hip((val), #val, __FILE__, __LINE__)
61#define getLastHIPError(msg) __getLastHIPError(msg, __FILE__, __LINE__)
63inline void __getLastHIPError(
const char *errorMessage,
const char *file,
65 hipError_t err = hipGetLastError();
67 if (hipSuccess != err) {
69 "%s(%i) : getLastHIPError() HIP error :"
71 file, line, errorMessage,
static_cast<int>(err),
72 hipGetErrorString(err));
77template <
typename KernelFunction>
78size_t max_active_blocks(KernelFunction kernel,
const size_t CTA_SIZE,
79 const size_t dynamic_smem_bytes) {
81 hipOccupancyMaxActiveBlocksPerMultiprocessor(
82 &MAX_BLOCKS, kernel, (
int)CTA_SIZE, dynamic_smem_bytes);
86 checkHIPErrors(hipGetDevice(&device));
87 checkHIPErrors(hipGetDeviceProperties(&prop, device));
89 return (
size_t)MAX_BLOCKS * prop.multiProcessorCount;
96template <
typename IndexType>
97void getNumBlocksAndThreads(
98 IndexType n, IndexType maxBlocks, IndexType maxThreads, IndexType &blocks,
100 typename std::enable_if<std::is_integral<IndexType>::value>::type * =
103 hipDeviceProp_t prop;
105 checkHIPErrors(hipGetDevice(&device));
106 checkHIPErrors(hipGetDeviceProperties(&prop, device));
108 threads = (n < maxThreads * 2) ? nextPow2((n + 1) / 2) : maxThreads;
109 blocks = (n + (threads * 2 - 1)) / (threads * 2);
111 if ((
float)threads * blocks >
112 (float)prop.maxGridSize[0] * prop.maxThreadsPerBlock) {
113 printf(
"n is too large, please choose a smaller number!\n");
116 if (blocks > prop.maxGridSize[0]) {
118 "Grid size <%d> exceeds the device capability <%d>, set block size as "
119 "%d (original %d)\n",
120 blocks, prop.maxGridSize[0], threads * 2, threads);
126 blocks = min(maxBlocks, blocks);
Generic Morpheus interfaces.
Definition: dummy.cpp:24