Morpheus 1.0.0
Dynamic matrix type and algorithms for sparse matrices
Loading...
Searching...
No Matches
Morpheus_HIPUtils.hpp
1
24#ifndef MORPHEUS_HIP_UTILS_HPP
25#define MORPHEUS_HIP_UTILS_HPP
26
27#include <Morpheus_Macros.hpp>
28#if defined(MORPHEUS_ENABLE_HIP)
29
30#include <impl/Morpheus_Utils.hpp>
31
32#include <stdio.h>
33#include <stdlib.h>
34#include "hip/hip_runtime.h"
35
36namespace Morpheus {
37namespace Impl {
38
39// maximum number of co-resident threads
40const int MAX_BLOCK_DIM_SIZE = 65535;
41const int WARP_SIZE = 64;
42
43template <typename T>
44static const char *_hipGetErrorEnum(T error) {
45 return hipGetErrorName(error);
46}
47
48template <typename T>
49void check_hip(T result, char const *const func, const char *const file,
50 int const line) {
51 if (result) {
52 fprintf(stderr, "HIP error at %s:%d code=%d(%s) \"%s\" \n", file, line,
53 static_cast<unsigned int>(result), _hipGetErrorEnum(result), func);
54 exit(EXIT_FAILURE);
55 }
56}
57
58#define checkHIPErrors(val) check_hip((val), #val, __FILE__, __LINE__)
59
60// This will output the proper error string when calling hipGetLastError
61#define getLastHIPError(msg) __getLastHIPError(msg, __FILE__, __LINE__)
62
63inline void __getLastHIPError(const char *errorMessage, const char *file,
64 const int line) {
65 hipError_t err = hipGetLastError();
66
67 if (hipSuccess != err) {
68 fprintf(stderr,
69 "%s(%i) : getLastHIPError() HIP error :"
70 " %s : (%d) %s.\n",
71 file, line, errorMessage, static_cast<int>(err),
72 hipGetErrorString(err));
73 exit(EXIT_FAILURE);
74 }
75}
76
77template <typename KernelFunction>
78size_t max_active_blocks(KernelFunction kernel, const size_t CTA_SIZE,
79 const size_t dynamic_smem_bytes) {
80 int MAX_BLOCKS;
81 hipOccupancyMaxActiveBlocksPerMultiprocessor(
82 &MAX_BLOCKS, kernel, (int)CTA_SIZE, dynamic_smem_bytes);
83
84 hipDeviceProp_t prop;
85 int device;
86 checkHIPErrors(hipGetDevice(&device));
87 checkHIPErrors(hipGetDeviceProperties(&prop, device));
88
89 return (size_t)MAX_BLOCKS * prop.multiProcessorCount;
90}
91
92// Compute the number of threads and blocks to use for the given reduction
93// kernel. We set threads / block to the minimum of maxThreads and n/2.
94// We observe the maximum specified number of blocks, because
95// each thread in that kernel can process a variable number of elements.
96template <typename IndexType>
97void getNumBlocksAndThreads(
98 IndexType n, IndexType maxBlocks, IndexType maxThreads, IndexType &blocks,
99 IndexType &threads,
100 typename std::enable_if<std::is_integral<IndexType>::value>::type * =
101 nullptr) {
102 // get device capability, to avoid block/grid size exceed the upper bound
103 hipDeviceProp_t prop;
104 int device;
105 checkHIPErrors(hipGetDevice(&device));
106 checkHIPErrors(hipGetDeviceProperties(&prop, device));
107
108 threads = (n < maxThreads * 2) ? nextPow2((n + 1) / 2) : maxThreads;
109 blocks = (n + (threads * 2 - 1)) / (threads * 2);
110
111 if ((float)threads * blocks >
112 (float)prop.maxGridSize[0] * prop.maxThreadsPerBlock) {
113 printf("n is too large, please choose a smaller number!\n");
114 }
115
116 if (blocks > prop.maxGridSize[0]) {
117 printf(
118 "Grid size <%d> exceeds the device capability <%d>, set block size as "
119 "%d (original %d)\n",
120 blocks, prop.maxGridSize[0], threads * 2, threads);
121
122 blocks /= 2;
123 threads *= 2;
124 }
125
126 blocks = min(maxBlocks, blocks);
127}
128
129} // namespace Impl
130} // namespace Morpheus
131
132#endif // MORPHEUS_ENABLE_HIP
133#endif // MORPHEUS_HIP_UTILS_HPP
Generic Morpheus interfaces.
Definition: dummy.cpp:24