Skip to content

Commit c9aa1dc

Browse files
committed
CUDA_CHECK to HIP_CHECK
Signed-off-by: Steven Hahn <hahnse@ornl.gov>
1 parent 9058ab0 commit c9aa1dc

2 files changed

Lines changed: 45 additions & 47 deletions

File tree

bin/cuda/gpu_benchmark.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include <cub/cub.cuh>
55

66
#include <chrono>
7-
#include <cstdlib> // For std::atoi
87
#include <iostream>
98

109
// The macro wraps any CUDA API call

bin/hip/gpu_benchmark.hip

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,25 @@
22
#include <hipcub/hipcub.hpp>
33

44
#include <chrono>
5-
#include <cstdlib> // For std::atoi
65
#include <iostream>
76

8-
// The macro wraps any CUDA API call
9-
#define CUDA_CHECK(ans) \
10-
{ gpuAssert((ans), __FILE__, __LINE__); }
11-
12-
inline void gpuAssert(hipError_t code, const char *file, int line, bool abort = true) {
13-
if (code != hipSuccess) {
14-
fprintf(stderr, "GPUassert: %s %s %d\n", hipGetErrorString(code), file, line);
15-
if (abort)
16-
exit(code);
17-
}
7+
#define HIP_CHECK(expression) \
8+
{ \
9+
const hipError_t status = expression; \
10+
if(status != hipSuccess){ \
11+
std::cerr << "HIP error " \
12+
<< status << ": " \
13+
<< hipGetErrorString(status) \
14+
<< " at " << __FILE__ << ":" \
15+
<< __LINE__ << std::endl; \
16+
} \
1817
}
1918

2019
float getElapsedTime(const hipEvent_t &gpu_start, hipEvent_t &gpu_stop) {
2120
float gpu_elapsed_time;
22-
CUDA_CHECK(hipEventRecord(gpu_stop, 0));
23-
CUDA_CHECK(hipEventSynchronize(gpu_stop));
24-
CUDA_CHECK(hipEventElapsedTime(&gpu_elapsed_time, gpu_start, gpu_stop));
21+
HIP_CHECK(hipEventRecord(gpu_stop, 0));
22+
HIP_CHECK(hipEventSynchronize(gpu_stop));
23+
HIP_CHECK(hipEventElapsedTime(&gpu_elapsed_time, gpu_start, gpu_stop));
2524
return gpu_elapsed_time / 1000.0f;
2625
}
2726

@@ -32,15 +31,15 @@ void runBenchmark(long max_work) {
3231

3332
unsigned long long int *d_count;
3433
hiprandState *d_state;
35-
CUDA_CHECK(hipMalloc((void **)&d_count, 256 * sizeof(unsigned long long int)));
36-
CUDA_CHECK(hipMalloc((void **)&d_state, n * sizeof(hiprandState)));
37-
CUDA_CHECK(hipMemset(d_count, 0, 256 * sizeof(unsigned long long int)));
34+
HIP_CHECK(hipMalloc((void **)&d_count, 256 * sizeof(unsigned long long int)));
35+
HIP_CHECK(hipMalloc((void **)&d_state, n * sizeof(hiprandState)));
36+
HIP_CHECK(hipMemset(d_count, 0, 256 * sizeof(unsigned long long int)));
3837

3938
// set up timing stuff
4039
hipEvent_t gpu_start, gpu_stop;
41-
CUDA_CHECK(hipEventCreate(&gpu_start));
42-
CUDA_CHECK(hipEventCreate(&gpu_stop));
43-
CUDA_CHECK(hipEventRecord(gpu_start, 0));
40+
HIP_CHECK(hipEventCreate(&gpu_start));
41+
HIP_CHECK(hipEventCreate(&gpu_stop));
42+
HIP_CHECK(hipEventRecord(gpu_start, 0));
4443

4544
// set kernel
4645
dim3 gridSize = 256;
@@ -49,38 +48,38 @@ void runBenchmark(long max_work) {
4948

5049
// monte carlo kernel
5150
monte_carlo_kernel<<<gridSize, blockSize>>>(d_state, d_count, m);
52-
CUDA_CHECK(hipDeviceSynchronize());
51+
HIP_CHECK(hipDeviceSynchronize());
5352

5453
// Allocate device output array
5554
unsigned long long int *d_out = nullptr;
56-
CUDA_CHECK(hipMalloc((void **)&d_out, sizeof(unsigned long long int)));
55+
HIP_CHECK(hipMalloc((void **)&d_out, sizeof(unsigned long long int)));
5756

5857
// Request and allocate temporary storage
5958
void *d_temp_storage = nullptr;
6059
size_t temp_storage_bytes = 0;
6160
hipcub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_count, d_out, 256);
62-
CUDA_CHECK(hipMalloc((void **)&d_temp_storage, temp_storage_bytes));
61+
HIP_CHECK(hipMalloc((void **)&d_temp_storage, temp_storage_bytes));
6362

6463
// Run
6564
hipcub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_count, d_out, 256);
6665

6766
float gpu_elapsed_time = getElapsedTime(gpu_start, gpu_stop);
68-
CUDA_CHECK(hipEventDestroy(gpu_start));
69-
CUDA_CHECK(hipEventDestroy(gpu_stop));
67+
HIP_CHECK(hipEventDestroy(gpu_start));
68+
HIP_CHECK(hipEventDestroy(gpu_stop));
7069

7170
// copy results back to the host
7271
unsigned long long int h_count = 0;
73-
CUDA_CHECK(hipMemcpy(&h_count, d_out, sizeof(unsigned long long int), hipMemcpyDeviceToHost));
72+
HIP_CHECK(hipMemcpy(&h_count, d_out, sizeof(unsigned long long int), hipMemcpyDeviceToHost));
7473

7574
// display results and timings for gpu
7675
float pi = h_count * 4.0 / (n * m);
7776
std::cout << "Approximate pi calculated on GPU is: " << pi << " and calculation took " << gpu_elapsed_time << "s\n";
7877
std::cout << "Benchmark completed!" << std::endl;
7978

80-
CUDA_CHECK(hipFree(d_count));
81-
CUDA_CHECK(hipFree(d_state));
82-
CUDA_CHECK(hipFree(d_out));
83-
CUDA_CHECK(hipFree(d_temp_storage));
79+
HIP_CHECK(hipFree(d_count));
80+
HIP_CHECK(hipFree(d_state));
81+
HIP_CHECK(hipFree(d_out));
82+
HIP_CHECK(hipFree(d_temp_storage));
8483
}
8584

8685
// Function to run the GPU benchmark for a specified time
@@ -92,15 +91,15 @@ void runBenchmarkTime(long max_work, int runtime_in_seconds) {
9291
// allocate memory
9392
unsigned long long int *d_count;
9493
hiprandState *d_state;
95-
CUDA_CHECK(hipMalloc((void **)&d_count, 256 * sizeof(unsigned long long int)));
96-
CUDA_CHECK(hipMalloc((void **)&d_state, n * sizeof(hiprandState)));
97-
CUDA_CHECK(hipMemset(d_count, 0, 256 * sizeof(unsigned long long int)));
94+
HIP_CHECK(hipMalloc((void **)&d_count, 256 * sizeof(unsigned long long int)));
95+
HIP_CHECK(hipMalloc((void **)&d_state, n * sizeof(hiprandState)));
96+
HIP_CHECK(hipMemset(d_count, 0, 256 * sizeof(unsigned long long int)));
9897

9998
// set up timing stuff
10099
hipEvent_t gpu_start, gpu_stop;
101-
CUDA_CHECK(hipEventCreate(&gpu_start));
102-
CUDA_CHECK(hipEventCreate(&gpu_stop));
103-
CUDA_CHECK(hipEventRecord(gpu_start, 0));
100+
HIP_CHECK(hipEventCreate(&gpu_start));
101+
HIP_CHECK(hipEventCreate(&gpu_stop));
102+
HIP_CHECK(hipEventRecord(gpu_start, 0));
104103

105104
// set kernel
106105
dim3 gridSize = 256;
@@ -112,40 +111,40 @@ void runBenchmarkTime(long max_work, int runtime_in_seconds) {
112111
// Run the workload loop until the specified runtime is reached
113112
while (getElapsedTime(gpu_start, gpu_stop) < runtime_in_seconds) {
114113
monte_carlo_kernel<<<gridSize, blockSize>>>(d_state, d_count, m);
115-
CUDA_CHECK(hipDeviceSynchronize()); // Ensure the kernel has finished executing
114+
HIP_CHECK(hipDeviceSynchronize()); // Ensure the kernel has finished executing
116115
iteration++;
117116
}
118117

119118
// copy results back to the host
120119
// Allocate device output array
121120
unsigned long long int *d_out = nullptr;
122-
CUDA_CHECK(hipMalloc((void **)&d_out, sizeof(unsigned long long int)));
121+
HIP_CHECK(hipMalloc((void **)&d_out, sizeof(unsigned long long int)));
123122

124123
// Request and allocate temporary storage
125124
void *d_temp_storage = nullptr;
126125
size_t temp_storage_bytes = 0;
127126
hipcub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_count, d_out, 256);
128-
CUDA_CHECK(hipMalloc((void **)&d_temp_storage, temp_storage_bytes));
127+
HIP_CHECK(hipMalloc((void **)&d_temp_storage, temp_storage_bytes));
129128

130129
// Run
131130
hipcub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_count, d_out, 256);
132131

133132
float gpu_elapsed_time = getElapsedTime(gpu_start, gpu_stop);
134-
CUDA_CHECK(hipEventDestroy(gpu_start));
135-
CUDA_CHECK(hipEventDestroy(gpu_stop));
133+
HIP_CHECK(hipEventDestroy(gpu_start));
134+
HIP_CHECK(hipEventDestroy(gpu_stop));
136135

137136
// copy results back to the host
138137
unsigned long long int h_count = 0;
139-
CUDA_CHECK(hipMemcpy(&h_count, d_out, sizeof(unsigned long long int), hipMemcpyDeviceToHost));
138+
HIP_CHECK(hipMemcpy(&h_count, d_out, sizeof(unsigned long long int), hipMemcpyDeviceToHost));
140139

141140
// display results and timings for gpu
142141
float pi = h_count * 4.0 / (n * m) / iteration;
143142
std::cout << "Approximate pi calculated on GPU is: " << pi << " and calculation took " << gpu_elapsed_time << "s\n";
144143

145-
CUDA_CHECK(hipFree(d_count));
146-
CUDA_CHECK(hipFree(d_state));
147-
CUDA_CHECK(hipFree(d_out));
148-
CUDA_CHECK(hipFree(d_temp_storage));
144+
HIP_CHECK(hipFree(d_count));
145+
HIP_CHECK(hipFree(d_state));
146+
HIP_CHECK(hipFree(d_out));
147+
HIP_CHECK(hipFree(d_temp_storage));
149148
}
150149

151150
int main(int argc, char *argv[]) {

0 commit comments

Comments
 (0)