I believe a minimal reproducible-example would help to illustrate my observation:
I’m interested on building host-libraries with CPU and GPU capabilities.
Let’s assume that, I want a timer for measuring host and/or device executions.
This library has the following structure:
.
├── library
│ ├── common.h
│ ├── library.cpp
│ └── library.h
├── CMakeLists.txt
└── main.cpp
where common.h
reads
#pragma once
#ifdef HIP_ENABLED
#include <hip/hip_runtime.h>
#define devError_t hipError_t
#define devSuccess hipSuccess
#define devGetErrorString hipGetErrorString
#define devEvent_t hipEvent_t
#define devEventCreate hipEventCreate
#define devEventRecord hipEventRecord
#define devEventSynchronize hipEventSynchronize
#define devEventElapsedTime hipEventElapsedTime
#define devEventDestroy hipEventDestroy
#else
#include <cuda_runtime.h>
#define devError_t cudaError_t
#define devSuccess cudaSuccess
#define devGetErrorString cudaGetErrorString
#define devEvent_t cudaEvent_t
#define devEventCreate cudaEventCreate
#define devEventRecord cudaEventRecord
#define devEventSynchronize cudaEventSynchronize
#define devEventElapsedTime cudaEventElapsedTime
#define devEventDestroy cudaEventDestroy
#endif
constexpr int error_exit_code = -1;
#define DEV_CHECK(condition) \
{ \
const devError_t error = condition; \
if (error != devSuccess) \
{ \
std::cerr << "An error encountered: \"" << devGetErrorString(error) \
<< "\" at " << __FILE__ << ':' << __LINE__ << std::endl; \
std::exit(error_exit_code); \
} \
}
my library.h
reads
#pragma once
// My "super" library for measuring time on GPU and CPU
#include "common.h"
class DeviceEvent {
public:
DeviceEvent();
~DeviceEvent();
void record();
void stop();
float elapsed();
private:
devEvent_t start;
devEvent_t end;
};
class HostEvent {
public:
HostEvent() = default;
~HostEvent() = default;
void record() {
start = std::chrono::high_resolution_clock::now();
}
void stop() {
end = std::chrono::high_resolution_clock::now();
}
float elapsed() {
const std::chrono::duration<float> elapsed = end - start;
return elapsed.count();
}
private:
std::chrono::high_resolution_clock::time_point start;
std::chrono::high_resolution_clock::time_point end;
};
template <bool IS_ON_DEV = false>
class timer {
public:
timer() : event() {}
~timer() = default;
void tic() {
event.record();
}
float toc() {
event.stop();
return event.elapsed();
}
private:
std::conditional_t<IS_ON_DEV, DeviceEvent, HostEvent> event;
};
its companion source library.cpp
reads
#include "library.h"
#include "common.h"
DeviceEvent::DeviceEvent() {
DEV_CHECK(devEventCreate(&start));
DEV_CHECK(devEventCreate(&end));
}
DeviceEvent::~DeviceEvent() {
DEV_CHECK(devEventDestroy(start));
DEV_CHECK(devEventDestroy(end));
}
void DeviceEvent::record() {
DEV_CHECK(devEventRecord(start, 0));
}
void DeviceEvent::stop() {
DEV_CHECK(devEventRecord(end, 0));
DEV_CHECK(devEventSynchronize(end));
}
float DeviceEvent::elapsed() {
float milliseconds;
DEV_CHECK(devEventElapsedTime(&milliseconds, start, end));
return (1E-3) * milliseconds;
}
NOTE: I choose to define the DeviceEvent
members on the cpp
just to have something to compile and not let this example be a header only library.
The main.cpp
of this example reads
#include "library.h"
#include <iostream>
int main() {
float time;
timer<false> hostTimer;
hostTimer.tic();
// Do some work on the host
time = hostTimer.toc();
std::cout << "Host time: " << time << std::endl;
timer<true> devTimer;
devTimer.tic();
// Do some work on the device
time = devTimer.toc();
std::cout << "Device time: " << time << std::endl;
return 0;
}
and lastly, to compile this example my CMakeLists.txt
read:
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
project(devEvent_library LANGUAGES CXX)
set(BUILD_GPU_LANGUAGE "HIP" CACHE STRING "Switches between HIP and CUDA")
set_property(CACHE BUILD_GPU_LANGUAGE PROPERTY STRINGS "HIP" "CUDA")
enable_language(${BUILD_GPU_LANGUAGE})
set(CMAKE_${BUILD_GPU_LANGUAGE}_STANDARD 17)
set(CMAKE_${BUILD_GPU_LANGUAGE}_EXTENSIONS OFF)
set(CMAKE_${BUILD_GPU_LANGUAGE}_STANDARD_REQUIRED ON)
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}")
# Find packages
if (BUILD_GPU_LANGUAGE STREQUAL "HIP")
find_package(hip REQUIRED)
set(DEPENDENCIES hip::host)
# We cannot be sure which compiler reads common header every time
add_compile_definitions(HIP_ENABLED)
else()
find_package(CUDAToolkit REQUIRED)
set(DEPENDENCIES CUDA::cudart)
endif()
# Create SHARED or STATIC library on the host.
set(library_name Test)
add_library(${library_name} SHARED library/library.cpp)
target_include_directories(${library_name} PUBLIC library)
set_target_properties(${library_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_source_files_properties(library/library.cpp PROPERTIES LANGUAGE ${BUILD_GPU_LANGUAGE})
# Create a driver executable using ROCm's bundled version of clang.
set(PROJECT_NAME_clang ${PROJECT_NAME}_clang)
add_executable(${PROJECT_NAME_clang} main.cpp)
target_link_libraries(${PROJECT_NAME_clang} PRIVATE ${library_name} ${DEPENDENCIES})
set_target_properties(${PROJECT_NAME_clang} PROPERTIES HIP_ARCHITECTURES FALSE)
# Create a driver executable using the host c++ compiler.
set(PROJECT_NAME_cxx ${PROJECT_NAME}_cxx)
add_executable(${PROJECT_NAME_cxx} main.cpp)
target_link_libraries(${PROJECT_NAME_cxx} PRIVATE ${library_name} ${DEPENDENCIES})
set_target_properties(${PROJECT_NAME_cxx} PROPERTIES LINKER_LANGUAGE CXX)
# Create ctests:
enable_testing()
add_test(NAME ${PROJECT_NAME_clang} COMMAND ${PROJECT_NAME_clang})
add_test(NAME ${PROJECT_NAME_cxx} COMMAND ${PROJECT_NAME_cxx})
If you follow this snippet, you’ll notice it can be compiled with either CUDA
or HIP
languages.
- On a NVIDIA Platform with
BUILD_GPU_LANGUAGE=CUDA
, I obtain
$ cmake ..
-- The CXX compiler identification is GNU 13.3.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The CUDA compiler identification is NVIDIA 12.6.85
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: /usr/local/cuda-12.6/bin/nvcc - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
-- Found CUDAToolkit: /usr/local/cuda-12.6/targets/x86_64-linux/include (found version "12.6.85")
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
-- Found Threads: TRUE
-- Configuring done (2.2s)
-- Generating done (0.0s)
-- Build files have been written to: /home/mdiaz/Depots/devLibrary/devEvent_library/build
$ make
[ 16%] Building CUDA object CMakeFiles/Test.dir/library/library.cpp.o
[ 33%] Linking CUDA shared library libTest.so
[ 33%] Built target Test
[ 50%] Building CXX object CMakeFiles/devEvent_library_clang.dir/main.cpp.o
[ 66%] Linking CXX executable devEvent_library_clang
[ 66%] Built target devEvent_library_clang
[ 83%] Building CXX object CMakeFiles/devEvent_library_cxx.dir/main.cpp.o
[100%] Linking CXX executable devEvent_library_cxx
[100%] Built target devEvent_library_cxx
$ ctest
Test project /home/mdiaz/Depots/devLibrary/devEvent_library/build
Start 1: devEvent_library_clang
1/2 Test #1: devEvent_library_clang ........... Passed 0.39 sec
Start 2: devEvent_library_cxx
2/2 Test #2: devEvent_library_cxx ............. Passed 0.25 sec
100% tests passed, 0 tests failed out of 2
Total Test time (real) = 0.64 sec
- On a AMD Platform with
BUILD_GPU_LANGUAGE=HIP
, I obtain
$ cmake ..
-- The CXX compiler identification is GNU 13.2.1
-- Cray Programming Environment 2.7.31.11 CXX
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /opt/cray/pe/craype/2.7.31.11/bin/CC - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The HIP compiler identification is Clang 17.0.0
-- Detecting HIP compiler ABI info
-- Detecting HIP compiler ABI info - done
-- Check for working HIP compiler: /opt/rocm-6.0.3/llvm/bin/clang++ - skipped
-- Detecting HIP compile features
-- Detecting HIP compile features - done
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
-- Found Threads: TRUE
-- Configuring done (6.8s)
-- Generating done (0.1s)
-- Build files have been written to: /users/mdiazesc/Depots/devTest/devEvent_library/build
$ make
[ 16%] Building HIP object CMakeFiles/Test.dir/library/library.cpp.o
[ 33%] Linking HIP shared library libTest.so
[ 33%] Built target Test
[ 50%] Building CXX object CMakeFiles/devEvent_library_clang.dir/main.cpp.o
[ 66%] Linking CXX executable devEvent_library_clang
[ 66%] Built target devEvent_library_clang
[ 83%] Building CXX object CMakeFiles/devEvent_library_cxx.dir/main.cpp.o
[100%] Linking CXX executable devEvent_library_cxx
[100%] Built target devEvent_library_cxx
$ ctest
Test project /users/mdiazesc/Depots/devTest/devEvent_library/build
Start 1: devEvent_library_clang
1/2 Test #1: devEvent_library_clang ........... Passed 0.39 sec
Start 2: devEvent_library_cxx
2/2 Test #2: devEvent_library_cxx ............. Passed 0.25 sec
100% tests passed, 0 tests failed out of 2
- But, on a NVIDIA Platform with
BUILD_GPU_LANGUAGE=HIP
, I obtain
$ cmake ..
-- The CXX compiler identification is GNU 13.3.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The HIP compiler identification is NVIDIA 12.6.85
-- Detecting HIP compiler ABI info
-- Detecting HIP compiler ABI info - done
-- Check for working HIP compiler: /usr/local/cuda-12.6/bin/nvcc - skipped
-- Detecting HIP compile features
-- Detecting HIP compile features - done
-- Configuring done (2.1s)
-- Generating done (0.0s)
-- Build files have been written to: /home/mdiaz/Depots/devLibrary/devEvent_library/build
$ make
[ 16%] Building HIP object CMakeFiles/Test.dir/library/library.cpp.o
[ 33%] Linking HIP shared library libTest.so
[ 33%] Built target Test
[ 50%] Building CXX object CMakeFiles/devEvent_library_clang.dir/main.cpp.o
In file included from /home/mdiaz/Depots/devLibrary/devEvent_library/library/library.h:6,
from /home/mdiaz/Depots/devLibrary/devEvent_library/main.cpp:1:
/home/mdiaz/Depots/devLibrary/devEvent_library_FAIL/library/common.h:16:10: fatal error: hip/hip_runtime.h: No such file or directory
16 | #include <hip/hip_runtime.h>
| ^~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [CMakeFiles/devEvent_library_clang.dir/build.make:76: CMakeFiles/devEvent_library_clang.dir/main.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:113: CMakeFiles/devEvent_library_clang.dir/all] Error 2
make: *** [Makefile:101: all] Error 2
Thus, am I doing something wrong (and perhaps a TRUE expert can enlighten my understanding of this issue) … or I need to make someone aware on either CMake team or HIP/ROCm team.
Cheers !