hip::host not working for NVIDIA platforms

Just started to work in porting a kaiju CUDA/C++17 project. To do this, I’m using a strong workstation with several Nvidia GPUs. Moreover, I’m using HIP 6.2.6 on top of a cudaToolKit 12.6, and to play safe with AMD, I’m using Ubuntu 24.04 as my OS. Installations of all packages went smoothly.

While working on porting, I faced several step backs during linking procedures with CMake to the point of almost resourcing to build the entire project with make by hand.

Once I had access to and AMD platform, a PC with ROCm 6.0.1 and a GPU architecture “gfx90a”, I managed to confirm my suspicion: the hip::host module is not working right on my NVIDIA platform as indicated in the HIP documentation: Consuming the HIP API in C++ code

Has anyone faced a similar issue?

I believe a minimal reproducible-example would help to illustrate my observation:

I’m interested on building host-libraries with CPU and GPU capabilities.
Let’s assume that, I want a timer for measuring host and/or device executions.
This library has the following structure:

.
├── library
│   ├── common.h
│   ├── library.cpp
│   └── library.h
├── CMakeLists.txt
└── main.cpp

where common.h reads

#pragma once

#ifdef HIP_ENABLED
#include <hip/hip_runtime.h>
#define devError_t hipError_t
#define devSuccess hipSuccess
#define devGetErrorString hipGetErrorString
#define devEvent_t hipEvent_t
#define devEventCreate hipEventCreate
#define devEventRecord hipEventRecord
#define devEventSynchronize hipEventSynchronize
#define devEventElapsedTime hipEventElapsedTime
#define devEventDestroy hipEventDestroy
#else
#include <cuda_runtime.h>
#define devError_t cudaError_t
#define devSuccess cudaSuccess
#define devGetErrorString cudaGetErrorString
#define devEvent_t cudaEvent_t
#define devEventCreate cudaEventCreate
#define devEventRecord cudaEventRecord
#define devEventSynchronize cudaEventSynchronize
#define devEventElapsedTime cudaEventElapsedTime
#define devEventDestroy cudaEventDestroy
#endif

constexpr int error_exit_code = -1;

#define DEV_CHECK(condition)                                                \
{                                                                           \
    const devError_t error = condition;                                     \
    if (error != devSuccess)                                                \
    {                                                                       \
        std::cerr << "An error encountered: \"" << devGetErrorString(error) \
            << "\" at " << __FILE__ << ':' << __LINE__ << std::endl;        \
        std::exit(error_exit_code);                                         \
    }                                                                       \
}

my library.h reads

#pragma once

// My "super" library for measuring time on GPU and CPU

#include "common.h"

class DeviceEvent {
  public:
    DeviceEvent();
    ~DeviceEvent();

    void record();
    void stop();
    float elapsed();

  private:
    devEvent_t start;
    devEvent_t end;
};


class HostEvent {
  public:
    HostEvent() = default;
    ~HostEvent() = default;

    void record() {
      start = std::chrono::high_resolution_clock::now();
    }

    void stop() {
      end = std::chrono::high_resolution_clock::now();
    }

    float elapsed() {
      const std::chrono::duration<float> elapsed = end - start;
      return elapsed.count();
    }

  private:
    std::chrono::high_resolution_clock::time_point start;
    std::chrono::high_resolution_clock::time_point end;
};


template <bool IS_ON_DEV = false>
class timer {
  public:
    timer() : event() {}
    ~timer() = default;

    void tic() {
      event.record();
    }

    float toc() {
      event.stop();
      return event.elapsed();
    }

  private:
    std::conditional_t<IS_ON_DEV, DeviceEvent, HostEvent> event;
};

its companion source library.cpp reads

#include "library.h"
#include "common.h"

DeviceEvent::DeviceEvent() {
    DEV_CHECK(devEventCreate(&start));
    DEV_CHECK(devEventCreate(&end));
}
DeviceEvent::~DeviceEvent() {
    DEV_CHECK(devEventDestroy(start));
    DEV_CHECK(devEventDestroy(end));
}
void DeviceEvent::record() {
    DEV_CHECK(devEventRecord(start, 0));
}
void DeviceEvent::stop() {
    DEV_CHECK(devEventRecord(end, 0));
    DEV_CHECK(devEventSynchronize(end));
}
float DeviceEvent::elapsed() {
    float milliseconds;
    DEV_CHECK(devEventElapsedTime(&milliseconds, start, end));
    return (1E-3) * milliseconds;
}

NOTE: I choose to define the DeviceEvent members on the cpp just to have something to compile and not let this example be a header only library.

The main.cpp of this example reads

#include "library.h"
#include <iostream>

int main() {

    float time;

    timer<false> hostTimer;
    hostTimer.tic();
    // Do some work on the host
    time = hostTimer.toc();
    std::cout << "Host time: " << time << std::endl;


    timer<true> devTimer;
    devTimer.tic();
    // Do some work on the device
    time = devTimer.toc();
    std::cout << "Device time: " << time << std::endl;

    return 0;
}

and lastly, to compile this example my CMakeLists.txt read:

cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
project(devEvent_library LANGUAGES CXX)

set(BUILD_GPU_LANGUAGE "HIP" CACHE STRING "Switches between HIP and CUDA")
set_property(CACHE BUILD_GPU_LANGUAGE PROPERTY STRINGS "HIP" "CUDA")

enable_language(${BUILD_GPU_LANGUAGE})
set(CMAKE_${BUILD_GPU_LANGUAGE}_STANDARD 17)
set(CMAKE_${BUILD_GPU_LANGUAGE}_EXTENSIONS OFF)
set(CMAKE_${BUILD_GPU_LANGUAGE}_STANDARD_REQUIRED ON)

set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}")

# Find packages
if (BUILD_GPU_LANGUAGE STREQUAL "HIP")
  find_package(hip REQUIRED)
  set(DEPENDENCIES hip::host)
  # We cannot be sure which compiler reads common header every time
  add_compile_definitions(HIP_ENABLED)
else()
  find_package(CUDAToolkit REQUIRED)
  set(DEPENDENCIES CUDA::cudart)
endif()

# Create SHARED or STATIC library on the host.
set(library_name Test)
add_library(${library_name} SHARED library/library.cpp)
target_include_directories(${library_name} PUBLIC library)
set_target_properties(${library_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_source_files_properties(library/library.cpp PROPERTIES LANGUAGE ${BUILD_GPU_LANGUAGE})

# Create a driver executable using ROCm's bundled version of clang.
set(PROJECT_NAME_clang ${PROJECT_NAME}_clang)
add_executable(${PROJECT_NAME_clang} main.cpp)
target_link_libraries(${PROJECT_NAME_clang} PRIVATE ${library_name} ${DEPENDENCIES})
set_target_properties(${PROJECT_NAME_clang} PROPERTIES HIP_ARCHITECTURES FALSE)

# Create a driver executable using the host c++ compiler.
set(PROJECT_NAME_cxx ${PROJECT_NAME}_cxx)
add_executable(${PROJECT_NAME_cxx} main.cpp)
target_link_libraries(${PROJECT_NAME_cxx} PRIVATE ${library_name} ${DEPENDENCIES})
set_target_properties(${PROJECT_NAME_cxx} PROPERTIES LINKER_LANGUAGE CXX)

# Create ctests:
enable_testing()
add_test(NAME ${PROJECT_NAME_clang} COMMAND ${PROJECT_NAME_clang})
add_test(NAME ${PROJECT_NAME_cxx} COMMAND ${PROJECT_NAME_cxx})

If you follow this snippet, you’ll notice it can be compiled with either CUDA or HIP languages.

  • On a NVIDIA Platform with BUILD_GPU_LANGUAGE=CUDA, I obtain
$ cmake ..
-- The CXX compiler identification is GNU 13.3.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The CUDA compiler identification is NVIDIA 12.6.85
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: /usr/local/cuda-12.6/bin/nvcc - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
-- Found CUDAToolkit: /usr/local/cuda-12.6/targets/x86_64-linux/include (found version "12.6.85") 
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
-- Found Threads: TRUE  
-- Configuring done (2.2s)
-- Generating done (0.0s)
-- Build files have been written to: /home/mdiaz/Depots/devLibrary/devEvent_library/build

$ make
[ 16%] Building CUDA object CMakeFiles/Test.dir/library/library.cpp.o
[ 33%] Linking CUDA shared library libTest.so
[ 33%] Built target Test
[ 50%] Building CXX object CMakeFiles/devEvent_library_clang.dir/main.cpp.o
[ 66%] Linking CXX executable devEvent_library_clang
[ 66%] Built target devEvent_library_clang
[ 83%] Building CXX object CMakeFiles/devEvent_library_cxx.dir/main.cpp.o
[100%] Linking CXX executable devEvent_library_cxx
[100%] Built target devEvent_library_cxx

$ ctest
Test project /home/mdiaz/Depots/devLibrary/devEvent_library/build
    Start 1: devEvent_library_clang
1/2 Test #1: devEvent_library_clang ...........   Passed    0.39 sec
    Start 2: devEvent_library_cxx
2/2 Test #2: devEvent_library_cxx .............   Passed    0.25 sec

100% tests passed, 0 tests failed out of 2

Total Test time (real) =   0.64 sec
  • On a AMD Platform with BUILD_GPU_LANGUAGE=HIP, I obtain
$ cmake ..
-- The CXX compiler identification is GNU 13.2.1
-- Cray Programming Environment 2.7.31.11 CXX
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /opt/cray/pe/craype/2.7.31.11/bin/CC - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The HIP compiler identification is Clang 17.0.0
-- Detecting HIP compiler ABI info
-- Detecting HIP compiler ABI info - done
-- Check for working HIP compiler: /opt/rocm-6.0.3/llvm/bin/clang++ - skipped
-- Detecting HIP compile features
-- Detecting HIP compile features - done
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success
-- Found Threads: TRUE
-- Configuring done (6.8s)
-- Generating done (0.1s)
-- Build files have been written to: /users/mdiazesc/Depots/devTest/devEvent_library/build

$ make
[ 16%] Building HIP object CMakeFiles/Test.dir/library/library.cpp.o
[ 33%] Linking HIP shared library libTest.so
[ 33%] Built target Test
[ 50%] Building CXX object CMakeFiles/devEvent_library_clang.dir/main.cpp.o
[ 66%] Linking CXX executable devEvent_library_clang
[ 66%] Built target devEvent_library_clang
[ 83%] Building CXX object CMakeFiles/devEvent_library_cxx.dir/main.cpp.o
[100%] Linking CXX executable devEvent_library_cxx
[100%] Built target devEvent_library_cxx

$ ctest
Test project /users/mdiazesc/Depots/devTest/devEvent_library/build
    Start 1: devEvent_library_clang
1/2 Test #1: devEvent_library_clang ...........   Passed    0.39 sec
    Start 2: devEvent_library_cxx
2/2 Test #2: devEvent_library_cxx .............   Passed    0.25 sec

100% tests passed, 0 tests failed out of 2
  • But, on a NVIDIA Platform with BUILD_GPU_LANGUAGE=HIP, I obtain
$ cmake ..
-- The CXX compiler identification is GNU 13.3.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The HIP compiler identification is NVIDIA 12.6.85
-- Detecting HIP compiler ABI info
-- Detecting HIP compiler ABI info - done
-- Check for working HIP compiler: /usr/local/cuda-12.6/bin/nvcc - skipped
-- Detecting HIP compile features
-- Detecting HIP compile features - done
-- Configuring done (2.1s)
-- Generating done (0.0s)
-- Build files have been written to: /home/mdiaz/Depots/devLibrary/devEvent_library/build

$ make
[ 16%] Building HIP object CMakeFiles/Test.dir/library/library.cpp.o
[ 33%] Linking HIP shared library libTest.so
[ 33%] Built target Test
[ 50%] Building CXX object CMakeFiles/devEvent_library_clang.dir/main.cpp.o
In file included from /home/mdiaz/Depots/devLibrary/devEvent_library/library/library.h:6,
                 from /home/mdiaz/Depots/devLibrary/devEvent_library/main.cpp:1:
/home/mdiaz/Depots/devLibrary/devEvent_library_FAIL/library/common.h:16:10: fatal error: hip/hip_runtime.h: No such file or directory
   16 | #include <hip/hip_runtime.h>
      |          ^~~~~~~~~~~~~~~~~~~
compilation terminated.
make[2]: *** [CMakeFiles/devEvent_library_clang.dir/build.make:76: CMakeFiles/devEvent_library_clang.dir/main.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:113: CMakeFiles/devEvent_library_clang.dir/all] Error 2
make: *** [Makefile:101: all] Error 2

Thus, am I doing something wrong (and perhaps a TRUE expert can enlighten my understanding of this issue) … or I need to make someone aware on either CMake team or HIP/ROCm team.

Cheers !

I can reproduce this in containers started from the rocm/dev-ubuntu-24.04:6.3.2 base image.

After configuring the posted sample project, I re-ran cmake . --trace-expand 2>log in the build tree.

On an AMD platform, the log shows the hip::host target getting built, and the /opt/rocm/include include directory added:

/opt/rocm/lib/cmake/hip/hip-targets.cmake(59):  add_library(hip::amdhip64 SHARED IMPORTED )
...
/opt/rocm/lib/cmake/hip/hip-targets.cmake(66):  add_library(hip::host INTERFACE IMPORTED )
/opt/rocm/lib/cmake/hip/hip-targets.cmake(68):  set_target_properties(hip::host PROPERTIES INTERFACE_LINK_LIBRARIES hip::amdhip64 )
...
/opt/rocm/lib/cmake/hip/hip-config-amd.cmake(136):  set_target_properties(hip::amdhip64 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES /opt/rocm/include INTERFACE_SYSTEM_INCLUDE_DIRECTORIES /opt/rocm/include )

The hip::host target is created under an include() in the HIP cmake package:

$ grep "^include" /opt/rocm/lib/cmake/hip/hip-config-amd.cmake
include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )

On an NVIDIA platform, the log shows the hip::host target being created but not populated:

/opt/rocm/lib/cmake/hip/hip-config-nvidia.cmake(22):  add_library(hip::host INTERFACE IMPORTED )
...

The hip::host target is created directly in the HIP cmake package, but not populated:

$ grep -v '^#' /opt/rocm/lib/cmake/hip/hip-config-nvidia.cmake

add_library(hip::device INTERFACE IMPORTED)
add_library(hip::host INTERFACE IMPORTED)
add_library(hip::amdhip64 INTERFACE IMPORTED)

I suspect that the NVIDIA-backed implementation of HIP’s cmake package is incomplete.

1 Like

Thanks for your quick response!

I traced and expanded the variables generated on my project (on both platforms) just to re-confirm that this is also what is happening on my system(s).

Indeed, on my AMD Platform, I can observe :

/opt/rocm/lib/cmake/hip/hip-targets.cmake(59):  add_library(hip::amdhip64 SHARED IMPORTED )
/opt/rocm/lib/cmake/hip/hip-targets.cmake(61):  set_target_properties(hip::amdhip64 PROPERTIES INTERFACE_COMPILE_DEFINITIONS USE_PROF_API=1 )
/opt/rocm/lib/cmake/hip/hip-targets.cmake(66):  add_library(hip::host INTERFACE IMPORTED )
/opt/rocm/lib/cmake/hip/hip-targets.cmake(68):  set_target_properties(hip::host PROPERTIES INTERFACE_LINK_LIBRARIES hip::amdhip64 )
/opt/rocm/lib/cmake/hip/hip-targets.cmake(73):  add_library(hip::device INTERFACE IMPORTED )
/opt/rocm/lib/cmake/hip/hip-targets.cmake(75):  set_target_properties(hip::device PROPERTIES INTERFACE_LINK_LIBRARIES hip::host )
...
/opt/rocm/lib/cmake/hip/hip-config-amd.cmake(86):  set_target_properties(hip::host PROPERTIES INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_AMD__=1 )
/opt/rocm/lib/cmake/hip/hip-config-amd.cmake(90):  set_target_properties(hip::amdhip64 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES /opt/rocm/include INTERFACE_SYSTEM_INCLUDE_DIRECTORIES /opt/rocm/include )
...

lost of work, … but on my NVIDIA platform, I only get :

...
/opt/rocm/lib/cmake/hip/hip-config.cmake(153):  include(/opt/rocm/lib/cmake/hip/hip-config-nvidia.cmake )
/opt/rocm/lib/cmake/hip/hip-config-nvidia.cmake(21):  add_library(hip::device INTERFACE IMPORTED )
/opt/rocm/lib/cmake/hip/hip-config-nvidia.cmake(22):  add_library(hip::host INTERFACE IMPORTED )
/opt/rocm/lib/cmake/hip/hip-config-nvidia.cmake(23):  add_library(hip::amdhip64 INTERFACE IMPORTED )
/opt/rocm/lib/cmake/hip/hip-config.cmake(158):  set(hip_LIBRARIES hip::host hip::device )
/opt/rocm/lib/cmake/hip/hip-config.cmake(159):  set(hip_LIBRARY hip::host;hip::device )
/opt/rocm/lib/cmake/hip/hip-config.cmake(161):  set(HIP_INCLUDE_DIR /opt/rocm/include )
/opt/rocm/lib/cmake/hip/hip-config.cmake(162):  set(HIP_INCLUDE_DIRS /opt/rocm/include )
/opt/rocm/lib/cmake/hip/hip-config.cmake(163):  set(HIP_LIB_INSTALL_DIR /opt/rocm/lib )
/opt/rocm/lib/cmake/hip/hip-config.cmake(164):  set(HIP_BIN_INSTALL_DIR /opt/rocm/bin )
/opt/rocm/lib/cmake/hip/hip-config.cmake(165):  set(HIP_LIBRARIES hip::host;hip::device )
/opt/rocm/lib/cmake/hip/hip-config.cmake(166):  set(HIP_LIBRARY hip::host;hip::device )
/opt/rocm/lib/cmake/hip/hip-config.cmake(167):  set(HIP_HIPCC_EXECUTABLE /opt/rocm/bin/hipcc )
/opt/rocm/lib/cmake/hip/hip-config.cmake(168):  set(HIP_HIPCONFIG_EXECUTABLE /opt/rocm/bin/hipconfig )

not really much is happening.
… I’ll post an issue on the hip project to inquire more.