enable_language(CUDA) -Xcompiler error

jclombardo · February 16, 2021, 4:59pm

Hi,

I can’t manage to generate a correct nvcc compile line with cuda language enabled, at least when a lot of compiler definition (coming from external dependencies) are present.

The -Xcompile options are separated by space instead of comma and thus are interpreted by nvcc itself.

My project’s cmake_minimum_required is 3.10
I’ve tested with cmake version ranging from 3.14.5 to latest self compiled 3.20.20210216-g6e2e906
I’ve also tested nvcc from cuda 9.2, 10.2 and 11.1
I’ve also tried Ninja and Make generators.
No luck so far…

An example:

[4/450] Building CUDA object src/PMH4/Base/CMakeFiles/PMH4Base.dir/CudaTools.cu.o
FAILED: src/PMH4/Base/CMakeFiles/PMH4Base.dir/CudaTools.cu.o 
/usr/local/cuda-9.2/bin/nvcc -ccbin=/opt/gcc6/bin/g++ -DBUK_PLUGIN_PREFIX=\"plug\" -DFINTEGER=int -DGFLAGS_IS_A_DLL=0 -DGOOGLE_GLOG_DLL_DECL="" -DGOOGLE_GLOG_DLL_DECL_FOR_UNITTESTS="" -DHAVE_CUDA=1 -DPMH4Base_EXPORTS -DPMH4_USE_CUDA -DSO_VERSION=\"4.1\" -DUSE_OPENMP -DVERSION_STRING=\"4.1.662\" -I../src -I/user/jclombar/home/SnoopGPU92/./include -isystem=/user/jclombar/home/SnoopGPU92/3rdParty/include --generate-code=arch=compute_30,code=sm_30 --generate-code=arch=compute_32,code=sm_32 --generate-code=arch=compute_35,code=sm_35 --generate-code=arch=compute_50,code=sm_50 --generate-code=arch=compute_53,code=sm_53 --generate-code=arch=compute_60,code=sm_60 --generate-code=arch=compute_61,code=sm_61 --generate-code=arch=compute_62,code=sm_62 --generate-code=arch=compute_70,code=compute_70 --generate-code=arch=compute_70,code=sm_70 -O2 -g -DNDEBUG -Xcompiler=-fPIC   -msse4.2 -fPIC -fopenmp -Wall -Wextra -Wconversion -Wshadow -Wdouble-promotion -Wfloat-equal -Wno-unused-result -Werror -x cu -c ../src/PMH4/Base/CudaTools.cu -o src/PMH4/Base/CMakeFiles/PMH4Base.dir/CudaTools.cu.o
/usr/local/cuda-9.2/bin/nvcc -ccbin=/opt/gcc6/bin/g++ -DBUK_PLUGIN_PREFIX=\"plug\" -DFINTEGER=int -DGFLAGS_IS_A_DLL=0 -DGOOGLE_GLOG_DLL_DECL="" -DGOOGLE_GLOG_DLL_DECL_FOR_UNITTESTS="" -DHAVE_CUDA=1 -DPMH4Base_EXPORTS -DPMH4_USE_CUDA -DSO_VERSION=\"4.1\" -DUSE_OPENMP -DVERSION_STRING=\"4.1.662\" -I../src -I/user/jclombar/home/SnoopGPU92/./include -isystem=/user/jclombar/home/SnoopGPU92/3rdParty/include --generate-code=arch=compute_30,code=sm_30 --generate-code=arch=compute_32,code=sm_32 --generate-code=arch=compute_35,code=sm_35 --generate-code=arch=compute_50,code=sm_50 --generate-code=arch=compute_53,code=sm_53 --generate-code=arch=compute_60,code=sm_60 --generate-code=arch=compute_61,code=sm_61 --generate-code=arch=compute_62,code=sm_62 --generate-code=arch=compute_70,code=compute_70 --generate-code=arch=compute_70,code=sm_70 -O2 -g -DNDEBUG -Xcompiler=-fPIC   -msse4.2 -fPIC -fopenmp -Wall -Wextra -Wconversion -Wshadow -Wdouble-promotion -Wfloat-equal -Wno-unused-result -Werror -x cu -M ../src/PMH4/Base/CudaTools.cu -MT src/PMH4/Base/CMakeFiles/PMH4Base.dir/CudaTools.cu.o -o src/PMH4/Base/CMakeFiles/PMH4Base.dir/CudaTools.cu.o.d
nvcc fatal   : 'sse4.2': expected a number

The interesting part is Xcompiler=-fPIC -msse4.2 -fPIC -fopenmp -Wall ... which should be Xcompiler=-fPIC,-msse4.2,-fPIC,-fopenmp,-Wall,... to be fed to the ccbin compiler instead of nvcc.

I’ve been digging around but could not find any solution.
The Working with targets section from https://cliutils.gitlab.io/modern-cmake/chapters/packages/CUDA.html gives some tips, but I am not able to find which target/source property hold the -fPIC -msse4.2,... flags.

Any help would be appreciated !

ben.boeckel · February 16, 2021, 8:22pm

Cc: @robert.maynard

robert.maynard · February 17, 2021, 2:47pm

Unfortunately CMake doesn’t have a great way to trace which targets is injecting these compile options onto the build line. Changing your CMake / CUDA version won’t help, instead you will need to search your CMake code for the -msee4.2 string and determine the source.

Using CMake 3.17+ and CUDA 10.2+ will make your life easier as from the point forward CMake will automatically inject the -forward-unknown-to-host-compiler option when compiling with nvcc. This will fix most of your problems, except for -msee4.2 as -m is a valid option for nvcc and is currently clashing.

jclombardo · February 22, 2021, 3:16pm

Finally it works by applying the following function to my Cuda targets (exe, lib, tests)

function(CUDA_CONVERT_FLAGS EXISTING_TARGET)
    if (HAVE_CUDA)
        get_target_property(old_flags ${EXISTING_TARGET} COMPILE_OPTIONS)
        if(old_flags)
            string(REPLACE ";" "," CUDA_flags "${old_flags}")
            if(CUDA_TOOLKIT_ROOT_DIR)
                string(APPEND CUDA_flags ",-I${CUDA_TOOLKIT_ROOT_DIR}/include")
            endif()
            set_property(TARGET ${EXISTING_TARGET} PROPERTY COMPILE_OPTIONS
                "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CXX>>:${old_flags}>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=${CUDA_flags}>"
                )
            #get_property(new_flags TARGET ${EXISTING_TARGET} PROPERTY COMPILE_OPTIONS)
            #message(STATUS "Target ${EXISTING_TARGET} After ${new_flags}")
        endif()
    endif()
endfunction()

HAVE_CUDA is defined by

get_property(lang_list GLOBAL PROPERTY ENABLED_LANGUAGES)
if (CUDA IN_LIST lang_list)
    set(HAVE_CUDA True)
else()
    set(HAVE_CUDA False)
endif()