CUDA 5.0 separate compilation of library with cmake

后端 未结 4 1101
情话喂你
情话喂你 2020-12-05 11:59

The buildtime of my cuda library is increasing and so I thought that separate compilation introduced in CUDA 5.0 might help me. I couldn\'t figure out how to achieve separat

4条回答
  •  心在旅途
    2020-12-05 12:24

    EDIT (2016-03-15): Yes, it is confirmed as a bug in FindCUDA: https://cmake.org/Bug/view.php?id=15157


    TL;DR: This seems to be a bug in FindCUDA, which makes objects lose info on external definitions before the final linking.

    The problem is that, even if separable compilation is enabled, a linking step is still performed for all the targets individually before the final linking.

    For instance, I have module.cu with:

    #include "module.h"
    #include 
    
    double arr[10] = {1,2,3,4,5,6,7,8,9,10};
    __constant__ double carr[10];
    
    void init_carr() {
      cudaMemcpyToSymbol(carr,arr,10*sizeof(double));
    }
    
    __global__ void pkernel() {
      printf("(pkernel) carr[%d]=%g\n",threadIdx.x,carr[threadIdx.x]);
    }
    
    void print_carr() {
      printf("in print_carr\n");
      pkernel<<<1,10>>>();
    }
    

    and module.h with:

    extern __constant__ double carr[10];
    extern double arr[10];
    
    void print_carr();
    void init_carr();
    

    and finally main.cu with:

    #include "module.h"
    
    #include 
    
    __global__ void kernel() {
      printf("(kernel) carr[%d]=%g\n",threadIdx.x,carr[threadIdx.x]);
    }
    
    
    int main(int argc, char *argv[]) {
      printf("arr: %g %g %g ..\n",arr[0],arr[1],arr[2]);
    
      kernel<<<1,10>>>();
      cudaDeviceSynchronize();
      print_carr();
      cudaDeviceSynchronize();
      init_carr();
      cudaDeviceSynchronize();
      kernel<<<1,10>>>();
      cudaDeviceSynchronize();
      print_carr();
      cudaDeviceSynchronize();
    
      return 0;
    }
    

    This then works fine with the following Makefile:

    NVCC=nvcc
    NVCCFLAGS=-arch=sm_20
    LIB=libmodule.a
    OBJS=module.o main.o
    PROG=extern
    
    $(PROG): main.o libmodule.a
        $(NVCC) $(NVCCFLAGS) -o $@ $^
    
    %.o: %.cu
        $(NVCC) $(NVCCFLAGS) -dc -c -o $@ $^
    
    $(LIB): module.o
        ar cr $@ $^
    
    clean:
        $(RM) $(PROG) $(OBJS) $(LIB)
    

    But then I try to use the following CMakeLists.txt:

    CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8)
    
    PROJECT(extern)
    
    FIND_PACKAGE(CUDA REQUIRED)
    SET(CUDA_SEPARABLE_COMPILATION ON)
    
    SITE_NAME(HOSTNAME)
    
    SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_20)
    
    cuda_add_library(module module.cu)
    
    CUDA_ADD_EXECUTABLE(extern main.cu)
    TARGET_LINK_LIBRARIES(extern module)
    

    When then compiling, what then happens is that the following:

    $ cmake ..
    -- The C compiler identification is GNU 4.9.2
    ...
    $ make VERBOSE=1
    ...
    [ 25%] Building NVCC (Device) object CMakeFiles/module.dir//./module_generated_module.cu.o
    ...
    -- Generating <...>/build/CMakeFiles/module.dir//./module_generated_module.cu.o
    /usr/local/cuda/bin/nvcc <...>/module.cu -dc -o <...>/build/CMakeFiles/module.dir//./module_generated_module.cu.o -ccbin /usr/bin/cc -m64 -Xcompiler ,\"-g\" -arch=sm_20 -DNVCC -I/usr/local/cuda/include
    [ 50%] Building NVCC intermediate link file CMakeFiles/module.dir/./module_intermediate_link.o
    /usr/local/cuda/bin/nvcc -arch=sm_20 -m64 -ccbin "/usr/bin/cc" -dlink <...>/build/CMakeFiles/module.dir//./module_generated_module.cu.o -o <...>/build/CMakeFiles/module.dir/./module_intermediate_link.o
    ...
    /usr/bin/ar cr libmodule.a  CMakeFiles/module.dir/./module_generated_module.cu.o CMakeFiles/module.dir/./module_intermediate_link.o
    /usr/bin/ranlib libmodule.a
    ...
    [ 50%] Built target module
    [ 75%] Building NVCC (Device) object CMakeFiles/extern.dir//./extern_generated_main.cu.o
    ...
    -- Generating <...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o
    /usr/local/cuda/bin/nvcc <...>/main.cu -dc -o <...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o -ccbin /usr/bin/cc -m64 -Xcompiler ,\"-g\" -arch=sm_20 -DNVCC -I/usr/local/cuda/include -I/usr/local/cuda/include
    ...
    [100%] Building NVCC intermediate link file CMakeFiles/extern.dir/./extern_intermediate_link.o
    /usr/local/cuda/bin/nvcc -arch=sm_20 -m64 -ccbin "/usr/bin/cc" -dlink <...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o -o <...>/build/CMakeFiles/extern.dir/./extern_intermediate_link.o
    nvlink error   : Undefined reference to 'carr' in '<...>/build/CMakeFiles/extern.dir//./extern_generated_main.cu.o'
    

    Clearly, the problem are the nvcc -dlink obj.o -o obj_intermediate_link.o lines. Then, I guess, the info on external definitions are lost. So, the question is, it is possible to make CMake/FindCUDA not do this extra linking step?

    Otherwise, I would argue that this is a bug. Do you agree? I can file a bug report with CMake.

提交回复
热议问题