Bug#1006962: nvidia-cuda-toolkit: nvcc chokes on g++ 11.2's bits/std_function.h

Wed Mar 9 12:26:17 GMT 2022

Package: nvidia-cuda-toolkit
Version: 11.4.3-2
Severity: serious
Control: block 1003037 with -1

nvcc fails to compile bits/std_function.h from g++ 11.2:

$ echo '#include <functional>' | nvcc -ccbin g++-11 -x cu -c -
/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with ‘...’:
  435 |         function(_Functor&& __f)
      |                                                                                                                                                 ^ 
/usr/include/c++/11/bits/std_function.h:435:145: note:         ‘_ArgTypes’
/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...’:
  530 |         operator=(_Functor&& __f)
      |                                                                                                                                                  ^ 
/usr/include/c++/11/bits/std_function.h:530:146: note:         ‘_ArgTypes’

This is a regression from the header shipped with g++ 11.1
(11.2 includes a fix for an STL defect:
  "2774. std::function construction vs assignment")

The offending code can be reduced to

===== nvcc-gcc112-failure.cu =====
template < typename >
class function ;
template < typename >
class _Function_handler ;
template < typename _Res , typename ... _ArgTypes >
class function < _Res ( _ArgTypes... ) >
{
        template < typename = void >
        using _Handler = _Function_handler < _Res ( _ArgTypes... ) > ;
        function
                () noexcept ( _Handler < > :: template _S_nothrow_init < > ) ;
} ;
=====

nvcc -v -x cu -c nvcc-gcc112-failure.cu yields these commands

g++-11 -D__CUDA_ARCH__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=4 -D__CUDACC_VER_BUILD__=152 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=4 -include "cuda_runtime.h" -m64 "nvcc-gcc112-failure.cu" -o "/tmp/tmpxft_00007ddc_00000000-7_nvcc-gcc112-failure.cpp1.ii" 
cicc --c++17 --gnu_version=110200 --orig_src_file_name "nvcc-gcc112-failure.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "tmpxft_00007ddc_00000000-3_nvcc-gcc112-failure.fatbin.c" -tused --gen_module_id_file --module_id_file_name "/tmp/tmpxft_00007ddc_00000000-4_nvcc-gcc112-failure.module_id" --gen_c_file_name "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.c" --stub_file_name "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.stub.c" --gen_device_file_name "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.gpu"  "/tmp/tmpxft_00007ddc_00000000-7_nvcc-gcc112-failure.cpp1.ii" -o "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.ptx"
ptxas -arch=sm_52 -m64 "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.ptx"  -o "/tmp/tmpxft_00007ddc_00000000-8_nvcc-gcc112-failure.sm_52.cubin" 
fatbinary -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=/tmp/tmpxft_00007ddc_00000000-8_nvcc-gcc112-failure.sm_52.cubin" "--image3=kind=ptx,sm=52,file=/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.ptx" --embedded-fatbin="/tmp/tmpxft_00007ddc_00000000-3_nvcc-gcc112-failure.fatbin.c" 
rm -f /tmp/tmpxft_00007ddc_00000000-3_nvcc-gcc112-failure.fatbin
g++-11 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=4 -D__CUDACC_VER_BUILD__=152 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=4 -include "cuda_runtime.h" -m64 "nvcc-gcc112-failure.cu" -o "/tmp/tmpxft_00007ddc_00000000-5_nvcc-gcc112-failure.cpp4.ii" 
cudafe++ --c++17 --gnu_version=110200 --orig_src_file_name "nvcc-gcc112-failure.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.cpp" --stub_file_name "tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.stub.c" --module_id_file_name "/tmp/tmpxft_00007ddc_00000000-4_nvcc-gcc112-failure.module_id" "/tmp/tmpxft_00007ddc_00000000-5_nvcc-gcc112-failure.cpp4.ii" 
g++-11 -D__CUDA_ARCH__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "/tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.cpp" -o "nvcc-gcc112-failure.o" 

the last g++-11 call is the failing one:

nvcc-gcc112-failure.cu:10:28: error: parameter packs not expanded with ‘...’:
   10 |         function
      |                            ^                                                                 
nvcc-gcc112-failure.cu:10:28: note:         ‘_ArgTypes’

Checking /tmp/tmpxft_00007ddc_00000000-6_nvcc-gcc112-failure.cudafe1.cpp
after rerunning the commands manually to preserve the temporary files,
the code has been rewritten by cudafe++ to

=====
template< class > class function; 
template< class > class _Function_handler; 
template< class _Res, class ..._ArgTypes> 
class function< _Res (_ArgTypes ...)>  { 
template< class  = void> using _Handler = _Function_handler< _Res (_ArgTypes ...)> ; 
function() noexcept(&_Function_handler< _Res (_ArgTypes)> ::template _S_nothrow_init<>); 
}; 
=====

The actual rewriting change is

-noexcept(_Handler<>                         ::template _S_nothrow_init<>)
+noexcept(&_Function_handler<_Res(_ArgTypes)>::template _S_nothrow_init<>)

which correctly makes the code fail to compile.

nvidia-cuda-toolkit 11.5.1 seems to have the same issue.

Andreas