Skip to content

Using cuda::std::inclusive_scan with std::vector fails to compile #8469

@oleksandr-pavlyk

Description

@oleksandr-pavlyk

Using

template <typename OffsetT>
std::tuple<std::vector<OffsetT>, std::vector<OffsetT>> 
make_in_out_offsets(const std::vector<OffsetT> sizes, OffsetT gap)
{
  std::vector<OffsetT> offsets;

  std::size_t segment_count = sizes.size();

  static constexpr OffsetT zero{0};

  offsets.resize(segment_count + 1);
  offsets[0] = zero;

  cuda::std::inclusive_scan(sizes.begin(), sizes.end(), offsets.begin() + 1);

  std::vector<OffsetT> sizes_with_gaps;
  sizes_with_gaps.resize(segment_count);
  for (std::size_t i = 0; i < segment_count; ++i)
  {
    const auto s       = sizes[i];
    sizes_with_gaps[i] = (s == 0) ? gap : s;
  }

  std::vector<OffsetT> offsets_with_gaps;
  offsets_with_gaps.resize(segment_count + 1);
  offsets_with_gaps[0] = zero;
  cuda::std::inclusive_scan(
    sizes_with_gaps.begin(),
    sizes_with_gaps.end(),
    offsets_with_gaps.begin() + 1);

  return {offsets, offsets_with_gaps};
}

I am getting the following errors with CTK 12.0:

Compilation errors
/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(51): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

Remark: The warnings can be suppressed with "-diag-suppress <warning-number>"

/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(52): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(52): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(53): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed

4 errors detected in the compilation of "/home/coder/cccl/cub/test/catch2_test_device_segmented_scan_multi_segment.cu".
ninja: build stopped: subcommand failed.

At the same time, compiling the following snippet based on the above code with nvcc inclusive_scan.cu --std=c++17 -arch=sm_86 -I libcudacxx/include/ -I cub/ -I thrust/ works fine.

C++ snippet
// filename: inclusive_scan.cu
#include <cuda/std/numeric>

#include <iostream>
#include <tuple>
#include <vector>

template <typename OffsetT>
std::tuple<std::vector<OffsetT>, std::vector<OffsetT>> make_in_out_offsets(const std::vector<OffsetT> sizes, OffsetT gap)
{
  std::vector<OffsetT> offsets;

  std::size_t segment_count = sizes.size();

  static constexpr OffsetT zero{0};

  offsets.resize(segment_count + 1);
  offsets[0] = zero;

  // cuda::std::plus<> plus_t{};

  cuda::std::inclusive_scan(sizes.begin(), sizes.end(), offsets.begin() + 1);

  std::vector<OffsetT> sizes_with_gaps;
  sizes_with_gaps.resize(segment_count);
  for (std::size_t i = 0; i < segment_count; ++i)
  {
    const auto s       = sizes[i];
    sizes_with_gaps[i] = (s == 0) ? gap : s;
  }

  std::vector<OffsetT> offsets_with_gaps;
  offsets_with_gaps.resize(segment_count + 1);
  offsets_with_gaps[0] = zero;
  cuda::std::inclusive_scan(sizes_with_gaps.begin(), sizes_with_gaps.end(), offsets_with_gaps.begin() + 1);

  return {offsets, offsets_with_gaps};
}

int main()
{
  std::vector<unsigned int> sizes = {2, 4, 0, 5, 1, 0, 0, 1};

  auto [a, b] = make_in_out_offsets(sizes, 4u);

  for (auto el : b)
  {
    std::cout << el << " ";
  }

  std::cout << "\n";

  return 0;
}

Originally posted by @oleksandr-pavlyk in #6712 (comment)

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

Status

In Review

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions