Using
template <typename OffsetT>
std::tuple<std::vector<OffsetT>, std::vector<OffsetT>>
make_in_out_offsets(const std::vector<OffsetT> sizes, OffsetT gap)
{
std::vector<OffsetT> offsets;
std::size_t segment_count = sizes.size();
static constexpr OffsetT zero{0};
offsets.resize(segment_count + 1);
offsets[0] = zero;
cuda::std::inclusive_scan(sizes.begin(), sizes.end(), offsets.begin() + 1);
std::vector<OffsetT> sizes_with_gaps;
sizes_with_gaps.resize(segment_count);
for (std::size_t i = 0; i < segment_count; ++i)
{
const auto s = sizes[i];
sizes_with_gaps[i] = (s == 0) ? gap : s;
}
std::vector<OffsetT> offsets_with_gaps;
offsets_with_gaps.resize(segment_count + 1);
offsets_with_gaps[0] = zero;
cuda::std::inclusive_scan(
sizes_with_gaps.begin(),
sizes_with_gaps.end(),
offsets_with_gaps.begin() + 1);
return {offsets, offsets_with_gaps};
}
I am getting the following errors with CTK 12.0:
Compilation errors
/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(51): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed
Remark: The warnings can be suppressed with "-diag-suppress <warning-number>"
/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(52): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed
/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(52): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed
/home/coder/cccl/lib/cmake/libcudacxx/../../../libcudacxx/include/cuda/std/__numeric/inclusive_scan.h(53): error #20014-D: calling a __host__ function from a __host__ __device__ function is not allowed
4 errors detected in the compilation of "/home/coder/cccl/cub/test/catch2_test_device_segmented_scan_multi_segment.cu".
ninja: build stopped: subcommand failed.
At the same time, compiling the following snippet based on the above code with nvcc inclusive_scan.cu --std=c++17 -arch=sm_86 -I libcudacxx/include/ -I cub/ -I thrust/ works fine.
C++ snippet
// filename: inclusive_scan.cu
#include <cuda/std/numeric>
#include <iostream>
#include <tuple>
#include <vector>
template <typename OffsetT>
std::tuple<std::vector<OffsetT>, std::vector<OffsetT>> make_in_out_offsets(const std::vector<OffsetT> sizes, OffsetT gap)
{
std::vector<OffsetT> offsets;
std::size_t segment_count = sizes.size();
static constexpr OffsetT zero{0};
offsets.resize(segment_count + 1);
offsets[0] = zero;
// cuda::std::plus<> plus_t{};
cuda::std::inclusive_scan(sizes.begin(), sizes.end(), offsets.begin() + 1);
std::vector<OffsetT> sizes_with_gaps;
sizes_with_gaps.resize(segment_count);
for (std::size_t i = 0; i < segment_count; ++i)
{
const auto s = sizes[i];
sizes_with_gaps[i] = (s == 0) ? gap : s;
}
std::vector<OffsetT> offsets_with_gaps;
offsets_with_gaps.resize(segment_count + 1);
offsets_with_gaps[0] = zero;
cuda::std::inclusive_scan(sizes_with_gaps.begin(), sizes_with_gaps.end(), offsets_with_gaps.begin() + 1);
return {offsets, offsets_with_gaps};
}
int main()
{
std::vector<unsigned int> sizes = {2, 4, 0, 5, 1, 0, 0, 1};
auto [a, b] = make_in_out_offsets(sizes, 4u);
for (auto el : b)
{
std::cout << el << " ";
}
std::cout << "\n";
return 0;
}
Originally posted by @oleksandr-pavlyk in #6712 (comment)
Using
I am getting the following errors with CTK 12.0:
Compilation errors
At the same time, compiling the following snippet based on the above code with
nvcc inclusive_scan.cu --std=c++17 -arch=sm_86 -I libcudacxx/include/ -I cub/ -I thrust/works fine.C++ snippet
Originally posted by @oleksandr-pavlyk in #6712 (comment)