2

我对推力(cuda)很陌生,并且正在寻找具有挑战性的东西。


要简化的编辑问题)我有一个输入向量和一个地图:

 vector = [8,23,46,500,2,7,91,91]
    map = [1, 0, 4,  3,1,0, 5, 3]

我想扩展它并增加值成为:

new_vec = [8,46,47,48,49,500,501,502,2,91,92,93,94,95,91,92,93]
  • 我意识到thrust/examples/expand.cu 示例已经主要执行此操作,但我不知道如何通过地图计数有效地增加数据值。
  • 如果有人可以解释如何修改此示例以实现此目的,那将很有帮助。
4

1 回答 1

2

调整 Thrustexpand示例以用于exclusive_scan_by_key对其子序列中的每个输出元素进行排名,然后按该排名递增:

#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/gather.h>
#include <thrust/scan.h>
#include <thrust/fill.h>
#include <thrust/copy.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/functional.h>
#include <iterator>
#include <iostream>


template<typename Vector>
void print(const std::string& s, const Vector& v)
{
  typedef typename Vector::value_type T;

  std::cout << s;
  thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, " "));
  std::cout << std::endl;
}


template<typename InputIterator1,
         typename InputIterator2,
         typename OutputIterator>
void expand_and_increment(InputIterator1 first1,
                          InputIterator1 last1,
                          InputIterator2 first2,
                          OutputIterator output)
{
  typedef typename thrust::iterator_difference<InputIterator1>::type difference_type;

  difference_type input_size  = thrust::distance(first1, last1);
  difference_type output_size = thrust::reduce(first1, last1);

  // scan the counts to obtain output offsets for each input element
  thrust::device_vector<difference_type> output_offsets(input_size);
  thrust::exclusive_scan(first1, last1, output_offsets.begin()); 

  print("output_offsets ", output_offsets);

  // scatter the nonzero counts into their corresponding output positions
  thrust::device_vector<difference_type> output_indices(output_size);
  thrust::scatter_if
    (thrust::counting_iterator<difference_type>(0),
     thrust::counting_iterator<difference_type>(input_size),
     output_offsets.begin(),
     first1,
     output_indices.begin());

  // compute max-scan over the output indices, filling in the holes
  thrust::inclusive_scan
    (output_indices.begin(),
     output_indices.end(),
     output_indices.begin(),
     thrust::maximum<difference_type>());

  print("output_indices ", output_indices);

  // gather input values according to index array (output = first2[output_indices])
  OutputIterator output_end = output; thrust::advance(output_end, output_size);
  thrust::gather(output_indices.begin(),
                 output_indices.end(),
                 first2,
                 output);

  // rank output_indices
  thrust::device_vector<difference_type> ranks(output_size);
  thrust::exclusive_scan_by_key(output_indices.begin(), output_indices.end(),
                                thrust::make_constant_iterator<difference_type>(1),
                                ranks.begin());

  print("ranks ", ranks);

  // increment output by ranks
  thrust::transform(output, output + output_size, ranks.begin(), output, thrust::placeholders::_1 + thrust::placeholders::_2);
}


int main(void)
{
  int values[] = {8,23,46,500,2,7,91,91};
  int counts[] = {1, 0, 4,  3,1,0, 5, 3};

  size_t input_size  = sizeof(counts) / sizeof(int);
  size_t output_size = thrust::reduce(counts, counts + input_size);

  // copy inputs to device
  thrust::device_vector<int> d_counts(counts, counts + input_size);
  thrust::device_vector<int> d_values(values, values + input_size);
  thrust::device_vector<int> d_output(output_size);

  // expand values according to counts
  expand_and_increment(d_counts.begin(), d_counts.end(),
                       d_values.begin(),
                       d_output.begin());

  std::cout << "Expanding and incrementing values according to counts" << std::endl;
  print(" counts ", d_counts);
  print(" values ", d_values);
  print(" output ", d_output);

  return 0;
}

输出:

$ nvcc expand_and_increment.cu -run
output_offsets 0 1 1 5 8 9 9 14 
output_indices 0 2 2 2 2 3 3 3 4 6 6 6 6 6 7 7 7 
ranks 0 0 1 2 3 0 1 2 0 0 1 2 3 4 0 1 2 
Expanding and incrementing values according to counts
 counts 1 0 4 3 1 0 5 3 
 values 8 23 46 500 2 7 91 91 
 output 8 46 47 48 49 500 501 502 2 91 92 93 94 95 91 92 93 
于 2013-04-22T16:50:56.400 回答