我正在尝试将数据复制到 tile_static 以进行长期运行的过程。在我看到的所有示例中,声明了一个数组,并为磁贴中的每个线程逐个填充数据。然后这些线程共享该数据。我想要做的只是用 tile_static 复制一些数据以供单个线程使用。我不需要分享它,但由于它对于长时间运行的线程来说非常重要,我的理解是它会提高性能。我不确定这是否是正确的方法。我试图进行的 tile_static 调用位于 parallel_for_each 循环的底部附近,如下所示:
tile_static vector<int_2> route = av_RouteSet[t_idx.global[0]];
为了清楚起见,我包含了额外的代码。
vector<float> tiledTSPCompute(accelerator_view accl, city_set CityLocations, int NumberOfTiles,
float StartTemp, float EndTemp, float CoolingCoefficient, unsigned int MovesPerTemp){
// Setting tile size
static const int TS = 16;
// Setting number of runs in terms of number of tiles
int NumberOfRuns = NumberOfTiles * TS * TS;
// Get results vector ready
vector<float> Results(NumberOfRuns);
array_view<float> av_Results(Results);
// Get routes ready
vector<int_2> RouteSet(sizeof(CityLocations.Cities) * NumberOfRuns);
array_view<int_2, 2> av_RouteSet(NumberOfRuns, sizeof(CityLocations.Cities), RouteSet);
// Prepare extent
concurrency::extent<1> e(NumberOfRuns);
// Create RNG
tinymt_collection<1> mtSet(e, 500);
concurrency::parallel_for_each(accl, av_Results.extent.tile<TS, TS>(), [=](tiled_index<TS, TS> t_idx)restrict(amp){
auto& mt = mtSet[t_idx.global];
//What I would like to do
tile_static vector<int_2> route = av_RouteSet[t_idx.global[0]];
Tiled_InitializeRoute(route);
Tiled_RandomizeRoute(route, mt);
Tiled_HeuristicRun(StartTemp, EndTemp, CoolingCoefficient, CityLocations, route, MovesPerTemp, mt);
av_Results[t_idx.global] = Tiled_TotalRouteDistance(route, CityLocations);
});
};