2

我试图在蚁群优化问题上获得更好的性能。为此,我使用 openCL 并行运行更新信息素部分。我刚开始学习openCL,这是我开发的内核代码。尽管它比顺序版本运行得更快,但我仍然认为我可以使用它实现更高的性能,但我没有找到其他可以做的事情。有没有办法进一步改进这段代码?

PS:我只在 CPU 上测试了这段代码,因为我正在使用的计算机没有 GPU。

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

int calculateLengthOfTrail(__global int*, const int, __global int*, const int );
int edgeInTrail(const int ,const int , __global int* , const int , const int );
int indexOfCity(__global int*, const int, const int, const int);

__kernel void updatePheromones(
    __global double* pheromones, 
    __global int* ants, 
    __global int* distances, 
    __local double* pheromones_old,
    const int numCities, 
    const int numAnts,
    const double pheromoneDecreaseFactor,
    const double pheromoneIncreaseFactor
)
{
    int i = get_global_id(0);
     int k, j;

    if(i<numCities)
    {
        for(j = i +1; j<numCities; j++)
        {
          for (k = 0; k < numAnts; k++)
          {
            double size = calculateLengthOfTrail(ants,k, distances, numCities);
            double decrease = (1.0 - pheromoneDecreaseFactor) *   pheromones_old[i+numCities*j];
            double increase = 0.0;

            int edge = edgeInTrail(i, j, ants, k, numCities);

            if (edge== 1) 
               increase = (pheromoneIncreaseFactor / size);

            pheromones[i+numCities*j] = decrease + increase;

            if (pheromones[i+numCities*j] < 0.0001)
               pheromones[i +numCities*j] = 0.0001;
            else if (pheromones[i + numCities*j] > 100000.0)
              pheromones[i+numCities*j] = 100000.0;

            pheromones[j+numCities*i] = pheromones[i+numCities*j];

          }
        }
     }
}

int edgeInTrail(const int cityX, const int cityY, __global int* ants, const int row, const int numCities)
{

  int lastIndex = numCities - 1;
  int indexCity = indexOfCity(ants, row, cityX, numCities);

  if (indexCity == 0 && ants[1+numCities*row] == cityY) 
    return 1;
  else if (indexCity == 0 && ants[lastIndex+numCities*row] == cityY) 
        return 1;
  else if (indexCity == 0) 
    return 0;
  else if (indexCity == lastIndex && ants[(lastIndex-1)+numCities*row] == cityY)
        return 1;
  else if (indexCity == lastIndex && ants[row*numCities] == cityY) 
    return 1;
  else if (indexCity == lastIndex) 
    return 0;
  else if (ants[(indexCity-1)+numCities*row] == cityY)
        return 1;
  else if (ants[(indexCity+1)+numCities*row] == cityY) 
        return 1;
  else 
    return 0;
}                                             

int calculateLengthOfTrail(__global int* ants, const int row, __global int* distances, const int numCities)
{
    int sumDistance = 0;
    int i;

    for(i =0; i<numCities-1; i++)
         sumDistance += distances[ants[i+numCities*row]+numCities*ants[(i+1)+numCities*row]];

    return sumDistance;

}

int indexOfCity(__global int* ants, int row, int city, int numCities)
{
    int i;

    for(i =0; i<numCities; i++)
    {
        if(ants[i+numCities*row] == city)
            return i;
    }

    return -1;
}
4

0 回答 0