我试图在蚁群优化问题上获得更好的性能。为此,我使用 openCL 并行运行更新信息素部分。我刚开始学习openCL,这是我开发的内核代码。尽管它比顺序版本运行得更快,但我仍然认为我可以使用它实现更高的性能,但我没有找到其他可以做的事情。有没有办法进一步改进这段代码?
PS:我只在 CPU 上测试了这段代码,因为我正在使用的计算机没有 GPU。
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
int calculateLengthOfTrail(__global int*, const int, __global int*, const int );
int edgeInTrail(const int ,const int , __global int* , const int , const int );
int indexOfCity(__global int*, const int, const int, const int);
__kernel void updatePheromones(
__global double* pheromones,
__global int* ants,
__global int* distances,
__local double* pheromones_old,
const int numCities,
const int numAnts,
const double pheromoneDecreaseFactor,
const double pheromoneIncreaseFactor
)
{
int i = get_global_id(0);
int k, j;
if(i<numCities)
{
for(j = i +1; j<numCities; j++)
{
for (k = 0; k < numAnts; k++)
{
double size = calculateLengthOfTrail(ants,k, distances, numCities);
double decrease = (1.0 - pheromoneDecreaseFactor) * pheromones_old[i+numCities*j];
double increase = 0.0;
int edge = edgeInTrail(i, j, ants, k, numCities);
if (edge== 1)
increase = (pheromoneIncreaseFactor / size);
pheromones[i+numCities*j] = decrease + increase;
if (pheromones[i+numCities*j] < 0.0001)
pheromones[i +numCities*j] = 0.0001;
else if (pheromones[i + numCities*j] > 100000.0)
pheromones[i+numCities*j] = 100000.0;
pheromones[j+numCities*i] = pheromones[i+numCities*j];
}
}
}
}
int edgeInTrail(const int cityX, const int cityY, __global int* ants, const int row, const int numCities)
{
int lastIndex = numCities - 1;
int indexCity = indexOfCity(ants, row, cityX, numCities);
if (indexCity == 0 && ants[1+numCities*row] == cityY)
return 1;
else if (indexCity == 0 && ants[lastIndex+numCities*row] == cityY)
return 1;
else if (indexCity == 0)
return 0;
else if (indexCity == lastIndex && ants[(lastIndex-1)+numCities*row] == cityY)
return 1;
else if (indexCity == lastIndex && ants[row*numCities] == cityY)
return 1;
else if (indexCity == lastIndex)
return 0;
else if (ants[(indexCity-1)+numCities*row] == cityY)
return 1;
else if (ants[(indexCity+1)+numCities*row] == cityY)
return 1;
else
return 0;
}
int calculateLengthOfTrail(__global int* ants, const int row, __global int* distances, const int numCities)
{
int sumDistance = 0;
int i;
for(i =0; i<numCities-1; i++)
sumDistance += distances[ants[i+numCities*row]+numCities*ants[(i+1)+numCities*row]];
return sumDistance;
}
int indexOfCity(__global int* ants, int row, int city, int numCities)
{
int i;
for(i =0; i<numCities; i++)
{
if(ants[i+numCities*row] == city)
return i;
}
return -1;
}