I am using Nvidia Nsight to debug my code and I noticed that my distance kernel is sometimes not launching. The code attempts to find the distance between two polygons. It does this by going through each segment of each polygon and finding the distance between the two segments. The following is my code:
__device__ double point_segment_distance(double px, double py, double x1, double y1, double x2, double y2)
{
double dx = x2 - x1;
double dy = y2 - y1;
//
//if (dx < 0.01 && dy < 0.01)
//{
// return hypot(px - x1, py - y1);
//}
//double t = ((px - x1) * dx + (py - y1) * dy) / (dx * dx + dy * dy);
//if (t < 0)
//{
// dx = px - x1;
// dy = py - y1;
//}
//else if (t > 1)
//{
// dx = px - x2;
// dy = py - y2;
//}
//else
//{
// double near_x = x1 + t * dx;
// double near_y = y1 + t * dy;
// dx = px - near_x;
// dy = py - near_y;
//}
//return hypot(dx, dy);
return 10.0;
}
__device__ bool segments_intersect(double x11, double y11, double x12, double y12, double x21, double y21, double x22, double y22)
{
double dx1 = x12 - x11;
double dy1 = y12 - y11;
double dx2 = x22 - x21;
double dy2 = y22 - y21;
double delta = dx2 * dy1 - dy2 * dx1;
if (delta < 0.01)
{
return false;
}
double s = (dx1 * (y21 - y11) + dy1 * (x11 - x21)) / delta;
double t = (dx2 * (y11 - y21) + dy2 * (x21 - x11)) / (-delta);
return (0 <= s && s <= 1 && 0 <= t && t <= 1);
}
__device__ double segments_distance(double x11, double y11, double x12, double y12, double x21, double y21, double x22, double y22)
{
if (segments_intersect(x11, y11, x12, y12, x21, y21, x22, y22))
{
return 0.0;
}
double minimumDist = 999999;
double tempDist = point_segment_distance(x11, y11, x21, y21, x22, y22);
if (tempDist < minimumDist)
{
minimumDist = tempDist;
}
tempDist = point_segment_distance(x12, y12, x21, y21, x22, y22);
if (tempDist < minimumDist)
{
minimumDist = tempDist;
}
tempDist = point_segment_distance(x21, y21, x11, y11, x12, y12);
if (tempDist < minimumDist)
{
minimumDist = tempDist;
}
tempDist = point_segment_distance(x22, y22, x11, y11, x12, y12);
if (tempDist < minimumDist)
{
minimumDist = tempDist;
}
return minimumDist;
}
__global__ void distance(double *x0, double *y0, double *x1, double *y1, double *dist, int *length0, int *length1, int *numDone)
{
int numComp = threadIdx.x + blockDim.x*blockIdx.x + *numDone;
int index = threadIdx.x + blockDim.x*blockIdx.x;
dist[index] = 99999;
if (numComp < ((*length0)*(*length1)))
{
int spot0 = numComp%(*length0);
int spot1 = numComp/(*length0);
dist[index] = segments_distance(x0[spot0], y0[spot0], x0[(spot0+1)%(*length0)], y0[(spot0+1)%(*length0)], x1[spot1], y1[spot1], x1[(spot1+1)%(*length1)], y1[(spot1+1)%(*length1)]);
}
}
void gpuDistance(double *x0, double *y0, double *x1, double *y1)
{
...
distance<<<165, 1024>>>(dev_x0, dev_y0, dev_x1, dev_y1, dev_dist, dev_length0, dev_length1, dev_numDone);
...
}
I commented out much of point_segment_distance in order to help me locate the error. This will not launch the distance kernel. I know this because I am using Nsight Cuda Debugging and it doesn't hit my breakpoints.
However, if I comment the line "double dy = y2 - y1;" in point_segment_distance the distance kernel will launch. How is this possible? Why would creating one more double cause the kernel to not launch. Is there a limit to the number of doubles that may be created on the GPU. I have a Tesla 2075. I am aware of the local memory limit of 512kb. However, looking at my code I can't imagine that I'm anywhere near that limit. Thanks for any help!