c# - 用 C# 实现 Hoey Shamos 算法

Question

好的，我现在从我当前的算法中获得了正确的信息！但是，要检查 700,000 个多边形，这实在是太慢了！上一个问题已修复（我的 Line2D intersectsWith 方法不正确）

现在是确定我的瓶颈的问题！该算法假设为 O(nlog-n)，因此它应该更快。我的 intersectsWith 方法看起来不能再快了，但是我会发布它的代码，以防我错了

编辑：添加了 IComparable 接口

我的读取线段交点的方法。为了便于阅读，省略了一些代码。

    public class Line2D : IComparable
    {


    public Line2D(XYPoints p1, XYPoints p2)
    {

    }

    public bool intersectsLine(Line2D comparedLine)
    {

        if ((X2 == comparedLine.X1) && (Y2 == comparedLine.Y1)) return false;
        if ((X1 == comparedLine.X2) && (Y1 == comparedLine.Y2)) return false;

        if (X2 == comparedLine.X1 && Y2 == comparedLine.Y1)
        {
            return false;
        }

        if (X1 == comparedLine.X2 && Y1 == comparedLine.Y2)
        {
            return false;
        }
        double firstLineSlopeX, firstLineSlopeY, secondLineSlopeX, secondLineSlopeY;

        firstLineSlopeX = X2 - X1;
        firstLineSlopeY = Y2 - Y1;

        secondLineSlopeX = comparedLine.getX2() - comparedLine.getX1();
        secondLineSlopeY = comparedLine.getY2() - comparedLine.getY1();

        double s, t;
        s = (-firstLineSlopeY * (X1 - comparedLine.getX1()) + firstLineSlopeX * (getY1() - comparedLine.getY1())) / (-secondLineSlopeX * firstLineSlopeY + firstLineSlopeX * secondLineSlopeY);
        t = (secondLineSlopeX * (getY1() - comparedLine.getY1()) - secondLineSlopeY * (getX1() - comparedLine.getX1())) / (-secondLineSlopeX * firstLineSlopeY + firstLineSlopeX * secondLineSlopeY);

        if (s >= 0 && s <= 1 && t >= 0 && t <= 1)
        {
            return true;
        }

        return false; // No collision 
    }

    int IComparable.CompareTo(object obj)
    {

        //return Y1.GetHashCode();
        Line2D o1 = this;
        Line2D o2 = (Line2D)obj;
        if (o1.getY1() < o2.getY1())
        {
            return -1;
        }
        else if (o1.getY1() > o2.getY2())
        {
            return 1;
        }
        else
        {
            if (o1.getY2() < o2.getY2())
            {
                return -1;
            }
            else if (o1.getY2() > o2.getY2())
            {
                return 1;
            }
            else
            {
                return 0;
            }
        } 
    }


}

在我的大部分算法实现中，我意识到 List 对于算法来说并不是最快的，但是我需要索引！：

//Create a new list, sort by Y values.

 List<AlgEvent> SortedList = events.OrderBy(o => o.getY()).ToList();                
 List<Line2D> sweepline = new List<Line2D>();

 for (var g = 0; g < SortedList.Count; g++)
 {
 if (SortedList[g].isStart)
 {
    Line2D nl = SortedList[g].line;
    Line2D above;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        if (index == -1)
        {
            above = null;
        }
        else
        {
            above = sweepline[index + 1];
        }


    }
    catch (ArgumentOutOfRangeException)
    {
        above = null;
    }
    /* End generating above */
    if (above != null)
    {
        if (above.intersectsLine(nl))
        {
            return true;
        }
    }
    Line2D below;
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (ArgumentOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    if (below != null)
    {
        if (below.intersectsLine(nl))
        {
            return true;
        }
    }
    sweepline.Add(nl);
    sweepline = sweepline.OrderBy(o => o.getY1()).ToList();

}
else
{
    Line2D nl = SortedList[g].line;
    Line2D above;
    Line2D below;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        Console.Out.WriteLine("index:" + index);
        //add 1 to get above line
        above = sweepline[index + 1];

    }
    catch (ArgumentOutOfRangeException)
    {

        above = null;

    }
    /* End generating above */
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (ArgumentOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    sweepline = sweepline.OrderBy(o => o.getY1()).ToList();
    sweepline.Remove(nl);
    if (above != null && below != null)
    {
        if (above.intersectsLine(below))
        {
            return true;
        }
    }
}
Console.WriteLine("");
  }



   } // end numofparts for-loop

   return false;

============================================

更新：9 月 12 日：实现了 C5 的 TreeSet，实现了 IComparable 到我的类，并且更慢了？如果这很重要，我还在索引它吗？

http://www.itu.dk/research/c5/

使用 TreeSet 的代码：

TreeSet<Line2D> sweepline = new TreeSet<Line2D>();
for (var g = 0; g < SortedList.Count; g++)
{
if (SortedList[g].isStart)
{
    Line2D nl = SortedList[g].line;
    Line2D above;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        above = sweepline[index + 1];

    }
    catch (IndexOutOfRangeException)
    {

        above = null;

    }
    /* End generating above */
    if (above != null)
    {
        if (above.intersectsLine(nl))
        {
            return false;
        }
    }
    Line2D below;
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (IndexOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    if (below != null)
    {
        if (below.intersectsLine(nl))
        {
            return false;
        }
    }
    sweepline.Add(nl);
    //sweepline = sweepline.OrderBy(o => o.getY1()).ToList();

}
else
{
    Line2D nl = SortedList[g].line;
    Line2D above;
    Line2D below;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //Console.Out.WriteLine("index:" + index);
        //add 1 to get above line
        above = sweepline[index + 1];

    }
    catch (IndexOutOfRangeException)
    {

        above = null;

    }
    /* End generating above */
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (IndexOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    //sweepline = sweepline.OrderBy(o => o.getY1()).ToList();
    sweepline.Remove(nl);
    if (above != null && below != null)
    {
        if (above.intersectsLine(below))
        {
            return false;
        }
    }
}
//Console.WriteLine("");

}

score 3 · Accepted Answer

首先，关于线交点：你不需要实际的交点，只知道它们是否相交。有关执行此操作的算法，请参见http://www.geeksforgeeks.org/check-if-two-given-line-segments-intersect/。

关于List实施：

在您使用Lists 的实现中，您调用indexOf扫描线来查找nl。这将从头到尾搜索扫描线。见List(T).IndexOf。如果您要使用该BinarySearch方法，那应该会大大加快搜索速度。

List 的文档中有一段称为性能注意事项。他们敦促您使用实现IEquatable<T>和的值类型IComparable<T>。所以，你Line2D可能应该是一个struct并实现这些接口。

如果您遵循该建议，则从扫描线检索端点应该是 O(log n)，这足以满足您的目的，并且应该更有效地使用内存。

列表的插入和删除是 O(n)，因为底层数组需要在内存中移动。A 的SortedSet插入和删除速度更快，但我不太明白如何在 O(log n) 中找到一个项目的邻居。任何人？（另请参阅为什么 SortedSet<T>.GetViewBetween 不是 O(log N)？）

无论如何，C5TreeSet应该解决这个问题。

我在用户指南中查找了 IndexOf 和 [i] 的性能，它们都被列为 O(log n)。所以这不应该是问题。调用在扫描线上寻找邻居的特定方法，即Successor和Predecessor，也可能是 O(log n) ，但速度可能会更快一些，但不超过一个固定因素。

所以

[...]
try 
{
    Line2D above = sweepline.Successor(nl);
    if (above.intersectsLine(nl))
    {
        return false;
    }
}
catch (NoSuchItemException ignore) { }
[...]

我不喜欢他们没有不抛出异常的方法，因为抛出异常非常昂贵。你的扫掠线通常会很满，所以我最好的猜测是找不到一条线的情况很少见，跟注Successor是最有效的方式。或者，您可以像现在一样继续调用IndexOf，但在检索之前检查它是否等于Count减一[index + 1]，并完全防止抛出异常：

[...]
int index = sweepline.IndexOf(nl);
if( index < sweepline.Count-1 )
{
    Line2D above = sweepline[index + 1];
    if (above.intersectsLine(nl))
    {
        return false;
    }
}
[...]

手册的第二章描述了 C5 集合的相等和比较。在这里，至少你必须实现IEquatable<T>and IComparable<T>！

再想一想：您报告为算法提供了 700000 行。您能否从例如 1000、2500、5000、10000 行的计时开始，并查看算法如何针对它们不相交的情况进行缩放？

关于如何比较扫描线上的线：

您需要为 Sweepline TreeSet 上的 Line2D 找到某种自然排序，因为 CompareTo 方法要求您将一个 Line2D 与另一个进行比较。其中一个 Line2D 已经位于 Sweepline TreeSet 中，另一个刚刚遇到并正在添加中。

我认为您的扫描线从下到上运行：

List<AlgEvent> SortedList = events.OrderBy(o => o.getY()).ToList();

穿过线段的扫描线

因此，假设段 S1 在事件 1 中添加到 TreeSet，我们希望将它与现在在事件 2 中添加的 S2 进行比较。

线段可能在某个点相交，这会改变顺序，但算法会在插入它们后立即检查，在上面和下面的检查中。想一想，也许最好将其称为左右检查。

无论如何..所以最简单的方法是比较两条线段的底部端点。左边小，右边大。但是，我们需要查看扫描线位置的排序，从那时起它们可能已经改变了位置，如图所示。

所以我们需要比较 S2 的下端点和 S1 上的红点，看看它是在那个点的左边还是右边。它位于左侧，因此 S2 被认为小于 S1。

通常它比这更简单：如果所有 S1 都位于 S2 下端点的左侧，则 S1 小于 S2。如果所有 S1 都位于 S2 的下端点的右侧，则 S1 大于 S2。

我认为您正在寻找界面的类型更安全的版本：

public class Line2D : IComparable<Line2D>

假设有两个属性BottomY，两个 Y 值中的最低者，以及BottomX，最低端点的 X 值，这是一个经过一定程度测试的尝试：

int IComparable<Line2D>.CompareTo(Line2D other)
{
    if( BottomY < other.BottomY )
    {
        return -other.CompareTo(this);
    }

    // we're the segment being added to the sweepline
    if( BottomX >= other.X1 && BottomX >= other.X2 )
    {
        return 1;
    }
    if( BottomX <= other.X1 && BottomX <= other.X2 )
    {
        return -1;
    }

    if( other.Y2 == other.Y1 )
    {
        // Scary edge case: horizontal line that we intersect with. Return 0?
        return 0;
    }

    // calculate the X coordinate of the intersection of the other segment
    // with the sweepline
    // double redX = other.X1 + 
    //    (BottomY - other.Y1) * (other.X2 - other.X1) / (other.Y2 - other.Y1);
    //
    // return BottomX.CompareTo(redX);

    // But more efficient, and more along the lines of the orientation comparison:
    return Comparer<Double>.Default.Compare(
        (BottomX - other.X1) * (other.Y2 - other.Y1),
        (BottomY - other.Y1) * (other.X2 - other.X1) );

}

score 1 · Accepted Answer

[原答案]

我不是C#用户，但这应该会加快速度。

减少堆垃圾
不要两次计算同一件事
如果可以的话，避免所有子调用（删除函数）

代码：

public bool intersectsLine(const Line2D &comparedLine)
    {
    if ((X2==comparedLine.X1)&&(Y2==comparedLine.Y1)) return false;
    if ((X1==comparedLine.X2)&&(Y1==comparedLine.Y2)) return false;
    double dx1,dy1,dx2,dy2;
    dx1 = X2 - X1;
    dy1 = Y2 - Y1;
    dx2 = comparedLine.X2 - comparedLine.X1;
    dy2 = comparedLine.Y2 - comparedLine.Y1;
    double s,t,ax,ay,b;
    ax=X1-comparedLine.X1;
    ay=Y1-comparedLine.Y1;
    b=1.0/(-(dx2*dy1)+(dx1*dy2));
    s = (-(dy1*ax)+(dx1*ay))*b;
    t = ( (dx2*ay)-(dy2*ax))*b;
    if ((s>=0)&&(s<=1)&&(t>=0)&&(t<=1)) return true;
    return false; // No collision
    }

对于您的其余代码，添加时间测量以找出究竟是什么减慢了速度。我的猜测是列表管理......不必要的重新分配会大大减慢速度。

[编辑1]

在对随机线数据进行一些研究后，我得出以下结论：

如果整个区域有太多线，则没有优化是有效的
如果小线多于任何优化的加速
蛮力T((N*N-N)/2)仍然O(N*N) 估计要处理 700K 行大约需要 35 小时（不可用）
具有区域细分的优化蛮力是T((((N/M)^2)-N)/2)- 优化~O((N/M)^2)在哪里
- N是面积线数的最大值
- M是每个轴的区域划分数想法是仅检查穿过某个区域的线（将数据集区域划分为M*M正方形/矩形）。对于 700K 行，最好对16x16区域进行细分。测量次数：
  
  每 32K 行 0.540s 每 64K 行 1.950s 每 128K 行 7.000s 每 256K 行 27.514s
估计运行时间为每 700K 行 3.7 分钟（对于整个区域最大长度约为 10% 的行）。我认为这比你的 19 分钟要好。
使用多 CPU/内核可以实现另一个加速

算法是完全可并行的，适用于 4 个 CPU/核心3.7min/4 -> 56s，或者您可以将其移植到 GPU ...

我的优化蛮力算法与区域细分 O((((N/M)^2)-N)/2) - 优化

获取使用区域大小(xmin,xmax,ymin,ymax) O(N)
选择细分M 我为我的随机数据集32K-256K线尝试的最好的方法是M=16
循环遍历所有细分区域（均匀划分的数据集区域）

创建穿过实际细分区域的线列表并检查该列表中所有线的交点。如果不想要重复的交叉点，则丢弃当前区域之外的所有交叉点

我的代码（我正在使用BDS2006 C++和我自己的列表，因此您需要移植它以与您的代码兼容）

void Twin_GLView2D::main_intersect(int M=16)
{
int ia,ib,i,j,N;
double zero=1e-6;
glview2D::_lin *l;
glview2D::_pnt p;

struct _line
    {
    double bx0,by0,bx1,by1;     // bounding rectangle
    double x0,y0,dx,dy;         // precomputed params
    } *lin,*a,*b;

struct _siz
    {
    double bx0,bx1,by0,by1;     // zone bounding rectangle
    } sz,bz;
List<_line*> zone;

// load and precompute lines
N=view.lin.num;
lin=new _line[N];
if (lin==NULL) return;
for (a=lin,l=view.lin.dat,ia=0;ia<N;ia++,a++,l++)
    {
    // line ...
    if (l->p0.p[0]<=l->p1.p[0]) { a->bx0=l->p0.p[0]; a->bx1=l->p1.p[0]; }
    else                        { a->bx0=l->p1.p[0]; a->bx1=l->p0.p[0]; }
    if (l->p0.p[1]<=l->p1.p[1]) { a->by0=l->p0.p[1]; a->by1=l->p1.p[1]; }
    else                        { a->by0=l->p1.p[1]; a->by1=l->p0.p[1]; }
    a->x0=l->p0.p[0]; a->dx=l->p1.p[0]-l->p0.p[0];
    a->y0=l->p0.p[1]; a->dy=l->p1.p[1]-l->p0.p[1];
    // global image size for zone subdivision
    if (!ia)
        {
        sz.bx0=l->p0.p[0];
        sz.by0=l->p0.p[1];
        sz.bx1=sz.bx0;
        sz.by1=sz.by0;
        }
    if (sz.bx0>l->p0.p[0]) sz.bx0=l->p0.p[0];
    if (sz.bx1<l->p0.p[0]) sz.bx1=l->p0.p[0];
    if (sz.by0>l->p0.p[1]) sz.by0=l->p0.p[1];
    if (sz.by1<l->p0.p[1]) sz.by1=l->p0.p[1];
    if (sz.bx0>l->p1.p[0]) sz.bx0=l->p1.p[0];
    if (sz.bx1<l->p1.p[0]) sz.bx1=l->p1.p[0];
    if (sz.by0>l->p1.p[1]) sz.by0=l->p1.p[1];
    if (sz.by1<l->p1.p[1]) sz.by1=l->p1.p[1];
    }
// process lines by zonal subdivision
zone.allocate(N);
view.pnt.num=0; view.pnt.allocate(view.lin.num);
sz.bx1-=sz.bx0; sz.bx1/=double(M);
sz.by1-=sz.by0; sz.by1/=double(M);
for (bz.by0=sz.by0,bz.by1=sz.by0+sz.by1,i=0;i<M;i++,bz.by0+=sz.by1,bz.by1+=sz.by1)
for (bz.bx0=sz.bx0,bz.bx1=sz.bx0+sz.bx1,j=0;j<M;j++,bz.bx0+=sz.bx1,bz.bx1+=sz.bx1)
    {
    // create list of lines for actual zone only
    zone.num=0;         // clear zone list
    for (a=lin,ia=   0;ia<N;ia++,a++)
     if ((a->bx0<=bz.bx1)&&(a->bx1>=bz.bx0))
      if ((a->by0<=bz.by1)&&(a->by1>=bz.by0))
       zone.add(a); // add line to zone list
    // check for intersection within zone only
    // O((((N/M)^2)-N)/2) - optimizations
    for (ia=   0,a=zone.dat[ia];ia<zone.num;ia++,a=zone.dat[ia])
    for (ib=ia+1,b=zone.dat[ib];ib<zone.num;ib++,b=zone.dat[ib])
        {
        // discart lines with non intersecting bound rectangles
        if (a->bx1<b->bx0) continue;
        if (a->bx0>b->bx1) continue;
        if (a->by1<b->by0) continue;
        if (a->by0>b->by1) continue;
        // 2D lines a,b intersect ?
        double x0,y0,x1,y1,t0,t1;
        // compute intersection
        t1=divide(a->dx*(a->y0-b->y0)+a->dy*(b->x0-a->x0),(a->dx*b->dy)-(b->dx*a->dy));
        x1=b->x0+(b->dx*t1);
        y1=b->y0+(b->dy*t1);
        if (fabs(a->dx)>=fabs(a->dy)) t0=divide(b->x0-a->x0+(b->dx*t1),a->dx);
        else                          t0=divide(b->y0-a->y0+(b->dy*t1),a->dy);
        x0=a->x0+(a->dx*t0);
        y0=a->y0+(a->dy*t0);
        // check if intersection exists
        if (fabs(x1-x0)>zero) continue;
        if (fabs(y1-y0)>zero) continue;
        if ((t0<0.0)||(t0>1.0)) continue;
        if ((t1<0.0)||(t1>1.0)) continue;
        // if yes add point
        p.p[0]=x0;
        p.p[1]=y0;
        p.p[2]=0.0;
        // do not add points out of zone (allmost all duplicit points removal)
        if (x0<bz.bx0) continue;
        if (x0>bz.bx1) continue;
        if (y0<bz.by0) continue;
        if (y0>bz.by1) continue;
        view.pnt.add(p);
        }
    }
view.redraw=true;
delete lin;
}

笔记：

List<T> x;T x[]与“无限”大小相同
- x.num;是x[]Ts 中的实际大小而不是字节！index = <0,x.num-1>
- x.add(q);在最后添加q到列表中
- x.num=0;清除列表
- x.allocate(N);为列表中的项目分配空间N以避免重定位
输入List<>是view.lin ... 包含点，p0,p1每个点都有double p[2]...x,y
输出List<>是view.pnt...包含double p[2]...x,y

[编辑2]

另外我发现上述算法的最佳性能是当M=12+(N>>15)

M是每个轴的细分区域数
N是要检查的行数

c# - 用 C# 实现 Hoey Shamos 算法

2 回答 2

Related

Reference