Implementing Hoey Shamos algorithm with C#

问题

Okay, I am now getting the correct information from my current algorithm! However, with 700,000 polygons to check, it\'s just way too slow! The previous issue is fixed (My Line2D intersectsWith method was incorrect)

Now it\'s a matter of identifying my bottleneck! This algorithm is suppose to be O(nlog-n) so it should be much quicker. My intersectsWith method looks like it can\'t get any faster, however I will post its code, in case I\'m wrong

EDIT: Added IComparable interface

My method for reading line segment intersections. Some code has been omitted for readability.

    public class Line2D : IComparable
    {


    public Line2D(XYPoints p1, XYPoints p2)
    {

    }

    public bool intersectsLine(Line2D comparedLine)
    {

        if ((X2 == comparedLine.X1) && (Y2 == comparedLine.Y1)) return false;
        if ((X1 == comparedLine.X2) && (Y1 == comparedLine.Y2)) return false;

        if (X2 == comparedLine.X1 && Y2 == comparedLine.Y1)
        {
            return false;
        }

        if (X1 == comparedLine.X2 && Y1 == comparedLine.Y2)
        {
            return false;
        }
        double firstLineSlopeX, firstLineSlopeY, secondLineSlopeX, secondLineSlopeY;

        firstLineSlopeX = X2 - X1;
        firstLineSlopeY = Y2 - Y1;

        secondLineSlopeX = comparedLine.getX2() - comparedLine.getX1();
        secondLineSlopeY = comparedLine.getY2() - comparedLine.getY1();

        double s, t;
        s = (-firstLineSlopeY * (X1 - comparedLine.getX1()) + firstLineSlopeX * (getY1() - comparedLine.getY1())) / (-secondLineSlopeX * firstLineSlopeY + firstLineSlopeX * secondLineSlopeY);
        t = (secondLineSlopeX * (getY1() - comparedLine.getY1()) - secondLineSlopeY * (getX1() - comparedLine.getX1())) / (-secondLineSlopeX * firstLineSlopeY + firstLineSlopeX * secondLineSlopeY);

        if (s >= 0 && s <= 1 && t >= 0 && t <= 1)
        {
            return true;
        }

        return false; // No collision 
    }

    int IComparable.CompareTo(object obj)
    {

        //return Y1.GetHashCode();
        Line2D o1 = this;
        Line2D o2 = (Line2D)obj;
        if (o1.getY1() < o2.getY1())
        {
            return -1;
        }
        else if (o1.getY1() > o2.getY2())
        {
            return 1;
        }
        else
        {
            if (o1.getY2() < o2.getY2())
            {
                return -1;
            }
            else if (o1.getY2() > o2.getY2())
            {
                return 1;
            }
            else
            {
                return 0;
            }
        } 
    }


}

The bulk of my algorithm implementation, I realize a List isn\'t the fastest for an algorithm, however I need indexing!:

//Create a new list, sort by Y values.

 List<AlgEvent> SortedList = events.OrderBy(o => o.getY()).ToList();                
 List<Line2D> sweepline = new List<Line2D>();

 for (var g = 0; g < SortedList.Count; g++)
 {
 if (SortedList[g].isStart)
 {
    Line2D nl = SortedList[g].line;
    Line2D above;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        if (index == -1)
        {
            above = null;
        }
        else
        {
            above = sweepline[index + 1];
        }


    }
    catch (ArgumentOutOfRangeException)
    {
        above = null;
    }
    /* End generating above */
    if (above != null)
    {
        if (above.intersectsLine(nl))
        {
            return true;
        }
    }
    Line2D below;
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (ArgumentOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    if (below != null)
    {
        if (below.intersectsLine(nl))
        {
            return true;
        }
    }
    sweepline.Add(nl);
    sweepline = sweepline.OrderBy(o => o.getY1()).ToList();

}
else
{
    Line2D nl = SortedList[g].line;
    Line2D above;
    Line2D below;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        Console.Out.WriteLine(\"index:\" + index);
        //add 1 to get above line
        above = sweepline[index + 1];

    }
    catch (ArgumentOutOfRangeException)
    {

        above = null;

    }
    /* End generating above */
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (ArgumentOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    sweepline = sweepline.OrderBy(o => o.getY1()).ToList();
    sweepline.Remove(nl);
    if (above != null && below != null)
    {
        if (above.intersectsLine(below))
        {
            return true;
        }
    }
}
Console.WriteLine(\"\");
  }



   } // end numofparts for-loop

   return false;

============================================

UPDATE: September 12th: Implemented the TreeSet from C5, implemented IComparable to my classes, and slowed it down even more? I am still indexing it if that matters?

http://www.itu.dk/research/c5/

Code using TreeSet:

TreeSet<Line2D> sweepline = new TreeSet<Line2D>();
for (var g = 0; g < SortedList.Count; g++)
{
if (SortedList[g].isStart)
{
    Line2D nl = SortedList[g].line;
    Line2D above;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        above = sweepline[index + 1];

    }
    catch (IndexOutOfRangeException)
    {

        above = null;

    }
    /* End generating above */
    if (above != null)
    {
        if (above.intersectsLine(nl))
        {
            return false;
        }
    }
    Line2D below;
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (IndexOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    if (below != null)
    {
        if (below.intersectsLine(nl))
        {
            return false;
        }
    }
    sweepline.Add(nl);
    //sweepline = sweepline.OrderBy(o => o.getY1()).ToList();

}
else
{
    Line2D nl = SortedList[g].line;
    Line2D above;
    Line2D below;
    /* Start generating above */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //Console.Out.WriteLine(\"index:\" + index);
        //add 1 to get above line
        above = sweepline[index + 1];

    }
    catch (IndexOutOfRangeException)
    {

        above = null;

    }
    /* End generating above */
    /* Start generating below */
    try
    {
        //grab index in sweepline
        int index = sweepline.IndexOf(nl);
        //add 1 to get above line
        below = sweepline[index - 1];

    }
    catch (IndexOutOfRangeException)
    {

        below = null;

    }
    /* End generating below */
    //sweepline = sweepline.OrderBy(o => o.getY1()).ToList();
    sweepline.Remove(nl);
    if (above != null && below != null)
    {
        if (above.intersectsLine(below))
        {
            return false;
        }
    }
}
//Console.WriteLine(\"\");

}

回答1:

First, regarding the line intersection: you do not need the actual point of intersection, only to know if they intersect. See http://www.geeksforgeeks.org/check-if-two-given-line-segments-intersect/ for an algorithm that does just that.

About the List implementation:

In your implementation using Lists, you call indexOf on the sweepline to find nl. This searches the sweepline from start to end. See List(T).IndexOf. If you were to use the BinarySearch method, that ought to speed up the search considerably.

List's documentation has a paragraph called performance considerations. They urge you to use a value type that implements IEquatable<T> and IComparable<T>. So, your Line2D should probably be a struct and implement these interfaces.

If you follow that advice, retrieval of the endpoint from the sweepline should be O(log n), which is sufficient for your purpose, and memory should be used more efficiently.

Insertion and removal are O(n) for Lists, cause the underlying array needs to be moved around in memory. A SortedSet has faster insertion and removal, but I don't quite see how to find an item's neighbors in O(log n) there. Anyone? (See also Why SortedSet<T>.GetViewBetween isn't O(log N)?)

Anyways, the C5 TreeSet should solve this.

I looked up the performance of IndexOf and [i] in the user guide and they're both listed as O(log n). So that is not supposed to be the issue. It's still probably somewhat faster, but no more than a fixed factor, to call the specific methods for finding the neighbors on the sweepline, i.e. Successor and Predecessor, which are also O(log n).

[...]
try 
{
    Line2D above = sweepline.Successor(nl);
    if (above.intersectsLine(nl))
    {
        return false;
    }
}
catch (NoSuchItemException ignore) { }
[...]

I don't like that they do not have a method that doesn't throw the exception, since throwing exceptions is very expensive. Your sweep line will be pretty full generally so my best guess is that failure to find one will be rare and calling Successor is the most efficient way. Alternatively, you could keep calling IndexOf like you do now, but check if it equals Count minus one before retrieving [index + 1], and prevent the exception from being thrown at all:

[...]
int index = sweepline.IndexOf(nl);
if( index < sweepline.Count-1 )
{
    Line2D above = sweepline[index + 1];
    if (above.intersectsLine(nl))
    {
        return false;
    }
}
[...]

Chapter two of the manual describes equality and comparison for C5 collections. Here, too, at the very least you must implement IEquatable<T> and IComparable<T>!

One more thought: you report feeding the algorithm 700000 lines. Could you start with timing for example 1000, 2500, 5000, 10000 lines and seeing how the algorithm scales for cases where they do not intersect?

On how to compare the lines on the sweepline:

You need to find some sort of natural ordering for the Line2Ds on the Sweepline TreeSet, since the CompareTo method asks you to compare one Line2D to another. One of the Line2Ds already sits in the Sweepline TreeSet, the other has just been encountered and is being added.

Your sweepline runs from bottom to top, I think:

List<AlgEvent> SortedList = events.OrderBy(o => o.getY()).ToList();

So let's say segment S1 got added to the TreeSet at event 1, and we wish to compare it to S2, which is being added at event 2, right now.

The line segments could possibly intersect at some point, which would change the ordering, but the algorithm will check for this right after inserting them, in the above and below checks. Which would perhaps better be called the left and right checks, come to think of it.

Anyways.. so the easiest would be to compare the bottom endpoints of both line segments. To the left is smaller, to the right is bigger. However, we need to look at the ordering at the position of the sweepline and they may have changed positions since then, like in the picture.

So we need to compare the bottom endpoint of S2 to the red point on S1, and see if it lies to the left or to the right of that point. It lies to the left so S2 is considered smaller than S1.

Usually it's simpler than this: If all of S1 lies to the left of S2's bottom endpoint, S1 is smaller than S2. If all of S1 lies to the right of S2's bottom endpoint, S1 is larger than S2.

I think you're looking for the typesafer version of the interface:

public class Line2D : IComparable<Line2D>

Assuming two properties BottomY, the lowest of the two Y values, and BottomX, the X value of the lowest endpoint, a somewhat tested attempt:

int IComparable<Line2D>.CompareTo(Line2D other)
{
    if( BottomY < other.BottomY )
    {
        return -other.CompareTo(this);
    }

    // we're the segment being added to the sweepline
    if( BottomX >= other.X1 && BottomX >= other.X2 )
    {
        return 1;
    }
    if( BottomX <= other.X1 && BottomX <= other.X2 )
    {
        return -1;
    }

    if( other.Y2 == other.Y1 )
    {
        // Scary edge case: horizontal line that we intersect with. Return 0?
        return 0;
    }

    // calculate the X coordinate of the intersection of the other segment
    // with the sweepline
    // double redX = other.X1 + 
    //    (BottomY - other.Y1) * (other.X2 - other.X1) / (other.Y2 - other.Y1);
    //
    // return BottomX.CompareTo(redX);

    // But more efficient, and more along the lines of the orientation comparison:
    return Comparer<Double>.Default.Compare(
        (BottomX - other.X1) * (other.Y2 - other.Y1),
        (BottomY - other.Y1) * (other.X2 - other.X1) );

}

回答2:

[original answer]

I am not C# user, but this should speeds things up a little.

less heap trashing
do not compute the same thing twice
avoid all sub calls if you can (get functions removed)

code:

public bool intersectsLine(const Line2D &comparedLine)
    {
    if ((X2==comparedLine.X1)&&(Y2==comparedLine.Y1)) return false;
    if ((X1==comparedLine.X2)&&(Y1==comparedLine.Y2)) return false;
    double dx1,dy1,dx2,dy2;
    dx1 = X2 - X1;
    dy1 = Y2 - Y1;
    dx2 = comparedLine.X2 - comparedLine.X1;
    dy2 = comparedLine.Y2 - comparedLine.Y1;
    double s,t,ax,ay,b;
    ax=X1-comparedLine.X1;
    ay=Y1-comparedLine.Y1;
    b=1.0/(-(dx2*dy1)+(dx1*dy2));
    s = (-(dy1*ax)+(dx1*ay))*b;
    t = ( (dx2*ay)-(dy2*ax))*b;
    if ((s>=0)&&(s<=1)&&(t>=0)&&(t<=1)) return true;
    return false; // No collision
    }

for the rest of your code, add time measurements to find what exactly slow things down. My guess is on List management ... unnecessary reallocations can slow things considerably.

[edit1]

After some research on random line data I concluded this:

if too many lines are across the entire area than no optimizations are valid
if there are more small lines than there is more speedup for any optimizations
brute force is T((N*N-N)/2) which is still O(N*N) estimate around 35 hours for 700K lines to be processed (unusable)
optimized brute force with area subdivision is T((((N/M)^2)-N)/2) - optimizations ~O((N/M)^2) where
- N is max of area lines number
- M is number of area divisions per any axis idea is to check only lines crossing some region (divide dataset area to M*M squares/rectangles). For 700K lines is best subdivision to 16x16 areas. Measured times:
  
  0.540s per 32K lines 1.950s per 64K lines 7.000s per 128K lines 27.514s per 256K lines
estimated run time is 3.7 min per 700K lines (for lines at max ~10% length of whole area). I think this is better than yours 19 minutes.
another speed up is possible with use of multi CPU/core

algorithm is fully parallel-isable and for 4 CPU/Cores 3.7min/4 -> 56s or you can port it to GPU ...

My optimized brute force algorithm with area subdivision O((((N/M)^2)-N)/2) - optimizations

get the used area size (xmin,xmax,ymin,ymax) O(N)
select subdivision M the best I try for my random datasets 32K-256K lines was M=16
cycle through all subdivision area (evenly divided dataset area)

create list of lines crossing actual subdivision area and check intersection for all lines in that list. If do not want duplicate intersections then discard all intersection outside current area

my code (I am using BDS2006 C++ and my own lists so you need to port it to be compatible with your code)

void Twin_GLView2D::main_intersect(int M=16)
{
int ia,ib,i,j,N;
double zero=1e-6;
glview2D::_lin *l;
glview2D::_pnt p;

struct _line
    {
    double bx0,by0,bx1,by1;     // bounding rectangle
    double x0,y0,dx,dy;         // precomputed params
    } *lin,*a,*b;

struct _siz
    {
    double bx0,bx1,by0,by1;     // zone bounding rectangle
    } sz,bz;
List<_line*> zone;

// load and precompute lines
N=view.lin.num;
lin=new _line[N];
if (lin==NULL) return;
for (a=lin,l=view.lin.dat,ia=0;ia<N;ia++,a++,l++)
    {
    // line ...
    if (l->p0.p[0]<=l->p1.p[0]) { a->bx0=l->p0.p[0]; a->bx1=l->p1.p[0]; }
    else                        { a->bx0=l->p1.p[0]; a->bx1=l->p0.p[0]; }
    if (l->p0.p[1]<=l->p1.p[1]) { a->by0=l->p0.p[1]; a->by1=l->p1.p[1]; }
    else                        { a->by0=l->p1.p[1]; a->by1=l->p0.p[1]; }
    a->x0=l->p0.p[0]; a->dx=l->p1.p[0]-l->p0.p[0];
    a->y0=l->p0.p[1]; a->dy=l->p1.p[1]-l->p0.p[1];
    // global image size for zone subdivision
    if (!ia)
        {
        sz.bx0=l->p0.p[0];
        sz.by0=l->p0.p[1];
        sz.bx1=sz.bx0;
        sz.by1=sz.by0;
        }
    if (sz.bx0>l->p0.p[0]) sz.bx0=l->p0.p[0];
    if (sz.bx1<l->p0.p[0]) sz.bx1=l->p0.p[0];
    if (sz.by0>l->p0.p[1]) sz.by0=l->p0.p[1];
    if (sz.by1<l->p0.p[1]) sz.by1=l->p0.p[1];
    if (sz.bx0>l->p1.p[0]) sz.bx0=l->p1.p[0];
    if (sz.bx1<l->p1.p[0]) sz.bx1=l->p1.p[0];
    if (sz.by0>l->p1.p[1]) sz.by0=l->p1.p[1];
    if (sz.by1<l->p1.p[1]) sz.by1=l->p1.p[1];
    }
// process lines by zonal subdivision
zone.allocate(N);
view.pnt.num=0; view.pnt.allocate(view.lin.num);
sz.bx1-=sz.bx0; sz.bx1/=double(M);
sz.by1-=sz.by0; sz.by1/=double(M);
for (bz.by0=sz.by0,bz.by1=sz.by0+sz.by1,i=0;i<M;i++,bz.by0+=sz.by1,bz.by1+=sz.by1)
for (bz.bx0=sz.bx0,bz.bx1=sz.bx0+sz.bx1,j=0;j<M;j++,bz.bx0+=sz.bx1,bz.bx1+=sz.bx1)
    {
    // create list of lines for actual zone only
    zone.num=0;         // clear zone list
    for (a=lin,ia=   0;ia<N;ia++,a++)
     if ((a->bx0<=bz.bx1)&&(a->bx1>=bz.bx0))
      if ((a->by0<=bz.by1)&&(a->by1>=bz.by0))
       zone.add(a); // add line to zone list
    // check for intersection within zone only
    // O((((N/M)^2)-N)/2) - optimizations
    for (ia=   0,a=zone.dat[ia];ia<zone.num;ia++,a=zone.dat[ia])
    for (ib=ia+1,b=zone.dat[ib];ib<zone.num;ib++,b=zone.dat[ib])
        {
        // discart lines with non intersecting bound rectangles
        if (a->bx1<b->bx0) continue;
        if (a->bx0>b->bx1) continue;
        if (a->by1<b->by0) continue;
        if (a->by0>b->by1) continue;
        // 2D lines a,b intersect ?
        double x0,y0,x1,y1,t0,t1;
        // compute intersection
        t1=divide(a->dx*(a->y0-b->y0)+a->dy*(b->x0-a->x0),(a->dx*b->dy)-(b->dx*a->dy));
        x1=b->x0+(b->dx*t1);
        y1=b->y0+(b->dy*t1);
        if (fabs(a->dx)>=fabs(a->dy)) t0=divide(b->x0-a->x0+(b->dx*t1),a->dx);
        else                          t0=divide(b->y0-a->y0+(b->dy*t1),a->dy);
        x0=a->x0+(a->dx*t0);
        y0=a->y0+(a->dy*t0);
        // check if intersection exists
        if (fabs(x1-x0)>zero) continue;
        if (fabs(y1-y0)>zero) continue;
        if ((t0<0.0)||(t0>1.0)) continue;
        if ((t1<0.0)||(t1>1.0)) continue;
        // if yes add point
        p.p[0]=x0;
        p.p[1]=y0;
        p.p[2]=0.0;
        // do not add points out of zone (allmost all duplicit points removal)
        if (x0<bz.bx0) continue;
        if (x0>bz.bx1) continue;
        if (y0<bz.by0) continue;
        if (y0>bz.by1) continue;
        view.pnt.add(p);
        }
    }
view.redraw=true;
delete lin;
}

Notes:

List<T> x; is the same as T x[] with 'unlimited' size
- x.num; is actual size of x[] in Ts not Bytes !!! index = <0,x.num-1>
- x.add(q); adds q to the list at the end
- x.num=0; clears the list
- x.allocate(N); allocate space for N items in list to avoid relocations
input List<> is view.lin ... contains points p0,p1 each have double p[2] ... x,y
output List<> is view.pnt ... contains double p[2] ... x,y

[Edit2]

In addition I found out that the best performance of above algorithm is when M=12+(N>>15)

M is subdivision areas count per axis
N is number of lines to check

来源：https://stackoverflow.com/questions/18512815/implementing-hoey-shamos-algorithm-with-c-sharp

标签

algorithm

computational-geometry

arcgis