Fast intersection of sets: C++ vs C#

后端 未结 13 1664
野性不改
野性不改 2020-12-28 10:17

On my machine (Quad core, 8gb ram), running Vista x64 Business, with Visual Studio 2008 SP1, I am trying to intersect two sets of numbers very quickly.

I\'ve impleme

13条回答
  •  盖世英雄少女心
    2020-12-28 10:52

    By the way, if you have large sorted sets std::set_intersection is not the fastest algorithm. std::set_intersection takes up to 2*(m+n)-1 comparisons but algorithms like the one from Baeza-Yates can be faster. For small m, Baeza-Yates is O(m * log(n)), while for n = alpha * m it is O(n). The basic idea is to do a kind of 2 way binary search.

    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.91.7899&rep=rep1&type=pdf

    Experimental Analysis of a Fast Intersection Algorithm for Sorted Sequences Ricardo Baeza-Yates and Alejandro Salinger

    OR

    R. Baeza-Yates. A Fast Set Intersection Algorithm for Sorted Sequences. In Proceedings of the 15th Annual Symposium on Combinatorial Pattern Matching (CPM 2004), Springer LNCS 3109, pp 400-408, Istanbul, Turkey, July 2004.

    Below is an explanation and an implementation by Erik Frey where he shows significantly faster results than std::set_intersection with a binary probe. I have not tried his code yet.
    http://fawx.com/

    1. Pick the median element, A, in the smaller set.
    2. Search for its insertion-position element, B, in the larger set.
    3. If A and B are equal, append the element to the result.
    4. Repeat steps 1-4 on non-empty subsets on either side of elements A and B.

    ;

    /* * baeza_intersect */ template< template class Probe, class RandomAccessIterator, class OutputIterator> void baeza_intersect(RandomAccessIterator begin1, RandomAccessIterator end1, RandomAccessIterator begin2, RandomAccessIterator end2, OutputIterator out) { RandomAccessIterator probe1, probe2;

    if ( (end1 - begin1) < ( end2 - begin2 ) ) { if ( begin1 == end1 ) return; probe1 = begin1 + ( ( end1 - begin1 ) >> 1 ); probe2 = lower_bound< Probe >( begin2, end2, *probe1 ); baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out); // intersect left if (! (probe2 == end2 || *probe1 < *probe2 )) *out++ = *probe2++; baeza_intersect< Probe >(++probe1, end1, probe2, end2, out); // intersect right } else { if ( begin2 == end2 ) return; probe2 = begin2 + ( ( end2 - begin2 ) >> 1 ); probe1 = lower_bound< Probe >( begin1, end1, *probe2 ); baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out); // intersect left if (! (probe1 == end1 || *probe2 < *probe1 )) *out++ = *probe1++; baeza_intersect< Probe >(probe1, end1, ++probe2, end2, out); // intersect right } }

    /* * with a comparator */ template< template class Probe, class RandomAccessIterator, class OutputIterator, class Comparator > void baeza_intersect(RandomAccessIterator begin1, RandomAccessIterator end1, RandomAccessIterator begin2, RandomAccessIterator end2, OutputIterator out, Comparator cmp) { RandomAccessIterator probe1, probe2;

      if ( (end1 - begin1) < ( end2 - begin2 ) )
      {
        if ( begin1 == end1 )
          return;
        probe1 = begin1 + ( ( end1 - begin1 ) >> 1 );
        probe2 = lower_bound< Probe >( begin2, end2, *probe1, cmp );
        baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out, cmp); // intersect left
        if (! (probe2 == end2 || cmp( *probe1, *probe2 ) ))
          *out++ = *probe2++;
        baeza_intersect< Probe >(++probe1, end1, probe2, end2, out, cmp); // intersect right
      }
      else
      {
        if ( begin2 == end2 )
          return;
        probe2 = begin2 + ( ( end2 - begin2 ) >> 1 );
        probe1 = lower_bound< Probe >( begin1, end1, *probe2, cmp );
        baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out, cmp); // intersect left
        if (! (probe1 == end1 || cmp( *probe2, *probe1 ) ))
          *out++ = *probe1++;
        baeza_intersect< Probe >(probe1, end1, ++probe2, end2, out, cmp); // intersect right
      }
    }
    

    // probe.hpp

    /** * binary probe: pick the next element by choosing the halfway point between low and high */ template< class RandomAccessIterator, class T > struct binary_probe { RandomAccessIterator operator()(RandomAccessIterator begin, RandomAccessIterator end, const T & value) { return begin + ( (end - begin) >> 1); } };

    /** * lower_bound: like stl's lower_bound but with different kinds of probing * note the appearance of the rare template parameter template! */ template< template class Probe, class RandomAccessIterator, class T > RandomAccessIterator lower_bound(RandomAccessIterator begin, RandomAccessIterator end, const T & value) { RandomAccessIterator pit; Probe< RandomAccessIterator, T > pfunc; // probe-functor (wants to get func'd up)

    while ( begin < end ) { pit = pfunc(begin, end, value); if ( *pit < value ) begin = pit + 1; else end = pit; } return begin; }

    /* * this time with a comparator! */ template< template class Probe, class RandomAccessIterator, class T, class Comparator > RandomAccessIterator lower_bound(RandomAccessIterator begin, RandomAccessIterator end, const T & value, Comparator cmp) { RandomAccessIterator pit; Probe< RandomAccessIterator, T > pfunc;

    while ( begin < end ) { pit = pfunc(begin, end, value); if ( cmp(*pit, value) ) begin = pit + 1; else end = pit; } return begin; }

提交回复
热议问题