Fast intersection of sets: C++ vs C#

后端 未结 13 1639
野性不改
野性不改 2020-12-28 10:17

On my machine (Quad core, 8gb ram), running Vista x64 Business, with Visual Studio 2008 SP1, I am trying to intersect two sets of numbers very quickly.

I\'ve impleme

13条回答
  •  情歌与酒
    2020-12-28 10:55

    Latest benchmark:

    Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms
    Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms
    Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms
    

    I think the 504 - 495 difference happens because there are a couple dupe values.

    Code:
    
    // MapPerformance.cpp : Defines the entry point for the console application.
    //
    
    #include "stdafx.h"
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    #include 
    
    #include "timer.h"
    
    using namespace std;
    using namespace stdext;
    using namespace boost;
    using namespace tr1;
    
    
    int runIntersectionTest2(const vector& set1, const vector& set2)
    {
        // hash_map theMap;
        // map theMap;
        unordered_set theSet;      
    
         theSet.insert( set1.begin(), set1.end() );
    
        int intersectionSize = 0;
    
        vector::const_iterator set2_end = set2.end();
    
        for ( vector::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
        {
            if ( theSet.find(*iterator) != theSet.end() )
            {
                    intersectionSize++;
            }
        }
    
        return intersectionSize;
    }
    
    int runIntersectionTest(const vector& set1, const vector& set2)
    {
        // hash_map theMap;
        // map theMap;
        unordered_map theMap;  
    
        vector::const_iterator set1_end = set1.end();
    
        // Now intersect the two sets by populating the map
        for ( vector::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
        {
            int value = *iterator;
    
            theMap[value] = 1;
        }
    
        int intersectionSize = 0;
    
        vector::const_iterator set2_end = set2.end();
    
        for ( vector::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
        {
            int value = *iterator;
    
            unordered_map::iterator foundValue = theMap.find(value);
    
            if ( foundValue != theMap.end() )
            {
                theMap[value] = 2;
    
                intersectionSize++;
            }
        }
    
        return intersectionSize;
    
    }
    
    int runSetIntersection(const vector& set1_unsorted, const vector& set2_unsorted)
    {   
        // Create two vectors
        std::vector set1(set1_unsorted.size());
        std::vector set2(set2_unsorted.size());
    
        // Copy the unsorted data into them
        std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
        std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());
    
        // Sort the data
        sort(set1.begin(),set1.end());
        sort(set2.begin(),set2.end());
    
        vector intersection;
        intersection.reserve(1000);
    
        set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));
    
        return intersection.size(); 
    }
    
    void createSets( vector& set1, vector& set2 )
    {
        srand ( time(NULL) );
    
        set1.reserve(100000);
        set2.reserve(1000);
    
        // Create 100,000 values for set1
        for ( int i = 0; i < 100000; i++ )
        {
            int value = 1000000000 + i;
            set1.push_back(value);
        }
    
        // Try to get half of our values intersecting
        float ratio = 200000.0f / RAND_MAX;
    
    
        // Create 1,000 values for set2
        for ( int i = 0; i < 1000; i++ )
        {
            int random = rand() * ratio + 1;
    
            int value = 1000000000 + random;
            set2.push_back(value);
        }
    
        // Make sure set1 is in random order (not sorted)
        random_shuffle(set1.begin(),set1.end());
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        int intersectionSize = 0;
    
        vector set1, set2; 
        createSets( set1, set2 );
    
        Timer timer;
        for ( int i = 0; i < 1000; i++ )
        {
            intersectionSize = runIntersectionTest(set1, set2);
        }
        timer.Stop();
    
        cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
    
        timer.Reset();
        for ( int i = 0; i < 1000; i++ )
        {
            intersectionSize = runSetIntersection(set1,set2);
        }
        timer.Stop();
    
        cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
    
        timer.Reset();
        for ( int i = 0; i < 1000; i++ )
        {
            intersectionSize = runIntersectionTest2(set1,set2);
        }
        timer.Stop();
    
        cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
    
        getchar();
    
        return 0;
    }
    

提交回复
热议问题