On my machine (Quad core, 8gb ram), running Vista x64 Business, with Visual Studio 2008 SP1, I am trying to intersect two sets of numbers very quickly.
I\'ve impleme
Latest benchmark:
Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms
Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms
Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms
I think the 504 - 495 difference happens because there are a couple dupe values.
Code:
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
using namespace tr1;
int runIntersectionTest2(const vector& set1, const vector& set2)
{
// hash_map theMap;
// map theMap;
unordered_set theSet;
theSet.insert( set1.begin(), set1.end() );
int intersectionSize = 0;
vector::const_iterator set2_end = set2.end();
for ( vector::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
if ( theSet.find(*iterator) != theSet.end() )
{
intersectionSize++;
}
}
return intersectionSize;
}
int runIntersectionTest(const vector& set1, const vector& set2)
{
// hash_map theMap;
// map theMap;
unordered_map theMap;
vector::const_iterator set1_end = set1.end();
// Now intersect the two sets by populating the map
for ( vector::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
vector::const_iterator set2_end = set2.end();
for ( vector::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
int value = *iterator;
unordered_map::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(const vector& set1_unsorted, const vector& set2_unsorted)
{
// Create two vectors
std::vector set1(set1_unsorted.size());
std::vector set2(set2_unsorted.size());
// Copy the unsorted data into them
std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());
// Sort the data
sort(set1.begin(),set1.end());
sort(set2.begin(),set2.end());
vector intersection;
intersection.reserve(1000);
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));
return intersection.size();
}
void createSets( vector& set1, vector& set2 )
{
srand ( time(NULL) );
set1.reserve(100000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Try to get half of our values intersecting
float ratio = 200000.0f / RAND_MAX;
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() * ratio + 1;
int value = 1000000000 + random;
set2.push_back(value);
}
// Make sure set1 is in random order (not sorted)
random_shuffle(set1.begin(),set1.end());
}
int _tmain(int argc, _TCHAR* argv[])
{
int intersectionSize = 0;
vector set1, set2;
createSets( set1, set2 );
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runSetIntersection(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest2(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}