在我的机器(四核,8gb 内存)上,运行 Vista x64 Business 和 Visual Studio 2008 SP1,我试图非常快速地交叉两组数字。
我在 C++ 中实现了两种方法,在 C# 中实现了一种。到目前为止,C# 方法更快,我想改进 C++ 方法,使其比 C# 更快,我希望 C++ 可以做到这一点。
这是 C# 输出:(发布版本)
Found the intersection 1000 times, in 4741.407 ms
这是初始 C++ 输出,用于两种不同的方法(发布 x64 构建):
Found the intersection (using unordered_map) 1000 times, in 21580.7ms
Found the intersection (using set_intersection) 1000 times, in 22366.6ms
这是最新的 C++ 输出,用于三种方法(发布 x64 构建):
最新基准:
Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms
Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms
Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms
因此,set_intersection 方法现在比 C# 慢约 2 倍,但比最初的 C++ 方法快 2 倍。
最新的 C++ 代码:
Code:
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <unordered_set>
#include <boost\unordered\unordered_map.hpp>
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
using namespace tr1;
int runIntersectionTest2(const vector<int>& set1, const vector<int>& set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_set<int> theSet;
theSet.insert( set1.begin(), set1.end() );
int intersectionSize = 0;
vector<int>::const_iterator set2_end = set2.end();
for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
if ( theSet.find(*iterator) != theSet.end() )
{
intersectionSize++;
}
}
return intersectionSize;
}
int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_map<int,int> theMap;
vector<int>::const_iterator set1_end = set1.end();
// Now intersect the two sets by populating the map
for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
vector<int>::const_iterator set2_end = set2.end();
for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
int value = *iterator;
unordered_map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(const vector<int>& set1_unsorted, const vector<int>& set2_unsorted)
{
// Create two vectors
std::vector<int> set1(set1_unsorted.size());
std::vector<int> set2(set2_unsorted.size());
// Copy the unsorted data into them
std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());
// Sort the data
sort(set1.begin(),set1.end());
sort(set2.begin(),set2.end());
vector<int> intersection;
intersection.reserve(1000);
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));
return intersection.size();
}
void createSets( vector<int>& set1, vector<int>& set2 )
{
srand ( time(NULL) );
set1.reserve(100000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Try to get half of our values intersecting
float ratio = 200000.0f / RAND_MAX;
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() * ratio + 1;
int value = 1000000000 + random;
set2.push_back(value);
}
// Make sure set1 is in random order (not sorted)
random_shuffle(set1.begin(),set1.end());
}
int _tmain(int argc, _TCHAR* argv[])
{
int intersectionSize = 0;
vector<int> set1, set2;
createSets( set1, set2 );
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runSetIntersection(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest2(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
C#代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace DictionaryPerformance
{
class Program
{
static void Main(string[] args)
{
List<int> set1 = new List<int>(100000);
List<int> set2 = new List<int>(1000);
// Create 100,000 values for set1
for (int i = 0; i < 100000; i++)
{
int value = 1000000000 + i;
set1.Add(value);
}
Random random = new Random(DateTime.Now.Millisecond);
// Create 1,000 values for set2
for (int i = 0; i < 1000; i++)
{
int value = 1000000000 + (random.Next() % 200000 + 1);
set2.Add(value);
}
long start = System.Diagnostics.Stopwatch.GetTimestamp();
for (int i = 0; i < 1000; i++)
{
runIntersectionTest(set1,set2);
}
long duration = System.Diagnostics.Stopwatch.GetTimestamp() - start;
Console.WriteLine(String.Format("Found the intersection 1000 times, in {0} ms", ((float) duration * 1000.0f) / System.Diagnostics.Stopwatch.Frequency));
Console.ReadKey();
}
static int runIntersectionTest(List<int> set1, List<int> set2)
{
Dictionary<int,int> theMap = new Dictionary<int,int>(100000);
// Now intersect the two sets by populating the map
foreach( int value in set1 )
{
theMap[value] = 1;
}
int intersectionSize = 0;
foreach ( int value in set2 )
{
int count;
if ( theMap.TryGetValue(value, out count ) )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
}
}
C++ 代码:
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <boost\unordered\unordered_map.hpp>
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
int runIntersectionTest(vector<int> set1, vector<int> set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_map<int,int> theMap;
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
unordered_map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(set<int> set1, set<int> set2)
{
set<int> intersection;
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));
return intersection.size();
}
int _tmain(int argc, _TCHAR* argv[])
{
srand ( time(NULL) );
vector<int> set1;
vector<int> set2;
set1.reserve(10000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
set<int> set21;
set<int> set22;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set21.insert(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set22.insert(value);
}
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
runSetIntersection(set21,set22);
}
timer.Stop();
cout << "Found the intersection (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}
好的,这是最新的,有一些变化:
- C++ 集现在已正确设置,因此它们有 50% 的交集(如 C#)
- Set1 被打乱所以它没有排序, set2 已经没有排序
- set_intersection 实现现在使用向量,并首先对它们进行排序
C++(发布,x64)结果:
Found the intersection of 503 values (using unordered_map) 1000 times, in 35131.1ms
Found the intersection of 494 values (using set_intersection) 1000 times, in 10317ms
所以它比 C# 慢 2 倍。@Jalf:你得到了一些非常快的数字,我在这里做错了吗?
C++ 代码:
// MapPerformance.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <boost\unordered\unordered_map.hpp>
#include "timer.h"
using namespace std;
using namespace stdext;
using namespace boost;
int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
// hash_map<int,int> theMap;
// map<int,int> theMap;
unordered_map<int,int> theMap;
vector<int>::const_iterator set1_end = set1.end();
// Now intersect the two sets by populating the map
for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
vector<int>::const_iterator set2_end = set2.end();
for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
{
int value = *iterator;
unordered_map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int runSetIntersection(const vector<int> set1_unsorted, const vector<int> set2_unsorted)
{
// Create two vectors
std::vector<int> set1(set1_unsorted.size());
std::vector<int> set2(set2_unsorted.size());
// Copy the unsorted data into them
std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());
// Sort the data
sort(set1.begin(),set1.end());
sort(set2.begin(),set2.end());
vector<int> intersection;
intersection.reserve(1000);
set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));
return intersection.size();
}
void createSets( vector<int>& set1, vector<int>& set2 )
{
srand ( time(NULL) );
set1.reserve(100000);
set2.reserve(1000);
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Try to get half of our values intersecting
float ratio = 200000.0f / RAND_MAX;
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() * ratio + 1;
int value = 1000000000 + random;
set2.push_back(value);
}
// Make sure set1 is in random order (not sorted)
random_shuffle(set1.begin(),set1.end());
}
int _tmain(int argc, _TCHAR* argv[])
{
int intersectionSize = 0;
vector<int> set1, set2;
createSets( set1, set2 );
Timer timer;
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runIntersectionTest(set1, set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
for ( int i = 0; i < 1000; i++ )
{
intersectionSize = runSetIntersection(set1,set2);
}
timer.Stop();
cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}