我在 C# 中经常使用 HashSet 和 Dictionary,发现它们非常快......
我试过使用 std::map 和 std::hash_map 并且发现它们比较慢。这听起来像预期的行为吗?我在使用 std::hash_map 时可能做错了什么?
或者,那里有更好的 C++ Hash 容器吗?
我正在散列 int32,通常大约 100,000 个。
更新:我用 C# 和 C++ 创建了一个 repro。它运行了两次试验,它们在 C# 中需要 19 毫秒和 13 毫秒,在 C++ 中大约需要 11,000 毫秒。我的 C++ 代码一定有什么问题 :)
(两者都作为发布版本运行,都是控制台应用程序)
C# 输出:
Found 511 values in the intersection, in 19 ms
Found 508 values in the intersection, in 13 ms
C++ 输出:
Found 308 values in the intersection, in 11764.7ms
Found 316 values in the intersection, in 11742.8ms
C++ 输出(使用 stdext::hash_map 而不是 std::map)
Found 300 values in the intersection, in 383.552ms
Found 306 values in the intersection, in 2277.02ms
C++ 输出(使用 stdext::hash_map,x64 版本)
Found 292 values in the intersection, in 1037.67ms
Found 302 values in the intersection, in 3663.71ms
笔记:
- Set2 并没有像我在 C++ 中想要的那样被填充,我希望它与 Set1 有 50% 的交集(就像在 C# 中一样),但是出于某种原因我不得不将我的随机数乘以 10 甚至让它们部分不相交
C#:
static void Main(string[] args)
{
int start = DateTime.Now.Millisecond;
int intersectionSize = runIntersectionTest();
int duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));
start = DateTime.Now.Millisecond;
intersectionSize = runIntersectionTest();
duration = DateTime.Now.Millisecond - start;
Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));
Console.ReadKey();
}
static int runIntersectionTest()
{
Random random = new Random(DateTime.Now.Millisecond);
Dictionary<int,int> theMap = new Dictionary<int,int>();
List<int> set1 = new List<int>();
List<int> set2 = new List<int>();
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.Add(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int value = 1000000000 + (random.Next() % 200000 + 1);
set2.Add(value);
}
// Now intersect the two sets by populating the map
foreach( int value in set1 )
{
theMap[value] = 1;
}
int intersectionSize = 0;
foreach ( int value in set2 )
{
int count;
if ( theMap.TryGetValue(value, out count ) )
{
intersectionSize++;
theMap[value] = 2;
}
}
return intersectionSize;
}
C++:
int runIntersectionTest()
{
std::map<int,int> theMap;
vector<int> set1;
vector<int> set2;
// Create 100,000 values for set1
for ( int i = 0; i < 100000; i++ )
{
int value = 1000000000 + i;
set1.push_back(value);
}
// Create 1,000 values for set2
for ( int i = 0; i < 1000; i++ )
{
int random = rand() % 200000 + 1;
random *= 10;
int value = 1000000000 + random;
set2.push_back(value);
}
// Now intersect the two sets by populating the map
for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
{
int value = *iterator;
theMap[value] = 1;
}
int intersectionSize = 0;
for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
{
int value = *iterator;
map<int,int>::iterator foundValue = theMap.find(value);
if ( foundValue != theMap.end() )
{
theMap[value] = 2;
intersectionSize++;
}
}
return intersectionSize;
}
int _tmain(int argc, _TCHAR* argv[])
{
srand ( time(NULL) );
Timer timer;
int intersectionSize = runIntersectionTest();
timer.Stop();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;
timer.Reset();
intersectionSize = runIntersectionTest();
timer.Stop();
cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;
getchar();
return 0;
}