散列:......我看到的一个缺点是每个向量的每个组件都至少被触摸一次。这似乎已经太多了。
在最坏的情况下,你怎么能比较两个向量而不至少看一次呢?不,真的,如果你有 1,1,1 和 2,2,2 比较/匹配立即结束。但是如果你有 1,2,3 和 1,2,3 呢?
无论如何,这是解决问题的一种方法。实施肯定可以改进。
#include <iostream>
#include <map>
#include <vector>
#include <list>
#include <cstdint>
#include <cstdlib>
#include <ctime>
using namespace std;
const int TotalVectorCount = 1000000;
const int UniqueVectorCount = 30000;
const int VectorLength = 30;
typedef vector<int> Vector;
typedef unsigned long long uint64;
void GenerateRandomVector(Vector& v)
{
v.reserve(VectorLength);
// generate 30 values from -100 to +100
for (int i = 0; i < VectorLength; i++)
v.push_back(rand() % 201 - 100);
}
bool IdenticalVectors(const Vector& v1, const Vector& v2)
{
for (int i = 0; i < VectorLength; i++)
if (v1[i] != v2[i])
return false;
return true;
}
// this lets us do "cout << Vector"
ostream& operator<<(ostream& os, const Vector& v)
{
for (int i = 0; i < VectorLength; i++)
os << v[i] << ' ';
return os;
}
uint64 HashVector(const Vector& v)
{
// this is probably a bad hash function,
// but it seems to work nonetheless
uint64 h = 0x7FFFFFFFFFFFFFE7;
for (int i = 0; i < VectorLength; i++)
h = h * 0xFFFFFFFFFFFFFFC5 + v[i];
return h & 0xFFFFFFFFFFFFFFFF;
}
Vector UniqueTestVectors[UniqueVectorCount];
void GenerateUniqueTestVectors()
{
map<uint64,char> m;
for (int i = 0; i < UniqueVectorCount; i++)
{
for (;;)
{
GenerateRandomVector(UniqueTestVectors[i]);
uint64 h = HashVector(UniqueTestVectors[i]);
map<uint64,char>::iterator it = m.find(h);
if (it == m.end())
{
m[h] = 0;
break;
}
}
}
}
bool GetNextVector(Vector& v)
{
static int count = 0;
v = UniqueTestVectors[count % UniqueVectorCount];
return ++count <= TotalVectorCount;
}
int main()
{
srand(time(0));
cout << "Generating " << UniqueVectorCount << " unique random vectors..."
<< endl;
GenerateUniqueTestVectors();
#if 0
for (int i = 0; i < UniqueVectorCount; i++)
cout << UniqueTestVectors[i] << endl;
#endif
cout << "Generating " << TotalVectorCount << " random vectors with only "
<< UniqueVectorCount << " unique..." << endl;
map<uint64,list<Vector>> TheBigHashTable;
int uniqCnt = 0;
int totCnt = 0;
Vector v;
while (GetNextVector(v))
{
totCnt++;
uint64 h = HashVector(v);
map<uint64,list<Vector>>::iterator it = TheBigHashTable.find(h);
if (it == TheBigHashTable.end())
{
// seeing vector with this hash (h) for the first time,
// insert it into the hash table
list<Vector> l;
l.push_back(v);
TheBigHashTable[h] = l;
uniqCnt++;
}
else
{
// we've seen vectors with this hash (h) before,
// let's see if we've already hashed this vector
list<Vector>::iterator it;
bool exists = false;
for (it = TheBigHashTable[h].begin();
it != TheBigHashTable[h].end();
it++)
{
if (IdenticalVectors(*it, v))
{
// we've hashed this vector before
exists = true;
break;
}
}
if (!exists)
{
// we haven't hashed this vector before,
// let's do it now
TheBigHashTable[h].push_back(v);
uniqCnt++;
}
}
}
#if 0
cout << "Unique vectors found:" << endl;
map<uint64,list<Vector>>::iterator it;
for (it = TheBigHashTable.begin();
it != TheBigHashTable.end();
it++)
{
list<Vector>::iterator it2;
for (it2 = it->second.begin();
it2 != it->second.end();
it2++)
cout << *it2 << endl;
}
#endif
cout << "Hashed " << uniqCnt << " unique vectors out of " << totCnt << " total" << endl;
return 0;
}
使用 12848 kB RAM 在 1.12 秒内输出 ( ideone ):
Generating 30000 unique random vectors...
Generating 1000000 random vectors with only 30000 unique...
Hashed 30000 unique vectors out of 1000000 total
现在,与更少和更短的唯一向量相同,因此可以在控制台中打印它们:
使用 3040 kB 的 RAM 在 0.14 秒内输出(ideone ):
Generating 10 unique random vectors...
-45 75 1 -71 -83 97 10 -18 89 -10
-11 60 18 -54 -90 77 19 -90 -7 -31
-15 -65 -47 88 25 -56 4 39 -20 39
-64 -14 -37 -13 15 -70 -66 -75 12 73
-35 -99 32 83 98 -8 59 16 2 -98
86 37 -63 -62 24 62 -68 78 -50 -38
17 -64 48 80 -26 -87 61 8 -62 -28
-70 -47 -27 62 86 -29 -97 44 37 -45
-4 -28 92 -17 -40 -35 -56 -58 -57 -55
5 10 -19 -48 -61 5 -35 100 -88 -47
Generating 1000000 random vectors with only 10 unique...
Unique vectors found:
86 37 -63 -62 24 62 -68 78 -50 -38
17 -64 48 80 -26 -87 61 8 -62 -28
5 10 -19 -48 -61 5 -35 100 -88 -47
-4 -28 92 -17 -40 -35 -56 -58 -57 -55
-11 60 18 -54 -90 77 19 -90 -7 -31
-15 -65 -47 88 25 -56 4 39 -20 39
-35 -99 32 83 98 -8 59 16 2 -98
-45 75 1 -71 -83 97 10 -18 89 -10
-64 -14 -37 -13 15 -70 -66 -75 12 73
-70 -47 -27 62 86 -29 -97 44 37 -45
Hashed 10 unique vectors out of 1000000 total