对于非常小的集合,差异将可以忽略不计。在您的范围的低端(500k 项),如果您进行大量查找,您将开始看到差异。二进制搜索将是 O(log n),而哈希查找将是 O(1),摊销。这与真正的常数不同,但您仍然必须拥有一个非常糟糕的哈希函数才能获得比二分搜索更差的性能。
(当我说“可怕的哈希”时,我的意思是:
hashCode()
{
return 0;
}
是的,它本身非常快,但会导致您的哈希映射成为链表。)
ialashkevich使用数组和字典编写了一些 C# 代码来比较这两种方法,但它使用 Long 值作为键。我想测试在查找过程中实际执行哈希函数的东西,所以我修改了那个代码。我将其更改为使用字符串值,并将填充和查找部分重构为它们自己的方法,以便在分析器中更容易看到。我还保留了使用 Long 值的代码,作为比较点。最后,我摆脱了自定义的二进制搜索功能,并使用了Array
课堂上的那个。
这是代码:
class Program
{
private const long capacity = 10_000_000;
private static void Main(string[] args)
{
testLongValues();
Console.WriteLine();
testStringValues();
Console.ReadLine();
}
private static void testStringValues()
{
Dictionary<String, String> dict = new Dictionary<String, String>();
String[] arr = new String[capacity];
Stopwatch stopwatch = new Stopwatch();
Console.WriteLine("" + capacity + " String values...");
stopwatch.Start();
populateStringArray(arr);
stopwatch.Stop();
Console.WriteLine("Populate String Array: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
populateStringDictionary(dict, arr);
stopwatch.Stop();
Console.WriteLine("Populate String Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
Array.Sort(arr);
stopwatch.Stop();
Console.WriteLine("Sort String Array: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchStringDictionary(dict, arr);
stopwatch.Stop();
Console.WriteLine("Search String Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchStringArray(arr);
stopwatch.Stop();
Console.WriteLine("Search String Array: " + stopwatch.ElapsedMilliseconds);
}
/* Populate an array with random values. */
private static void populateStringArray(String[] arr)
{
for (long i = 0; i < capacity; i++)
{
arr[i] = generateRandomString(20) + i; // concatenate i to guarantee uniqueness
}
}
/* Populate a dictionary with values from an array. */
private static void populateStringDictionary(Dictionary<String, String> dict, String[] arr)
{
for (long i = 0; i < capacity; i++)
{
dict.Add(arr[i], arr[i]);
}
}
/* Search a Dictionary for each value in an array. */
private static void searchStringDictionary(Dictionary<String, String> dict, String[] arr)
{
for (long i = 0; i < capacity; i++)
{
String value = dict[arr[i]];
}
}
/* Do a binary search for each value in an array. */
private static void searchStringArray(String[] arr)
{
for (long i = 0; i < capacity; i++)
{
int index = Array.BinarySearch(arr, arr[i]);
}
}
private static void testLongValues()
{
Dictionary<long, long> dict = new Dictionary<long, long>(Int16.MaxValue);
long[] arr = new long[capacity];
Stopwatch stopwatch = new Stopwatch();
Console.WriteLine("" + capacity + " Long values...");
stopwatch.Start();
populateLongDictionary(dict);
stopwatch.Stop();
Console.WriteLine("Populate Long Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
populateLongArray(arr);
stopwatch.Stop();
Console.WriteLine("Populate Long Array: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchLongDictionary(dict);
stopwatch.Stop();
Console.WriteLine("Search Long Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchLongArray(arr);
stopwatch.Stop();
Console.WriteLine("Search Long Array: " + stopwatch.ElapsedMilliseconds);
}
/* Populate an array with long values. */
private static void populateLongArray(long[] arr)
{
for (long i = 0; i < capacity; i++)
{
arr[i] = i;
}
}
/* Populate a dictionary with long key/value pairs. */
private static void populateLongDictionary(Dictionary<long, long> dict)
{
for (long i = 0; i < capacity; i++)
{
dict.Add(i, i);
}
}
/* Search a Dictionary for each value in a range. */
private static void searchLongDictionary(Dictionary<long, long> dict)
{
for (long i = 0; i < capacity; i++)
{
long value = dict[i];
}
}
/* Do a binary search for each value in an array. */
private static void searchLongArray(long[] arr)
{
for (long i = 0; i < capacity; i++)
{
int index = Array.BinarySearch(arr, arr[i]);
}
}
/**
* Generate a random string of a given length.
* Implementation from https://stackoverflow.com/a/1344258/1288
*/
private static String generateRandomString(int length)
{
var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
var stringChars = new char[length];
var random = new Random();
for (int i = 0; i < stringChars.Length; i++)
{
stringChars[i] = chars[random.Next(chars.Length)];
}
return new String(stringChars);
}
}
以下是几种不同大小的集合的结果。(时间以毫秒为单位。)
500000 长值...
填充长字典:26
填充长数组:2
搜索长字典:9
搜索长数组:80
500000 个字符串值...
填充字符串数组:1237
填充字符串字典:46
排序字符串数组:1755
搜索字符串字典:27
搜索字符串数组:1569
1000000 长值...
填充长字典:58
填充长数组:5
搜索长字典:23
搜索长数组:136
1000000 个字符串值...
填充字符串数组:2070
填充字符串字典:121
排序字符串数组:3579
搜索字符串字典:58
搜索字符串数组:3267
3000000 长值...
填充长字典:207
填充长数组:14
搜索长字典:75
搜索长数组:435
3000000 字符串值...
填充字符串数组:5553
填充字符串字典:449
排序字符串数组:11695
搜索字符串字典:194
搜索字符串数组:10594
10000000 长值...
填充长字典:521
填充长数组:47
搜索长字典:202
搜索长数组:1181
10000000 字符串值...
填充字符串数组:18119
填充字符串字典:1088
排序字符串数组:28174
搜索字符串字典:747
搜索字符串数组:26503
为了比较,这是程序最后一次运行的分析器输出(1000 万条记录和查找)。我强调了相关的功能。他们非常同意上面的秒表计时指标。
您可以看到字典查找比二分查找快得多,并且(正如预期的那样)集合越大,差异越明显。所以,如果你有一个合理的散列函数(相当快,几乎没有冲突),散列查找应该胜过二进制搜索在这个范围内的集合。