23

我们的应用程序使用了大量的字典,这些字典具有不经常更改的多级查找。我们正在研究将一些使用字典进行大量查找的关键代码转换为使用替代结构来实现 - 更快的查找,内存/gc 的轻量级。这使我们比较了各种可用的字典/库 -

Dictionary( System.Collections.Generics.Dictionary-SCGD), ImmutableDictionary, C5.HashDictionary, FSharpMap.

使用各种项目计数运行以下程序 - 100、1000、10000、100000 - 表明 Dictionary 在大多数范围内仍然是赢家。第一行表示集合中的项目。MS/Ticks 将随机执行 x 次查找所花费的时间(代码如下)。

项目 - 100
SCGD - 0 MS - 50 Ticks
C5 - 1 MS - 1767 Ticks
Imm - 4 MS - 5951 Ticks
FS - 0 MS - 240 Ticks

项目 - 1000
SCGD - 0 MS - 230 Ticks
C5 - 0 MS - 496 Ticks
Imm - 0 MS - 1046 Ticks
FS - 1 MS - 1870 Ticks

项目 - 10000
SCGD - 3 MS - 4722 Ticks
C5 - 4 MS - 6370 Ticks
Imm - 9 MS - 13119 Ticks
FS - 15 MS - 22050 Ticks

项目 - 100000
SCGD - 62 MS - 89276 Ticks
C5 - 72 MS - 103658 Ticks
Imm - 172 MS - 246247 Ticks
FS - 249 MS - 356176 Ticks

这是否意味着,我们已经在使用最快且无需更改?我曾假设不可变结构应该位于表的顶部,但事实并非如此。我们是在做错误的比较还是我错过了什么?一直在坚持这个问题,但觉得最好问一下。任何链接或注释或任何引用都会很好。

完整的测试代码 -

namespace CollectionsTest
{
    using System;
    using System.Collections.Generic;
    using System.Collections.Immutable;
    using System.Diagnostics;
    using System.Linq;
    using System.Text;
    using System.Runtime;
    using Microsoft.FSharp.Collections;

    /// <summary>
    /// 
    /// </summary>
    class Program
    {
        static Program()
        {
            ProfileOptimization.SetProfileRoot(@".\Jit");
            ProfileOptimization.StartProfile("Startup.Profile");
        }

        /// <summary>
        /// Mains the specified args.
        /// </summary>
        /// <param name="args">The args.</param>
        static void Main(string[] args)
        {
            // INIT TEST DATA ------------------------------------------------------------------------------------------------

            foreach (int MAXITEMS in new int[] { 100, 1000, 10000, 100000 })
            {
                Console.WriteLine("\n# - {0}", MAXITEMS);

                List<string> accessIndex = new List<string>(MAXITEMS);
                List<KeyValuePair<string, object>> listofkvps = new List<KeyValuePair<string, object>>();
                List<Tuple<string, object>> listoftuples = new List<Tuple<string, object>>();
                for (int i = 0; i < MAXITEMS; i++)
                {
                    listoftuples.Add(new Tuple<string, object>(i.ToString(), i));
                    listofkvps.Add(new KeyValuePair<string, object>(i.ToString(), i));
                    accessIndex.Add(i.ToString());
                }

                // Randomize for lookups
                Random r = new Random(Environment.TickCount);
                List<string> randomIndexesList = new List<string>(MAXITEMS);
                while (accessIndex.Count > 0)
                {
                    int index = r.Next(accessIndex.Count);
                    string value = accessIndex[index];
                    accessIndex.RemoveAt(index);

                    randomIndexesList.Add(value);
                }

                // Convert to array for best perf
                string[] randomIndexes = randomIndexesList.ToArray();

                // LOAD ------------------------------------------------------------------------------------------------

                // IMMU
                ImmutableDictionary<string, object> idictionary = ImmutableDictionary.Create<string, object>(listofkvps);
                //Console.WriteLine(idictionary.Count);

                // SCGD
                Dictionary<string, object> dictionary = new Dictionary<string, object>();
                for (int i = 0; i < MAXITEMS; i++)
                {
                    dictionary.Add(i.ToString(), i);
                }
                //Console.WriteLine(dictionary.Count);

                // C5
                C5.HashDictionary<string, object> c5dictionary = new C5.HashDictionary<string, object>();
                for (int i = 0; i < MAXITEMS; i++)
                {
                    c5dictionary.Add(i.ToString(), i);
                }
                //Console.WriteLine(c5dictionary.Count);
                // how to change to readonly?

                // F#
                FSharpMap<string, object> fdictionary = new FSharpMap<string, object>(listoftuples);
                //Console.WriteLine(fdictionary.Count);

                // TEST ------------------------------------------------------------------------------------------------
                Stopwatch sw = Stopwatch.StartNew();
                for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
                {
                    string i = randomIndexes[index];
                    object value;
                    dictionary.TryGetValue(i, out value);
                }
                sw.Stop();
                Console.WriteLine("SCGD - {0,10} MS - {1,10} Ticks", sw.ElapsedMilliseconds, sw.ElapsedTicks);

                Stopwatch c5sw = Stopwatch.StartNew();
                for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
                {
                    string key = randomIndexes[index];
                    object value;
                    c5dictionary.Find(ref key, out value);
                }
                c5sw.Stop();
                Console.WriteLine("C5   - {0,10} MS - {1,10} Ticks", c5sw.ElapsedMilliseconds, c5sw.ElapsedTicks);

                Stopwatch isw = Stopwatch.StartNew();
                for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
                {
                    string i = randomIndexes[index];
                    object value;
                    idictionary.TryGetValue(i, out value);
                }
                isw.Stop();
                Console.WriteLine("Imm  - {0,10} MS - {1,10} Ticks", isw.ElapsedMilliseconds, isw.ElapsedTicks);


                Stopwatch fsw = Stopwatch.StartNew();
                for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
                {
                    string i = randomIndexes[index];
                    fdictionary.TryFind(i);
                }
                fsw.Stop();
                Console.WriteLine("FS   - {0,10} MS - {1,10} Ticks", fsw.ElapsedMilliseconds, fsw.ElapsedTicks);
            }
        }
    }
}
4

5 回答 5

17

您认为不可变字典允许更快查找的假设是错误的,因为几乎所有不可变集合设法避免在“修改”时复制整个结构的方式是将数据存储在树中,并且仅在“修改”时复制一些节点,共享所有其他节点。并且树访问通常比通过索引访问平面数组要慢,就像可变表亲所做的那样。

我根据您的代码比较了、和的单线程读取性能。Dictionary<,>ConcurrentDictionary<,>ImmutableDictionary<,>

热身后 30 次运行的平均结果:

各种字典实现的读取性能

为了了解写入性能,我还运行了一个测试,在字典中增加了 50 个条目。再次,30 次运行的平均结果,预热后

各种字典实现的写入性能

经测试

  • .net 4.5.1
  • 微软 Bcl 不可变 1.0.34.0

NB 应该注意的是,在许多现实生活中的多线程应用程序中,不可变字典的速度要快得多和/或允许更高级别的并发,否则这些应用程序将不得不求助于缓慢或容易出错的技术,如防御性复制、锁定等, 为了应对线程的可变性。如果您需要快照能力,例如乐观并发,MVCC,则尤其如此。

顺便说一句,如果您按原样运行示例代码,则至少不可变字典的值在正常(长时间运行)应用程序中将是非常不典型的,因为由于某种原因,不可变字典显然需要时间来预热。性能上的差异是巨大的。看看前 3 次运行的输出:

Items    Dict   Conc   Immu
===========================
   100   1.90   1.00 361.81
  1000   1.07   1.00   4.33
 10000   1.24   1.00   1.74
100000   1.00   1.33   2.71
---------------------------
   100   1.06   1.00   2.56
  1000   1.03   1.00   4.34
 10000   1.00   1.06   3.54
100000   1.00   1.17   2.76
---------------------------
   100   1.06   1.00   2.50
  1000   1.66   1.00   4.16
 10000   1.00   1.02   3.67
100000   1.00   1.26   3.13

您的问题是关于(冻结字典的)读取性能,但不可变集合的树特征在写入性能中表现类似:

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
                       Mutable (amortized)  Mutable (worst)  Immutable 
───────────────────────────────────────────────────────────────────────
 Stack.Push            O(1)                 O(n)             O(1)      
 Queue.Enqueue         O(1)                 O(n)             O(1)      
 List.Add              O(1)                 O(n)             O(log n)  
 HashSet.Add           O(1)                 O(n)             O(log n)  
 SortedSet.Add         O(log n)             O(n)             O(log n)  
 Dictionary.Add        O(1)                 O(n)             O(log n)  
 SortedDictionary.Add  O(log n)             O(n log n)       O(log n)  
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
于 2015-04-02T01:04:31.150 回答
16

标准字典已经很好地优化了。当您进行查找时,它真正做的就是计算所提供密钥的哈希(其速度取决于密钥的类型及其实现方式GetHashCode),对哈希值进行模运算以找到正确的存储桶,然后它遍历桶的内容,直到找到正确的值(其速度取决于GetHashCode函数的质量,因此如果桶平衡良好且不包含太多项目,以及Equals方法的速度)类型)。

总而言之,它对查找的作用不大,所以我认为您无法找到速度明显更快的通用数据结构。但是,根据您计划使用字典的方式,您可以构建更专业的解决方案。例如,我需要一个非常快速的查找,其中键是一个类型。我没有使用字典dictionary[typeof(T)],而是创建了一个像这样的泛型类:

class ValueStore<T> 
{
  public static T Value;
}

所以我可以ValueStore<T>.Value用几乎为零的查找开销来做。

您是否可以做类似的事情(以及是否值得)真的取决于您的用例;结构将容纳多少项,读取和写入的频率,是否需要线程安全,写入速度有多重要等。例如,如果写入速度根本不重要,但是否需要线程安全,您将需要执行写时复制,其中数据结构永远不会被写入,而是被复制,以牺牲写入速度为代价确保线程安全和无锁(因此:快速)读取。专门化它可以在写入时重新排序结构以优化它,使哈希桶不包含超过 N 个项目。

PS:如果您真的非常渴望速度但无法构建更专业的数据结构,那么您可能会从复制Dictionary<TKey,TValue>和删除各种健全性检查(空检查等)和虚拟/接口方法调用中获得少量收益。但是,如果那样的话,我怀疑这会给你带来超过 20% 的收益。

于 2013-05-19T03:23:29.240 回答
7

F# map 结构被实现为二叉树,因此实际上不是字典。如此处所述,标准的 .net 字典是您将获得的最快的。

于 2013-05-18T19:42:12.887 回答
0

这里还有一些基准。

.NET 4.5 版本 1.3.1System.Collections.Immutable包,64 位。

DictionaryImmutableDictionary

BuilderBenchmark elapsed time
 Mutable   : Avg= 15.213, Stdev  4.591 [ms] (   10.2x)
 Immutable : Avg=155.883, Stdev 15.145 [ms]
BuilderBenchmark per op
 Mutable   : Avg=  0.152, Stdev  0.046 [us] (   10.2x)
 Immutable : Avg=  1.559, Stdev  0.151 [us]
SetItemBenchmark elapsed time
 Mutable   : Avg= 13.100, Stdev  2.975 [ms] (   30.4x)
 Immutable : Avg=397.932, Stdev 18.551 [ms]
SetItemBenchmark per op
 Mutable   : Avg=  0.131, Stdev  0.030 [us] (   30.4x)
 Immutable : Avg=  3.979, Stdev  0.186 [us]
LookupBenchmark elapsed time
 Mutable   : Avg=  9.439, Stdev  0.942 [ms] (    3.6x)
 Immutable : Avg= 34.250, Stdev  3.457 [ms]
LookupBenchmark per op
 Mutable   : Avg=  0.094, Stdev  0.009 [us] (    3.6x)
 Immutable : Avg=  0.343, Stdev  0.035 [us]

DictionaryImmutableSortedDictionary

BuilderBenchmark elapsed time
 Mutable   : Avg= 13.654, Stdev  5.124 [ms] (   34.5x)
 Immutable : Avg=471.574, Stdev 20.719 [ms]
BuilderBenchmark per op
 Mutable   : Avg=  0.137, Stdev  0.051 [us] (   34.5x)
 Immutable : Avg=  4.716, Stdev  0.207 [us]
SetItemBenchmark elapsed time
 Mutable   : Avg= 11.838, Stdev  0.530 [ms] (   37.6x)
 Immutable : Avg=444.964, Stdev 11.125 [ms]
SetItemBenchmark per op
 Mutable   : Avg=  0.118, Stdev  0.005 [us] (   37.6x)
 Immutable : Avg=  4.450, Stdev  0.111 [us]
LookupBenchmark elapsed time
 Mutable   : Avg=  9.354, Stdev  0.542 [ms] (    4.4x)
 Immutable : Avg= 40.988, Stdev  3.242 [ms]
LookupBenchmark per op
 Mutable   : Avg=  0.094, Stdev  0.005 [us] (    4.4x)
 Immutable : Avg=  0.410, Stdev  0.032 [us]

我想知道不可变集合会慢多少。请注意,整个运行是针对 100,000 次插入操作。我很高兴地报告查找性能下降仅为 4 倍,而插入性能下降为 10 倍,仍然相当不错。ImmutableDictionary显然优于ImmutableSortedDictionary除非您绝对需要排序的数据结构。

关于写时复制行为的旁注。这些持久性数据结构比任何简单的写入时复制都要快几个数量级。这是我正在使用的。CAS使用比较和交换指令提交更改(使用数据竞争检测)也很容易。

附件

程序.cs:

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;

using ImmutableDictionary = System.Collections.Immutable.ImmutableDictionary; // select implementation to benchmark here

namespace DictPerf
{
    class Program
    {
        static string alphaNum = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

        static string NextString(Random r, char[] buf)
        {
            int i = 0, len = r.Next(buf.Length) + 1;
            for (; i < len; i++)
            {
                buf[i] = alphaNum[r.Next(alphaNum.Length)];
            }
            return new string(buf, 0, len);
        }

        static HashSet<string> strings = new HashSet<string>();

        private static void Seed()
        {
            var r = new Random();
            var buf = new char[64];
            for (int i = 0; i < 100000; i++)
            {
                strings.Add(NextString(r, buf));
            }
        }

        static void Main(string[] args)
        {
            Seed();

            Benchmark(RunDictionaryBuilderBenchmark, RunImmutableDictionaryBuilderBenchmark, "BuilderBenchmark");
            Benchmark(RunDictionarySetItemBenchmark, RunImmutableDictionarySetItemBenchmark, "SetItemBenchmark");
            Benchmark(RunDictionaryLookupBenchmark, RunImmutableDictionaryLookupBenchmark, "LookupBenchmark");
        }

        private static string Stats(IEnumerable<double> source)
        {
            var avg = source.Average();
            var variance = source.Select(val => (val - avg) * (val - avg)).Sum();
            var stdev = Math.Sqrt(variance / (source.Count() - 1));
            return $"Avg={avg,7:0.000}, Stdev{stdev,7:0.000}";
        }

        private static void Benchmark(Action<ICollection<string>, Stopwatch> benchmark1, Action<ICollection<string>, Stopwatch> benchmark2, string benchmarkName)
        {
            var xs = new List<double>();
            var ys = new List<double>();

            var sw = Stopwatch.StartNew();
            for (int i = 0; i < 10; i++)
            {
                sw.Restart();
                benchmark1(strings, sw);
                xs.Add(sw.Elapsed.TotalMilliseconds);
                sw.Restart();
                benchmark2(strings, sw);
                ys.Add(sw.Elapsed.TotalMilliseconds);
            }

            var x = xs.Average();
            var y = ys.Average();
            var a = xs.Select(v => v / 100).Average();
            var b = ys.Select(v => v / 100).Average();

            Console.WriteLine($"{benchmarkName} elapsed time");
            Console.WriteLine($" Mutable   : {Stats(xs)} [ms] ({y / x,7:0.0}x)");
            Console.WriteLine($" Immutable : {Stats(ys)} [ms]");
            Console.WriteLine($"{benchmarkName} per op");
            Console.WriteLine($" Mutable   : {Stats(xs.Select(v => v / 100))} [us] ({b / a,7:0.0}x)");
            Console.WriteLine($" Immutable : {Stats(ys.Select(v => v / 100))} [us]");
        }

        private static void RunDictionaryBuilderBenchmark(ICollection<string> strings, Stopwatch sw)
        {
            var d = new Dictionary<string, object>();
            foreach (var s in strings)
            {
                d[s] = null;
            }
        }

        private static void RunImmutableDictionaryBuilderBenchmark(ICollection<string> strings, Stopwatch sw)
        {
            var d = ImmutableDictionary.CreateBuilder<string, object>();
            foreach (var s in strings)
            {
                d[s] = null;
            }
            d.ToImmutableDictionary();
        }

        private static void RunDictionarySetItemBenchmark(ICollection<string> strings, Stopwatch sw)
        {
            var d = new Dictionary<string, object>();
            foreach (var s in strings)
            {
                d[s] = null;
            }
        }

        private static void RunImmutableDictionarySetItemBenchmark(ICollection<string> strings, Stopwatch sw)
        {
            var d = ImmutableDictionary.Create<string, object>();
            foreach (var s in strings)
            {
                d = d.SetItem(s, null);
            }
        }

        private static void RunDictionaryLookupBenchmark(ICollection<string> strings, Stopwatch timer)
        {
            timer.Stop();

            var d = new Dictionary<string, object>();
            foreach (var s in strings)
            {
                d[s] = null;
            }

            timer.Start();

            foreach (var s in strings)
            {
                object v;
                d.TryGetValue(s, out v);
            }
        }

        private static void RunImmutableDictionaryLookupBenchmark(ICollection<string> strings, Stopwatch timer)
        {
            timer.Stop();

            var d = ImmutableDictionary.CreateBuilder<string, object>();
            foreach (var s in strings)
            {
                d[s] = null;
            }
            var x = d.ToImmutableDictionary();

            timer.Start();

            foreach (var s in strings)
            {
                object v;
                x.TryGetValue(s, out v);
            }
        }
    }
}
于 2017-03-12T16:22:50.677 回答
0

使用 .NET 3.1,它们看起来非常相似。

|    Method | MAXITEMS |      Mean |     Error |    StdDev |
|---------- |--------- |----------:|----------:|----------:|
|       imm |       10 | 0.9921 ns | 0.0630 ns | 0.1837 ns |
|       scg |       10 | 0.9699 ns | 0.0571 ns | 0.1683 ns |
| scgsorted |       10 | 1.0136 ns | 0.0577 ns | 0.1684 ns |
|       imm |      100 | 1.5296 ns | 0.1153 ns | 0.3327 ns |
|       scg |      100 | 1.3151 ns | 0.0694 ns | 0.1990 ns |
| scgsorted |      100 | 1.4516 ns | 0.0855 ns | 0.2426 ns |
|       imm |     1000 | 0.8514 ns | 0.0905 ns | 0.2582 ns |
|       scg |     1000 | 1.0898 ns | 0.0552 ns | 0.1416 ns |
| scgsorted |     1000 | 1.0302 ns | 0.0533 ns | 0.1001 ns |
|       imm |    10000 | 1.0280 ns | 0.0530 ns | 0.1175 ns |
|       scg |    10000 | 0.9929 ns | 0.0523 ns | 0.1253 ns |
| scgsorted |    10000 | 1.0531 ns | 0.0534 ns | 0.1248 ns |

测试源代码:

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

namespace CollectionsTest
{
    using System;
    using System.Collections.Generic;
    using System.Collections.Immutable;
    using System.Runtime;

    /// <summary>
    ///
    /// </summary>
    public class Program
    {
        private static ImmutableDictionary<string, object> idictionary;
        private static Dictionary<string, object> dictionary;
        private static SortedDictionary<string, object> sorteddictionary;
        private static string[] randomIndexes;

        [Params(10, 100, 1000, 10000)] public  int MAXITEMS { get; set; }

        public Program()
        {
            Console.WriteLine("\n# - {0}", MAXITEMS);

            List<string> accessIndex = new List<string>(MAXITEMS);
            List<KeyValuePair<string, object>> listofkvps = new List<KeyValuePair<string, object>>();
            List<Tuple<string, object>> listoftuples = new List<Tuple<string, object>>();
            for (int i = 0; i < MAXITEMS; i++)
            {
                listoftuples.Add(new Tuple<string, object>(i.ToString(), i));
                listofkvps.Add(new KeyValuePair<string, object>(i.ToString(), i));
                accessIndex.Add(i.ToString());
            }

            // Randomize for lookups
            Random r = new Random(Environment.TickCount);
            List<string> randomIndexesList = new List<string>(MAXITEMS);
            while (accessIndex.Count > 0)
            {
                int index = r.Next(accessIndex.Count);
                string value = accessIndex[index];
                accessIndex.RemoveAt(index);

                randomIndexesList.Add(value);
            }

            // Convert to array for best perf
            randomIndexes = randomIndexesList.ToArray();

            // LOAD ------------------------------------------------------------------------------------------------

            // IMMU
            idictionary = listofkvps.ToImmutableDictionary();
            //Console.WriteLine(idictionary.Count);

            // SCGD
            dictionary = new Dictionary<string, object>();
            for (int i = 0; i < MAXITEMS; i++)
            {
                dictionary.Add(i.ToString(), i);
            }

            sorteddictionary = new SortedDictionary<string, object>();
            for (int i = 0; i < MAXITEMS; i++)
            {
                sorteddictionary.Add(i.ToString(), i);
            }
            //Console.WriteLine(dictionary.Count);


            //scg(randomIndexes, dictionary);


            //imm(randomIndexes, idictionary);
        }

        /// <summary>
        /// Mains the specified args.
        /// </summary>
        /// <param name="args">The args.</param>
        public static void Main(string[] args)
        {
            // INIT TEST DATA ------------------------------------------------------------------------------------------------



            Console.WriteLine(BenchmarkRunner.Run<Program>());
        }

        [Benchmark]
        public   void imm()
        {
            for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
            {
                string i = randomIndexes[index];
                object value;
                idictionary.TryGetValue(i, out value);
            }
        }

        [Benchmark]
        public   void scg()
        {
            // TEST ------------------------------------------------------------------------------------------------
            for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
            {
                string i = randomIndexes[index];
                object value;
                dictionary.TryGetValue(i, out value);
            }
        }

        [Benchmark]
        public   void scgsorted()
        {
            // TEST ------------------------------------------------------------------------------------------------
            for (int index = 0, indexMax = randomIndexes.Length; index < indexMax; index++)
            {
                string i = randomIndexes[index];
                object value;
                sorteddictionary.TryGetValue(i, out value);
            }
        }
    }
}
于 2020-08-16T15:05:17.770 回答