77

我一直在寻找 . 和 . 中可用的方法之间的性能基准Contains测试。我只是出于好奇才想知道这一点,因为我总是对这些感到困惑。关于 SO 的许多问题描述了这些方法的定义,例如:ExistsAnyList<T>

  1. LINQ Ring:Any() 与 Contains() 的巨大集合
  2. Linq .Any VS .Exists - 有什么区别?
  3. LINQ 扩展方法 - Any() vs. Where() vs. Exists()

所以我决定自己做。我将其添加为答案。任何对结果的更多见解都是最受欢迎的。我还对数组进行了此基准测试以查看结果

4

3 回答 3

83

最快的方法是使用HashSet. 对于ContainsaHashSet是 O(1)。

我拿了你的代码并添加了一个基准,HashSet<int> 性能成本HashSet<int> set = new HashSet<int>(list);几乎为零。

代码

void Main()
{
    ContainsExistsAnyVeryShort();
    
    ContainsExistsAnyShort();

    ContainsExistsAny();
}

private static void ContainsExistsAny()
{
    Console.WriteLine("***************************************");
    Console.WriteLine("********* ContainsExistsAny ***********");
    Console.WriteLine("***************************************");

    List<int> list = new List<int>(6000000);
    Random random = new Random();
    for (int i = 0; i < 6000000; i++)
    {
        list.Add(random.Next(6000000));
    }
    int[] arr = list.ToArray();
    HashSet<int> set = new HashSet<int>(list);

    find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");

}

private static void ContainsExistsAnyShort()
{
    Console.WriteLine("***************************************");
    Console.WriteLine("***** ContainsExistsAnyShortRange *****");
    Console.WriteLine("***************************************");

    List<int> list = new List<int>(2000);
    Random random = new Random();
    for (int i = 0; i < 2000; i++)
    {
        list.Add(random.Next(6000000));
    }
    int[] arr = list.ToArray();
    HashSet<int> set = new HashSet<int>(list);

    find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");

}

private static void ContainsExistsAnyVeryShort()
{
    Console.WriteLine("*******************************************");
    Console.WriteLine("***** ContainsExistsAnyVeryShortRange *****");
    Console.WriteLine("*******************************************");

    List<int> list = new List<int>(10);
    Random random = new Random();
    for (int i = 0; i < 10; i++)
    {
        list.Add(random.Next(6000000));
    }
    int[] arr = list.ToArray();
    HashSet<int> set = new HashSet<int>(list);

    find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedTicks} ticks");

}

private static void find(List<int> list, int[] arr, HashSet<int> set, Func<string, Stopwatch, string> format)
{
    Random random = new Random();
    int[] find = new int[10000];
    for (int i = 0; i < 10000; i++)
    {
        find[i] = random.Next(6000000);
    }

    Stopwatch watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        list.Contains(find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("List/Contains", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        list.Exists(a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("List/Exists", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        list.Any(a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("List/Any", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        arr.Contains(find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/Contains", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        Array.Exists(arr, a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/Exists", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        Array.IndexOf(arr, find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/IndexOf", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        arr.Any(a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/Any", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        set.Contains(find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("HashSet/Contains", watch));
}

结果

*******************************************
***** ContainsExistsAnyVeryShortRange *****
*******************************************
List/Contains: 1067 ticks
List/Exists: 2884 ticks
List/Any: 10520 ticks
Array/Contains: 1880 ticks
Array/Exists: 5526 ticks
Array/IndexOf: 601 ticks
Array/Any: 13295 ticks
HashSet/Contains: 6629 ticks
***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 4ms
List/Exists: 28ms
List/Any: 138ms
Array/Contains: 6ms
Array/Exists: 34ms
Array/IndexOf: 3ms
Array/Any: 96ms
HashSet/Contains: 0ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 11504ms
List/Exists: 57084ms
List/Any: 257659ms
Array/Contains: 11643ms
Array/Exists: 52477ms
Array/IndexOf: 11741ms
Array/Any: 194184ms
HashSet/Contains: 3ms

编辑 (2021-08-25)

我为非常短的集合(10 项)添加了比较,还添加了Array.Containsand Array.IndexOf。您可以看到,Array.IndexOf对于如此小的范围,这是最快的。也就是说,因为正如@lucky-brian 所说,这里的 n 太小了,所以for-loop 的性能要好于稍微复杂的搜索算法。但是我仍然建议尽可能使用HashSet<T>,因为它更好地反映了仅具有唯一值的意图,并且小集合的差异低于 1ms。

于 2016-01-21T10:08:04.490 回答
78

根据文档:

List.Exists(对象方法)

确定 List(T) 是否包含与指定谓词定义的条件匹配的元素。

IEnumerable.Any(扩展方法)

确定序列的任何元素是否满足条件。

List.Contains(对象方法)

确定元素是否在列表中。

基准测试:

代码:

    static void Main(string[] args)
    {
        ContainsExistsAnyShort();

        ContainsExistsAny();
    }
    
    private static void ContainsExistsAny()
    {
        Console.WriteLine("***************************************");
        Console.WriteLine("********* ContainsExistsAny ***********");
        Console.WriteLine("***************************************");

        List<int> list = new List<int>(6000000);
        Random random = new Random();
        for (int i = 0; i < 6000000; i++)
        {
            list.Add(random.Next(6000000));
        }
        int[] arr = list.ToArray();

        find(list, arr);
    }

    private static void ContainsExistsAnyShort()
    {
        Console.WriteLine("***************************************");
        Console.WriteLine("***** ContainsExistsAnyShortRange *****");
        Console.WriteLine("***************************************");

        List<int> list = new List<int>(2000);
        Random random = new Random();
        for (int i = 0; i < 2000; i++)
        {
            list.Add(random.Next(6000000));
        }
        int[] arr = list.ToArray();

        find(list, arr);
    }

    private static void find(List<int> list, int[] arr)
    {
        Random random = new Random();
        int[] find = new int[10000];
        for (int i = 0; i < 10000; i++)
        {
            find[i] = random.Next(6000000);
        }

        Stopwatch watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            list.Contains(find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("List/Contains: {0:N0}ms", watch.ElapsedMilliseconds);

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            list.Exists(a => a == find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("List/Exists: {0:N0}ms", watch.ElapsedMilliseconds);

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            list.Any(a => a == find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("List/Any: {0:N0}ms", watch.ElapsedMilliseconds);

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            arr.Contains(find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("Array/Contains: {0:N0}ms", watch.ElapsedMilliseconds);

        Console.WriteLine("Arrays do not have Exists");

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            arr.Any(a => a == find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("Array/Any: {0:N0}ms", watch.ElapsedMilliseconds);
    }

结果

***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 96ms
List/Exists: 146ms
List/Any: 381ms
Array/Contains: 34ms
Arrays do not have Exists
Array/Any: 410ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 257,996ms
List/Exists: 379,951ms
List/Any: 884,853ms
Array/Contains: 72,486ms
Arrays do not have Exists
Array/Any: 1,013,303ms
于 2013-09-06T07:10:14.707 回答
4

值得一提的是,这种比较有点不公平,因为Array类不拥有该Contains()方法。它使用扩展方法IEnumerable<T>通过顺序Enumerator,因此它没有针对Array实例进行优化。另一方面,HashSet<T>它自己的实现针对所有尺寸进行了全面优化。

为了公平比较,您可以使用int Array.IndexOf()Array实例实现的静态方法,即使它使用的for循环比Enumerator.

使用公平比较算法,最多 5 个元素的小集合的性能HashSet<T>.Contains()Array.IndexOf()

于 2018-02-14T17:53:02.783 回答