1

i have a collection trends with about 30kk elements. When i am trying execute following code in linqpad

trends.Take(count).Dump();

it works ok.

But if i add sort:

 trends.OrderByDescending(x => x.Item2).Take(count).Dump();

I get System.OutOfMemoryException

What i am doing wrong?

4

3 回答 3

3

OrderByDescending (or OrderBy) materializes the whole sequence when you try to fetch the first element - it has to, as otherwise you can't possibly know the first element. It has to make a copy of the sequence (typically just a bunch of references, of course) in order to sort, so if the original sequence is an in-memory collection, you end up with two copies of it. Presumably you don't have enough memory for that.

于 2012-06-23T19:13:48.423 回答
3

You don't have to sort the whole collection just take top count elements from it. Here is a solution for this https://codereview.stackexchange.com/a/9777/11651.

The key point from this answer is It doesn't require all items to be kept in memory(for sorting)

Again from comments of the answer in the link:

The idea is: You can find the Max(or Min) item of a List in O(n) time. if you extend this idea to m item(5 in the question), you can get top(or buttom) m items faster then sorting the list(just in one pass on the list + the cost of keeping 5 sorted items)

于 2012-06-23T19:26:30.280 回答
1

Here is another extension method that may work better than the original LINQ (e.g. it shouldn't blow up for a small number of selected items). Like L.B.'s solution it should be O(n) and doesn't keep all items in memory:

public static class Enumerables
{
    public static IEnumerable<T> TopN<T, TV>(this IEnumerable<T> value, Func<T, TV> selector, Int32 count, IComparer<TV> comparer)
    {
        var qCount = 0;
        var queue = new SortedList<TV, List<T>>(count, comparer);
        foreach (var val in value)
        {
            var currTv = selector(val);
            if (qCount >= count && comparer.Compare(currTv, queue.Keys[0]) <= 0) continue;
            if (qCount == count)
            {
                var list = queue.Values[0];
                if (list.Count == 1)
                    queue.RemoveAt(0);
                else
                    list.RemoveAt(0);
                qCount--;
            }
            if (queue.ContainsKey(currTv))
                queue[currTv].Add(val);
            else
                queue.Add(currTv, new List<T> {val});
            qCount++;
        }
        return queue.SelectMany(kvp => kvp.Value);
    }

    public static IEnumerable<T> TopN<T, TV>(this IEnumerable<T> value, Func<T, TV> selector, Int32 count)
    {
        return value.TopN(selector, count, Comparer<TV>.Default);
    }

    public static IEnumerable<T> BottomN<T, TV>(this IEnumerable<T> value, Func<T, TV> selector, Int32 count, IComparer<TV> comparer)
    {
        return value.TopN(selector, count, new ReverseComparer<TV>(comparer));
    }

    public static IEnumerable<T> BottomN<T, TV>(this IEnumerable<T> value, Func<T, TV> selector, Int32 count)
    {
        return value.BottomN(selector, count, Comparer<TV>.Default);
    }
}

// Helper class
public class ReverseComparer<T> : IComparer<T>
{
    private readonly IComparer<T> _comparer;

    public int Compare(T x, T y)
    {
        return -1*_comparer.Compare(x, y);
    }

    public ReverseComparer()
        : this(Comparer<T>.Default)
    { }

    public ReverseComparer(IComparer<T> comparer)
    {
        if (comparer == null) throw new ArgumentNullException("comparer");
        _comparer = comparer;
    }
}

And some tests:

[TestFixture]
public class EnumerablesTests
{
    [Test]
    public void TestTopN()
    {
        var input = new[] { 1, 2, 8, 3, 6 };
        var output = input.TopN(n => n, 3).ToList();
        Assert.AreEqual(3, output.Count);
        Assert.IsTrue(output.Contains(8));
        Assert.IsTrue(output.Contains(6));
        Assert.IsTrue(output.Contains(3));
    }

    [Test]
    public void TestBottomN()
    {
        var input = new[] { 1, 2, 8, 3, 6 };
        var output = input.BottomN(n => n, 3).ToList();
        Assert.AreEqual(3, output.Count);
        Assert.IsTrue(output.Contains(1));
        Assert.IsTrue(output.Contains(2));
        Assert.IsTrue(output.Contains(3));
    }

    [Test]
    public void TestTopNDupes()
    {
        var input = new[] { 1, 2, 8, 8, 3, 6 };
        var output = input.TopN(n => n, 3).ToList();
        Assert.AreEqual(3, output.Count);
        Assert.IsTrue(output.Contains(8));
        Assert.IsTrue(output.Contains(6));
        Assert.IsFalse(output.Contains(3));
    }

    [Test]
    public void TestBottomNDupes()
    {
        var input = new[] { 1, 1, 2, 8, 3, 6 };
        var output = input.BottomN(n => n, 3).ToList();
        Assert.AreEqual(3, output.Count);
        Assert.IsTrue(output.Contains(1));
        Assert.IsTrue(output.Contains(2));
        Assert.IsFalse(output.Contains(3));
    }
}
于 2012-06-23T20:05:55.940 回答