3

(我目前仅限于 .NET 4.0)

我有一种情况,我想尽可能并行处理项目,必须保持顺序,并且可以随时添加项目,直到按下“停止”。

项目可能会“爆发”进来,所以队列可能会完全排空,会有一个暂停,然后大量项目会再次进入。

我希望结果一旦完成就可以使用。

这是一个简化的示例:

class Program
{
    static void Main(string[] args)
    {
        BlockingCollection<int> itemsQueue = new BlockingCollection<int>();

        Random random = new Random();

        var results = itemsQueue
        .GetConsumingEnumerable()
        .AsParallel()
        .AsOrdered()
        .WithMergeOptions(ParallelMergeOptions.NotBuffered)
        .Select(i =>
        {
            int work = 0;

            Console.WriteLine("Working on " + i);

            //simulate work
            for (int busy = 0; busy <= 90000000; ++busy) { ++work; };

            Console.WriteLine("Finished " + i);


            return i;
        });

        TaskCompletionSource<bool> completion = new TaskCompletionSource<bool>();

        Task.Factory.StartNew(() =>
        {
            foreach (int i in results)
            {
                Console.WriteLine("Result Available: " + i);
            }
            completion.SetResult(true);
        });

        int iterations;
        iterations = random.Next(5, 50);
        Console.WriteLine("------- iterations: " + iterations + "-------");

        for (int i = 1; i <= iterations; ++i)
        {
            itemsQueue.Add(i);
        }

        while (true)
        {
            char c = Console.ReadKey().KeyChar;

            if (c == 's')
            {
                break;
            }
            else
            {
                ++iterations;

                Console.WriteLine("adding: " + iterations);
                itemsQueue.Add(iterations);
            }
        }


        itemsQueue.CompleteAdding();

        completion.Task.Wait();

        Console.WriteLine("Done!");
        Console.ReadKey();
        itemsQueue.Dispose();
    }
}

如上面的示例所示,通常会发生的情况是,直到最后几个结果才可用(我不是 100% 确定这一点,但它停止的结果数量可能与数量大致相关盒子上的核心数),直到itemsQueue.CompleteAdding();被调用(在示例中,按下“s”键),此时其余的结果最终将变为可用。

尽管我指定了.WithMergeOptions(ParallelMergeOptions.NotBuffered),为什么结果没有立即可用,我怎样才能使它们立即可用?

4

1 回答 1

2

请注意,如果您可以调用实例方法,则问题不是问题BlockingQueue.CompleteAdding()- 这将导致所有结果完成。

简答

另一方面,如果您需要维持秩序,并且需要尽快获得结果,并且您没有机会致电BlockingQueue.CompleteAdding(),那么如果可能的话,您最好拥有队列中项目的消耗是非并行的,而是并行处理每个单独的任务。

例如

  class Program
  {
    //Not parallel, but suitable for monitoring queue purposes, 
    //can then focus on parallelizing each individual task
    static void Main(string[] args)
    {
        BlockingCollection<int> itemsQueue = new BlockingCollection<int>();


        Random random = new Random();

        var results = itemsQueue.GetConsumingEnumerable()
        .Select(i =>
        {
            Console.WriteLine("Working on " + i);

            //Focus your parallelization efforts on the work of 
            //the individual task
            //E.g, simulated:
            double work = Enumerable.Range(0, 90000000 - (10 * (i % 3)))
            .AsParallel()
            .Select(w => w + 1)
            .Average();

            Console.WriteLine("Finished " + i);


            return i;
        });

        TaskCompletionSource<bool> completion = new TaskCompletionSource<bool>();

        Task.Factory.StartNew(() =>
        {
            foreach (int i in results)
            {
                Console.WriteLine("Result Available: " + i);
            }
            completion.SetResult(true);
        });

        int iterations;
        iterations = random.Next(5, 50);
        Console.WriteLine("------- iterations: " + iterations + "-------");

        for (int i = 1; i <= iterations; ++i)
        {
            itemsQueue.Add(i);
        }

        while (true)
        {
            char c = Console.ReadKey().KeyChar;

            if (c == 's')
            {
                break;
            }
            else
            {
                ++iterations;

                Console.WriteLine("adding: " + iterations);
                itemsQueue.Add(iterations);
            }
        }


        itemsQueue.CompleteAdding();

        completion.Task.Wait();

        Console.WriteLine("Done!");
        Console.ReadKey();
        itemsQueue.Dispose();
    }
}

更长的答案

似乎BlockingQueue特别和之间存在相互作用AsOrderable()

似乎AsOrderable只要分区块中的枚举器之一停止处理任务。

默认分区器将处理通常大于 1 的块 - 并且阻塞队列将阻塞直到块可以被填充(或被CompleteAdding填充)。

但是,即使块大小为 1,问题也不会完全消失。

为了解决这个问题,您有时可以在实现自己的分区器时看到行为。(请注意,如果您指定.WithDegreeOfParallelism(1)等待出现的结果的问题就会消失 - 但当然,具有一定程度的并行性 = 1 种会破坏目的!)

例如

public class ImmediateOrderedPartitioner<T> : OrderablePartitioner<T>
{
    private readonly IEnumerable<T> _consumingEnumerable;
    private readonly Ordering _ordering = new Ordering();

    public ImmediateOrderedPartitioner(BlockingCollection<T> collection) : base(true, true, true)
    {
        _consumingEnumerable = collection.GetConsumingEnumerable();
    }

    private class Ordering
    {
        public int Order = -1;
    }

    private class MyEnumerator<S> : IEnumerator<KeyValuePair<long, S>>
    {
        private readonly object _orderLock = new object();

        private readonly IEnumerable<S> _enumerable;

        private KeyValuePair<long, S> _current;

        private bool _hasItem;

        private Ordering _ordering;

        public MyEnumerator(IEnumerable<S> consumingEnumerable, Ordering ordering)
        {
            _enumerable = consumingEnumerable;
            _ordering = ordering;
        }

        public KeyValuePair<long, S> Current
        {
            get
            {
                if (_hasItem)
                {
                    return _current;
                }
                else
                    throw new InvalidOperationException();
            }
        }

        public void Dispose()
        {

        }

        object System.Collections.IEnumerator.Current
        {
            get 
            {
                return Current;
            }
        }

        public bool MoveNext()
        {
            lock (_orderLock)
            {
                bool canMoveNext = false;

                var next = _enumerable.Take(1).FirstOrDefault(s => { canMoveNext = true; return true; });

                if (canMoveNext)
                {
                    _current = new KeyValuePair<long, S>(++_ordering.Order, next);
                    _hasItem = true;
                    ++_ordering.Order;
                }
                else
                {
                    _hasItem = false;
                }

                return canMoveNext;
            }
        }

        public void Reset()
        {
            throw new NotSupportedException();
        }
    }

    public override IList<IEnumerator<KeyValuePair<long, T>>> GetOrderablePartitions(int partitionCount)
    {
        var result = new List<IEnumerator<KeyValuePair<long,T>>>();

        //for (int i = 0; i < partitionCount; ++i)
        //{
        //    result.Add(new MyEnumerator<T>(_consumingEnumerable, _ordering));
        //}

        //share the enumerator between partitions in this case to maintain
        //the proper locking on ordering.
        var enumerator = new MyEnumerator<T>(_consumingEnumerable, _ordering);

        for (int i = 0; i < partitionCount; ++i)
        {
            result.Add(enumerator);
        }

        return result;
    }

    public override bool SupportsDynamicPartitions
    {
        get
        {
            return false;
        }
    }

    public override IEnumerable<T> GetDynamicPartitions()
    {
        throw new NotImplementedException();
        return base.GetDynamicPartitions();
    }

    public override IEnumerable<KeyValuePair<long, T>> GetOrderableDynamicPartitions()
    {
        throw new NotImplementedException();
        return base.GetOrderableDynamicPartitions();
    }

    public override IList<IEnumerator<T>> GetPartitions(int partitionCount)
    {
        throw new NotImplementedException();
        return base.GetPartitions(partitionCount);
    }
}

class Program
{
    static void Main(string[] args)
    {
        BlockingCollection<int> itemsQueue = new BlockingCollection<int>();

        var partitioner = new ImmediateOrderedPartitioner<int>(itemsQueue);

        Random random = new Random();

        var results = partitioner
        .AsParallel()
        .AsOrdered()
        .WithMergeOptions(ParallelMergeOptions.NotBuffered)
        //.WithDegreeOfParallelism(1)
        .Select(i =>
        {
            int work = 0;

            Console.WriteLine("Working on " + i);

            for (int busy = 0; busy <= 90000000; ++busy) { ++work; };

            Console.WriteLine("Finished " + i);


            return i;
        });

        TaskCompletionSource<bool> completion = new TaskCompletionSource<bool>();

        Task.Factory.StartNew(() =>
        {
            foreach (int i in results)
            {
                Console.WriteLine("Result Available: " + i);
            }
            completion.SetResult(true);
        });

        int iterations;
        iterations = 1; // random.Next(5, 50);
        Console.WriteLine("------- iterations: " + iterations + "-------");

        for (int i = 1; i <= iterations; ++i)
        {
            itemsQueue.Add(i);
        }

        while (true)
        {
            char c = Console.ReadKey().KeyChar;

            if (c == 's')
            {
                break;
            }
            else
            {
                ++iterations;

                Console.WriteLine("adding: " + iterations);
                itemsQueue.Add(iterations);
            }
        }


        itemsQueue.CompleteAdding();

        completion.Task.Wait();

        Console.WriteLine("Done!");
        Console.ReadKey();
        itemsQueue.Dispose();
    }
}

替代方法 如果不可能并行化单个任务(如“简短答案”中所建议的那样),并且所有其他问题约束都适用,那么您可以实现自己的队列类型,为每个项目启动任务 - 从而让任务并行库处理工作的调度,但您自己同步结果的消耗。

例如,如下所示(带有标准的“无保证”免责声明!)

public class QueuedItem<TInput, TResult>
{
    private readonly object _lockObject = new object();

    private TResult _result;

    private readonly TInput _input;

    private readonly TResult _notfinished;

    internal readonly bool IsEndQueue = false;

    internal QueuedItem()
    {
        IsEndQueue = true;
    }

    public QueuedItem(TInput input, TResult notfinished)
    {
        _input = input;
        _notfinished = notfinished;
        _result = _notfinished;
    }

    public TResult ReadResult()
    {
        lock (_lockObject)
        {
            if (!IsResultReady)
                throw new InvalidOperationException("Check IsResultReady before calling ReadResult()");

            return _result;
        }
    }

    public void WriteResult(TResult value)
    {
        lock (_lockObject)
        {
            if (IsResultReady)
                throw new InvalidOperationException("Result has already been written");

            _result = value;
        }
    }

    public TInput Input { get { return _input; } }

    public bool IsResultReady
    {
        get
        {
            lock (_lockObject)
            {
                return !object.Equals(_result, _notfinished) || IsEndQueue;
            }
        }
    }
}


public class ParallelImmediateOrderedProcessingQueue<TInput, TResult>
{
    private readonly ReaderWriterLockSlim _addLock = new ReaderWriterLockSlim();

    private readonly object _readingResultsLock = new object();

    private readonly ConcurrentQueue<QueuedItem<TInput, TResult>> _concurrentQueue = new ConcurrentQueue<QueuedItem<TInput, TResult>>();

    bool _isFinishedAdding = false;

    private readonly TResult _notFinished;

    private readonly Action<QueuedItem<TInput, TResult>> _processor;

    /// <param name="notFinished">A value that indicates the result is not yet finished</param>
    /// <param name="processor">Must call SetResult() on argument when finished.</param>
    public ParallelImmediateOrderedProcessingQueue(TResult notFinished, Action<QueuedItem<TInput, TResult>> processor)
    {
        _notFinished = notFinished;
        _processor = processor;
    }

    public event Action ResultsReady = delegate { };

    private void SignalResult()
    {
            QueuedItem<TInput, TResult> item;
            if (_concurrentQueue.TryPeek(out item) && item.IsResultReady)
            {
                ResultsReady();
            }
    }

    public void Add(TInput input)
    {
        bool shouldThrow = false;

        _addLock.EnterReadLock();
        {
            shouldThrow = _isFinishedAdding;

            if (!shouldThrow)
            {
                var queuedItem = new QueuedItem<TInput, TResult>(input, _notFinished);

                _concurrentQueue.Enqueue(queuedItem);

                Task.Factory.StartNew(() => { _processor(queuedItem); SignalResult(); });
            }
        }
        _addLock.ExitReadLock();

        if (shouldThrow)
            throw new InvalidOperationException("An attempt was made to add an item, but adding items was marked as completed");
    }

    public IEnumerable<TResult> ConsumeReadyResults()
    {
        //lock necessary to preserve ordering
        lock (_readingResultsLock)
        {
            QueuedItem<TInput, TResult> queuedItem;

            while (_concurrentQueue.TryPeek(out queuedItem) && queuedItem.IsResultReady)
            {
                if (!_concurrentQueue.TryDequeue(out queuedItem))
                    throw new ApplicationException("this shouldn't happen");

                if (queuedItem.IsEndQueue)
                {
                    _completion.SetResult(true);
                }
                else
                {
                    yield return queuedItem.ReadResult();
                }
            }
        }
    }

    public void CompleteAddingItems()
    {
        _addLock.EnterWriteLock();
        {
            _isFinishedAdding = true;

            var queueCompletion = new QueuedItem<TInput, TResult>();

            _concurrentQueue.Enqueue(queueCompletion);
            Task.Factory.StartNew(() => { SignalResult(); });
        }
        _addLock.ExitWriteLock();
    }

    TaskCompletionSource<bool> _completion = new TaskCompletionSource<bool>();

    public void WaitForCompletion()
    {
        _completion.Task.Wait();
    }
}

class Program
{
    static void Main(string[] args)
    {
        const int notFinished = int.MinValue;

        var processingQueue = new ParallelImmediateOrderedProcessingQueue<int, int>(notFinished, qi =>
        {
            int work = 0;

            Console.WriteLine("Working on " + qi.Input);

            //simulate work
            int maxBusy = 90000000 - (10 * (qi.Input % 3));
            for (int busy = 0; busy <= maxBusy; ++busy) { ++work; };

            Console.WriteLine("Finished " + qi.Input);

            qi.WriteResult(qi.Input);
        });

        processingQueue.ResultsReady += new Action(() =>
        {
            Task.Factory.StartNew(() =>
                {
                    foreach (int result in processingQueue.ConsumeReadyResults())
                    {
                        Console.WriteLine("Results Available: " + result);
                    }
                });
        });


        int iterations = new Random().Next(5, 50);
        Console.WriteLine("------- iterations: " + iterations + "-------");

        for (int i = 1; i <= iterations; ++i)
        {
            processingQueue.Add(i);
        }

        while (true)
        {
            char c = Console.ReadKey().KeyChar;

            if (c == 's')
            {
                break;
            }
            else
            {
                ++iterations;

                Console.WriteLine("adding: " + iterations);
                processingQueue.Add(iterations);
            }
        }

        processingQueue.CompleteAddingItems();
        processingQueue.WaitForCompletion();

        Console.WriteLine("Done!");
        Console.ReadKey();
    }
}
于 2013-01-30T10:27:31.673 回答