2

我在 tpl 数据流中有一个批处理块,并且有几个目标块链接到批处理块。但是,目标块的数量会动态变化,因此批次的大小也会随之变化。问题是必须在初始化批处理块时提供批处理大小,我看不到稍后调整它的方法。任何想法如何解决这个问题?是取消链接的唯一方法(处理所有到批处理块和批处理块的链接),用新的批处理大小重新初始化批处理块,然后再次链接?我可以这样做,但是如何确保旧批次和新批次不会混在一起呢?

例如,如果我有 2 个转换块流式传输到批处理块,现在有一个额外的转换块并且想要将批处理大小增加到 3,我如何确保在增加之前处理所有先前的批处理以确保同步行为?关键是所有变换块都获得完全相同的项目,并且这些变换块的输出应该以只有那些匹配相同输入的输出被批处理的方式进行批处理。

这是我想要的示例:

用于转换块的恒定整数流:1,2,3,[批量大小增加的点],4,5,...

让变换块输出他们得到的东西,比如 1 => 1

所以 batchblock 应该像这样输出:[1,1], [2,2], [3,3], [change of batch size], [4,4,4], [5,5,5],.. .

这是我当前的代码:

public class Test
{
    private Stopwatch watch;

    private BroadcastBlock<List<InputObject>> tempBCB;
    private BatchBlock<Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>> batchBlock;
    private TransformBlock<Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>[], List<FinalObject>> transformBlock;
    private ActionBlock<List<FinalObject>> justToFlushTransformBlock;

    private CoreLogic core1;
    private CoreLogic core2;

    public Test()
    {
        tempBCB = new BroadcastBlock<List<InputObject>>(input => input);

        //here batch size = 2
        batchBlock = new BatchBlock<Tuple<List<InputObject>,Dictionary<int,IntermediateObject>>>(2, new GroupingDataflowBlockOptions { Greedy = false });

        transformBlock = new TransformBlock<Tuple<List<InputObject>,Dictionary<int,IntermediateObject>>[],List<FinalObject>>(array =>
        {
            List<InputObject> inputObjects = array[0].Item1;
            List<FinalObject> ret = inputObjects.ConvertAll(x => new FinalObject(x));

            foreach (var tuple in array)
            {
                //iterate over each individual object
                foreach (var dictionary in tuple.Item2)
                {
                    ret[dictionary.Key].outputList.Add(dictionary.Value);
                }
            }

            return ret;
        }, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = DataflowBlockOptions.Unbounded });

        justToFlushTransformBlock = new ActionBlock<List<FinalObject>>(list =>
            {
                //just in order to accept items from the transformBlock output queue
            });

        //Generate 2 CoreLogic objects
        core1 = new CoreLogic();
        core2 = new CoreLogic();

        //linking
        tempBCB.LinkTo(core1.transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
        tempBCB.LinkTo(core2.transformBlock, new DataflowLinkOptions { PropagateCompletion = true });

        core1.transformBlock.LinkTo(batchBlock);
        core2.transformBlock.LinkTo(batchBlock);

        batchBlock.LinkTo(transformBlock, new DataflowLinkOptions { PropagateCompletion = true });

        transformBlock.LinkTo(justToFlushTransformBlock, new DataflowLinkOptions { PropagateCompletion = true });
    }

    public void Start()
    {
        const int numberChunks = 30;

        watch = new Stopwatch();
        watch.Start();

        for (int j = 1; j <= numberChunks; j++)
        {
            int collectionSize = 10000 * j;

            List<InputObject> collection = new List<InputObject>(collectionSize);
            for (int i = 0; i < collectionSize; i++)
            {
                collection.Add(new InputObject(i));
            }

            tempBCB.Post(collection);
        }

        tempBCB.Complete();

        Task.WhenAll(core1.transformBlock.Completion, core2.transformBlock.Completion).ContinueWith(_ =>
            {
                batchBlock.Complete();
            });

        transformBlock.Completion.Wait();

        watch.Stop();

        Console.WriteLine("Elapsed time (in milliseconds): " + watch.ElapsedMilliseconds);
        Console.ReadLine();
    }
}

public class CoreLogic
{
    private Random rand;
    public TransformBlock<List<InputObject>, Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>> transformBlock;

    public CoreLogic()
    {
        const int numberIntermediateObjects = 10000;

        transformBlock = new TransformBlock<List<InputObject>, Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>>(input =>
        {
            //please ignore the fact that `input` is not utilized here, the point is to generate a collection of IntermediateObject and return

            Dictionary<int, IntermediateObject> ret = new Dictionary<int, IntermediateObject>();
            for (int i = 0; i < numberIntermediateObjects; i++)
            {
                IntermediateObject value = new IntermediateObject(i);

                ret.Add(i, value);
            }

            var tuple = new Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>(input, ret);

            return tuple;
        });
    }
}

public class InputObject : ICloneable
{
    public int value1 { get; private set; }

    public InputObject(int value)
    {
        this.value1 = value;
    }

    object ICloneable.Clone()
    {
        return Clone();
    }

    public InputObject Clone()
    {
        return (InputObject)this.MemberwiseClone();
    }
}

public class IntermediateObject
{
    public int value1 { get; private set; }

    public IntermediateObject(int value)
    {
        this.value1 = value;
    }
}

public class FinalObject
{
    public InputObject input { get; private set; }
    public List<IntermediateObject> outputList;

    public FinalObject(InputObject input)
    {
        this.input = input;

        this.outputList = new List<IntermediateObject>();
    }
}

public static class Cloning
{
    public static List<TValue> CloneListCloneValues<TValue>(List<TValue> original) where TValue : ICloneable
    {
        List<TValue> ret = new List<TValue>(original.Count);

        foreach (TValue entry in original)
        {
            ret.Add((TValue)entry.Clone());
        }

        return ret;
    }
}
4

0 回答 0