我一直在使用 TPL Dataflow,但遇到了一个我无法解决的问题:
我有以下架构:
BroadCastBlock<List<object1>>
-> 2 个不同的TransformBlock<List<Object1>, Tuple<int, List<Object1>>>
-> 都链接到TransformManyBlock<Tuple<int, List<Object1>>, Object2>
我在链末端的 TransformManyBlock 中改变 lambda 表达式:(a) 对流式元组执行操作的代码,(b) 根本没有代码。
在 TransformBlocks 中,我测量从第一项到达开始到 TransformBlock.Completion 指示块完成时停止的时间(broadCastBlock 链接到 transfrom 块,propagateCompletion 设置为 true)。
我无法调和的是,为什么 (b) 情况下的 transformBlocks 完成速度比 (a) 快 5-6 倍。这完全违背了整个 TDF 设计意图的意图。转换块中的项目被传递到 transfromManyBlock,因此 transformManyBlock 对影响转换块何时完成的项目所做的一切都无关紧要。我看不出transfromManyBlock中发生的任何事情可能与前面的TransformBlocks有关的单一原因。
谁能调和这个奇怪的观察?
这是一些显示差异的代码。运行代码时,请确保更改以下两行:
tfb1.transformBlock.LinkTo(transformManyBlock);
tfb2.transformBlock.LinkTo(transformManyBlock);
至:
tfb1.transformBlock.LinkTo(transformManyBlockEmpty);
tfb2.transformBlock.LinkTo(transformManyBlockEmpty);
为了观察前面的 transformBlocks 在运行时的差异。
class Program
{
static void Main(string[] args)
{
Test test = new Test();
test.Start();
}
}
class Test
{
private const int numberTransformBlocks = 2;
private int currentGridPointer;
private Dictionary<int, List<Tuple<int, List<Object1>>>> grid;
private BroadcastBlock<List<Object1>> broadCastBlock;
private TransformBlockClass tfb1;
private TransformBlockClass tfb2;
private TransformManyBlock<Tuple<int, List<Object1>>, Object2>
transformManyBlock;
private TransformManyBlock<Tuple<int, List<Object1>>, Object2>
transformManyBlockEmpty;
private ActionBlock<Object2> actionBlock;
public Test()
{
grid = new Dictionary<int, List<Tuple<int, List<Object1>>>>();
broadCastBlock = new BroadcastBlock<List<Object1>>(list => list);
tfb1 = new TransformBlockClass();
tfb2 = new TransformBlockClass();
transformManyBlock = new TransformManyBlock<Tuple<int, List<Object1>>, Object2>
(newTuple =>
{
for (int counter = 1; counter <= 10000000; counter++)
{
double result = Math.Sqrt(counter + 1.0);
}
return new Object2[0];
});
transformManyBlockEmpty
= new TransformManyBlock<Tuple<int, List<Object1>>, Object2>(
tuple =>
{
return new Object2[0];
});
actionBlock = new ActionBlock<Object2>(list =>
{
int tester = 1;
//flush transformManyBlock
});
//linking
broadCastBlock.LinkTo(tfb1.transformBlock
, new DataflowLinkOptions
{ PropagateCompletion = true }
);
broadCastBlock.LinkTo(tfb2.transformBlock
, new DataflowLinkOptions
{ PropagateCompletion = true }
);
//link either to ->transformManyBlock or -> transformManyBlockEmpty
tfb1.transformBlock.LinkTo(transformManyBlock);
tfb2.transformBlock.LinkTo(transformManyBlock);
transformManyBlock.LinkTo(actionBlock
, new DataflowLinkOptions
{ PropagateCompletion = true }
);
transformManyBlockEmpty.LinkTo(actionBlock
, new DataflowLinkOptions
{ PropagateCompletion = true }
);
//completion
Task.WhenAll(tfb1.transformBlock.Completion
, tfb2.transformBlock.Completion)
.ContinueWith(_ =>
{
transformManyBlockEmpty.Complete();
transformManyBlock.Complete();
});
transformManyBlock.Completion.ContinueWith(_ =>
{
Console.WriteLine("TransformManyBlock (with code) completed");
});
transformManyBlockEmpty.Completion.ContinueWith(_ =>
{
Console.WriteLine("TransformManyBlock (empty) completed");
});
}
public void Start()
{
const int numberBlocks = 100;
const int collectionSize = 300000;
//send collection numberBlock-times
for (int i = 0; i < numberBlocks; i++)
{
List<Object1> list = new List<Object1>();
for (int j = 0; j < collectionSize; j++)
{
list.Add(new Object1(j));
}
broadCastBlock.Post(list);
}
//mark broadCastBlock complete
broadCastBlock.Complete();
Console.WriteLine("Core routine finished");
Console.ReadLine();
}
}
class TransformBlockClass
{
private Stopwatch watch;
private bool isStarted;
private int currentIndex;
public TransformBlock<List<Object1>, Tuple<int, List<Object1>>> transformBlock;
public TransformBlockClass()
{
isStarted = false;
watch = new Stopwatch();
transformBlock = new TransformBlock<List<Object1>, Tuple<int, List<Object1>>>
(list =>
{
if (!isStarted)
{
StartUp();
isStarted = true;
}
return new Tuple<int, List<Object1>>(currentIndex++, list);
});
transformBlock.Completion.ContinueWith(_ =>
{
ShutDown();
});
}
private void StartUp()
{
watch.Start();
}
private void ShutDown()
{
watch.Stop();
Console.WriteLine("TransformBlock : Time elapsed in ms: "
+ watch.ElapsedMilliseconds);
}
}
class Object1
{
public int val { get; private set; }
public Object1(int val)
{
this.val = val;
}
}
class Object2
{
public int value { get; private set; }
public List<Object1> collection { get; private set; }
public Object2(int value, List<Object1> collection)
{
this.value = value;
this.collection = collection;
}
}
*编辑:我发布了另一个代码片段,这次使用值类型的集合,我无法重现我在上面的代码中观察到的问题。是否是传递引用类型并同时对它们进行操作(即使在不同的数据流块中)可能会阻塞并导致争用?*
class Program
{
static void Main(string[] args)
{
Test test = new Test();
test.Start();
}
}
class Test
{
private BroadcastBlock<List<int>> broadCastBlock;
private TransformBlock<List<int>, List<int>> tfb11;
private TransformBlock<List<int>, List<int>> tfb12;
private TransformBlock<List<int>, List<int>> tfb21;
private TransformBlock<List<int>, List<int>> tfb22;
private TransformManyBlock<List<int>, List<int>> transformManyBlock1;
private TransformManyBlock<List<int>, List<int>> transformManyBlock2;
private ActionBlock<List<int>> actionBlock1;
private ActionBlock<List<int>> actionBlock2;
public Test()
{
broadCastBlock = new BroadcastBlock<List<int>>(item => item);
tfb11 = new TransformBlock<List<int>, List<int>>(item =>
{
return item;
});
tfb12 = new TransformBlock<List<int>, List<int>>(item =>
{
return item;
});
tfb21 = new TransformBlock<List<int>, List<int>>(item =>
{
return item;
});
tfb22 = new TransformBlock<List<int>, List<int>>(item =>
{
return item;
});
transformManyBlock1 = new TransformManyBlock<List<int>, List<int>>(item =>
{
Thread.Sleep(100);
//or you can replace the Thread.Sleep(100) with actual work,
//no difference in results. This shows that the issue at hand is
//unrelated to starvation of threads.
return new List<int>[1] { item };
});
transformManyBlock2 = new TransformManyBlock<List<int>, List<int>>(item =>
{
return new List<int>[1] { item };
});
actionBlock1 = new ActionBlock<List<int>>(item =>
{
//flush transformManyBlock
});
actionBlock2 = new ActionBlock<List<int>>(item =>
{
//flush transformManyBlock
});
//linking
broadCastBlock.LinkTo(tfb11, new DataflowLinkOptions
{ PropagateCompletion = true });
broadCastBlock.LinkTo(tfb12, new DataflowLinkOptions
{ PropagateCompletion = true });
broadCastBlock.LinkTo(tfb21, new DataflowLinkOptions
{ PropagateCompletion = true });
broadCastBlock.LinkTo(tfb22, new DataflowLinkOptions
{ PropagateCompletion = true });
tfb11.LinkTo(transformManyBlock1);
tfb12.LinkTo(transformManyBlock1);
tfb21.LinkTo(transformManyBlock2);
tfb22.LinkTo(transformManyBlock2);
transformManyBlock1.LinkTo(actionBlock1
, new DataflowLinkOptions
{ PropagateCompletion = true }
);
transformManyBlock2.LinkTo(actionBlock2
, new DataflowLinkOptions
{ PropagateCompletion = true }
);
//completion
Task.WhenAll(tfb11.Completion, tfb12.Completion).ContinueWith(_ =>
{
Console.WriteLine("TransformBlocks 11 and 12 completed");
transformManyBlock1.Complete();
});
Task.WhenAll(tfb21.Completion, tfb22.Completion).ContinueWith(_ =>
{
Console.WriteLine("TransformBlocks 21 and 22 completed");
transformManyBlock2.Complete();
});
transformManyBlock1.Completion.ContinueWith(_ =>
{
Console.WriteLine
("TransformManyBlock (from tfb11 and tfb12) finished");
});
transformManyBlock2.Completion.ContinueWith(_ =>
{
Console.WriteLine
("TransformManyBlock (from tfb21 and tfb22) finished");
});
}
public void Start()
{
const int numberBlocks = 100;
const int collectionSize = 300000;
//send collection numberBlock-times
for (int i = 0; i < numberBlocks; i++)
{
List<int> list = new List<int>();
for (int j = 0; j < collectionSize; j++)
{
list.Add(j);
}
broadCastBlock.Post(list);
}
//mark broadCastBlock complete
broadCastBlock.Complete();
Console.WriteLine("Core routine finished");
Console.ReadLine();
}
}