假设您有一堆用于将记录创建/插入到一堆不同的数据库表中的操作。您有一些记录可以插入而不依赖于任何其他插入的输出。你有一些需要等待另一件事完成。还有其他人需要等待许多事情完成,这些事情可能在流程中的不同时间完成。
你如何编写一个算法来对依赖树中的动作进行排序和分块,以便对插入/数据库动作进行最佳批处理?通过最佳批处理,我的意思是如果您可以一次将 10 条记录插入同一个表中,那么就这样做。任何时候可以批量插入,都应该尽量减少数据库调用/插入的数量。
这是示例代码中的一个片段,我使用一个简单的数据结构来捕获所有需要的信息,并与一个虚假的动作序列一起使用。
...
{ action: 'create', table: 'tb1', set: 'key12', input: {
p: { type: 'binding', path: ['key10', 'z'] },
q: { type: 'binding', path: ['key11', 'a'] }
} },
{ action: 'create', table: 'tb4', set: 'key13' },
{ action: 'create', table: 'tb3', set: 'key14' },
{ action: 'create', table: 'tb4', set: 'key15', input: {
a: { type: 'binding', path: ['key8', 'z'] },
} },
...
请注意,我们的“依赖节点项”有 4 个可能的属性:
action
:在我们的情况下,这始终是“创造”,但将来可能是其他事情。table
:要插入的表名。set
:要添加到动作依赖树中共享全局“范围”的变量的名称,因此其他动作可以将其作为输入读取。input
:动作的输入,在我们的例子中都是“绑定”输入(但也可以是文字值,但这太容易了)。对于绑定输入,它从存储在依赖树的共享范围内的记录中读取一些属性/列值。
鉴于此,应该有可能以某种方式构建一个简单的算法,将动作分成可以并行和批处理的子集。例如,我们下面的代码最终会是这样的结构(我手动创建了这个输出,所以虽然我认为我做对了,但可能会出现错误。哦,请注意,虽然表格是编号的,但这并不意味着顺序对他们来说,只是简单的名字选择):
// this is "close to" the desired result, because
// there might be a slightly different sort applied
// to this when implemented, but the chunking pattern
// and grouping of elements should be exactly like This
// (I am pretty sure, I manually did this
// so there could be small mistakes, but I doubt it)
const closeToDesiredResult = [
[
[
{ action: 'create', table: 'tb1', set: 'key1' },
{ action: 'create', table: 'tb1', set: 'key21' },
],
[
{ action: 'create', table: 'tb2', set: 'key2' },
{ action: 'create', table: 'tb2', set: 'key3' },
{ action: 'create', table: 'tb2', set: 'key23' },
],
[
{ action: 'create', table: 'tb4', set: 'key6' },
{ action: 'create', table: 'tb4', set: 'key8' },
{ action: 'create', table: 'tb4', set: 'key13' },
],
[
{ action: 'create', table: 'tb3', set: 'key5' },
{ action: 'create', table: 'tb3', set: 'key7' },
{ action: 'create', table: 'tb3', set: 'key9' },
{ action: 'create', table: 'tb3', set: 'key14' },
{ action: 'create', table: 'tb3', set: 'key24' },
],
[
{ action: 'create', table: 'tb6', set: 'key17' },
],
[
{ action: 'create', table: 'tb5', set: 'key16' },
]
],
[
[
{ action: 'create', table: 'tb1', set: 'key4', input: {
x: { type: 'binding', path: ['key2', 'baz'] }
} },
],
[
{ action: 'create', table: 'tb3', set: 'key10', input: {
y: { type: 'binding', path: ['key6', 'foo'] },
z: { type: 'binding', path: ['key1', 'bar'] }
} },
],
[
{ action: 'create', table: 'tb4', set: 'key15', input: {
a: { type: 'binding', path: ['key8', 'z'] },
} },
]
],
[
[
{ action: 'create', table: 'tb1', set: 'key12', input: {
p: { type: 'binding', path: ['key10', 'z'] },
q: { type: 'binding', path: ['key11', 'a'] }
} },
],
[
{ action: 'create', table: 'tb4', set: 'key11', input: {
a: { type: 'binding', path: ['key10', 'z'] },
b: { type: 'binding', path: ['key1', 'bar'] }
} },
],
[
{ action: 'create', table: 'tb6', set: 'key18', input: {
m: { type: 'binding', path: ['key4', 'x'] },
} },
{ action: 'create', table: 'tb6', set: 'key19', input: {
m: { type: 'binding', path: ['key4', 'x'] },
n: { type: 'binding', path: ['key13', 'a'] },
} },
]
],
[
[
{ action: 'create', table: 'tb2', set: 'key22', input: {
w: { type: 'binding', path: ['key18', 'm'] },
x: { type: 'binding', path: ['key17', 'm'] },
} },
],
[
{ action: 'create', table: 'tb6', set: 'key20', input: {
m: { type: 'binding', path: ['key18', 'm'] },
n: { type: 'binding', path: ['key17', 'm'] },
} },
]
]
]
注意结果数组中有 4 个顶级块。这些是主要步骤。然后在每一步中,所有的东西都是按表分组的,所以它们都可以并行运行,并且在每个表组内,它们都可以批量插入。繁荣。
您将如何实现这一点,我的大脑似乎很难掌握?
const actionTree = generateActionTree()
const chunkedActionTree = chunkDependencyTree(actionTree)
function chunkDependencyTree(list) {
const independentOnesMapByTableName = {}
list.forEach(node => {
// easy case
if (!node.input) {
const group = independentOnesMapByTableName[node.table]
= independentOnesMapByTableName[node.table] ?? []
group.push(node)
} else {
// I am at a loss for words...
}
})
}
function generateActionTree() {
// this would be constructed through a bunch of real-world
// functions, queuing up all the actions
// and pointing outputs to inputs.
return [
{ action: 'create', table: 'tb1', set: 'key1' },
{ action: 'create', table: 'tb2', set: 'key2' },
{ action: 'create', table: 'tb2', set: 'key3' },
{ action: 'create', table: 'tb3', set: 'key5' },
{ action: 'create', table: 'tb4', set: 'key6' },
{ action: 'create', table: 'tb3', set: 'key7' },
{ action: 'create', table: 'tb4', set: 'key8' },
{ action: 'create', table: 'tb3', set: 'key9' },
{ action: 'create', table: 'tb3', set: 'key10', input: {
y: { type: 'binding', path: ['key6', 'foo'] },
z: { type: 'binding', path: ['key1', 'bar'] }
} },
{ action: 'create', table: 'tb1', set: 'key4', input: {
x: { type: 'binding', path: ['key2', 'baz'] }
} },
{ action: 'create', table: 'tb4', set: 'key11', input: {
a: { type: 'binding', path: ['key10', 'z'] },
b: { type: 'binding', path: ['key1', 'bar'] }
} },
{ action: 'create', table: 'tb1', set: 'key12', input: {
p: { type: 'binding', path: ['key10', 'z'] },
q: { type: 'binding', path: ['key11', 'a'] }
} },
{ action: 'create', table: 'tb4', set: 'key13' },
{ action: 'create', table: 'tb3', set: 'key14' },
{ action: 'create', table: 'tb4', set: 'key15', input: {
a: { type: 'binding', path: ['key8', 'z'] },
} },
{ action: 'create', table: 'tb5', set: 'key16' },
{ action: 'create', table: 'tb6', set: 'key17' },
{ action: 'create', table: 'tb6', set: 'key18', input: {
m: { type: 'binding', path: ['key4', 'x'] },
} },
{ action: 'create', table: 'tb6', set: 'key19', input: {
m: { type: 'binding', path: ['key4', 'x'] },
n: { type: 'binding', path: ['key13', 'a'] },
} },
{ action: 'create', table: 'tb6', set: 'key20', input: {
m: { type: 'binding', path: ['key18', 'm'] },
n: { type: 'binding', path: ['key17', 'm'] },
} },
{ action: 'create', table: 'tb1', set: 'key21' },
{ action: 'create', table: 'tb2', set: 'key22', input: {
w: { type: 'binding', path: ['key18', 'm'] },
x: { type: 'binding', path: ['key17', 'm'] },
} },
{ action: 'create', table: 'tb2', set: 'key23' },
{ action: 'create', table: 'tb3', set: 'key24' },
]
}
我认为这大致是拓扑排序,但不太确定如何将其应用于这种特定情况。