尽管匈牙利算法会起作用,但在您的情况下可以使用更简单的算法。您的隐含成本矩阵是一种特殊形式的对称矩阵:
ABS(SUBJ.match_field-CTRL.match_field)
因此,您可以相对容易地证明,在按的值排序的最优赋值{SUBJ i , CTRL j }中也将被排序。SUBJ.match_field
CTRL.match_field
证明:考虑一个未由排序的赋值{SUBJ i , CTRL j }。那么你至少有一个反转,即一对赋值{SUBJ i1 , CTRL j1 }和{SUBJ i2 , CTRL j2 }这样SUBJ.match_field
CTRL.match_field
SUBJ.match_field
i1 < SUBJ.match_field
i2,但是
CTRL.match_field
j1 > CTRL.match_field
j2
然后你可以用非倒置的一对替换倒置的对
{SUBJ i1 , CTRL j2 }和{SUBJ i2 , CTRL j1 }
SUBJ.match_field
对于( i1 , i2 ) 和CTRL.match_field
( j1 , j2 )的所有六个相对布局,成本小于或等于反向分配的成本(链接到 Wolfram Alpha)。:证明
有了这个观察,很容易证明下面的动态规划算法提出了最优分配:
- 复制
N
每个主题;订购match_field
- 按顺序控制
match_field
- 准备一个空数组
assignments
大小N * subject.SIZE
- 通过for
mem
memoization准备一个空的 2D数组;将所有元素设置为N * subject.SIZE
control.SIZE
-1
Recursive_Assign
在下面的伪代码中定义的调用
- 该
assignments
表现在包含在、 包含 和、 不包含之间位置N
的每个主题的分配。i
N*i
N*(i+1)
FUNCTION Recursive_Assign
// subjects contains each original subj repeated N times
PARAM subjects : array of int[subjectLength]
PARAM controls: array of int[controlLength]
PARAM mem : array of int[subjectLength,controlLength]
PARAM sp : int // current subject position
PARAM cp : int // current control position
PARAM assign : array of int[subjectLength]
BEGIN
IF sp == subjects.Length THEN RETURN 0 ENDIF
IF mem[sp, cp] > 0 THEN RETURN mem[sp, cp] ENDIF
int res = ABS(subjects[sp] - controls[cp])
+ Recursive_Assign(subjects, controls, mem, sp + 1, cp + 1, assign)
assign[sp] = cp
IF cp+1+subjects.Length-sp < controls.Length THEN
int alt = Recursive_Assign(subjects, controls, mem, sp, cp + 1, assign)
IF alt < res THEN
res = alt
ELSE
assign[sp] = cp
ENDIF
ENDIF
RETURN (mem[sp, cp] = res)
END
这是在 ideone 上使用 C# 实现的上述伪代码。
该算法已准备好在 SQL 中重写为基于集合的算法。试图将其融入原始问题设置(按位置分组并制作主题的多个副本)会给已经相当复杂的过程增加不必要的复杂层,所以我将通过使用表来简化一些事情SQL Server 的值参数。我不确定 DB2 是否提供类似的功能,但如果没有,您应该能够将它们替换为临时表。
下面的存储过程几乎是将上述伪代码直接转录为 SQL Server 的存储过程语法:
CREATE TYPE SubjTableType AS TABLE (row int, id int, match_field int)
CREATE TYPE ControlTableType AS TABLE (row int, id int, match_field int)
CREATE PROCEDURE RecAssign (
@subjects SubjTableType READONLY
, @controls ControlTableType READONLY
, @sp int
, @cp int
, @subjCount int
, @ctrlCount int
) AS BEGIN
IF @sp = @subjCount BEGIN
RETURN 0
END
IF 1 = (SELECT COUNT(1) FROM #MemoTable WHERE sRow=@sp AND cRow=@cp) BEGIN
RETURN (SELECT best FROM #MemoTable WHERE sRow=@sp AND cRow=@cp)
END
DECLARE @res int, @spNext int, @cpNext int, @prelim int, @alt int, @diff int, @sId int, @cId int
SET @spNext = @sp + 1
SET @cpNext = @cp + 1
SET @sId = (SELECT id FROM @subjects WHERE row = @sp)
SET @cId = (SELECT id FROM @controls WHERE row = @cp)
EXEC @prelim = RecAssign @subjects=@subjects, @controls=@controls, @sp=@spNext, @cp=@cpNext, @subjCount=@subjCount, @ctrlCount=@ctrlCount
SET @diff = ABS((SELECT match_field FROM @subjects WHERE row=@sp)-(SELECT match_field FROM @controls WHERE row=@cp))
SET @res = @prelim + @diff
IF 1 = (SELECT COUNT(1) FROM #Assignments WHERE sRow=@sp) BEGIN
UPDATE #Assignments SET cId=@cId, sId=@sId, diff=@diff WHERE sRow=@sp
END
ELSE BEGIN
INSERT INTO #Assignments(sRow, sId, cId, diff) VALUES (@sp, @sId, @cId, @diff)
END
IF @cp+1+@subjCount-@sp < @ctrlCount BEGIN
EXEC @alt = RecAssign @subjects=@subjects, @controls=@controls, @sp=@sp, @cp=@cpNext, @subjCount=@subjCount, @ctrlCount=@ctrlCount
IF @alt < @res BEGIN
SET @res = @alt
END
ELSE BEGIN
UPDATE #Assignments SET cId=@cId, sId=@sId, diff=@diff WHERE sRow=@sp
END
END
INSERT INTO #MemoTable (sRow, cRow, best) VALUES (@sp, @cp, @res)
RETURN @res
END
以下是调用此存储过程的方法:
-- The procedure uses a temporary table for memoization:
CREATE TABLE #MemoTable (sRow int, cRow int, best int)
-- The procedure returns a table with assignments:
CREATE TABLE #Assignments (sRow int, sId int, cId int, diff int)
DECLARE @subj as SubjTableType
INSERT INTO @SUBJ (row, id, match_field) SELECT ROW_NUMBER() OVER(ORDER BY match_field ASC)-1 AS row, id, match_field FROM subjects
DECLARE @ctrl as ControlTableType
INSERT INTO @ctrl (row, id, match_field) SELECT ROW_NUMBER() OVER(ORDER BY match_field ASC)-1 AS row, id, match_field FROM controls
DECLARE @subjCount int
SET @subjCount = (SELECT COUNT(1) FROM subjects)
DECLARE @ctrlCount int
SET @ctrlCount = (SELECT COUNT(1) FROM controls)
DECLARE @best int
EXEC @best = RecAssign
@subjects=@subj
, @controls=@ctrl
, @sp=0
, @cp=0
, @subjCount=@subjCount
, @ctrlCount=@ctrlCount
SELECT @best
SELECT sId, cId, diff FROM #Assignments
上面的调用假定subjects
和controls
都已按位置过滤,并且在进行调用之前已将 的N
副本subjects
插入到表值参数(或 DB2 的临时表)中。
这是sqlfiddle 上的运行演示。