我正在使用一组日期间隔,其中每个间隔都有一个版本号,新间隔将经常与旧间隔重叠,甚至是它们的子集。根据这些数据,我需要计算一组新的间隔,以显示每个时间点的最新版本号。这个问题有基于集合的解决方案吗?
这是一个插图:
Interval 1: 11111111111111111111111
Interval 2: 2222222222
Interval 3: 33333333333333
Interval 4: 444444444
Interval 5: 555555555
Result : 11333333333333331155555555544
这是我正在使用的数据示例:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 1/1/2011 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2012 12/31/2012 6
1 10/1/2012 11/1/2012 8
...以及所需的输出:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 10/1/2010 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2011 10/1/2012 6
1 10/1/2012 11/1/2012 8 << note how version 8 supersedes version 6
1 11/1/2012 12/31/2012 6 << version 6 is split into two records
我还没有找到任何其他关于这个问题的例子,我的谷歌搜索只找到了识别间隙和岛屿或覆盖集的查询。
我想我有一个迭代解决方案(SQL Server 2008)。它从结果集中间隔的临时表开始,并通过插入具有特殊版本号的记录来定义我们想要覆盖的范围的起点和终点。然后,它反复识别结果集间隔之间的间隙,并尝试用原始数据集中的最新记录填充它们,直到没有更多的间隙或没有更多的记录要添加:
GO
-- Create data set and results table
CREATE TABLE #Data (
groupId INT
,startDate DATE
,endDate DATE
,versionId INT
)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2007-12-22', '2008-12-22', 8)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2008-12-22', '2009-12-22', 9)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2009-12-22', '2010-12-22', 10)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2010-12-22', '2011-12-22', 11)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-01-01', '2011-11-30', 500)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-12-22', '2012-12-22', 12)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 13)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 14)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 17)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 19)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-01-01', '2011-01-01', 1)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-10-01', '2011-07-05', 2)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2011-07-05', '2012-08-13', 3)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-08-13', '2012-12-31', 6)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-10-01', '2012-11-01', 8)
CREATE TABLE #Results (
groupId VARCHAR(10)
,startDate DATE
,endDate DATE
,versionId BIGINT
)
DECLARE @startDate DATE
DECLARE @endDate DATE
DECLARE @placeholderId BIGINT
SET @startDate = '20030101'
SET @endDate = '20121231'
SET @placeholderId = 999999999999999
INSERT #Results
SELECT DISTINCT
groupId
,CASE WHEN MIN(startDate) < @startDate THEN MIN(startDate) ELSE @startDate END
,CASE WHEN MIN(startDate) < @startDate THEN @startDate ELSE MIN(startDate) END
,@placeholderId
FROM #data
GROUP BY groupId
UNION ALL
SELECT DISTINCT
groupId
,CASE WHEN MAX(endDate) < @endDate THEN MAX(endDate) ELSE @endDate END
,CASE WHEN MAX(endDate) < @endDate THEN @endDate ELSE MAX(endDate) END
,@placeholderId
FROM #data
GROUP BY groupId
GO
-- Fill gaps in results table
DECLARE @startDate DATE
DECLARE @endDate DATE
DECLARE @placeholderId BIGINT
SET @startDate = '20030101'
SET @endDate = '20111231'
SET @placeholderId = 999999999999999
DECLARE @counter INT
SET @counter = 0
WHILE @counter < 10
BEGIN
SET @counter = @counter + 1;
WITH Gaps AS (
SELECT
gs.groupId
,gs.startDate
,MIN(ge.endDate) as endDate
,ROW_NUMBER() OVER (ORDER BY gs.groupId, gs.startDate) as gapId
FROM (
SELECT groupId, endDate as startDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.startDate <= r1.endDate
AND r2.endDate > r1.endDate
)
AND NOT (endDate >= @endDate AND versionId = @placeholderId)
) gs
INNER JOIN (
SELECT groupId, startDate as endDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.endDate >= r1.startDate
AND r2.startDate < r1.startDate
)
AND NOT (startDate <= @startDate AND versionId = @placeholderId)
) ge
ON ge.groupId = gs.groupId
AND ge.endDate >= gs.startDate
GROUP BY gs.groupId, gs.startDate
)
INSERT #Results (
groupId
,startDate
,endDate
,versionId
)
SELECT
d.groupId
,CASE WHEN d.startDate < g.startDate THEN g.startDate ELSE d.startDate END
,CASE WHEN d.endDate > g.endDate THEN g.endDate ELSE d.endDate END
,d.versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
INNER JOIN (
SELECT
d.groupId
,gapId
,MAX(d.versionId) as versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
WHERE d.versionId < (
SELECT MIN(versionId)
FROM #Results r
WHERE r.groupId = d.groupId
AND (r.startDate = g.endDate OR r.endDate = g.startDate)
)
AND NOT EXISTS (
SELECT *
FROM #Data dsup
WHERE dsup.groupId = d.groupId
AND dsup.versionId > d.versionId
AND dsup.startDate <= d.startDate
AND dsup.endDate >= d.endDate
)
GROUP BY
d.groupId
,g.gapId
) mg
ON mg.groupId = g.groupId
AND mg.gapId = g.gapId
AND mg.versionId = d.versionId
END
SELECT *
FROM #Results
WHERE versionId <> @placeholderId
order by groupId, startDate
基于集合的解决方案会更有用,但我一直在努力寻找。有任何想法吗?