这是我过去的做法。这是一个两步过程:
- 构建候选连续周期集
- 如果有任何重叠期间,请删除除最长的此类期间之外的所有期间。
这是一个显示它是如何完成的脚本。您也许可以在一个 [bug,丑陋] 查询中完成它,但尝试这样做会让我头疼。我正在使用临时表,因为它使调试变得更加容易。
drop table #source
create table #source
(
id int not null ,
dtCol datetime not null ,
-----------------------------------------------------------------------
-- ASSUMPTION 1: Each date must be unique for a given ID value.
-----------------------------------------------------------------------
unique clustered ( id , dtCol ) ,
-----------------------------------------------------------------------
-- ASSUMPTION 2: The datetime column only represents a day.
-- The value of the time component is always 00:00:00.000
-----------------------------------------------------------------------
check ( dtCol = convert(datetime,convert(varchar,dtCol,112),112) ) ,
)
go
insert #source values(1,'jan 1, 2011')
insert #source values(1,'jan 4, 2011')
insert #source values(1,'jan 5, 2011')
insert #source values(2,'jan 1, 2011')
insert #source values(2,'jan 2, 2011')
insert #source values(2,'jan 3, 2011')
insert #source values(2,'jan 5, 2011')
insert #source values(3,'jan 1, 2011')
insert #source values(4,'jan 1, 2011')
insert #source values(4,'jan 2, 2011')
insert #source values(4,'jan 3, 2011')
insert #source values(4,'jan 4, 2011')
go
insert #source values( 200236 , '2011-01-02')
insert #source values( 200236 , '2011-01-03')
insert #source values( 200236 , '2011-01-05')
insert #source values( 200236 , '2011-01-06')
insert #source values( 200236 , '2011-01-07')
insert #source values( 200236 , '2011-01-08')
insert #source values( 200236 , '2011-01-09')
insert #source values( 200236 , '2011-01-10')
insert #source values( 200236 , '2011-01-11')
insert #source values( 200236 , '2011-01-12')
insert #source values( 200236 , '2011-01-13')
insert #source values( 200236 , '2011-01-15')
insert #source values( 200236 , '2011-01-16')
insert #source values( 200236 , '2011-01-17')
go
drop table #candidate_range
go
create table #candidate_range
(
rowId int not null identity(1,1) ,
id int not null ,
dtFrom datetime not null ,
dtThru datetime not null ,
length as 1+datediff(day,dtFrom,dtThru) ,
primary key nonclustered ( rowID ) ,
unique clustered (id,dtFrom,dtThru) ,
)
go
--
-- seed the candidate range table with the set of all possible contiguous ranges for each id
--
insert #candidate_range ( id , dtFrom , dtThru )
select id = tFrom.id ,
valFrom = tFrom.dtCol ,
valThru = tThru.dtCol
from #source tFrom
join #source tThru on tThru.id = tFrom.id
and tThru.dtCol >= tFrom.dtCol
where 1+datediff(day,tFrom.dtCol,tThru.dtCol) = ( select count(*)
from #source t
where t.id = tFrom.id
and t.dtCol between tFrom.dtCol and tThru.dtCol
)
order by 1,2,3
go
--
-- compare the table to itself. If we find overlapping periods,
-- we'll keep the longest such period and delete the shorter overlapping periods.
--
delete t2
from #candidate_range t1
join #candidate_range t2 on t2.id = t1.id
and t2.rowId != t1.rowID
and t2.length < t1.length
and t2.dtFrom <= t1.dtThru
and t2.dtThru >= t1.dtFrom
go
这就是它的全部内容。