这在任何支持 ROW_NUMBER() 的数据库中使用纯 SQL(无过程或用户定义函数)是非常可行的。这是一个带有sql fiddle的 SQLSERVER 2008 实现。
-- Create a virtual table with 2 rows that is used to convert a single row
-- into 2 rows when the range is only a single day
with events as (
select 'start' event
union all
select 'stop' event
),
-- Sort the data by date, partioning by ID, and assign a row number
sorted_dates as (
select id,
dt,
row_number() over(partition by id order by dt) sorted_rownum
from t
),
-- Find the dates that begin and end the ranges. Assign new row numbers
-- so that the START and STOP row numbers are always consecutive.
-- Convert a date that both starts and ends the range into two rows.
pruned_dates as (
select d1.id,
e.event,
d1.dt,
row_number() over(partition by d1.id order by d1.sorted_rownum, e.event) pruned_rownum
from sorted_dates d1
-- Look for a previous date that is the same day or 1 day earlier
left outer join sorted_dates d0
on d1.id=d0.id
and d1.sorted_rownum = d0.sorted_rownum+1
and datediff(d, d0.dt, d1.dt)<=1
-- Look for a next date that is the same day or 1 day later.
left outer join sorted_dates d2
on d1.id=d2.id
and d1.sorted_rownum = d2.sorted_rownum-1
and datediff(d, d1.dt, d2.dt)<=1
-- Identify the record as a START date if there does not exist a prior date
-- that is the same date or 1 day earlier.
-- Identify the record as a STOP date if there does not exist a subsequent
-- date that is the same date or 1 day later.
left outer join events e
on (d0.id is null and e.event='start')
or (d2.id is null and e.event='stop')
-- Ignore records that have not been identified as START or STOP records.
where e.event is not null
)
-- Pair the START and STOP records and report the results
select d1.id,
d1.dt from_date,
d2.dt to_date
from pruned_dates d1
join pruned_dates d2
on d1.id=d2.id
and d1.pruned_rownum = d2.pruned_rownum-1
where d1.event='start'
;
使用支持 LEAD() 和 LAG() 的数据库,该解决方案更简单、更高效。这是带有sql fiddle的 SqlServer 2012 实现。
-- Create a virtual table with 2 rows that is used to convert a single row
-- into 2 rows when the range is only a single day
with events as(
select 'start' event
union all
select 'stop' event
),
-- Use LAG() to get the previous date and LEAD() to get the next date.
-- The previous and/or next date may not exist, or it may be more than
-- one day away.
dates as(
select id,
dt,
lag(dt,1,'01/01/1900') over(partition by id order by dt) prev_dt,
lead(dt,1,'12/31/9999') over(partition by id order by dt) next_dt
from t
),
-- Discard rows where both the previous and next dates are <= 1 day away.
-- Identify the remaining rows as either START or STOP.
-- Convert any date that both starts and stops a range into 2 rows.
-- For each remaining row, use LEAD() to get the subsequent remaining row.
-- At this point there are valid rows that have START in FROM and STOP in TO,
-- but also invalid rows that have STOP in FROM and NULL or START in TO. But
-- the invalid rows are required for LEAD() to give the correct value.
pruned_dates as(
select id,
event,
dt from_date,
lead(dt,1) over(partition by id order by dt, event) to_date
from dates d
join events e
on (e.event='start' and datediff(d,prev_dt,dt)>1)
or (e.event='stop' and datediff(d,dt,next_dt)>1)
)
-- Filter out the unwanted rows, preserving the rows with START in FROM
-- and STOP in TO.
select id,
from_date,
to_date
from pruned_dates
where event='start'