问题
I've a table, which describes work slices of a business working calendar: (date format is 24 hours format)
PK | STARTDATE | ENDDATE
__________________________________________
1 | 2012/07/21 02:00 | 2012/07/21 04:00
2 | 2012/07/21 03:00 | 2012/07/21 10:00
3 | 2012/07/21 06:00 | 2012/07/21 17:00
4 | 2012/07/21 18:00 | 2012/07/21 19:00
Now, I like to merge the date ranges (within a given start and end date) like this:
PK | STARTDATE | ENDDATE
__________________________________________
1 | 2012/07/21 02:00 | 2012/07/21 17:00
2 | 2012/07/21 18:00 | 2012/07/21 19:00
Is there a way to do this with SQL97 standard? If so, what is with other operations (e.g. if I want to to an invered merge, the result should be
PK | STARTDATE | ENDDATE
__________________________________________
1 | 2012/07/21 00:00 | 2012/07/21 02:00
2 | 2012/07/21 19:00 | 2012/07/22 00:00
回答1:
Here's an example using SQL Server syntax. First it determines the "heads", or rows that have no previous overlapping rows. To determine the last "child" of a "head", it looks for the last row that is smaller than the next "head". Here's the SQL:
; with heads as
(
select row_number() over (order by head.StartDate) as PK
, *
from YourTable head
where not exists
(
select *
from YourTable prev
where prev.StartDate < head.StartDate
and head.StartDate < prev.EndDate
)
)
select row_number() over (order by h.StartDate) as PK
, h.StartDate
, max(yt.EndDate) as EndDate
from heads h
left join
heads nh
on nh.PK = h.PK + 1
left join
YourTable yt
on h.StartDate <= yt.StartDate
and (yt.StartDate < nh.StartDate or nh.StartDate is null)
group by
h.StartDate
Live example at SQL Fiddle.
回答2:
This should do the trick in most SQL-92 supporting DBMSes. No advanced SQL Syntax here.
The performance may not be so good because it has to join the same table 4 times. If using DBMS-specific syntax is an option you'll probably be able to get much better performance.
SELECT
D.StartDate,
(
SELECT Min(E.EndDate)
FROM dbo.Dates E
WHERE
E.EndDate >= D.EndDate
AND NOT EXISTS (
SELECT *
FROM dbo.Dates E2
WHERE
E.StartDate < E2.StartDate
AND E.EndDate > E2.StartDate
)
) EndDate
FROM
dbo.Dates D
WHERE
NOT EXISTS (
SELECT *
FROM dbo.Dates D2
WHERE
D.StartDate < D2.EndDate
AND D.EndDate > D2.EndDate
);
See a Sql Fiddle for this same query working in several different RDBMSes:
- SQL Server 2008
- Oracle 11g R2
- PostGre 8.3.20
Update
Here's a new query that still doesn't do recursion, and only scans the table once. It does have two sorts, which are the most expensive part of the query (88% of the cost in this sample with just a few rows). However, do not underestimate the benefit of doing fewer reads, and not having to join... sometimes queries like this can kick major butt.
WITH Data AS (
SELECT
StartDate = Convert(datetime, StartDate),
EndDate = Convert(datetime, EndDate)
FROM (VALUES
('02:00', '04:00'), ('03:00', '10:00'), (' 09:00', '12:00'), (' 11:00', '17:00'), (' 18:00', '19:00')
) D (StartDate, EndDate)
), LeadLag AS (
SELECT
PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '00010101')),
NextStartDate = Coalesce(Lead(StartDate) OVER (ORDER BY StartDate), Convert(datetime2, '99991231')),
*
FROM Data
), Dates AS (
SELECT
X.*
FROM
LeadLag
CROSS APPLY (
SELECT
StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
EndDate = CASE WHEN EndDate < NextStartDate THEN EndDate ELSE NULL END
) X
WHERE
X.StartDate IS NOT NULL
OR X.EndDate IS NOT NULL
), Final AS (
SELECT
StartDate,
EndDate = Min(EndDate) OVER (ORDER BY EndDate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
FROM Dates
)
SELECT *
FROM Final
WHERE StartDate IS NOT NULL
;
回答3:
This is my solution.
IF OBJECT_ID('tempdb..#tblDates') IS NOT NULL
DROP TABLE #tblDates
CREATE TABLE #tblDates (AutoId INT IDENTITY, StartDate DATE, EndDate DATE)
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-08'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-07', '2014-11-10'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-06', '2014-11-12'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-15'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-10', '2014-12-13'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-12', '2014-12-15'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-14', '2014-12-16'
-- Optional / Remove the duplicated records of same StartDate and EndDate
DELETE FROM #tblDates WHERE AutoId NOT IN (SELECT MAX(AutoId) FROM #tblDates GROUP BY StartDate, EndDate)
-- Optional / Get only the record with max EndDate grouped by StartDate, Remove Others
DELETE d1
FROM #tblDates d1
JOIN (SELECT x.StartDate, MAX(x.EndDate) MAXEndDate FROM #tblDates x GROUP BY x.StartDate) d2 ON d2.StartDate = d1.StartDate AND d2.MAXEndDate != d1.EndDate
-- Optional / Get only the record with min StartDate grouped by EndDate, Remove Others
DELETE d1
FROM #tblDates d1
JOIN (SELECT x.EndDate, MIN(x.StartDate) MINStartDate FROM #tblDates x GROUP BY x.EndDate) d2 ON d2.EndDate = d1.EndDate AND d2.MINStartDate != d1.StartDate
-- Optional / Remove the overlapping ranges of relevant StartDate and EndDate
DELETE c
FROM #tblDates p
JOIN #tblDates c ON c.AutoId != p.AutoId AND c.StartDate BETWEEN p.StartDate AND p.EndDate AND c.EndDate BETWEEN p.StartDate AND p.EndDate
;WITH Ranges
AS
(
SELECT s.StartDate, s.EndDate
FROM #tblDates s
LEFT JOIN #tblDates a ON a.AutoId != s.AutoId AND s.StartDate BETWEEN a.StartDate AND a.EndDate AND s.StartDate != a.StartDate
WHERE a.AutoId IS NULL
UNION ALL
SELECT r.StartDate, d.EndDate
FROM Ranges r
JOIN #tblDates d ON r.EndDate != d.EndDate AND r.EndDate BETWEEN d.StartDate AND d.EndDate
)
SELECT StartDate, MAX(EndDate) EndDate FROM Ranges GROUP BY StartDate
回答4:
Based on ErikE response :
IF(object_id('dbo.Periods') is not null)
drop table Periods
go
create table Periods (
StartDate date not null,
EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '1980-01-01','1980-01-10' union all
select '1980-01-03','1980-01-07' union all
select '2000-01-01','2000-01-10' union all
select '2000-01-05','2000-01-30' union all
select '2000-01-12','2000-01-20' union all
select '2021-01-01','2021-01-01'
go
; with LeadLag AS (
SELECT
rownum = row_number() OVER( ORDER BY StartDate),
PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '0001-01-01')),
p.*
FROM Periods p
), Dates AS (
SELECT
StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
EndDate,
rownum
FROM LeadLag
), startGrouping AS (
SELECT
StartDate = max(StartDate) OVER (ORDER BY rownum rows UNBOUNDED PRECEDING),
EndDate,
rownum
FROM Dates
),
groups AS (
SELECT
StartDate,
EndDate,
rownum,
ingroupRownum = row_number() OVER(PARTITION BY StartDate ORDER BY EndDate desc)
FROM startGrouping e1
)
SELECT StartDate, EndDate
from groups
WHERE ingroupRownum = 1
来源:https://stackoverflow.com/questions/11653255/sql-merge-date-ranges