With the following data
create table #ph (product int, [date] date, price int)
insert into #ph select 1, \'20120101\', 1
insert into #ph select 1, \'20120102
I'm new to this forum so hope my contribution is helpful.
If you really don't want to use a CTE (although I think thats probably the best approach) you can get a solution using set based code. You will need to test the performance of this code!.
I have added in an extra temp table so that I can use a unique identifier for each record but I suspect you will already have this column in you source table. So heres the temp table.
If Exists (SELECT Name FROM tempdb.sys.tables WHERE name LIKE '#phwithId%')
DROP TABLE #phwithId
CREATE TABLE #phwithId
(
SaleId INT
, ProductID INT
, Price Money
, SaleDate Date
)
INSERT INTO #phwithId SELECT row_number() over(partition by product order by [date] asc) as SalesId, Product, Price, Date FROM ph
Now the main body of the Select statement
SELECT
productId
, date_from
, date_to
, Price
FROM
(
SELECT
dfr.ProductId
, ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno1
, ChangeDate AS date_from
, dfr.Price
FROM
(
SELECT
sl1.ProductId AS ProductId
, sl1.SaleDate AS ChangeDate
, sl1.price
FROM
#phwithId sl1
LEFT JOIN
#phwithId sl2
ON sl1.SaleId = sl2.SaleId + 1
WHERE
sl1.Price <> sl2.Price OR sl2.Price IS NULL
) dfr
) da1
LEFT JOIN
(
SELECT
ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno2
, ChangeDate AS date_to
FROM
(
SELECT
sl1.ProductId
, sl1.SaleDate AS ChangeDate
FROM
#phwithId sl1
LEFT JOIN
#phwithId sl3
ON sl1.SaleId = sl3.SaleId - 1
WHERE
sl1.Price <> sl3.Price OR sl3.Price IS NULL
) dto
) da2
ON da1.rowno1 = da2.rowno2
By binding the data source offset by 1 record (+or-) we can identify when the price buckets change and then its just a matter of getting the start and end dates for the buckets back into a single record.
All a bit fiddly and I'm not sure its going to give better performance but I enjoyed the challenge.
There's a (more or less) known technique of solving this kind of problem, involving two ROW_NUMBER() calls, like this:
WITH marked AS (
SELECT
*,
grp = ROW_NUMBER() OVER (PARTITION BY product ORDER BY date)
- ROW_NUMBER() OVER (PARTITION BY product, price ORDER BY date)
FROM #ph
)
SELECT
product,
date_from = MIN(date),
date_to = MAX(date),
price
FROM marked
GROUP BY
product,
price,
grp
ORDER BY
product,
MIN(date)
Output:
product date_from date_to price
------- ---------- ------------- -----
1 2012-01-01 2012-01-04 1
1 2012-01-05 2012-01-08 2
1 2012-01-09 2012-01-12 1
WITH marked AS (
SELECT
*,
case
when (lag(price,1,'') over (partition by product order by date_from)) = price
then 0 else 1
end is_price_change
FROM #ph
),
marked_as_group AS
( SELECT m.*,
SUM(is_price_change) over (PARTITION BY product order by date_from ROWS
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS price_change_group
FROM marked m
),
SELECT
product,
date_from = MIN(date_from),
date_to = MAX(date_to),
price = MIN(price)
FROM marked_as_group
GROUP BY
product,
price_change_group
ORDER BY
product,
date_to
One solution I have come up with which is relatively "clean" is:
;with cte_sort (product, [date], price, [row])
as
(select product, [date], price, row_number() over(partition by product order by [date] asc) as row
from #ph)
select a.product, a.[date] as date_from, c.[date] as date_to, a.price
from cte_sort a
left outer join cte_sort b on a.product = b.product and (a.row+1) = b.row and a.price = b.price
outer apply (select top 1 [date] from cte_sort z where z.product = a.product and z.row > a.row order by z.row) c
where b.row is null
order by a.[date]
I used a CTE with row_number because you then don't need to worry about whether any dates are missing if you use functions like dateadd. You obviously only need the outer apply if you want to have the date_to column (which I do).
This solution does solve my problem, I am however having a slight issue getting it to perform as quickly as I'd like on my table of 5 million rows.
Create function [dbo].[AF_TableColumns](@table_name nvarchar(55))
returns nvarchar(4000) as
begin
declare @str nvarchar(4000)
select @str = cast(rtrim(ltrim(column_name)) as nvarchar(500)) + coalesce(' ' + @str , ' ')
from information_schema.columns
where table_name = @table_name
group by table_name, column_name, ordinal_position
order by ordinal_position DESC
return @str
end
--select dbo.AF_TableColumns('YourTable') Select * from YourTable