Group data by the change of grouping column value in order

后端未结

关注

 5  881

With the following data

create table #ph (product int, [date] date, price int)
insert into #ph select 1, \'20120101\', 1
insert into #ph select 1, \'20120102


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  面向向阳花        
                
              
                            
                2020-12-14 04:01
              
            
            
                                                                       
I'm new to this forum so hope my contribution is helpful.

If you really don't want to use a CTE (although I think thats probably the best approach) you can get a solution using set based code. You will need to test the performance of this code!.

I have added in an extra temp table so that I can use a unique identifier for each record but I suspect you will already have this column in you source table. So heres the temp table.

    If Exists (SELECT Name FROM tempdb.sys.tables WHERE name LIKE '#phwithId%')
        DROP TABLE #phwithId    

    CREATE TABLE #phwithId
    (
        SaleId INT
        , ProductID INT
        , Price Money
        , SaleDate Date 
    )
    INSERT INTO #phwithId SELECT row_number() over(partition by product order by [date] asc) as SalesId, Product, Price, Date FROM ph 


Now the main body of the Select statement

    SELECT 
        productId 
        , date_from
        , date_to
        , Price
    FROM
        (   
            SELECT 
                dfr.ProductId
                , ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno1          
                , ChangeDate AS date_from
                , dfr.Price
            FROM
                (       
                    SELECT
                        sl1.ProductId AS ProductId
                        , sl1.SaleDate AS ChangeDate
                        , sl1.price
                    FROM
                        #phwithId sl1
                    LEFT JOIN
                        #phwithId sl2
                        ON sl1.SaleId = sl2.SaleId + 1
                    WHERE
                        sl1.Price <> sl2.Price OR sl2.Price IS NULL
                ) dfr
        ) da1
    LEFT JOIN
        (   
            SELECT 
                ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno2
                , ChangeDate AS date_to     
            FROM
                (   
                    SELECT 
                        sl1.ProductId
                        , sl1.SaleDate AS ChangeDate
                    FROM
                        #phwithId sl1
                    LEFT JOIN
                        #phwithId sl3
                        ON sl1.SaleId = sl3.SaleId - 1  
                    WHERE
                        sl1.Price <> sl3.Price OR sl3.Price IS NULL         
                ) dto

        ) da2 
        ON da1.rowno1 = da2.rowno2  


By binding the data source offset by 1 record (+or-) we can identify when the price buckets change and then its just a matter of getting the start and end dates for the buckets back into a single record. 

All a bit fiddly and I'm not sure its going to give better performance but I enjoyed the challenge.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  耶瑟儿～        
                
              
                            
                2020-12-14 04:07
              
            
            
                                                                       
There's a (more or less) known technique of solving this kind of problem, involving two ROW_NUMBER() calls, like this:

WITH marked AS (
  SELECT
    *,
    grp = ROW_NUMBER() OVER (PARTITION BY product        ORDER BY date)
        - ROW_NUMBER() OVER (PARTITION BY product, price ORDER BY date)
  FROM #ph
)
SELECT
  product,
  date_from = MIN(date),
  date_to   = MAX(date),
  price
FROM marked
GROUP BY
  product,
  price,
  grp
ORDER BY
  product,
  MIN(date)


Output:

product  date_from   date_to        price 
-------  ----------  -------------  ----- 
1        2012-01-01  2012-01-04     1     
1        2012-01-05  2012-01-08     2     
1        2012-01-09  2012-01-12     1     

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  星月不相逢        
                
              
                            
                2020-12-14 04:07
              
            
            
                                                                       
WITH marked AS (
  SELECT
    *,
  case
   when (lag(price,1,'') over (partition by product order by date_from)) = price
   then 0 else 1
  end is_price_change
  FROM #ph
),
marked_as_group AS
( SELECT m.*,
       SUM(is_price_change) over (PARTITION BY product order by date_from ROWS 
      BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS price_change_group
  FROM marked m
),
SELECT
  product,
  date_from = MIN(date_from),
  date_to   = MAX(date_to),
  price = MIN(price)
FROM marked_as_group 
GROUP BY
  product,
  price_change_group
ORDER BY
  product,
  date_to

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  终归单人心        
                
              
                            
                2020-12-14 04:15
              
            
            
                                                                       
One solution I have come up with which is relatively "clean" is:

;with cte_sort (product, [date], price, [row])
as
    (select product, [date], price, row_number() over(partition by product order by [date] asc) as row
     from #ph)

select a.product, a.[date] as date_from, c.[date] as date_to, a.price 
from cte_sort a
left outer join cte_sort b on a.product = b.product and (a.row+1) = b.row and a.price = b.price
outer apply (select top 1 [date] from cte_sort z where z.product = a.product and z.row > a.row order by z.row) c
where b.row is null
order by a.[date] 


I used a CTE with row_number because you then don't need to worry about whether any dates are missing if you use functions like dateadd.  You obviously only need the outer apply if you want to have the date_to column (which I do).

This solution does solve my problem, I am however having a slight issue getting it to perform as quickly as I'd like on my table of 5 million rows.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  闹比i        
                
              
                            
                2020-12-14 04:24
              
            
            
                                                                       
Create function [dbo].[AF_TableColumns](@table_name nvarchar(55))
returns nvarchar(4000) as
begin
declare @str nvarchar(4000)
    select @str = cast(rtrim(ltrim(column_name)) as nvarchar(500)) + coalesce('         ' + @str , '            ') 
    from information_schema.columns
    where table_name = @table_name
    group by table_name, column_name, ordinal_position 
    order by ordinal_position DESC
return @str
end

--select dbo.AF_TableColumns('YourTable') Select * from YourTable

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复