Are there any Linear Regression Function in SQL Server?

前端 未结 8 1606
遇见更好的自我
遇见更好的自我 2020-12-12 17:19

Are there any Linear Regression Function in SQL Server 2005/2008, similar to the the Linear Regression functions in Oracle ?

8条回答
  •  情书的邮戳
    2020-12-12 18:07

    I've actually written an SQL routine using Gram-Schmidt orthoganalization. It, as well as other machine learning and forecasting routines, is available at sqldatamine.blogspot.com

    At the suggestion of Brad Larson I've added the code here rather than just direct users to my blog. This produces the same results as the linest function in Excel. My primary source is Elements of Statistical Learning (2008) by Hastie, Tibshirni and Friedman.

    --Create a table of data
    create table #rawdata (id int,area float, rooms float, odd float,  price float)
    
    insert into #rawdata select 1, 2201,3,1,400
    insert into #rawdata select 2, 1600,3,0,330
    insert into #rawdata select 3, 2400,3,1,369
    insert into #rawdata select 4, 1416,2,1,232
    insert into #rawdata select 5, 3000,4,0,540
    
    --Insert the data into x & y vectors
    select id xid, 0 xn,1 xv into #x from #rawdata
    union all
    select id, 1,rooms  from #rawdata
    union all
    select id, 2,area  from #rawdata
    union all
    select id, 3,odd  from #rawdata
    
    select id yid, 0 yn, price yv  into #y from #rawdata
    
    --create a residuals table and insert the intercept (1)
    create table #z (zid int, zn int, zv float)
    insert into #z select id , 0 zn,1 zv from #rawdata
    
    --create a table for the orthoganal (#c) & regression(#b) parameters
    create table #c(cxn int, czn int, cv float) 
    create table #b(bn int, bv float) 
    
    
    --@p is the number of independent variables including the intercept (@p = 0)
    declare @p int
    set @p = 1
    
    
    --Loop through each independent variable and estimate the orthagonal parameter (#c)
    -- then estimate the residuals and insert into the residuals table (#z)
    while @p <= (select max(xn) from #x)
    begin   
            insert into #c
        select  xn cxn,  zn czn, sum(xv*zv)/sum(zv*zv) cv 
            from #x join  #z on  xid = zid where zn = @p-1 and xn>zn group by xn, zn
    
        insert into #z
        select zid, xn,xv- sum(cv*zv) 
            from #x join #z on xid = zid   join  #c  on  czn = zn and cxn = xn  where xn = @p and zn=0 
    begin
    
        insert into #b
        select zn, sum(yv*zv)/ sum(zv*zv) 
            from #z  join 
                (select yid, yv-isnull(sum(bv*xv),0) yv from #x join #y on xid = yid left join #b on  xn=bn group by yid, yv) y
            on zid = yid where zn = @p  group by zn
    
        set @p = @p-1
    end
    
    --The regression parameters
    select * from #b
    
    --Actual vs. fit with error
    select yid, yv, fit, yv-fit err from #y join 
        (select xid, sum(xv*bv) fit from #x join #b on xn = bn  group by xid) f
         on yid = xid
    
    --R Squared
    select 1-sum(power(err,2))/sum(power(yv,2)) from 
    (select yid, yv, fit, yv-fit err from #y join 
        (select xid, sum(xv*bv) fit from #x join #b on xn = bn  group by xid) f
         on yid = xid) d
    

提交回复
热议问题