Are there any Linear Regression Function in SQL Server 2005/2008, similar to the the Linear Regression functions in Oracle ?
There are no linear regression functions in SQL Server. But to calculate a Simple Linear Regression (Y' = bX + A) between pairs of data points x,y - including the calculation of the Correlation Coefficient, Coefficient of Determination (R^2) and Standard Estimate of Error (Standard Deviation), do the following:
For a table regression_data
with numeric columns x
and y
:
declare @total_points int
declare @intercept DECIMAL(38, 10)
declare @slope DECIMAL(38, 10)
declare @r_squared DECIMAL(38, 10)
declare @standard_estimate_error DECIMAL(38, 10)
declare @correlation_coefficient DECIMAL(38, 10)
declare @average_x DECIMAL(38, 10)
declare @average_y DECIMAL(38, 10)
declare @sumX DECIMAL(38, 10)
declare @sumY DECIMAL(38, 10)
declare @sumXX DECIMAL(38, 10)
declare @sumYY DECIMAL(38, 10)
declare @sumXY DECIMAL(38, 10)
declare @Sxx DECIMAL(38, 10)
declare @Syy DECIMAL(38, 10)
declare @Sxy DECIMAL(38, 10)
Select
@total_points = count(*),
@average_x = avg(x),
@average_y = avg(y),
@sumX = sum(x),
@sumY = sum(y),
@sumXX = sum(x*x),
@sumYY = sum(y*y),
@sumXY = sum(x*y)
from regression_data
set @Sxx = @sumXX - (@sumX * @sumX) / @total_points
set @Syy = @sumYY - (@sumY * @sumY) / @total_points
set @Sxy = @sumXY - (@sumX * @sumY) / @total_points
set @correlation_coefficient = @Sxy / SQRT(@Sxx * @Syy)
set @slope = (@total_points * @sumXY - @sumX * @sumY) / (@total_points * @sumXX - power(@sumX,2))
set @intercept = @average_y - (@total_points * @sumXY - @sumX * @sumY) / (@total_points * @sumXX - power(@sumX,2)) * @average_x
set @r_squared = (@intercept * @sumY + @slope * @sumXY - power(@sumY,2) / @total_points) / (@sumYY - power(@sumY,2) / @total_points)
-- calculate standard_estimate_error (standard deviation)
Select
@standard_estimate_error = sqrt(sum(power(y - (@slope * x + @intercept),2)) / @total_points)
From regression_data