I am using the ols.py code downloaded at scipy Cookbook (the download is in the first paragraph with the bold OLS) but I need to understand rather than using ra
You should differentiate two cases: i) you just want to solve the equation. ii) you also want to know the statistical information about your model. You can do i) with np.linalg.lstsq; and for ii), you better use statsmodels.
Below you find a sample example, with both solutions:
# The standard imports
import numpy as np
import pandas as pd
# For the statistic
from statsmodels.formula.api import ols
def generatedata():
''' Generate and show the data '''
x = np.linspace(-5,5,101)
(X,Y) = np.meshgrid(x,x)
# To get reproducable values, I provide a seed value
np.random.seed(987654321)
Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1])
return (X.flatten(),Y.flatten(),Z.flatten())
def regressionmodel(X,Y,Z):
'''Multilinear regression model, calculating fit, P-values, confidence intervals etc.'''
# Convert the data into a Pandas DataFrame
df = pd.DataFrame({'x':X, 'y':Y, 'z':Z})
# Fit the model
model = ols("z ~ x + y", df).fit()
# Print the summary
print(model.summary())
return model._results.params # should be array([-4.99754526, 3.00250049, -0.50514907])
def linearmodel(X,Y,Z):
'''Just fit the plane'''
M = np.vstack((np.ones(len(X)), X, Y)).T
bestfit = np.linalg.lstsq(M,Z)[0]
print('Best fit plane:', bestfit)
return bestfit
if __name__ == '__main__':
(X,Y,Z) = generatedata()
regressionmodel(X,Y,Z)
linearmodel(X,Y,Z)