import numpy as np import pandas as pd from sklearn import linear_model,datasets,metrics import matplotlib.pyplot as plt data=pd.read_csv('C://Users//yangbaojie.ASPIRE//Desktop//python_learn//liner_regression//Folds5x2_pp.csv') ## print(data) ### 9568*5 X = data[['AT', 'V', 'AP', 'RH']] ### print(X.shape) ### 9568*4 y = data[['PE']] ### print(y.shape) ### 9568*1 from sklearn.model_selection import train_test_split ## 随机划分数据集,测试集占25%,设定随机种子,每次选取的测试值都是一样的,实验可以重现 X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=1) # print(X_train) ### 7176*4 # print(X_test) ### 2392*4 # print(y_train) ### 7176*4 # print(y_test) ### 2392*1 from sklearn.linear_model import LinearRegression LR = LinearRegression() ### 对训练数据进行拟合训练 LR.fit(X_train, y_train) ### 输出参数,分别是截距(intercept_)和权重参数(coef_) print('LR.intercept:\n',LR.intercept_) print('LR.coef:\n',LR.coef_) ### 计算确定系数R^2,取值范[0,1],值越大,说明模拟的拟合度越好,对模型的解释能力越强 print('R^2:\n',LR.score(X_test,y_test)) ### 根据测试数据计算预测值y_predict y_predict=LR.predict(X_test) ### MSE为均方误差,用测试数据来验证,MSE为预测数据和测试数据误差平方和的均值 print ("MSE:",metrics.mean_squared_error(y_test,y_predict)) ### RMSE为均方根无误差 print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,y_predict)))
LR.intercept: [ 447.06297099] LR.coef: [[-1.97376045 -0.23229086 0.0693515 -0.15806957]] R^2: 0.931716257578 MSE: 20.0804012021 RMSE: 4.48111606657
print('y_predict:',y_predict[0:5]) ### 输出预测值前5行 print('y_test:\n',y_test[0:5]) ### 输出测试值前5行 plt.scatter(y_test,y_predict,c='b',alpha=0.5,marker='*') plt.xlabel('y_test') plt.ylabel('y_predict') plt.plot([y_test.min(),y_test.max()],[y_test.min(),y_test.max()],'k--',lw=4) ### 画出y=x这条线 plt.show()
y_predict: [[ 459.32136845] [ 433.9320719 ] [ 474.84501331] [ 434.21338967] [ 452.56159683]] y_test: PE 5014 458.92 6947 430.55 9230 473.85 4290 435.02 6477 456.44
from sklearn.model_selection import cross_val_predict from sklearn import metrics ### 交叉验证 若cv=5,把训练集平均分成5份,其中4份作为训练集,剩余的一份作为验证集,一共有5中组合方式, cross_predict = cross_val_predict(LR,X_train,y_train,cv=5) print(cross_predict) ### print(cross_predict.shape) ### 7176*1 print ("MSE:",metrics.mean_squared_error(y_train,cross_predict)) print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train,cross_predict))) plt.scatter(y_train,cross_predict,c='b',alpha=0.5,marker='*') plt.xlabel('y_train') plt.ylabel('cross_predict') plt.plot([y_train.min(),y_train.max()],[y_train.min(),y_train.max()],'k--',lw=4) ### 画出y=x这条线 plt.show()
文章来源: sklearn 线性回归实践