sklearn 线性回归实践

匿名 (未验证) 提交于 2019-12-03 00:41:02
import numpy as np import pandas as pd  from sklearn import linear_model,datasets,metrics import matplotlib.pyplot as plt data=pd.read_csv('C://Users//yangbaojie.ASPIRE//Desktop//python_learn//liner_regression//Folds5x2_pp.csv') ##  print(data)   ### 9568*5 X = data[['AT', 'V', 'AP', 'RH']] ### print(X.shape)    ###  9568*4 y = data[['PE']]  ### print(y.shape)    ### 9568*1   from sklearn.model_selection import train_test_split   ## 随机划分数据集,测试集占25%,设定随机种子,每次选取的测试值都是一样的,实验可以重现 X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=1) # print(X_train)   ###  7176*4 # print(X_test)    ###  2392*4 # print(y_train)   ###  7176*4 # print(y_test)    ###  2392*1   from sklearn.linear_model import  LinearRegression LR = LinearRegression() ### 对训练数据进行拟合训练 LR.fit(X_train, y_train) ### 输出参数,分别是截距(intercept_)和权重参数(coef_) print('LR.intercept:\n',LR.intercept_) print('LR.coef:\n',LR.coef_) ### 计算确定系数R^2,取值范[0,1],值越大,说明模拟的拟合度越好,对模型的解释能力越强 print('R^2:\n',LR.score(X_test,y_test)) ### 根据测试数据计算预测值y_predict y_predict=LR.predict(X_test) ### MSE为均方误差,用测试数据来验证,MSE为预测数据和测试数据误差平方和的均值 print ("MSE:",metrics.mean_squared_error(y_test,y_predict)) ### RMSE为均方根无误差 print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,y_predict)))
LR.intercept:  [ 447.06297099] LR.coef:  [[-1.97376045 -0.23229086  0.0693515  -0.15806957]] R^2:  0.931716257578 MSE: 20.0804012021 RMSE: 4.48111606657
print('y_predict:',y_predict[0:5])    ### 输出预测值前5行 print('y_test:\n',y_test[0:5])          ### 输出测试值前5行 plt.scatter(y_test,y_predict,c='b',alpha=0.5,marker='*')    plt.xlabel('y_test') plt.ylabel('y_predict') plt.plot([y_test.min(),y_test.max()],[y_test.min(),y_test.max()],'k--',lw=4)   ### 画出y=x这条线 plt.show()
y_predict:  [[ 459.32136845]  [ 433.9320719 ]  [ 474.84501331]  [ 434.21338967]  [ 452.56159683]] y_test:         PE 5014  458.92 6947  430.55 9230  473.85 4290  435.02 6477  456.44

from sklearn.model_selection import cross_val_predict from sklearn import metrics ### 交叉验证   若cv=5,把训练集平均分成5份,其中4份作为训练集,剩余的一份作为验证集,一共有5中组合方式, cross_predict = cross_val_predict(LR,X_train,y_train,cv=5) print(cross_predict) ###  print(cross_predict.shape)   ### 7176*1 print ("MSE:",metrics.mean_squared_error(y_train,cross_predict)) print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train,cross_predict))) plt.scatter(y_train,cross_predict,c='b',alpha=0.5,marker='*')    plt.xlabel('y_train') plt.ylabel('cross_predict') plt.plot([y_train.min(),y_train.max()],[y_train.min(),y_train.max()],'k--',lw=4)   ### 画出y=x这条线 plt.show() 
[[ 447.48775241]  [ 433.0748175 ]  [ 428.18449282]  ...,   [ 477.69395695]  [ 461.26499278]  [ 461.61132225]] (7176, 1) MSE: 21.0332779559 RMSE: 4.58620518031 
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!