

import pandas as pd import numpy as np from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.linear_model import LogisticRegression
data_lr = pd.read_excel('D:\python原始数据\logist_model.xlsx','logist_model') print(data_lr.shape) print(data_lr.head(10))
array = data_lr.values X_train =array[0:200,2:5] Y_train = array[0:200, 5] X_test =array[200:291,2:5] Y_test = array[200:291,5]
model = LogisticRegression() model.fit(X_train, Y_train) print("截距项",model.intercept_) print("系数",model.coef_)
scores = cross_val_score(model, X_train, Y_train, cv=10) print("准确率", np.mean(scores), scores) from sklearn.metrics import confusion_matrix predicted = model.predict(X_test) matrix = confusion_matrix(Y_test, predicted) classes = ['0', '1'] dataframe = pd.DataFrame(data=matrix, index=classes, columns=classes) print(dataframe) from sklearn.metrics import roc_curve, auc predictions = model.predict_proba(X_test) fpr, tpr, thresholds = roc_curve(Y_test, predictions[:,1]) roc_auc = auc(fpr, tpr) import matplotlib.pyplot as plt plt.plot(fpr, tpr,'b', label='auc=%0.2f' % roc_auc) plt.legend(loc ='lower right') plt.plot([0, 1],[0,1],'r--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel("fpr") plt.ylabel("tpr") plt.show()

