参考:作者的Jupyter Notebook
Chapter 5 – Support Vector Machines
支持向量机(简称SVM)是一个功能强大并且全面的机器学习模型,它能够执行线性或非线性分类、回归,甚至是异常值检测任务。它是机器学习领域最受欢迎的模型之一,任何对机器学习感兴趣的人都应该在工具箱中配备一个。SVM特别适用于中小型复杂数据集的分类。
- 保存图片
from __future__ import division, print_function, unicode_literals import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import os np.random.seed(42) mpl.rc('axes', labelsize=14) mpl.rc('xtick', labelsize=12) mpl.rc('ytick', labelsize=12) # Where to save the figures PROJECT_ROOT_DIR = "images" CHAPTER_ID = "traininglinearmodels" def save_fig(fig_id, tight_layout=True): path = os.path.join(PROJECT_ROOT_DIR, CHAPTER_ID, fig_id + ".png") print("Saving figure", fig_id) if tight_layout: plt.tight_layout() plt.savefig(path, format='png', dpi=600)
线性SVM分类
- 加载鸢尾花数据集,缩放特征,然后训练一个线性SVM模型(使用LinearSVC类,C=0.1,用即将介绍的hinge损失函数)用来检测Virginica鸢尾花。
from sklearn.svm import SVC from sklearn import datasets iris = datasets.load_iris() X = iris["data"][:, (2, 3)] # petal length, petal width y = iris["target"] setosa_or_versicolor = (y == 0) | (y == 1) X = X[setosa_or_versicolor] y = y[setosa_or_versicolor] # SVM Classifier model svm_clf = SVC(kernel="linear", C=float("inf")) print(svm_clf.fit(X, y)) # Bad models x0 = np.linspace(0, 5.5, 200) pred_1 = 5*x0 - 20 pred_2 = x0 - 1.8 pred_3 = 0.1 * x0 + 0.5 def plot_svc_decision_boundary(svm_clf, xmin, xmax): w = svm_clf.coef_[0] b = svm_clf.intercept_[0] # At the decision boundary, w0*x0 + w1*x1 + b = 0 # => x1 = -w0/w1 * x0 - b/w1 x0 = np.linspace(xmin, xmax, 200) decision_boundary = -w[0]/w[1] * x0 - b/w[1] margin = 1/w[1] gutter_up = decision_boundary + margin gutter_down = decision_boundary - margin svs = svm_clf.support_vectors_ plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA') plt.plot(x0, decision_boundary, "k-", linewidth=2) plt.plot(x0, gutter_up, "k--", linewidth=2) plt.plot(x0, gutter_down, "k--", linewidth=2) plt.figure(figsize=(12,2.7)) plt.subplot(121) plt.plot(x0, pred_1, "g--", linewidth=2) plt.plot(x0, pred_2, "m-", linewidth=2) plt.plot(x0, pred_3, "r-", linewidth=2) plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs", label="Iris-Versicolor") plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris-Setosa") plt.xlabel("Petal length", fontsize=14) plt.ylabel("Petal width", fontsize=14) plt.legend(loc="upper left", fontsize=14) plt.axis([0, 5.5, 0, 2]) plt.subplot(122) plot_svc_decision_boundary(svm_clf, 0, 5.5) plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs") plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo") plt.xlabel("Petal length", fontsize=14) plt.axis([0, 5.5, 0, 2]) save_fig("large_margin_classification_plot较少间隔违例和大间隔对比") plt.show()
非线性SVM分类
-
通过添加特征使数据集线性可分离
X1D = np.linspace(-4, 4, 9).reshape(-1, 1) X2D = np.c_[X1D, X1D**2] y = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0]) ''' plt.figure(figsize=(11, 4)) plt.subplot(121) plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.plot(X1D[:, 0][y==0], np.zeros(4), "bs") plt.plot(X1D[:, 0][y==1], np.zeros(5), "g^") plt.gca().get_yaxis().set_ticks([]) plt.xlabel(r"$x_1$", fontsize=20) plt.axis([-4.5, 4.5, -0.2, 0.2]) plt.subplot(122) plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.plot(X2D[:, 0][y==0], X2D[:, 1][y==0], "bs") plt.plot(X2D[:, 0][y==1], X2D[:, 1][y==1], "g^") plt.xlabel(r"$x_1$", fontsize=20) plt.ylabel(r"$x_2$", fontsize=20, rotation=0) plt.gca().get_yaxis().set_ticks([0, 4, 8, 12, 16]) plt.plot([-4.5, 4.5], [6.5, 6.5], "r--", linewidth=3) plt.axis([-4.5, 4.5, -1, 17]) plt.subplots_adjust(right=1) save_fig("higher_dimensions_plot", tight_layout=False) plt.show()
-
要使用Scikit-Learn实现这个想法,可以搭建一条流水线:一个PolynomialFeatures转换器,接着一个StandardScaler,然后是LinearSVC。我们用卫星数据集来测试一下
from sklearn.preprocessing import StandardScaler from sklearn.svm import LinearSVC from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.15, random_state=42) def plot_dataset(X, y, axes): plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bs") plt.plot(X[:, 0][y==1], X[:, 1][y==1], "g^") plt.axis(axes) plt.grid(True, which='both') plt.xlabel(r"$x_1$", fontsize=20) plt.ylabel(r"$x_2$", fontsize=20, rotation=0) #plot_dataset(X, y, [-1.5, 2.5, -1, 1.5]) #plt.show() from sklearn.datasets import make_moons from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures polynomial_svm_clf = Pipeline([ ("poly_features", PolynomialFeatures(degree=3)), ("scaler", StandardScaler()), ("svm_clf", LinearSVC(C=10, loss="hinge", random_state=42)) ]) polynomial_svm_clf.fit(X, y) def plot_predictions(clf, axes): x0s = np.linspace(axes[0], axes[1], 100) x1s = np.linspace(axes[2], axes[3], 100) x0, x1 = np.meshgrid(x0s, x1s) X = np.c_[x0.ravel(), x1.ravel()] y_pred = clf.predict(X).reshape(x0.shape) y_decision = clf.decision_function(X).reshape(x0.shape) plt.contourf(x0, x1, y_pred, cmap=plt.cm.brg, alpha=0.2) plt.contourf(x0, x1, y_decision, cmap=plt.cm.brg, alpha=0.1) plot_predictions(polynomial_svm_clf, [-1.5, 2.5, -1, 1.5]) plot_dataset(X, y, [-1.5, 2.5, -1, 1.5]) save_fig("moons_polynomial_svc_plot") plt.show()
-
多项式核
from sklearn.svm import SVC poly_kernel_svm_clf = Pipeline([ ("scaler", StandardScaler()), ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5)) ]) poly_kernel_svm_clf.fit(X, y) #print(poly_kernel_svm_clf.fit(X, y)) poly100_kernel_svm_clf = Pipeline([ ("scaler", StandardScaler()), ("svm_clf", SVC(kernel="poly", degree=10, coef0=100, C=5)) ]) poly100_kernel_svm_clf.fit(X, y) #print(poly100_kernel_svm_clf.fit(X, y)) plt.figure(figsize=(11, 4)) plt.subplot(121) plot_predictions(poly_kernel_svm_clf, [-1.5, 2.5, -1, 1.5]) plot_dataset(X, y, [-1.5, 2.5, -1, 1.5]) plt.title(r"$d=3, r=1, C=5$", fontsize=18) plt.subplot(122) plot_predictions(poly100_kernel_svm_clf, [-1.5, 2.5, -1, 1.5]) plot_dataset(X, y, [-1.5, 2.5, -1, 1.5]) plt.title(r"$d=10, r=100, C=5$", fontsize=18) save_fig("moons_kernelized_polynomial_svc_plot") plt.show()
-
添加相似特征
def gaussian_rbf(x, landmark, gamma): return np.exp(-gamma * np.linalg.norm(x - landmark, axis=1)**2) gamma = 0.3 x1s = np.linspace(-4.5, 4.5, 200).reshape(-1, 1) x2s = gaussian_rbf(x1s, -2, gamma) x3s = gaussian_rbf(x1s, 1, gamma) XK = np.c_[gaussian_rbf(X1D, -2, gamma), gaussian_rbf(X1D, 1, gamma)] yk = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0]) plt.figure(figsize=(11, 4)) plt.subplot(121) plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.scatter(x=[-2, 1], y=[0, 0], s=150, alpha=0.5, c="red") plt.plot(X1D[:, 0][yk==0], np.zeros(4), "bs") plt.plot(X1D[:, 0][yk==1], np.zeros(5), "g^") plt.plot(x1s, x2s, "g--") plt.plot(x1s, x3s, "b:") plt.gca().get_yaxis().set_ticks([0, 0.25, 0.5, 0.75, 1]) plt.xlabel(r"$x_1$", fontsize=20) plt.ylabel(r"Similarity", fontsize=14) plt.annotate(r'$\mathbf{x}$', xy=(X1D[3, 0], 0), xytext=(-0.5, 0.20), ha="center", arrowprops=dict(facecolor='black', shrink=0.1), fontsize=18, ) plt.text(-2, 0.9, "$x_2$", ha="center", fontsize=20) plt.text(1, 0.9, "$x_3$", ha="center", fontsize=20) plt.axis([-4.5, 4.5, -0.1, 1.1]) plt.subplot(122) plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.plot(XK[:, 0][yk==0], XK[:, 1][yk==0], "bs") plt.plot(XK[:, 0][yk==1], XK[:, 1][yk==1], "g^") plt.xlabel(r"$x_2$", fontsize=20) plt.ylabel(r"$x_3$ ", fontsize=20, rotation=0) plt.annotate(r'$\phi\left(\mathbf{x}\right)$', xy=(XK[3, 0], XK[3, 1]), xytext=(0.65, 0.50), ha="center", arrowprops=dict(facecolor='black', shrink=0.1), fontsize=18, ) plt.plot([-0.1, 1.1], [0.57, -0.1], "r--", linewidth=3) plt.axis([-0.1, 1.1, -0.1, 1.1]) plt.subplots_adjust(right=1) #save_fig("kernel_method_plot") #plt.show()
-
高斯RBF
x1_example = X1D[3, 0] for landmark in (-2, 1): k = gaussian_rbf(np.array([[x1_example]]), np.array([[landmark]]), gamma) print("Phi({}, {}) = {}".format(x1_example, landmark, k))
-
使用SVC类试试高斯RBF核
rbf_kernel_svm_clf = Pipeline([ ("scaler", StandardScaler()), ("svm_clf", SVC(kernel="rbf", gamma=5, C=0.001)) ]) rbf_kernel_svm_clf.fit(X, y) print(rbf_kernel_svm_clf.fit(X, y)) from sklearn.svm import SVC gamma1, gamma2 = 0.1, 5 C1, C2 = 0.001, 1000 hyperparams = (gamma1, C1), (gamma1, C2), (gamma2, C1), (gamma2, C2) svm_clfs = [] for gamma, C in hyperparams: rbf_kernel_svm_clf = Pipeline([ ("scaler", StandardScaler()), ("svm_clf", SVC(kernel="rbf", gamma=gamma, C=C)) ]) rbf_kernel_svm_clf.fit(X, y) svm_clfs.append(rbf_kernel_svm_clf) plt.figure(figsize=(11, 7)) for i, svm_clf in enumerate(svm_clfs): plt.subplot(221 + i) plot_predictions(svm_clf, [-1.5, 2.5, -1, 1.5]) plot_dataset(X, y, [-1.5, 2.5, -1, 1.5]) gamma, C = hyperparams[i] plt.title(r"$\gamma = {}, C = {}$".format(gamma, C), fontsize=16) #使用RBF核的SVM分类器 save_fig("moons_rbf_svc_plot") plt.show()
SVM回归
-
SVM回归
np.random.seed(42) m = 50 X = 2 * np.random.rand(m, 1) y = (4 + 3 * X + np.random.randn(m, 1)).ravel() from sklearn.svm import LinearSVR svm_reg = LinearSVR(epsilon=1.5, random_state=42) svm_reg.fit(X, y) svm_reg1 = LinearSVR(epsilon=1.5, random_state=42) svm_reg2 = LinearSVR(epsilon=0.5, random_state=42) svm_reg1.fit(X, y) svm_reg2.fit(X, y) def find_support_vectors(svm_reg, X, y): y_pred = svm_reg.predict(X) off_margin = (np.abs(y - y_pred) >= svm_reg.epsilon) return np.argwhere(off_margin) svm_reg1.support_ = find_support_vectors(svm_reg1, X, y) svm_reg2.support_ = find_support_vectors(svm_reg2, X, y) eps_x1 = 1 eps_y_pred = svm_reg1.predict([[eps_x1]]) def plot_svm_regression(svm_reg, X, y, axes): x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1) y_pred = svm_reg.predict(x1s) plt.plot(x1s, y_pred, "k-", linewidth=2, label=r"$\hat{y}$") plt.plot(x1s, y_pred + svm_reg.epsilon, "k--") plt.plot(x1s, y_pred - svm_reg.epsilon, "k--") plt.scatter(X[svm_reg.support_], y[svm_reg.support_], s=180, facecolors='#FFAAAA') plt.plot(X, y, "bo") plt.xlabel(r"$x_1$", fontsize=18) plt.legend(loc="upper left", fontsize=18) plt.axis(axes) plt.figure(figsize=(9, 4)) plt.subplot(121) plot_svm_regression(svm_reg1, X, y, [0, 2, 3, 11]) plt.title(r"$\epsilon = {}$".format(svm_reg1.epsilon), fontsize=18) plt.ylabel(r"$y$", fontsize=18, rotation=0) #plt.plot([eps_x1, eps_x1], [eps_y_pred, eps_y_pred - svm_reg1.epsilon], "k-", linewidth=2) plt.annotate( '', xy=(eps_x1, eps_y_pred), xycoords='data', xytext=(eps_x1, eps_y_pred - svm_reg1.epsilon), textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5} ) plt.text(0.91, 5.6, r"$\epsilon$", fontsize=20) plt.subplot(122) plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11]) plt.title(r"$\epsilon = {}$".format(svm_reg2.epsilon), fontsize=18) save_fig("svm_regression_plot使用二阶多项式核的SVM回归") plt.show()
-
使用二阶多项式核的SVM回归
np.random.seed(42) m = 100 X = 2 * np.random.rand(m, 1) - 1 y = (0.2 + 0.1 * X + 0.5 * X**2 + np.random.randn(m, 1)/10).ravel() #SVR类是SVC类的回归等价物,LinearSVR类也是LinearSVC类的回归等价物。LinearSVR与训练集的大小线性相关 #(跟LinearSVC一样),而SVR则在训练集变大时,变得很慢(SVC也是一样)。 from sklearn.svm import SVR svm_poly_reg1 = SVR(kernel="poly", degree=2, C=100, epsilon=0.1, gamma="auto") svm_poly_reg2 = SVR(kernel="poly", degree=2, C=0.01, epsilon=0.1, gamma="auto") svm_poly_reg1.fit(X, y) svm_poly_reg2.fit(X, y) plt.figure(figsize=(9, 4)) plt.subplot(121) plot_svm_regression(svm_poly_reg1, X, y, [-1, 1, 0, 1]) plt.title(r"$degree={}, C={}, \epsilon = {}$".format(svm_poly_reg1.degree, svm_poly_reg1.C, svm_poly_reg1.epsilon), fontsize=18) plt.ylabel(r"$y$", fontsize=18, rotation=0) plt.subplot(122) plot_svm_regression(svm_poly_reg2, X, y, [-1, 1, 0, 1]) plt.title(r"$degree={}, C={}, \epsilon = {}$".format(svm_poly_reg2.degree, svm_poly_reg2.C, svm_poly_reg2.epsilon), fontsize=18) save_fig("svm_with_polynomial_kernel_plot使用二阶多项式核的SVM回归") plt.show()
-
鸢尾花数据集的决策函数
scaler = StandardScaler() svm_clf1 = LinearSVC(C=1, loss="hinge", random_state=42) svm_clf2 = LinearSVC(C=100, loss="hinge", random_state=42) scaled_svm_clf1 = Pipeline([ ("scaler", scaler), ("linear_svc", svm_clf1), ]) scaled_svm_clf2 = Pipeline([ ("scaler", scaler), ("linear_svc", svm_clf2), ]) scaled_svm_clf1.fit(X, y) scaled_svm_clf2.fit(X, y) # Convert to unscaled parameters b1 = svm_clf1.decision_function([-scaler.mean_ / scaler.scale_]) b2 = svm_clf2.decision_function([-scaler.mean_ / scaler.scale_]) w1 = svm_clf1.coef_[0] / scaler.scale_ w2 = svm_clf2.coef_[0] / scaler.scale_ svm_clf1.intercept_ = np.array([b1]) svm_clf2.intercept_ = np.array([b2]) svm_clf1.coef_ = np.array([w1]) svm_clf2.coef_ = np.array([w2]) # Find support vectors (LinearSVC does not do this automatically) t = y * 2 - 1 support_vectors_idx1 = (t * (X.dot(w1) + b1) < 1).ravel() support_vectors_idx2 = (t * (X.dot(w2) + b2) < 1).ravel() svm_clf1.support_vectors_ = X[support_vectors_idx1] svm_clf2.support_vectors_ = X[support_vectors_idx2] from sklearn import datasets iris = datasets.load_iris() X = iris["data"][:, (2, 3)] # petal length, petal width y = (iris["target"] == 2).astype(np.float64) # Iris-Virginica from mpl_toolkits.mplot3d import Axes3D def plot_3D_decision_function(ax, w, b, x1_lim=[4, 6], x2_lim=[0.8, 2.8]): x1_in_bounds = (X[:, 0] > x1_lim[0]) & (X[:, 0] < x1_lim[1]) X_crop = X[x1_in_bounds] y_crop = y[x1_in_bounds] x1s = np.linspace(x1_lim[0], x1_lim[1], 20) x2s = np.linspace(x2_lim[0], x2_lim[1], 20) x1, x2 = np.meshgrid(x1s, x2s) xs = np.c_[x1.ravel(), x2.ravel()] df = (xs.dot(w) + b).reshape(x1.shape) m = 1 / np.linalg.norm(w) boundary_x2s = -x1s*(w[0]/w[1])-b/w[1] margin_x2s_1 = -x1s*(w[0]/w[1])-(b-1)/w[1] margin_x2s_2 = -x1s*(w[0]/w[1])-(b+1)/w[1] ax.plot_surface(x1s, x2, np.zeros_like(x1), color="b", alpha=0.2, cstride=100, rstride=100) ax.plot(x1s, boundary_x2s, 0, "k-", linewidth=2, label=r"$h=0$") ax.plot(x1s, margin_x2s_1, 0, "k--", linewidth=2, label=r"$h=\pm 1$") ax.plot(x1s, margin_x2s_2, 0, "k--", linewidth=2) ax.plot(X_crop[:, 0][y_crop==1], X_crop[:, 1][y_crop==1], 0, "g^") ax.plot_wireframe(x1, x2, df, alpha=0.3, color="k") ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, "bs") ax.axis(x1_lim + x2_lim) ax.text(4.5, 2.5, 3.8, "Decision function $h$", fontsize=15) ax.set_xlabel(r"Petal length", fontsize=15) ax.set_ylabel(r"Petal width", fontsize=15) ax.set_zlabel(r"$h = \mathbf{w}^T \mathbf{x} + b$", fontsize=18) ax.legend(loc="upper left", fontsize=16) fig = plt.figure(figsize=(11, 6)) ax1 = fig.add_subplot(111, projection='3d') plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0]) #save_fig("iris_3D_plot鸢尾花数据集的决策函数") #plt.show()
-
权重向量越小,间隔越大
def plot_2D_decision_function(w, b, ylabel=True, x1_lim=[-3, 3]): x1 = np.linspace(x1_lim[0], x1_lim[1], 200) y = w * x1 + b m = 1 / w plt.plot(x1, y) plt.plot(x1_lim, [1, 1], "k:") plt.plot(x1_lim, [-1, -1], "k:") plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.plot([m, m], [0, 1], "k--") plt.plot([-m, -m], [0, -1], "k--") plt.plot([-m, m], [0, 0], "k-o", linewidth=3) plt.axis(x1_lim + [-2, 2]) plt.xlabel(r"$x_1$", fontsize=16) if ylabel: plt.ylabel(r"$w_1 x_1$ ", rotation=0, fontsize=16) plt.title(r"$w_1 = {}$".format(w), fontsize=16) plt.figure(figsize=(12, 3.2)) plt.subplot(121) plot_2D_decision_function(1, 0) plt.subplot(122) plot_2D_decision_function(0.5, 0, ylabel=False) save_fig("small_w_large_margin_plot") plt.show() ''' from sklearn.svm import SVC from sklearn import datasets iris = datasets.load_iris() X = iris["data"][:, (2, 3)] # petal length, petal width y = (iris["target"] == 2).astype(np.float64) # Iris-Virginica svm_clf = SVC(kernel="linear", C=1) svm_clf.fit(X, y) svm_clf.predict([[5.3, 1.3]]) #print(svm_clf.predict([[5.3, 1.3]])) #Hinge loss t = np.linspace(-2, 4, 200) h = np.where(1 - t < 0, 0, 1 - t) # max(0, 1-t) plt.figure(figsize=(5,2.8)) plt.plot(t, h, "b-", linewidth=2, label="$max(0, 1 - t)$") plt.grid(True, which='both') plt.axhline(y=0, color='k') plt.axvline(x=0, color='k') plt.yticks(np.arange(-1, 2.5, 1)) plt.xlabel("$t$", fontsize=16) plt.axis([-2, 4, -1, 2.5]) plt.legend(loc="upper right", fontsize=16) save_fig("hinge_plot") plt.show()
来源:https://www.cnblogs.com/kissnow/p/12588546.html