原文地址:https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/4.4-overfitting-and-underfitting.ipynb
Overfitting and underfitting
以这种方式处理过度拟合的过程称为正则化。让我们回顾一些最常见的正则化技术,让我们将其应用到实践中,以改进前一章中的电影分类模型。
from keras.datasets import imdb import numpy as np (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000) def vectorize_sequences(sequences, dimension=10000): # Create an all-zero matrix of shape (len(sequences), dimension) results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1. # set specific indices of results[i] to 1s return results # Our vectorized training data x_train = vectorize_sequences(train_data) # Our vectorized test data x_test = vectorize_sequences(test_data) # Our vectorized labels y_train = np.asarray(train_labels).astype('float32') y_test = np.asarray(test_labels).astype('float32')
战斗过度
减少网络的大小
让我们在我们的电影评论分类网络上试试这个。我们原来的网络是这样的:
from keras import models from keras import layers original_model = models.Sequential() original_model.add(layers.Dense(16, activation='relu', input_shape=(10000,))) original_model.add(layers.Dense(16, activation='relu')) original_model.add(layers.Dense(1, activation='sigmoid')) original_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
smaller_model = models.Sequential() smaller_model.add(layers.Dense(4, activation='relu', input_shape=(10000,))) smaller_model.add(layers.Dense(4, activation='relu')) smaller_model.add(layers.Dense(1, activation='sigmoid')) smaller_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
original_hist = original_model.fit(x_train, y_train, epochs=20, batch_size=512, validation_data=(x_test, y_test))
smaller_model_hist = smaller_model.fit(x_train, y_train, epochs=20, batch_size=512, validation_data=(x_test, y_test))
epochs = range(1, 21) original_val_loss = original_hist.history['val_loss'] smaller_model_val_loss = smaller_model_hist.history['val_loss']
import matplotlib.pyplot as plt # b+ is for "blue cross" plt.plot(epochs, original_val_loss, 'b+', label='Original model') # "bo" is for "blue dot" plt.plot(epochs, smaller_model_val_loss, 'bo', label='Smaller model') plt.xlabel('Epochs') plt.ylabel('Validation loss') plt.legend() plt.show()
bigger_model = models.Sequential() bigger_model.add(layers.Dense(512, activation='relu', input_shape=(10000,))) bigger_model.add(layers.Dense(512, activation='relu')) bigger_model.add(layers.Dense(1, activation='sigmoid')) bigger_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
bigger_model_hist = bigger_model.fit(x_train, y_train, epochs=20, batch_size=512, validation_data=(x_test, y_test))
bigger_model_val_loss = bigger_model_hist.history['val_loss'] plt.plot(epochs, original_val_loss, 'b+', label='Original model') plt.plot(epochs, bigger_model_val_loss, 'bo', label='Bigger model') plt.xlabel('Epochs') plt.ylabel('Validation loss') plt.legend() plt.show()
original_train_loss = original_hist.history['loss'] bigger_model_train_loss = bigger_model_hist.history['loss'] plt.plot(epochs, original_train_loss, 'b+', label='Original model') plt.plot(epochs, bigger_model_train_loss, 'bo', label='Bigger model') plt.xlabel('Epochs') plt.ylabel('Training loss') plt.legend() plt.show()
增加体重正则化
from keras import regularizers l2_model = models.Sequential() l2_model.add(layers.Dense(16, kernel_regularizer=regularizers.l2(0.001), activation='relu', input_shape=(10000,))) l2_model.add(layers.Dense(16, kernel_regularizer=regularizers.l2(0.001), activation='relu')) l2_model.add(layers.Dense(1, activation='sigmoid'))
l2_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
l2_model_hist = l2_model.fit(x_train, y_train, epochs=20, batch_size=512, validation_data=(x_test, y_test))
l2_model_val_loss = l2_model_hist.history['val_loss'] plt.plot(epochs, original_val_loss, 'b+', label='Original model') plt.plot(epochs, l2_model_val_loss, 'bo', label='L2-regularized model') plt.xlabel('Epochs') plt.ylabel('Validation loss') plt.legend() plt.show()
from keras import regularizers # L1 regularization regularizers.l1(0.001) # L1 and L2 regularization at the same time regularizers.l1_l2(l1=0.001, l2=0.001)
添加丢失
# At training time: we drop out 50% of the units in the output layer_output *= np.randint(0, high=2, size=layer_output.shape)
# At test time: layer_output *= 0.5
# At training time: layer_output *= np.randint(0, high=2, size=layer_output.shape) # Note that we are scaling *up* rather scaling *down* in this case layer_output /= 0.5
model.add(layers.Dropout(0.5))
dpt_model = models.Sequential() dpt_model.add(layers.Dense(16, activation='relu', input_shape=(10000,))) dpt_model.add(layers.Dropout(0.5)) dpt_model.add(layers.Dense(16, activation='relu')) dpt_model.add(layers.Dropout(0.5)) dpt_model.add(layers.Dense(1, activation='sigmoid')) dpt_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
dpt_model_hist = dpt_model.fit(x_train, y_train, epochs=20, batch_size=512, validation_data=(x_test, y_test))
文章来源: Keras学习教程五