实现ZFNET网络的训练与验证部分

ZFNET

对ALexNet做了一些改进，但并不大，意义是可视化了卷积神经网络的内部参数。
本文训练使用的数据集是cifar-10。

网络架构（zfnet.py）

import tensorflow as tf
class ZFNet:

    def __init__(self, input_width=224, input_height=224, input_channels=3, num_classes=1000, learning_rate=0.01,
                 momentum=0.9, keep_prob=0.5):
        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.keep_prob = keep_prob

        self.random_mean = 0
        self.random_stddev = 0.01

        with tf.name_scope('input'):
            self.X = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.input_height, self.input_width, self.input_channels], name='X')
        with tf.name_scope('labels'):
            self.Y = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes], name='Y')
        with tf.name_scope('dropout'):
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='dropout_keep_prob')
        with tf.name_scope('layer1'):
            layer1_activations = self.__conv(input=self.X, filter_width=7, filter_height=7, filters_count=96,
                                             stride_x=2, stride_y=2, padding='VALID')
            layer1_pool = self.__max_pool(input=layer1_activations, filter_width=3, filter_height=3, stride_x=2,
                                          stride_y=2, padding='VALID')
        with tf.name_scope('layer2'):
            layer2_activations = self.__conv(input=layer1_pool, filter_width=5, filter_height=5, filters_count=256,
                                             stride_x=2, stride_y=2, padding='VALID')
            layer2_pool = self.__max_pool(input=layer2_activations, filter_width=3, filter_height=3, stride_x=2,
                                          stride_y=2, padding='VALID')
        with tf.name_scope('layer3'):
            layer3_activations = self.__conv(input=layer2_pool, filter_width=3, filter_height=3, filters_count=384,
                                             stride_x=1, stride_y=1, padding='SAME')

        with tf.name_scope('layer4'):
            layer4_activations = self.__conv(input=layer3_activations, filter_width=3, filter_height=3,
                                             filters_count=384, stride_x=1, stride_y=1, padding='SAME')
        with tf.name_scope('layer5'):
            layer5_activations = self.__conv(input=layer4_activations, filter_width=3, filter_height=3,
                                             filters_count=256, stride_x=1, stride_y=1, padding='SAME')
            layer5_pool = self.__max_pool(input=layer5_activations, filter_width=3, filter_height=3, stride_x=2,
                                          stride_y=2, padding='VALID')
        with tf.name_scope('layer6'):
            pool5_shape = layer5_pool.get_shape().as_list()
            flattened_input_size = pool5_shape[1] * pool5_shape[2] * pool5_shape[3]
            layer6_fc = self.__fully_connected(input=tf.reshape(layer5_pool, shape=[-1, flattened_input_size]),
                                               inputs_count=flattened_input_size, outputs_count=4096, relu=True)
            layer6_dropout = self.__dropout(input=layer6_fc)
        with tf.name_scope('layer7'):
            layer7_fc = self.__fully_connected(input=layer6_dropout, inputs_count=4096, outputs_count=4096, relu=True)
            layer7_dropout = self.__dropout(input=layer7_fc)
        with tf.name_scope('layer8'):
            layer8_logits = self.__fully_connected(input=layer7_dropout, inputs_count=4096,
                                                   outputs_count=self.num_classes, relu=False, name='logits')

        with tf.name_scope('cross_entropy'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer8_logits, labels=self.Y,
                                                                       name='cross_entropy')
            self.__variable_summaries(cross_entropy)

        with tf.name_scope('training'):
            loss_operation = tf.reduce_mean(cross_entropy, name='loss_operation')
            tf.summary.scalar(name='loss', tensor=loss_operation)
            optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=self.momentum)
            grads_and_vars = optimizer.compute_gradients(loss_operation)
            self.training_operation = optimizer.apply_gradients(grads_and_vars, name='training_operation')

            for grad, var in grads_and_vars:
                if grad is not None:
                    with tf.name_scope(var.op.name + '/gradients'):
                        self.__variable_summaries(grad)
         with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(layer8_logits, 1), tf.argmax(self.Y, 1), name='correct_prediction')
            self.accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy_operation')
            tf.summary.scalar(name='accuracy', tensor=self.accuracy_operation)
            
    def train_epoch(self, sess, X_data, Y_data, batch_size=128, file_writer=None, summary_operation=None,
                    epoch_number=None):
        num_examples = len(X_data)
        step = 0
        for offset in range(0, num_examples, batch_size):
            end = offset + batch_size
            batch_x, batch_y = X_data[offset:end], Y_data[offset:end]
            if file_writer is not None and summary_operation is not None:
                _, summary = sess.run([self.training_operation, summary_operation],
                                      feed_dict={self.X: batch_x, self.Y: batch_y,
                                                 self.dropout_keep_prob: self.keep_prob})
                file_writer.add_summary(summary, epoch_number * (num_examples // batch_size + 1) + step)
                step += 1
            else:
                sess.run(self.training_operation, feed_dict={self.X: batch_x, self.Y: batch_y,
                                                             self.dropout_keep_prob: self.keep_prob})

    def evaluate(self, sess, X_data, Y_data, batch_size=128):
        num_examples = len(X_data)
        total_accuracy = 0
        for offset in range(0, num_examples, batch_size):
            end = offset + batch_size
            batch_x, batch_y = X_data[offset:end], Y_data[offset:end]
            batch_accuracy = sess.run(self.accuracy_operation, feed_dict={self.X: batch_x, self.Y: batch_y,
                                                                          self.dropout_keep_prob: 1.0})
            total_accuracy += (batch_accuracy * len(batch_x))
        return total_accuracy / num_examples

    def save(self, sess, file_name):
        saver = tf.train.Saver()
        saver.save(sess, file_name)

    def restore(self, sess, checkpoint_dir):
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))

    def __variable_summaries(self, var):
        mean = tf.reduce_mean(var)
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('mean', mean)
        tf.summary.scalar('stddev', stddev)
        tf.summary.histogram('histogram', var)

    def __initial_weight_values(self, shape):
        return tf.random_normal(shape=shape, mean=self.random_mean, stddev=self.random_stddev, dtype=tf.float32)
    
    def __conv(self, input, filter_width, filter_height, filters_count, stride_x, stride_y, padding='VALID',
               name='conv'):
        with tf.name_scope(name):
            input_channels = input.get_shape()[-1].value
            filters = tf.Variable(
                self.__initial_weight_values(shape=[filter_height, filter_width, input_channels, filters_count]),
                name='filters')
            convs = tf.nn.conv2d(input=input, filter=filters, strides=[1, stride_y, stride_x, 1], padding=padding,
                                 name='convs')
            biases = tf.Variable(tf.zeros(shape=[filters_count], dtype=tf.float32), name='biases')
            preactivations = tf.nn.bias_add(convs, biases, name='preactivations')
            activations = tf.nn.relu(preactivations, name='activations')

            with tf.name_scope('filter_summaries'):
                self.__variable_summaries(filters)

            with tf.name_scope('bias_summaries'):
                self.__variable_summaries(biases)

            with tf.name_scope('preactivations_histogram'):
                tf.summary.histogram('preactivations', preactivations)

            with tf.name_scope('activations_histogram'):
                tf.summary.histogram('activations', activations)

            return activations

    def __max_pool(self, input, filter_width, filter_height, stride_x, stride_y, padding='VALID', name='pool'):
        with tf.name_scope(name):
            pool = tf.nn.max_pool(input, ksize=[1, filter_height, filter_width, 1], strides=[1, stride_y, stride_x, 1],
                                  padding=padding, name='pool')
            return pool

    def __fully_connected(self, input, inputs_count, outputs_count, relu=True, name='fully_connected'):
        with tf.name_scope(name):
            wights = tf.Variable(self.__initial_weight_values(shape=[inputs_count, outputs_count]), name='wights')
            biases = tf.Variable(tf.zeros(shape=[outputs_count], dtype=tf.float32), name='biases')
            preactivations = tf.nn.bias_add(tf.matmul(input, wights), biases, name='preactivations')
            if relu:
                activations = tf.nn.relu(preactivations, name='activations')

            with tf.name_scope('wight_summaries'):
                self.__variable_summaries(wights)

            with tf.name_scope('bias_summaries'):
                self.__variable_summaries(biases)

            with tf.name_scope('preactivations_histogram'):
                tf.summary.histogram('preactivations', preactivations)

            if relu:
                with tf.name_scope('activations_histogram'):
                    tf.summary.histogram('activations', activations)

            if relu:
                return activations
            else:
                return preactivations

    def __dropout(self, input, name='dropout'):
        with tf.name_scope(name):
            return tf.nn.dropout(input, keep_prob=self.dropout_keep_prob, name='dropout')

加载数据代码（dataset_helper.py）

import pickle
import numpy as np
import scipy.misc
def __unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
def read_cifar_10(image_width, image_height):

    batch_1 = __unpickle('./cifar-10/data_batch_1')
    batch_2 = __unpickle('./cifar-10/data_batch_2')
    batch_3 = __unpickle('./cifar-10/data_batch_3')
    batch_4 = __unpickle('./cifar-10/data_batch_4')
    batch_5 = __unpickle('./cifar-10/data_batch_5')
    test_batch = __unpickle('./cifar-10/test_batch')

    classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

    total_train_samples = len(batch_1[b'labels']) + len(batch_2[b'labels']) + len(batch_3[b'labels'])\
                          + len(batch_4[b'labels']) + len(batch_5[b'labels'])

    X_train = np.zeros(shape=[total_train_samples, image_width, image_height, 3], dtype=np.uint8)
    Y_train = np.zeros(shape=[total_train_samples, len(classes)], dtype=np.float32)

    batches = [batch_1, batch_2, batch_3, batch_4, batch_5]

    index = 0
    for batch in batches:
        for i in range(len(batch[b'labels'])):
            image = batch[b'data'][i].reshape(3, 32, 32).transpose([1, 2, 0])
            label = batch[b'labels'][i]

            X = scipy.misc.imresize(image, size=(image_height, image_width), interp='bicubic')
            Y = np.zeros(shape=[len(classes)], dtype=np.int)
            Y[label] = 1

            X_train[index + i] = X
            Y_train[index + i] = Y

        index += len(batch[b'labels'])

    total_test_samples = len(test_batch[b'labels'])

    X_test = np.zeros(shape=[total_test_samples, image_width, image_height, 3], dtype=np.uint8)
    Y_test = np.zeros(shape=[total_test_samples, len(classes)], dtype=np.float32)

    for i in range(len(test_batch[b'labels'])):
        image = test_batch[b'data'][i].reshape(3, 32, 32).transpose([1, 2, 0])
        label = test_batch[b'labels'][i]

        X = scipy.misc.imresize(image, size=(image_height, image_width), interp='bicubic')
        Y = np.zeros(shape=[len(classes)], dtype=np.int)
        Y[label] = 1

        X_test[i] = X
        Y_test[i] = Y

    return X_train, Y_train, X_test, Y_test

训练代码（train.py）

import tensorflow as tf
from zfnet import ZFNet
from dataset_helper import read_cifar_10

INPUT_WIDTH = 80
INPUT_HEIGHT = 80
INPUT_CHANNELS = 3

NUM_CLASSES = 10

LEARNING_RATE = 0.001   # Original value: 0.01
MOMENTUM = 0.9
KEEP_PROB = 0.5

EPOCHS = 1
BATCH_SIZE = 128

print('Reading CIFAR-10...')
X_train, Y_train, X_test, Y_test = read_cifar_10(image_width=INPUT_WIDTH, image_height=INPUT_HEIGHT)

zfnet = ZFNet(input_width=INPUT_WIDTH, input_height=INPUT_HEIGHT, input_channels=INPUT_CHANNELS,
              num_classes=NUM_CLASSES, learning_rate=LEARNING_RATE, momentum=MOMENTUM, keep_prob=KEEP_PROB)

with tf.Session() as sess:
    print('Training dataset...')
    print()

    file_writer = tf.summary.FileWriter(logdir='./log', graph=sess.graph)

    summary_operation = tf.summary.merge_all()

    sess.run(tf.global_variables_initializer())

    for i in range(EPOCHS):

        print('Calculating accuracies...')

        train_accuracy = zfnet.evaluate(sess, X_train, Y_train, BATCH_SIZE)
        test_accuracy = zfnet.evaluate(sess, X_test, Y_test, BATCH_SIZE)

        print('Train Accuracy = {:.3f}'.format(train_accuracy))
        print('Test Accuracy = {:.3f}'.format(test_accuracy))
        print()

        print('Training epoch', i + 1, '...')
        zfnet.train_epoch(sess, X_train, Y_train, BATCH_SIZE, file_writer, summary_operation, i)
        print()

    final_train_accuracy = zfnet.evaluate(sess, X_train, Y_train, BATCH_SIZE)
    final_test_accuracy = zfnet.evaluate(sess, X_test, Y_test, BATCH_SIZE)

    print('Final Train Accuracy = {:.3f}'.format(final_train_accuracy))
    print('Final Test Accuracy = {:.3f}'.format(final_test_accuracy))
    print()

    zfnet.save(sess, './model/zfnet')
    print('Model saved.')
    print()

print('Training done successfully.')

验证代码（evaluate.py）

import tensorflow as tf
from zfnet import ZFNet
from dataset_helper import read_cifar_10

INPUT_WIDTH = 80
INPUT_HEIGHT = 80
INPUT_CHANNELS = 3

NUM_CLASSES = 10

LEARNING_RATE = 0.001   # Original value: 0.01
MOMENTUM = 0.9
KEEP_PROB = 0.5

EPOCHS = 100
BATCH_SIZE = 128

print('Reading CIFAR-10...')
X_train, Y_train, X_test, Y_test = read_cifar_10(image_width=INPUT_WIDTH, image_height=INPUT_HEIGHT)

zfnet = ZFNet(input_width=INPUT_WIDTH, input_height=INPUT_HEIGHT, input_channels=INPUT_CHANNELS,
              num_classes=NUM_CLASSES, learning_rate=LEARNING_RATE, momentum=MOMENTUM, keep_prob=KEEP_PROB)

with tf.Session() as sess:
    print('Evaluating dataset...')
    print()

    sess.run(tf.global_variables_initializer())

    print('Loading model...')
    print()
    zfnet.restore(sess, './model')

    print('Evaluating...')

    train_accuracy = zfnet.evaluate(sess, X_train, Y_train, BATCH_SIZE)
    test_accuracy = zfnet.evaluate(sess, X_test, Y_test, BATCH_SIZE)

    print('Train Accuracy = {:.3f}'.format(train_accuracy))
    print('Test Accuracy = {:.3f}'.format(test_accuracy))
    print()