Issue with simple CAE | 易学教程

问题

It looks like simple CAE not working for Carvana dataset I’m trying simple CAE for Carvana dataset. You can download it here

My code is following:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.transform import downscale_local_mean
from skimage.color import rgb2grey
from os.path import join, isfile
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Input, concatenate
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import keras.backend as K
from scipy.ndimage.filters import gaussian_filter
from keras.optimizers import Adam
from random import randint
import hickle as hkl
import dill

class Data(object):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

input_folder = join('..', 'input')

print('Path:',input_folder)

data_file_name = 'datafile.pkl'

df_mask = pd.read_csv(join(input_folder, 'train_masks.csv'), usecols=['img'])        
load_img = lambda im, idx: imread(join(input_folder, 'train', '{}_{:02d}.jpg'.format(im, idx)))
load_mask = lambda im, idx: imread(join(input_folder, 'train_masks', '{}_{:02d}_mask.gif'.format(im, idx)))

ids_train = df_mask['img'].map(lambda s: s.split('_')[0]).unique()
imgs_idx = list(range(1, 17))

resize = lambda im: downscale_local_mean(im, (4,4) if im.ndim==2 else (4,4,1))
mask_image = lambda im, mask: (im * np.expand_dims(mask, 2))

num_train = 48#len(ids_train)

if isfile(data_file_name):
    #with open(data_file_name, 'rb') as f:
    data = hkl.load(data_file_name)
    X = data.X
    y = data.y

else:
    X = np.empty((num_train, 320, 480, 1), dtype=np.float32)
    y = np.empty((num_train, 320, 480, 1), dtype=np.float32)

    with tqdm_notebook(total=num_train) as bar:
        idx = 1 # Rotation index
        for i, img_id in enumerate(ids_train[:num_train]):
            imgs_id = [resize(load_img(img_id, j)) for j in imgs_idx]
            greyscale = rgb2grey(imgs_id[idx-1]) / 255
            greyscale = np.expand_dims(greyscale, 2)
            X[i] = greyscale
            y_processed = resize(np.expand_dims(load_mask(img_id, idx), 2)) / 255.
            y[i] = y_processed
            del imgs_id # Free memory
            bar.update()

    #data = Data(X, y)
    #with open(data_file_name, 'w+') as f:
    #hkl.dump(data, data_file_name)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=43)

y_train_mean = y_train.mean(axis=0)
y_train_std = y_train.std(axis=0)
y_train_min = y_train.min(axis=0)

y_features = np.concatenate([y_train_mean, y_train_std, y_train_min], axis=2)

inp = Input((320, 480, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same')(inp)
max1 = MaxPooling2D(2)(conv1)
conv2 = Conv2D(48, 5, activation='relu', padding='same')(max1)
max2 = MaxPooling2D(2)(conv2)
conv3 = Conv2D(32, 7, activation='relu', padding='same')(max2)

deconv3 = Conv2DTranspose(32, 7, strides=4, activation='relu', padding='same')(conv3)
deconv2 = Conv2DTranspose(48, 5, strides=2, activation='relu', padding='same')(conv2)

deconvs = concatenate([conv1, deconv2, deconv3])

out = Conv2D(1, 7, activation='sigmoid', padding='same')(deconvs)

model = Model(inp, out)
model.summary()

smooth = 1.

# From here: https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def bce_dice_loss(y_true, y_pred):
    return 0.5 * K.binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])

model.load_weights(cae_filepath)

filepath="weights-improvement2_lre-5-{epoch:02d}-{val_acc:.5f}-{val_dice_coef:.5f}.hdf5"
mcp = ModelCheckpoint(filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')

history = model.fit(X_train, y_train, epochs=1000, validation_data=(X_val, y_val), batch_size=22, verbose=2, callbacks=[mcp])

idxs = [0, X_val.shape[0]/2, randint(1, X_val.shape[0] -1)]

for idx in idxs:
    print('Index:', idx)
    x = X_val[idx]

    fig, ax = plt.subplots(3,3, figsize=(16, 16))
    ax = ax.ravel()

    cmaps = ['Reds', 'Greens', 'Blues']
    for i in range(x.shape[-1]):
        ax[i].imshow(x[...,i], cmap='gray') #cmaps[i%3])
        ax[i].set_title('channel {}'.format(i))

    ax[-8].imshow(y_val[idx,...,0], cmap='gray')
    ax[-8].set_title('y')

    y_pred = model.predict(x[None]).squeeze()
    ax[-7].imshow(y_pred, cmap='gray')
    ax[-7].set_title('y_pred')

    ax[-6].imshow(gaussian_filter(y_pred,1) > 0.5, cmap='gray')
    ax[-6].set_title('1')

    ax[-5].imshow(gaussian_filter(y_pred,2) > 0.5, cmap='gray')
    ax[-5].set_title('2')

    ax[-4].imshow(gaussian_filter(y_pred,3) > 0.5, cmap='gray')
    ax[-4].set_title('3')

    ax[-3].imshow(gaussian_filter(y_pred,4) > 0.5, cmap='gray')
    ax[-3].set_title('4')

    ax[-2].imshow(gaussian_filter(y_pred,5) > 0.5, cmap='gray')
    ax[-2].set_title('5')

    ax[-1].imshow(gaussian_filter(y_pred,6) > 0.5, cmap='gray')
    ax[-1].set_title('6')

It’s working fine without pre-training, you can check it by commenting these lines:

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])

model.load_weights(cae_filepath)

However, then I tried pre-train auto encoder to reconstruct original images I have no accuracy improvements, only dice coefficient improvements: Moreover, then I tried using pre-trained autoencoder for training to make predictions based on training data I have a different result – accuracy stuck on level 0,8374 and dice coefficient degradation from 0.11864 initially down to 7.5781e-04:

Pre-train of model by autoencoder should increase model accuracy. From my experience it gives an improvement of accuracy to 99.62% for full MNIST dataset with a simple CAE

Also, I looked into data to make sure the same nature for both cases (you can see it by temporary variables to debug it in code)

In the second case I have an idea that it may be caused due to the fact, we have not only encoder, but also decoder’s weights and it can potentially cause an issue during training

After reset of decoder’s weights I had almost the same picture for some time: But after 49 iteration process has reached a crucial moment and training process became efficient:

However, I have no clue why during autoencoder train we don’t have accuracy increase, despite the fact of dice coefficient improvements, probably something wrong with my code or frameworks I’m using

Additional info:

My environment: