Fitting array to datagen then passing as parameter to Keras Classifier

问题

I have a convolutional neural network that i am using to classify cats and dogs, using keras classifier. I had to use custom cross validation, due to how my data is organized, where i have n groups of different breeds of cats and dogs and each breed has 200 images, 600 images per pet class. Now, what i am trying to do is create augmented data (on the fly/in place data augmentation) and concatenate them to my original array of groups. However i get the following error when i try to iterate through datagen:

TypeError: __init__() got an unexpected keyword argument 'y'

For this attempt:

for i in range(0, 2):
    datagen = ImageDataGenerator(
    groups[i],
    y=labels[i],
    batch_size=32,
    save_to_dir=None,
    save_prefix="",
    save_format="png",
    rotation_range=20,
    zoom_range=0.2
    )

I am using custom cross validation to check for the best weights and passing it as parameter to KerasClassifier function like this:

X = np.concatenate([group_1, group_2, group_3], axis=0)[..., np.newaxis]
y = np.concatenate([y_g_1, y_g_2, y_g_3], axis=0)
    def n_fold_cv():
        lengths = [0] + list(accumulate(map(len, [y_g_1, y_g_2, y_g_3])))
        i = 1
        while i <= 3:
            min_length = lengths[i - 1]
            max_length = lengths[i]
            idx = np.arange(min_length, max_length, dtype=int)
            yield idx, idx
            i += 1


    keras_clf = KerasClassifier(build_fn=build_model, epochs=100, batch_size=8, verbose=0)
    accuracies = cross_val_score(estimator=keras_clf, scoring="accuracy", X=X, y=y, cv=n_fold_cv())
    print(accuracies)

Where i have group 1, group 2 and group 3. I split them into k-folds with my custom k-fold function, before the concatenation.

I would like to set each group for augmentation once, before concatenating and doing cross validation. I haven't really grasped how to fit data augmentation to Keras Classifier.

Public kaggle notebook with dataset:

Just code, with dataset:

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
import numpy as np
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential
from itertools import accumulate
import tensorflow as tf
from keras import backend as K
from keras.preprocessing import image
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint
import os
import numpy as np
import cv2
from PIL import Image as PImage
from os import listdir
from keras.preprocessing.image import ImageDataGenerator

img_width, img_height = 128, 160

def load_dataset(path):
    imagesList = listdir(path)
    loadedImages = []
    for root, dirs, files in os.walk(path):
        for i, name in enumerate(files):
            image_path = os.path.join(root, name)
            img = PImage.open(image_path)
            arr = np.array(img)
            loadedImages.append(arr)
    return loadedImages 

def n_fold_cv():
    lengths = [0] + list(accumulate(map(len, [y_g_1, y_g_2, y_g_3])))
    i = 1
    while i <= 3:
        min_length = lengths[i - 1]
        max_length = lengths[i]
        idx = np.arange(min_length, max_length, dtype=int)
        yield idx, idx
        i += 1
              
def generateLabel(sober, drunk):
    label = []
    for i in range(0, 1):
        for idx in range(1):
            label.extend( [idx]*sober)
            for idx in range(1):
                label.extend([idx+1]*drunk)
    label = np.array(label)
    return label

y_g_1 = generateLabel(200, 200)
y_g_2 = generateLabel(200, 200)
y_g_3 = generateLabel(200, 200)


group_1 = np.asarray(load_dataset('../input/cats-and-dogs-dataset/Pets 1'))
group_2 = np.asarray(load_dataset('../input/cats-and-dogs-dataset/Pets 2'))
group_3 = np.asarray(load_dataset('../input/cats-and-dogs-dataset/Pets 3'))

groups = np.stack((grupo_1, grupo_2, grupo_3),axis=0)
labels = np.stack((y_g_2, y_g_2, y_g_3), axis=0)



for i in range(0, 2):
    datagen = ImageDataGenerator(
    groups[i],
    y=labels[i],
    batch_size=32,
    save_to_dir=None,
    save_prefix="",
    save_format="png",
    rotation_range=20,
    zoom_range=0.2
    )
    
    
    
X = np.concatenate([group_1, group_2, group_3], axis=0)[..., np.newaxis]
y = np.concatenate([y_g_1, y_g_2, y_g_3], axis=0)

if K.image_data_format() == 'channels_first':
   input_shape = (3, img_width, img_height)
else:
   input_shape = (img_width, img_height, 3)

def build_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.summary()
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['mse','accuracy'])


    return model


keras_clf = KerasClassifier(build_fn=build_model, epochs=100, batch_size=8, verbose=0)


accuracies = cross_val_score(estimator=keras_clf, scoring="accuracy", X=X, y=y, cv=n_fold_cv())
print(accuracies)

回答1:

I think at this point you should just make a custom cross-validation loop, since you want extra flexibility. Then you'll be able to apply any transformation you want. For example, I used this transformation:

img  = tf.image.random_contrast(img, .2, .5)

But you can make it anything you want.

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential
from glob2 import glob
from collections import deque

group1 = glob('group1\\*\\*.jpg')
group2 = glob('group2\\*\\*.jpg')
group3 = glob('group3\\*\\*.jpg')

groups = [group1, group2, group3]

assert all(map(len, groups))

def load(file_path):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, size=(100, 100))
    img  = tf.image.random_contrast(img, .2, .5)
    label = tf.strings.split(file_path, os.sep)[1]
    label = tf.cast(tf.equal(label, 'dogs'), tf.int32)
    return img, label

accuracies_on_test_set = {}

for i in range(len(groups)):
    d = deque(groups)
    d.rotate(i)
    train1, train2, test1 = d
    train_ds = tf.data.Dataset.from_tensor_slices(train1 + train2).\
        shuffle(len(train1) + len(train2)).map(load).batch(4)
    test_ds = tf.data.Dataset.from_tensor_slices(test1).\
        shuffle(len(test1)).map(load).batch(4)

    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(100, 100, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dropout(0.25))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['mse', 'accuracy'])

    model.fit(train_ds, validation_data=test_ds, epochs=5, verbose=0)
    loss, mse, accuracy = model.evaluate(test_ds, verbose=0)
    accuracies_on_test_set[f'epoch_{i + 1}_accuracy'] = accuracy

print(accuracies_on_test_set)

{'epoch_1_accuracy': 0.915, 'epoch_2_accuracy': 0.95, 'epoch_3_accuracy': 0.9}

The folder structure is this:

group1/
    dogs/
        dog001.jpg
    cats/
        cat001.jpg
group2/
    dogs/
        dog001.jpg
    cats/
        cat001.jpg
group3/
    dogs/
        dog001.jpg
    cats/
        cat001.jpg

来源：https://stackoverflow.com/questions/63992902/fitting-array-to-datagen-then-passing-as-parameter-to-keras-classifier

标签

python

python-3.x

tensorflow

machine-learning

keras