问题
I'm trying to run my keras
model using multiprocessing
due to GPU OOM issue.
I loaded all libraries and set up the model within the function for multiprocessing as below:
When I execute this code, it gets stuck at history = q.get()
, which is multiprocessing.Queue.get()
.
And when I remove all the code related to multiprocessing.Queue()
, the code execution ends as soon as I execute the code, which I suspect that the code is not working. Even a simple print()
function didn't show an output.
The model.fit()
works perfectly when executed in the main process. And epoch and batch are set at low number for testing.
BTW, I'm running this on Kaggle kernal and I've tried all the things to release GPU usage by tensorflow such as cuda.close()
but all of them didn't work. So, I'm hoping multiprocessing will solve OOM problem for me.
I'd appreciate your inputs!
# train model in multiprocessing
import multiprocessing
def train_model(x_train,y_train,x_val,y_val,q):
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten, Dropout
from keras.optimizers import Adam
# define function to build VGG-16 model
img_height = 256
img_width = 256
def build_vgg():
model = Sequential()
model.add(Conv2D(input_shape=(img_height,img_width,1),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Flatten())
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=1, activation="sigmoid"))
opt = Adam(lr=0.001)
model.compile(optimizer=opt, loss=keras.losses.binary_crossentropy, metrics=['accuracy'])
return model
model = build_vgg()
batch_size = 16
epochs = 1
history = model.fit(x_train, y_train, validation_data=(x_val, y_val),
epochs=epochs, batch_size=batch_size)
q.put(history)
q = multiprocessing.Queue()
p = multiprocessing.Process(target=train_model, args=(x_train,y_train,x_val,y_val,q))
p.start()
history = q.get()
p.join()
来源:https://stackoverflow.com/questions/62620104/keras-not-running-in-multiprocessing