Sometimes I run into a problem:
OOM when allocating tensor with shape
e.q.
OOM when allocating tensor wi
def FindBatchSize(model):
"""#model: model architecture, that is yet to be trained"""
import os, sys, psutil, gc, tensorflow, keras
import numpy as np
from keras import backend as K
BatchFound= 16
try:
total_params= int(model.count_params()); GCPU= "CPU"
#find whether gpu is available
try:
if K.tensorflow_backend._get_available_gpus()== []:
GCPU= "CPU"; #CPU and Cuda9GPU
else:
GCPU= "GPU"
except:
from tensorflow.python.client import device_lib; #Cuda8GPU
def get_available_gpus():
local_device_protos= device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
if "gpu" not in str(get_available_gpus()).lower():
GCPU= "CPU"
else:
GCPU= "GPU"
#decide batch size on the basis of GPU availability and model complexity
if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params <1000000):
BatchFound= 64
if (os.cpu_count() <16) and (total_params <500000):
BatchFound= 64
if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params <2000000) and (total_params >=1000000):
BatchFound= 32
if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params >=2000000) and (total_params <10000000):
BatchFound= 16
if (GCPU== "GPU") and (os.cpu_count() >15) and (total_params >=10000000):
BatchFound= 8
if (os.cpu_count() <16) and (total_params >5000000):
BatchFound= 8
if total_params >100000000:
BatchFound= 1
except:
pass
try:
#find percentage of memory used
memoryused= psutil.virtual_memory()
memoryused= float(str(memoryused).replace(" ", "").split("percent=")[1].split(",")[0])
if memoryused >75.0:
BatchFound= 8
if memoryused >85.0:
BatchFound= 4
if memoryused >90.0:
BatchFound= 2
if total_params >100000000:
BatchFound= 1
print("Batch Size: "+ str(BatchFound)); gc.collect()
except:
pass
memoryused= []; total_params= []; GCPU= "";
del memoryused, total_params, GCPU; gc.collect()
return BatchFound
#####################################################################################################
#####################################################################################################