As of August 2017, Pandas DataFame.apply() is unfortunately still limited to working with a single core, meaning that a multi-core machine will waste the majority of its com
Here is an example of sklearn base transformer, in which pandas apply is parallelized
import multiprocessing as mp
from sklearn.base import TransformerMixin, BaseEstimator
class ParllelTransformer(BaseEstimator, TransformerMixin):
def __init__(self,
n_jobs=1):
"""
n_jobs - parallel jobs to run
"""
self.variety = variety
self.user_abbrevs = user_abbrevs
self.n_jobs = n_jobs
def fit(self, X, y=None):
return self
def transform(self, X, *_):
X_copy = X.copy()
cores = mp.cpu_count()
partitions = 1
if self.n_jobs <= -1:
partitions = cores
elif self.n_jobs <= 0:
partitions = 1
else:
partitions = min(self.n_jobs, cores)
if partitions == 1:
# transform sequentially
return X_copy.apply(self._transform_one)
# splitting data into batches
data_split = np.array_split(X_copy, partitions)
pool = mp.Pool(cores)
# Here reduce function - concationation of transformed batches
data = pd.concat(
pool.map(self._preprocess_part, data_split)
)
pool.close()
pool.join()
return data
def _transform_part(self, df_part):
return df_part.apply(self._transform_one)
def _transform_one(self, line):
# some kind of transformations here
return line
for more info see https://towardsdatascience.com/4-easy-steps-to-improve-your-machine-learning-code-performance-88a0b0eeffa8