After spending a lot of time trying to wrap my head around multiprocessing I came up with this code which is a benchmark test:
Example 1:
Multiprocessing could be useful for what you're doing, but not in the way you're thinking about using it. As you're basically doing some computation on every member of a list, you could do it using the multiprocessing.Pool.map method, to do the computation on the list members in parallel.
Here is an example that shows your code's performance using a single process and using multiprocessing.Pool.map:
from multiprocessing import Pool
from random import choice
from string import printable
from time import time
def build_test_list():
# Builds a test list consisting of 5 sublists of 10000 strings each.
# each string is 20 characters long
testlist = [[], [], [], [], []]
for sublist in testlist:
for _ in xrange(10000):
sublist.append(''.join(choice(printable) for _ in xrange(20)))
return testlist
def process_list(l):
# the time-consuming code
result = []
tmp = []
for n in range(len(l)):
if l[n] not in tmp:
result.insert(n, l[n]+' ('+str(l.count(l[n]))+')')
tmp.insert(0, l[n])
return result
def single(l):
# process the test list elements using a single process
results = []
for sublist in l:
results.append(process_list(sublist))
return results
def multi(l):
# process the test list elements in parallel
pool = Pool()
results = pool.map(process_list, l)
return results
print "Building the test list..."
testlist = build_test_list()
print "Processing the test list using a single process..."
starttime = time()
singleresults = single(testlist)
singletime = time() - starttime
print "Processing the test list using multiple processes..."
starttime = time()
multiresults = multi(testlist)
multitime = time() - starttime
# make sure they both return the same thing
assert singleresults == multiresults
print "Single process: {0:.2f}sec".format(singletime)
print "Multiple processes: {0:.2f}sec".format(multitime)
Output:
Building the test list...
Processing the test list using a single process...
Processing the test list using multiple processes...
Single process: 34.73sec
Multiple processes: 24.97sec