Error when profiling an otherwise perfectly working multiprocessing python script with cProfile

时光毁灭记忆、已成空白 提交于 2019-12-21 12:18:57

问题


I've written a small python script that uses multiprocessing (See https://stackoverflow.com/a/41875711/1878788). It works when I test it:

$ ./forkiter.py
0
1
2
3
4
sum of x+1: 15
sum of 2*x: 20
sum of x*x: 30

But when I try to profile it with cProfile, I get the following:

$ python3.6 -m cProfile -o forkiter.prof ./forkiter.py
0
1
2
3
4
Traceback (most recent call last):
  File "/home/bli/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/bli/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/bli/lib/python3.6/cProfile.py", line 160, in <module>
    main()
  File "/home/bli/lib/python3.6/cProfile.py", line 153, in main
    runctx(code, globs, None, options.outfile, options.sort)
  File "/home/bli/lib/python3.6/cProfile.py", line 20, in runctx
    filename, sort)
  File "/home/bli/lib/python3.6/profile.py", line 64, in runctx
    prof.runctx(statement, globals, locals)
  File "/home/bli/lib/python3.6/cProfile.py", line 100, in runctx
    exec(cmd, globals, locals)
  File "./forkiter.py", line 71, in <module>
    exit(main())
  File "./forkiter.py", line 67, in main
    sum_tuples, results_generator))
  File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 699, in next
    raise value
  File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
    put(task)
  File "/home/bli/lib/python3.6/multiprocessing/connection.py", line 206, in send
    self._send_bytes(_ForkingPickler.dumps(obj))
  File "/home/bli/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed

What happens?

Here is the script:

#!/usr/bin/env python3
"""This script tries to work around some limitations of multiprocessing."""

from itertools import repeat, starmap
from multiprocessing import Pool
from functools import reduce
from operator import add
from time import sleep

# Doesn't work because local functions can't be pickled:
# def make_tuple_func(funcs):
#     def tuple_func(args_list):
#         return tuple(func(args) for func, args in zip(funcs, args_list))
#     return tuple_func
#
# test_tuple_func = make_tuple_func((plus_one, double, square))

class FuncApplier(object):
    """This kind of object can be used to group functions and call them on a
    tuple of arguments."""
    __slots__ = ("funcs", )

    def __init__(self, funcs):
        self.funcs = funcs

    def __len__(self):
        return len(self.funcs)

    def __call__(self, args_list):
        return tuple(func(args) for func, args in zip(self.funcs, args_list))

    def fork_args(self, args_list):
        """Takes an arguments list and repeat them in a n-tuple."""
        return tuple(repeat(args_list, len(self)))


def sum_tuples(*tuples):
    """Element-wise sum of tuple items."""
    return tuple(starmap(add, zip(*tuples)))


# Can't define these functions in main:
# They wouldn't be pickleable.
def plus_one(x):
    return x + 1

def double(x):
    return 2 * x

def square(x):
    return x * x

def main():
    def my_generator():
        for i in range(5):
            print(i)
            yield i


    test_tuple_func = FuncApplier((plus_one, double, square))

    with Pool(processes=5) as pool:
        results_generator = pool.imap_unordered(
            test_tuple_func,
            (test_tuple_func.fork_args(args_list) for args_list in my_generator()))
        print("sum of x+1:\t%s\nsum of 2*x:\t%s\nsum of x*x:\t%s" % reduce(
            sum_tuples, results_generator))
    exit(0)

if __name__ == "__main__":
    exit(main())

Some pickling tests

Some research suggested me that sometimes objects needed a __setstate__ and __getstate__ methods to be picklable. This helps for some pickling protocols, but this doesn't seem to solve the problem in the cProfile case. See the tests below.

The updated script:

#!/usr/bin/env python3
"""This script tries to work around some limitations of multiprocessing."""

from itertools import repeat, starmap
from multiprocessing import Pool
from functools import reduce
from operator import add
from time import sleep
import pickle

# Doesn't work because local functions can't be pickled:
# def make_tuple_func(funcs):
#     def tuple_func(args_list):
#         return tuple(func(args) for func, args in zip(funcs, args_list))
#     return tuple_func
#
# test_tuple_func = make_tuple_func((plus_one, double, square))

class FuncApplier(object):
    """This kind of object can be used to group functions and call them on a
    tuple of arguments."""
    __slots__ = ("funcs", )

    def __init__(self, funcs):
        self.funcs = funcs

    def __len__(self):
        return len(self.funcs)

    def __call__(self, args_list):
        return tuple(func(args) for func, args in zip(self.funcs, args_list))

    # Attempt to make it pickleable when under cProfile (doesn't help)
    def __getstate__(self):
        return self.funcs

    def __setstate__(self, state):
        self.funcs = state

    def fork_args(self, args_list):
        """Takes an arguments list and repeat them in a n-tuple."""
        return tuple(repeat(args_list, len(self)))


def sum_tuples(*tuples):
    """Element-wise sum of tuple items."""
    return tuple(starmap(add, zip(*tuples)))


# Can't define these functions in main:
# They wouldn't be pickleable.
def plus_one(x):
    return x + 1

def double(x):
    return 2 * x

def square(x):
    return x * x

def main():
    def my_generator():
        for i in range(5):
            print(i)
            yield i


    test_tuple_func = FuncApplier((plus_one, double, square))

    print("protocol 0")
    try:
        print(pickle.dumps(test_tuple_func, 0))
    except pickle.PicklingError as err:
        print("failed with the following error:\n%s" % err)
    print("protocol 1")
    try:
        print(pickle.dumps(test_tuple_func, 0))
    except pickle.PicklingError as err:
        print("failed with the following error:\n%s" % err)
    print("protocol 2")
    try:
        print(pickle.dumps(test_tuple_func, 0))
    except pickle.PicklingError as err:
        print("failed with the following error:\n%s" % err)
    print("protocol 3")
    try:
        print(pickle.dumps(test_tuple_func, 0))
    except pickle.PicklingError as err:
        print("failed with the following error:\n%s" % err)
    print("protocol 4")
    try:
        print(pickle.dumps(test_tuple_func, 0))
    except pickle.PicklingError as err:
        print("failed with the following error:\n%s" % err)

    with Pool(processes=5) as pool:
        results_generator = pool.imap_unordered(
            test_tuple_func,
            (test_tuple_func.fork_args(args_list) for args_list in my_generator()))
        print("sum of x+1:\t%s\nsum of 2*x:\t%s\nsum of x*x:\t%s" % reduce(
            sum_tuples, results_generator))
    exit(0)

if __name__ == "__main__":
    exit(main())

Test without cProfile seems OK:

$ ./forkiter.py
protocol 0
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 1
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 2
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 3
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
protocol 4
b'ccopy_reg\n_reconstructor\np0\n(c__main__\nFuncApplier\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(c__main__\nplus_one\np5\nc__main__\ndouble\np6\nc__main__\nsquare\np7\ntp8\nb.'
0
1
2
3
4
sum of x+1: 15
sum of 2*x: 20
sum of x*x: 30

The test under cProfile fails at every pickling protocol (and consequently in multiprocessing also):

$ python3.6 -m cProfile -o forkiter.prof ./forkiter.py
protocol 0
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 1
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 2
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 3
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
protocol 4
failed with the following error:
Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed
0
1
2
3
4
Traceback (most recent call last):
  File "/home/bli/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/bli/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/bli/lib/python3.6/cProfile.py", line 160, in <module>
    main()
  File "/home/bli/lib/python3.6/cProfile.py", line 153, in main
    runctx(code, globs, None, options.outfile, options.sort)
  File "/home/bli/lib/python3.6/cProfile.py", line 20, in runctx
    filename, sort)
  File "/home/bli/lib/python3.6/profile.py", line 64, in runctx
    prof.runctx(statement, globals, locals)
  File "/home/bli/lib/python3.6/cProfile.py", line 100, in runctx
    exec(cmd, globals, locals)
  File "./forkiter.py", line 105, in <module>
    exit(main())
  File "./forkiter.py", line 101, in main
    sum_tuples, results_generator))
  File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 699, in next
    raise value
  File "/home/bli/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
    put(task)
  File "/home/bli/lib/python3.6/multiprocessing/connection.py", line 206, in send
    self._send_bytes(_ForkingPickler.dumps(obj))
  File "/home/bli/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '__main__.FuncApplier'>: attribute lookup FuncApplier on __main__ failed

回答1:


It seems cProfile simply doesn't work with multiprocessing.

If you are happy to modify the code only profile the main process (or add specific profiling for the subprocesses), cProfile.run() seems to work to a degree.

In your example, replace

exit(main())

with

exit(cProfile.run('main()')

That at least works if the parallel function is a global scope function, not sure that is also true for a class like in your case.



来源:https://stackoverflow.com/questions/41892297/error-when-profiling-an-otherwise-perfectly-working-multiprocessing-python-scrip

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!