Identifying equivalent varargs function calls for memoization in Python

问题

I'm using a variant of the following decorator for memoization (found here):

# note that this decorator ignores **kwargs
def memoize(obj):
    cache = obj.cache = {}

    @functools.wraps(obj)
    def memoizer(*args, **kwargs):
        if args not in cache:
            cache[args] = obj(*args, **kwargs)
        return cache[args]
    return memoizer

I'm wondering, is there a reasonable way to memoize based on both args and kwargs, particularly in cases where two function calls specified with arguments assigned differently positionally and through keyword, but have the exact same arguments?

回答1:

If you are using parameters either always as positionals or always as keywords, Thorsten solution works fine. But, if you want to consider equal calls that give to the parameters the same values, indipendently of how the parameters are passed, then you have to do something more complex:

import inspect


def make_key_maker(func):
    args_spec = inspect.getargspec(func)

    def key_maker(*args, **kwargs):
        left_args = args_spec.args[len(args):]
        num_defaults = len(args_spec.defaults or ())
        defaults_names = args_spec.args[-num_defaults:]

        if not set(left_args).symmetric_difference(kwargs).issubset(defaults_names):
            # We got an error in the function call. Let's simply trigger it
            func(*args, **kwargs)

        start = 0
        key = []
        for arg, arg_name in zip(args, args_spec.args):
            key.append(arg)
            if arg_name in defaults_names:
                start += 1

        for left_arg in left_args:
            try:
                key.append(kwargs[left_arg])
            except KeyError:
                key.append(args_spec.defaults[start])

            # Increase index if we used a default, or if the argument was provided
            if left_arg in defaults_names:
                start += 1
        return tuple(key)

    return key_maker

The above functions tries to map keyword arguments(and defaults) to positional and uses the resultant tuple as key. I tested it a bit and it seems to work properly in most cases. It fails when the target function also uses a **kwargs argument.

>>> def my_function(a,b,c,d,e=True,f="something"): pass
... 
>>> key_maker = make_key_maker(my_function)
>>> 
>>> key_maker(1,2,3,4)
(1, 2, 3, 4, True, 'something')
>>> key_maker(1,2,3,4, e=True)               # same as before
(1, 2, 3, 4, True, 'something')
>>> key_maker(1,2,3,4, True)                 # same as before
(1, 2, 3, 4, True, 'something')
>>> key_maker(1,2,3,4, True, f="something")  # same as before
(1, 2, 3, 4, True, 'something')
>>> key_maker(1,2,3,4, True, "something")    # same as before
(1, 2, 3, 4, True, 'something')
>>> key_maker(1,2,3,d=4)                     # same as before
(1, 2, 3, 4, True, 'something')
>>> key_maker(1,2,3,d=4, f="something")      # same as before
(1, 2, 3, 4, True, 'something')

回答2:

In general, it's not possible to infer that two calls have the same parameter meaning. Consider the calls

func(foo=1)
func(1)
func(bar=1)

Which of these (if any) are equivalent depends on whether the positional argument is called foo or bar: if the argument is called foo, then the first call matches the second, etc. However, the positional parameter may also have an entirely different name.

IOW, you need to reflect on the function to be called, which, in turn, may not be possible (e.g. if it is implemented in C, or is itself a wrapper that only processes *args, **kwargs).

If you want to go the reflection route, something like ndpu's response is a good start.

回答3:

import inspect
def memoize(obj):
    cache = obj.cache = {}
    @functools.wraps(obj)
    def memoizer(*args, **kwargs):
        kwargs.update(dict(zip(inspect.getargspec(obj).args, args)))
        key = tuple(kwargs.get(k, None) for k in inspect.getargspec(obj).args)
        if key not in cache:
            cache[key] = obj(**kwargs)
        return cache[key]
    return memoizer

回答4:

You just have to find a good way to build a key from both args and kwargs. Maybe try this:

import functools
from collections import OrderedDict

# note that this decorator ignores **kwargs
def memoize(obj):
    def make_key(args, kwargs):
        ordered_kwargs = OrderedDict(kwargs)
        parameters = tuple([args, 
                            tuple(ordered_kwargs.keys()), 
                            tuple(ordered_kwargs.values())])
        return parameters
    cache = obj.cache = {}

    @functools.wraps(obj)
    def memoizer(*args, **kwargs):
        key = make_key(args, kwargs)
        if key not in cache:
            cache[key] = obj(*args, **kwargs)
            print "Not using cached result for key %s" % str(key)
        else:
            print "Using cached result for key %s" % str(key)
        return cache[key]
    return memoizer

@memoize
def calculate_sum(*args, **kwargs):
    return sum(args)

calculate_sum(4,7,9,2,flag=0)
calculate_sum(4,7,9,3)
calculate_sum(4,7,9,2,flag=1)
calculate_sum(4,7,9,2,flag=0)

I put some print-statements into memoizer, just to demonstrate that it works. Output is:

Not using cached result for key ((4, 7, 9, 2), ('flag',), (0,))
Not using cached result for key ((4, 7, 9, 3), (), ())
Not using cached result for key ((4, 7, 9, 2), ('flag',), (1,))
Using cached result for key ((4, 7, 9, 2), ('flag',), (0,))

I'm sure I didn't tackle all corner-cases, especially if the values passed-in as kwargs (or even args) are not hashable. But maybe it can serve as a good starting-point.

回答5:

This solution uses the inspect module to extract parameter names for both positional and keyword arguments. The memoization lookup is then performed on the ordered tuple of name:value pairs. It can tolerate parameters being passed as both positional and keyword arguments. If there are excess positional arguments, these are stored in the order they appear in a separate tuple.

This uses Michele Simionato's decorator package to ensure that function signatures are preserved. Because it inspects the argspec of the function being memoized, it will fail if composed with decorator implementations that do not preserve the argspec.

from decorator import decorator as robust_decorator

def argument_signature(function,*args,**kwargs):
    '''
    Convert the function arguments and values to a unique set. 
    Throws ValueError if the provided arguments cannot match argspec.
    '''
    named_store = {} # map from parameter names to values 
    named,vargname,kwargname,defaults = inspect.getargspec(function)
    available = zip(named,args)
    nargs     = len(available)
    ndefault  = len(defaults) if not defaults is None else 0
    nnamed    = len(named)
    # All positional arguments must be filled
    nmandatory = nnamed - ndefault
    if nargs<nmandatory: raise ValueError('Not enough positional arguments')
    # Assign available positional arguments to names    
    for k,v in available:
        if k in named_store: raise ValueError('Duplicate argument',k)
        named_store[k] = v
    # If not all arguments are provided, check **kwargs and defaults
    ndefaulted   = max(0,nnamed - nargs)
    default_map = dict(zip(named[-ndefault:],defaults)) if ndefault>0 else {}
    if ndefaulted>0:
        for k in named[-ndefaulted:]:
            if k in named_store: raise ValueError('Duplicate argument',k)
            named_store[k] = kwargs[k] if k in kwargs else default_map[k]
            if k in kwargs: del kwargs[k]
    # Store excess positional arguments in *vargs if possible
    vargs = None
    if len(args)>nnamed:
        if vargname is None:
            raise ValueError('Excess positional arguments, but function does not accept *vargs!')
        vargs = args[nnamed:]
    # Store excess keyword arguments if the function accepts **kwargs
    if len(kwargs):
        if kwargname is None:
            raise ValueError("Excess keyword arguments, but function does not accept **kwargs!")
        for k in kwargs:
            if k in named_store: raise ValueError('Duplicate argument',k)
            named_store[k] = kwargs[k]
    # Construct a tuple reflecting argument signature
    keys  = sorted(named_store.keys())
    vals  = tuple(named_store[k] for k in keys)
    named = tuple(zip(keys,vals))
    argument_signature = (named,vargs)
    return argument_signature

def print_signature(sig):
    '''Formats the argument signature for printing.'''
    named, vargs = sig
    result = ','.join(['%s=%s'%(k,v) for (k,v) in named])
    if not vargs is None: result += '; ' + ','.join(map(str,vargs))
    return result

def vararg_memoize(f):
    '''Memoization decorator'''
    cache = {}
    @robust_decorator
    def decorated(f,*args,**kwargs):
        sig = argument_signature(f,*args,**kwargs)
        if not sig in cache:  cache[sig] = f(*args,**kwargs)
        else: print('found cached',f.func_name,print_signature(sig))
        return cache[sig]
    return decorated(f)

if __name__=="__main__":
    print("Running example and testing code")

    def example_function(a,b,c=1,d=('ok',),*vargs,**kw):
        ''' This docstring should be preserved by the decorator '''
        e,f = vargs if (len(vargs)==2) else (None,None)
        g = kw['k'] if 'k' in kw else None
        print(a,b,c,d,e,f,g)

    f = example_function
    g = vararg_memoize(example_function)

    for fn in [f,g]:
        print('Testing',fn.__name__)
        fn('a','b','c','d')
        fn('a','b','c','d','e','f')
        fn('a','b',c='c',d='d')
        fn('a','b',**{'c':'c','d':'d'})
        fn('a','b',*['c','d'])
        fn('a','b',d='d',*['c'])
        fn('a','b',*['c'],**{'d':'d'})
        fn('a','b','c','d','e','f')

来源：https://stackoverflow.com/questions/14621838/identifying-equivalent-varargs-function-calls-for-memoization-in-python

标签

python

decorator

memoization