How to register implementation of abc.MutableMapping as a dict subclass?

ぐ巨炮叔叔 提交于 2019-12-12 15:52:13

问题


I would like my SpreadSheet class below to be considered a dict subclass by the isinstance() built-in, but when I try to register it as such, an AttributeError Exception is thrown (also shown below).

What is a (or the) way to do something like this?

Note: My question is similar to Is it possible to be a virtual subclass of a built in type?, but its accepted answer doesn't address the titular question asked (so please don't vote to close this as a duplicate).

The primary motivation for wanting to do this is to allow an instance of the class to be passed to json.dump() and be treated just like a Python dict. This is needed because — for reasons I don't understand — the JSONEncoder class uses isinstance(value, dict), rather than isinstance(value, Mapping).

from collections.abc import MutableMapping


class SpreadSheet(MutableMapping):
    def __init__(self, tools=None, **kwargs):
        self._cells = {}
        self._tools = {'__builtins__': None}
        if tools is not None:
            self._tools.update(tools)  # Add caller supplied functions.

    def clear(self):
        return self._cells.clear()

    def __contains__(self, k):
        return k in self._cells

    def __setitem__(self, key, formula):
        self._cells[key] = formula

    def __getitem__(self, key ):
        return eval(self._cells[key], self._tools, self)

    def __len__(self):
        return len(self._cells)

    def __iter__(self):
        return iter(self._cells)

    def __delitem__(self, k):
        del self._cells[k]

    def getformula(self, key):
        return self._cells[key]

type(dict).register(SpreadSheet)  # Register class as dict subclass.

ss = SpreadSheet()
print(f'isinstance(ss, dict): {isinstance(ss, dict)}')  # Result should be True.

Error:

Traceback (most recent call last):
  File "spreadsheet.py", line 35, in <module>
    type(dict).register(SpreadSheet)  # Register class as dict subclass.
AttributeError: type object 'type' has no attribute 'register'

Chosen Solution

As the accepted answer to "Is it possible to be a virtual subclass of a built in type?" says, it's impossible as primitive types are essentially immutable.

However it is possible to make json.dump() treat a Mapping just like a dict by patching the module as shown in the second approach presented in @jsbueno's answer. The implementation shown below patches the encoder in a slightly different, simpler way, that gives equivalent results. I especially liked a bonus feature it has which is that it also prevents the C optimized version from being used and silently failing.

from collections.abc import Mapping, MutableMapping
from functools import partial
import json


class SpreadSheet(MutableMapping):
    def __init__(self, tools=None, **kwargs):
        self._cells = {}
        self._tools = {'__builtins__': None}  # Prevent eval() from supplying.
        if tools is not None:
            self._tools.update(tools)  # Add any caller-supplied functions.

    def clear(self):
        return self._cells.clear()

    def __contains__(self, key):
        return key in self._cells

    def __setitem__(self, key, formula):
        self._cells[key] = formula

    def __getitem__(self, key):
        return eval(self._cells[key], self._tools, self)

    def __len__(self):
        return len(self._cells)

    def __iter__(self):
        return iter(self._cells)

    def __delitem__(self, key):
        del self._cells[key]

    def getformula(self, key):
        """ Return raw un-evaluated contents of cell. """
        return self._cells[key]

    def update(self, *args, **kwargs):
        for k, v in dict(*args, **kwargs).iteritems():
            self[k] = v


# Monkey-path json module

# Changes check for isinstance(obj, dict) to isinstance(obj, Mapping)
# https://github.com/python/cpython/blob/3.8/Lib/json/encoder.py#L321
# This changes the default value of the function's dict= keyword to be
# a Mapping instead of a dict. The isinstance() call uses whatever it's
# set to.
_new__make_iterencode = partial(json.encoder._make_iterencode, dict=Mapping)

json.encoder._make_iterencode = _new__make_iterencode
json.encoder.c_make_encoder = None  # Disables use of C version of make encoder


if __name__ == '__main__':

    import json
    from math import cos, sin, pi, tan

    # A small set of safe built-ins.
    tools = dict(len=len, sin=sin, cos=cos, pi=pi, tan=tan)

    ss = SpreadSheet(tools)
    ss['a1'] = '5'
    ss['a2'] = 'a1*6'
    ss['a3'] = 'a2*7'
    ss['b1'] = 'sin(pi/4)'

    print()
    print('isinstance(SpreadSheet(tools), dict) -> {}'.format(isinstance(ss, dict)))
    print()
    print('Static Contents via getformula():')
    print(json.dumps({k: ss.getformula(k) for k in ss.keys()}, indent=4))
    print()
    print('Dynamic Contents via __getitem__():')
    print("  ss['a1'] -> {!r}".format(ss['a1']))
    print("  ss['a2'] -> {!r}".format(ss['a2']))
    print("  ss['a3'] -> {!r}".format(ss['a3']))
    print("  ss['b1'] -> {!r}".format(ss['b1']))
    print()
    print("via json.dumps(ss, indent=4):")
    print(json.dumps(ss, indent=4))
    print()
    print("via json.dumps(ss):")  # Works, too.
    print(json.dumps(ss))  # -> {}
    print()
    print('dict(**ss): {}'.format(dict(**ss)))  # Gets dynamic contents.


回答1:


So, first things first, the "obvious way to do it", is to have a Json Encoder with a default method that would create a dict out of a CustomDict class while serializing:

Given

from collections.abc import MutableMapping
import json


class IdentaDict(MutableMapping):
    __getitem__ = lambda s, i: i
    __setitem__ = lambda s, i, v: None
    __delitem__ = lambda s, i: None
    __len__ = lambda s: 1
    __iter__ = lambda s: iter(['test_value'])

def default(obj):
    if isinstance(obj, MutableMapping):
            return dict(obj)
    raise TypeError()

print(json.dumps(IdentaDict, default=default)

will just work.

second

If for some reason, this is not desirable (maybe creating a dict out of the CustomDict is not feasible, or would be too expensive), it is possible to monkeypatch the machinery of Python's json.encoder, so that it uses the appropriate call to isinstance:


from collections.abc import MutableMapping
from functools import partial
from unittest.mock import patch

import json

class IdentaDict(MutableMapping):
   ...

a = IdentaDict()

new_iterencoder = partial(
    json.encoder._make_iterencode,
    isinstance=lambda obj, cls: isinstance(obj, MutableMapping if cls == dict else cls)
)

with patch("json.encoder.c_make_encoder", None), patch("json.encoder._make_iterencode", new_iterencoder):
    print(json.dumps(a))

(Note that while at it, I also disabled the native C encoder, so that the "pass indent to force Python encoder" hack is not needed. One never knows when an eager Python volunteer will implement indent in the C Json serializer and break that)

Also, the "mock.patch" thing is only needed if one plays mr. RightGuy and is worried about restoring the default behavior. Otherwise, just overriding both members of json.encoder in the application setup will make the changes proccess wide, and working for all json.dump[s] call, no changes needed to the calls - which might be more convenient.

third

Now, answering the actual question: what is possible is to have a mechanism that will create an actual subclass of "dict", but implementing all the methods needed by dict. Instead of re-doing the work done by collections.abc.MutableClass, it should be ok to just copy over both user methods and generated methods to the dict class:

import json
from abc import ABCMeta
from collections.abc import MutableMapping

class RealBase(ABCMeta):
    def __new__(mcls, name, bases, namespace, *, realbase=dict, **kwargs):
        abc_cls = super().__new__(mcls, name, bases, namespace, **kwargs)
        for attr_name in dir(abc_cls):
            attr = getattr(abc_cls, attr_name)
            if getattr(attr, "__module__", None) == "collections.abc" and attr_name not in namespace:
                namespace[attr_name] = attr
        return type.__new__(mcls, name, (realbase,), namespace)


class IdentaDict(MutableMapping, metaclass=RealBase):
    __getitem__ = lambda s, i: i
    __setitem__ = lambda s, i, v: None
    __delitem__ = lambda s, i: None
    __len__ = lambda s: 1
    __iter__ = lambda s: iter(['test_value'])

This will make the class work as expected, and return True to isinstance(IdentaClass(), dict). However the C Json Encoder will then try to use native dict API's to get its values: so json.dump(...) will not raise, but will fail unless the Python Json encoder is forced. Maybe this is why the instance check in json.encoder is for a strict "dict":

a = IdentaDict()


In [76]: a = IdentaDict()                                                                                                          

In [77]: a                                                                                                                         
Out[77]: {'test_value': 'test_value'}

In [78]: isinstance(a, dict)                                                                                                       
Out[78]: True

In [79]: len(a)                                                                                                                    
Out[79]: 1

In [80]: json.dumps(a)                                                                                                             
Out[80]: '{}'

In [81]: print(json.dumps(a, indent=4))                                                                                            
{
    "test_value": "test_value"
}

(Another side-effect of this metaclass is that as the value returned by __new__ is not an instance of ABCMeta, the metaclass __init__ won't be called. But people coding with multiple metaclass composition would have to be aware of such issues. This would be easily work-aroundable by explicitly calling mcls.__init__ at the end of __new__)




回答2:


I think I found a way to do it, based on a modified version of the suggestion in this answer to the question How to “perfectly” override a dict?.

Disclaimer: As the answer's author states, its a "monstrosity", so I probably would never actually use it in production code.

Here's the result:

from __future__ import print_function
try:
    from collections.abc import Mapping, MutableMapping  # Python 3
except ImportError:
    from collections import Mapping, MutableMapping  # Python 2


class SpreadSheet(MutableMapping):
    def __init__(self, tools=None, **kwargs):
        self.__class__ = dict  # see https://stackoverflow.com/a/47361653/355230

        self._cells = {}
        self._tools = {'__builtins__': None}
        if tools is not None:
            self._tools.update(tools)  # Add caller supplied functions.

    @classmethod
    def __class__(cls):  # see https://stackoverflow.com/a/47361653/355230
        return dict

    def clear(self):
        return self._cells.clear()

    def __contains__(self, key):
        return key in self._cells

    def __setitem__(self, key, formula):
        self._cells[key] = formula

    def __getitem__(self, key):
        return eval(self._cells[key], self._tools, self)

    def __len__(self):
        return len(self._cells)

    def __iter__(self):
        return iter(self._cells)

    def __delitem__(self, key):
        del self._cells[key]

    def getformula(self, key):
        """ Return raw un-evaluated contents of cell. """
        return self._cells[key]

    def update(self, *args, **kwargs):
        for k, v in dict(*args, **kwargs).iteritems():
            self[k] = v

#    # Doesn't work.
#    type(dict).register(SpreadSheet)  # Register class as dict subclass.


if __name__ == '__main__':

    import json
    from math import cos, sin, pi, tan

    # A small set of safe built-ins.
    tools = dict(len=len, sin=sin, cos=cos, pi=pi, tan=tan)

    ss = SpreadSheet(tools)
    ss['a1'] = '5'
    ss['a2'] = 'a1*6'
    ss['a3'] = 'a2*7'
    ss['b1'] = 'sin(pi/4)'

    print()
    print('isinstance(SpreadSheet(tools), dict) -> {}'.format(isinstance(ss, dict)))
    print()
    print('Static Contents via getformula():')
    print(json.dumps({k: ss.getformula(k) for k in ss.keys()}, indent=4))
    print()
    print('Dynamic Contents via __getitem__():')
    print("  ss['a1'] -> {!r}".format(ss['a1']))
    print("  ss['a2'] -> {!r}".format(ss['a2']))
    print("  ss['a3'] -> {!r}".format(ss['a3']))
    print("  ss['b1'] -> {!r}".format(ss['b1']))
    print()
    print("via json.dumps(ss, indent=4):")
    print(json.dumps(ss, indent=4))

Output:

isinstance(SpreadSheet(tools), dict) -> True

Static Contents via getformula():
{
    "a1": "5",
    "a2": "a1*6",
    "a3": "a2*7",
    "b1": "sin(pi/4)"
}

Dynamic Contents via __getitem__():
  ss['a1'] -> 5
  ss['a2'] -> 30
  ss['a3'] -> 210
  ss['b1'] -> 0.7071067811865475

via json.dumps(ss, indent=4):
{
    "a1": 5,
    "a2": 30,
    "a3": 210,
    "b1": 0.7071067811865475
}

Note: I got the idea for this class from an old ActiveState recipe by Raymond Hettinger.




回答3:


You can do something like:

import json

def json_default(obj):
    if isinstance(obj, SpreadSheet):
        return obj._cells
    raise TypeError

cheet = SpreadSheet()    
cheet['a'] = 5
cheet['b'] = 23
cheet['c'] = -4


print(json.dumps(cheet, default=json_default))

Output:

{"a": 5, "b": 23, "c": -4}

The key is the function json_default that tells the json decoder how to serialize your class!



来源:https://stackoverflow.com/questions/57982946/how-to-register-implementation-of-abc-mutablemapping-as-a-dict-subclass

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!