Creating complex nested dictionaries from Pandas DataFrame

后端 未结 2 1187
梦毁少年i
梦毁少年i 2021-01-13 02:28

I\'m trying to find a generic way of creating (possibly deeply) nested dictionaries from a flat Pandas DataFrame instance.

Suppose I have the following DataFrame:

2条回答
  •  一个人的身影
    2021-01-13 02:46

    Not really concise, but it's the best I can get now:

    >>> def rollup1(x):
    ...     return x.set_index('test')[['grade', 'pass']].to_dict(orient='index')
    >>> def rollup2(x):
    ...     return x.groupby('course').apply(rollup1).to_dict()
    >>> def rollup3(x):
    ...     return x.groupby('study').apply(rollup2).to_dict()
    
    >>> df = dat.groupby(['name','age','gender']).apply(rollup3)
    >>> df.name = 'study'
    >>> res = df.reset_index(level=[1,2]).to_dict(orient='index')
    >>> pprint.pprint(res)
    {'Henry': {'age': 31L,
               'gender': 'Male',
               'study': {'Physics': {'Quantum mechanics': {'Exam1': {'grade': 'C',
                                                                     'pass': True},
                                                           'Exam2': {'grade': 'C',
                                                                     'pass': True}}}}},
     'John': {'age': 24L,
              'gender': 'Male',
              'study': {'Mathematics': {'Calculus 101': {'Essay': {'grade': 'A',
                                                                   'pass': True},
                                                         'Exam': {'grade': 'A',
                                                                  'pass': True}},
                                        'Calculus 102': {'Exam': {'grade': 'B',
                                                                  'pass': True}}},
                        'Philosophy': {'Aristotelean Ethics': {'Essay': {'grade': 'A',
                                                                         'pass': True}}}}}}
    

    The idea is to roll up data to dictionaries while grouping data to get 'study' column

    update I've tried to create more generic solution, so it'd work for question like this one as well:

    def rollup_to_dict_core(x, values, columns, d_columns=None):
        if d_columns is None:
            d_columns = []
    
        if len(columns) == 1:
            if len(values) == 1:
                return x.set_index(columns)[values[0]].to_dict()
            else:
                return x.set_index(columns)[values].to_dict(orient='index')
        else:
            res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
            if len(d_columns) == 0:
                return res.to_dict()
            else:
                res.name = columns[1]
                res = res.reset_index(level=range(1, len(d_columns) + 1))
                return res.to_dict(orient='index')
    
    def rollup_to_dict(x, values, d_columns=None):
        if d_columns is None:
            d_columns = []
    
        columns = [c for c in x.columns if c not in values and c not in d_columns]
        return rollup_to_dict_core(x, values, columns, d_columns)
    
    >>> pprint(rollup_to_dict(dat, ['pass', 'grade'], ['age','gender']))
    {'Henry': {'age': 31L,
               'gender': 'Male',
               'study': {'Physics': {'Quantum mechanics': {'Exam1': {'grade': 'C',
                                                                     'pass': True},
                                                           'Exam2': {'grade': 'C',
                                                                     'pass': True}}}}},
     'John': {'age': 24L,
              'gender': 'Male',
              'study': {'Mathematics': {'Calculus 101': {'Essay': {'grade': 'A',
                                                                   'pass': True},
                                                         'Exam': {'grade': 'A',
                                                                  'pass': True}},
                                        'Calculus 102': {'Exam': {'grade': 'B',
                                                                  'pass': True}}},
                        'Philosophy': {'Aristotelean Ethics': {'Essay': {'grade': 'A',
                                                                         'pass': True}}}}}}
    

提交回复
热议问题