Creating complex nested dictionaries from Pandas DataFrame

后端 未结 2 1181
梦毁少年i
梦毁少年i 2021-01-13 02:28

I\'m trying to find a generic way of creating (possibly deeply) nested dictionaries from a flat Pandas DataFrame instance.

Suppose I have the following DataFrame:

相关标签:
2条回答
  • 2021-01-13 02:46

    Not really concise, but it's the best I can get now:

    >>> def rollup1(x):
    ...     return x.set_index('test')[['grade', 'pass']].to_dict(orient='index')
    >>> def rollup2(x):
    ...     return x.groupby('course').apply(rollup1).to_dict()
    >>> def rollup3(x):
    ...     return x.groupby('study').apply(rollup2).to_dict()
    
    >>> df = dat.groupby(['name','age','gender']).apply(rollup3)
    >>> df.name = 'study'
    >>> res = df.reset_index(level=[1,2]).to_dict(orient='index')
    >>> pprint.pprint(res)
    {'Henry': {'age': 31L,
               'gender': 'Male',
               'study': {'Physics': {'Quantum mechanics': {'Exam1': {'grade': 'C',
                                                                     'pass': True},
                                                           'Exam2': {'grade': 'C',
                                                                     'pass': True}}}}},
     'John': {'age': 24L,
              'gender': 'Male',
              'study': {'Mathematics': {'Calculus 101': {'Essay': {'grade': 'A',
                                                                   'pass': True},
                                                         'Exam': {'grade': 'A',
                                                                  'pass': True}},
                                        'Calculus 102': {'Exam': {'grade': 'B',
                                                                  'pass': True}}},
                        'Philosophy': {'Aristotelean Ethics': {'Essay': {'grade': 'A',
                                                                         'pass': True}}}}}}
    

    The idea is to roll up data to dictionaries while grouping data to get 'study' column

    update I've tried to create more generic solution, so it'd work for question like this one as well:

    def rollup_to_dict_core(x, values, columns, d_columns=None):
        if d_columns is None:
            d_columns = []
    
        if len(columns) == 1:
            if len(values) == 1:
                return x.set_index(columns)[values[0]].to_dict()
            else:
                return x.set_index(columns)[values].to_dict(orient='index')
        else:
            res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
            if len(d_columns) == 0:
                return res.to_dict()
            else:
                res.name = columns[1]
                res = res.reset_index(level=range(1, len(d_columns) + 1))
                return res.to_dict(orient='index')
    
    def rollup_to_dict(x, values, d_columns=None):
        if d_columns is None:
            d_columns = []
    
        columns = [c for c in x.columns if c not in values and c not in d_columns]
        return rollup_to_dict_core(x, values, columns, d_columns)
    
    >>> pprint(rollup_to_dict(dat, ['pass', 'grade'], ['age','gender']))
    {'Henry': {'age': 31L,
               'gender': 'Male',
               'study': {'Physics': {'Quantum mechanics': {'Exam1': {'grade': 'C',
                                                                     'pass': True},
                                                           'Exam2': {'grade': 'C',
                                                                     'pass': True}}}}},
     'John': {'age': 24L,
              'gender': 'Male',
              'study': {'Mathematics': {'Calculus 101': {'Essay': {'grade': 'A',
                                                                   'pass': True},
                                                         'Exam': {'grade': 'A',
                                                                  'pass': True}},
                                        'Calculus 102': {'Exam': {'grade': 'B',
                                                                  'pass': True}}},
                        'Philosophy': {'Aristotelean Ethics': {'Essay': {'grade': 'A',
                                                                         'pass': True}}}}}}
    
    0 讨论(0)
  • 2021-01-13 02:56

    This is partial answer. I don't know how to convert the index to json.

    df = pd.DataFrame({'name' : ['John', 'John', 'John', 'John', 'Henry', 'Henry'],
                        'age' : [24, 24, 24, 24, 31, 31],
                        'gender' : ['Male','Male','Male','Male','Male','Male'],
                        'study' : ['Mathematics', 'Mathematics', 'Mathematics', 'Philosophy', 'Physics', 'Physics'],
                        'course' : ['Calculus 101', 'Calculus 101', 'Calculus 102', 'Aristotelean Ethics', 'Quantum mechanics', 'Quantum mechanics'],
                        'test' : ['Exam', 'Essay','Exam','Essay', 'Exam1','Exam2'],
                        'pass' : [True, True, True, True, True, True],
                        'grade' : ['A', 'A', 'B', 'A', 'C', 'C']})
    df.set_index(keys=['name','age','gender', 'study','course','test','grade','pass'], inplace=True)
    df
    

    Output:

    0 讨论(0)
提交回复
热议问题