Create a json tree from csv list in python

后端 未结 3 1298
一生所求
一生所求 2020-12-19 20:05

I\'m trying to build a json hierarchy from a simple table in python.

The data comes in looking like the following:

id         parent          name
1          


        
相关标签:
3条回答
  • 2020-12-19 20:56

    To assign all child nodes to its parent, you can do two passes over the list of nodes. The first pass adds each node to a UserDict. In the second pass the parent of each node is guaranteed to be in the UserDict so the node can be added to the children of its parent.

    To serialize to JSON a JSONEncoder can be used.

    #!/usr/bin/env python
    
    import sys
    import json
    import UserDict
    
    class Node(object):
        def __init__(self, nid, parent, name):
            self.nid = nid
            self.parent = parent
            self.children = []
            self.name = name
    
    class NodeDict(UserDict.UserDict):
        def addNodes(self, nodes):
            """ Add every node as a child to its parent by doing two passes."""
            for i in (1, 2):
                for node in nodes:
                    self.data[node.nid] = node
                    if node.parent in self.data.keys():
                        if node.parent != "none" and
                           node not in self.data[node.parent].children:
                            self.data[node.parent].children.append(node)
    
    class NodeJSONEncoder(json.JSONEncoder):
        def default(self, node):
            if type(node) == Node:
                return {"nid":node.nid, "name":node.name, "children":node.children}
            raise TypeError("{} is not an instance of Node".format(node))
    
    if __name__ == "__main__":
        nodes = []
    
        with open(sys.argv[1]) as f:
            for row in f.readlines()[1:]:
                nid, parent, name = row.split()
                nodes.append(Node(nid, parent, name))
    
        nodeDict = NodeDict()
        nodeDict.addNodes(nodes)
    
        rootNodes = [node for nid, node in nodeDict.items()
                     if node.parent == "none"]
        for rootNode in rootNodes:
            print NodeJSONEncoder().encode(rootNode)
    

    Result:

    {"name": "test-name-4", "nid": "4", "children":[
         {"name": "test-name-10", "nid": "10", "children":[
             {"name": "test-name-1", "nid": "1", "children":[
                {"name": "test-name-7", "nid": "7", "children": []},
                {"name": "test-name-8", "nid": "8", "children":[
                    {"name": "test-name-9", "nid": "9", "children": []}]}]},
             {"name": "test-name-2", "nid": "2", "children": []},
             {"name": "test-name-5", "nid": "5", "children":[
                 {"name": "test-name-3", "nid": "3", "children": []}]}]}]}
    {"name": "test-name-6", "nid": "6", "children": []}
    
    0 讨论(0)
  • 2020-12-19 20:56

    The answer given did not work for me in python 3.6 because Dict.Dict has been deprecated. So I made some changes to make it work and generalized it a little by letting user specify columns for child_id, parent_id and child name via command line. Please see below (I am just learning and am sure this could be improved, but it works for my purposes).

    """ Converts a CSV file with Parent/Child Hierarchy to a hierarchical JSON file for front-end processing (javascript/DS)
    
    USAGE: csv2json.py <somefile.csv> a b c  (column nrs of a=child_id, b=parent-id, c=name(of child))
    
    ROOT of hierarchy should contain child_id and parent_id = 'none' or blank.  name must exist """
    
    import sys
    import json
    import csv
    #import UserDict
    from collections import UserDict
    
    class Node(object):
        def __init__(self, child_id, parent_id, name):
            self.child_id = child_id
            self.parent_id = parent_id
            self.children = []
            self.name = name
    
    class NodeDict(UserDict):
        def addNodes(self, nodes):
            """ Add every node as a child to its parent_id by doing two passes."""
            for i in (1, 2):
                for node in nodes:
                    self.data[node.child_id] = node
                    if node.parent_id in self.data.keys():
                        if (node.parent_id != "none" or node.parent_id != "") and node not in self.data[node.parent_id].children:
                            self.data[node.parent_id].children.append(node)
    
    class NodeJSONEncoder(json.JSONEncoder):
        def default(self, node):
            if type(node) == Node:
                return {"name":node.name, "children":node.children}
                raise TypeError("{} is not an instance of Node".format(node))
    
    if __name__ == "__main__":
        nodes = []
    
        with open(sys.argv[1], 'r') as f:
            reader = csv.reader(f)
            for row in reader:
                if not row[int(sys.argv[4])] : #skip if no name/label exists
                    continue
                child_id, parent_id, name = row[int(sys.argv[2])] , row[int(sys.argv[3])] , row[int(sys.argv[4])]
    
                nodes.append(Node(child_id, parent_id, name))
    
        nodeDict = NodeDict()
        nodeDict.addNodes(nodes)
    
        rootNodes = [node for child_id, node in nodeDict.items()
                     if (node.parent_id == "none" or node.parent_id == "")]
        for rootNode in rootNodes:
            print(NodeJSONEncoder().encode(rootNode))
    
    0 讨论(0)
  • 2020-12-19 20:57

    I have a solution based on 2 loops too (1 to cache, 1 to build), without JSON encoder, and that gives exactly the output you required:

    >>> import re
    >>> from collections import defaultdict
    >>> parents = defaultdict(list)
    >>> for i, line in enumerate(file_.split('\n')):
        if i != 0 and line.strip():
            id_, parent, name = re.findall(r'[\d\w-]+', line)
            parents[parent].append((id_, name))
    
    
    >>> parents
    defaultdict(<type 'list'>, {'10': [('1', 'test-name-1'), ('2', 'test-name-2'), ('5', 'test-name-5')], 'none': [('4', 'test-name-4'), ('6', 'test-name-6')], '1': [('7', 'test-name-7'), ('8', 'test-name-8')], '5': [('3', 'test-name-3')], '4': [('10', 'test-name-10')], '8': [('9', 'test-name-9')]})
    

    OK, now we have our cache, the recursive function easily builds the output we'd like:

    >>> def build_tree(d, val):
        return {'$' + id_: {'name': name, 'children': build_tree(d, id_)} for id_, name in d[val]}
    

    We just have to call it on the dict built previously, with value 'none' which is the tree root:

    >>> from pprint import pprint
    >>> pprint(build_tree(parents, 'none'))
    {'$4': {'children': {'$10': {'children': {'$1': {'children': {'$7': {'children': {},
                                                                         'name': 'test-name-7'},
                                                                  '$8': {'children': {'$9': {'children': {},
                                                                                             'name': 'test-name-9'}},
                                                                         'name': 'test-name-8'}},
                                                     'name': 'test-name-1'},
                                              '$2': {'children': {},
                                                     'name': 'test-name-2'},
                                              '$5': {'children': {'$3': {'children': {},
                                                                         'name': 'test-name-3'}},
                                                     'name': 'test-name-5'}},
                                 'name': 'test-name-10'}},
            'name': 'test-name-4'},
     '$6': {'children': {}, 'name': 'test-name-6'}}
    >>> 
    
    0 讨论(0)
提交回复
热议问题