Merge xml files with nested elements without external libraries

僤鯓⒐⒋嵵緔 提交于 2019-11-27 19:31:27

What the code you posted is doing is combining all the elements regardless of whether or not an element with the same tag already exists. So you need to iterate over the elements and manually check and combine them the way you see fit, because it is not a standard way of handling XML files. I can't explain it better than code, so here it is, more or less commented:

from xml.etree import ElementTree as et

class XMLCombiner(object):
    def __init__(self, filenames):
        assert len(filenames) > 0, 'No filenames!'
        # save all the roots, in order, to be processed later
        self.roots = [et.parse(f).getroot() for f in filenames]

    def combine(self):
        for r in self.roots[1:]:
            # combine each element with the first one, and update that
            self.combine_element(self.roots[0], r)
        # return the string representation
        return et.tostring(self.roots[0])

    def combine_element(self, one, other):
        """
        This function recursively updates either the text or the children
        of an element if another element is found in `one`, or adds it
        from `other` if not found.
        """
        # Create a mapping from tag name to element, as that's what we are fltering with
        mapping = {el.tag: el for el in one}
        for el in other:
            if len(el) == 0:
                # Not nested
                try:
                    # Update the text
                    mapping[el.tag].text = el.text
                except KeyError:
                    # An element with this name is not in the mapping
                    mapping[el.tag] = el
                    # Add it
                    one.append(el)
            else:
                try:
                    # Recursively process the element, and update it in the same way
                    self.combine_element(mapping[el.tag], el)
                except KeyError:
                    # Not in the mapping
                    mapping[el.tag] = el
                    # Just add it
                    one.append(el)

if __name__ == '__main__':
    r = XMLCombiner(('sample1.xml', 'sample2.xml')).combine()
    print '-'*20
    print r

Thank you, but my problem was to merge by considering the attributes also. here is the code after my patch:

    import sys
    from xml.etree import ElementTree as et


    class hashabledict(dict):
        def __hash__(self):
            return hash(tuple(sorted(self.items())))


    class XMLCombiner(object):
        def __init__(self, filenames):
            assert len(filenames) > 0, 'No filenames!'
            # save all the roots, in order, to be processed later
            self.roots = [et.parse(f).getroot() for f in filenames]

    def combine(self):
        for r in self.roots[1:]:
            # combine each element with the first one, and update that
            self.combine_element(self.roots[0], r)
        # return the string representation
        return et.ElementTree(self.roots[0])

    def combine_element(self, one, other):
        """
        This function recursively updates either the text or the children
        of an element if another element is found in `one`, or adds it
        from `other` if not found.
        """
        # Create a mapping from tag name to element, as that's what we are fltering with
        mapping = {(el.tag, hashabledict(el.attrib)): el for el in one}
        for el in other:
            if len(el) == 0:
                # Not nested
                try:
                    # Update the text
                    mapping[(el.tag, hashabledict(el.attrib))].text = el.text
                except KeyError:
                    # An element with this name is not in the mapping
                    mapping[(el.tag, hashabledict(el.attrib))] = el
                    # Add it
                    one.append(el)
            else:
                try:
                    # Recursively process the element, and update it in the same way
                    self.combine_element(mapping[(el.tag, hashabledict(el.attrib))], el)
                except KeyError:
                    # Not in the mapping
                    mapping[(el.tag, hashabledict(el.attrib))] = el
                    # Just add it
                    one.append(el)

if __name__ == '__main__':

    r = XMLCombiner(sys.argv[1:-1]).combine()
    print '-'*20
    print et.tostring(r.getroot())
    r.write(sys.argv[-1], encoding="iso-8859-1", xml_declaration=True)
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!