I\'m currently using the toprettyxml()
function of the xml.dom
module in a Python script and I\'m having some trouble with the newlines.
If don\'t
This gives me nice XML on Python 3.6, haven't tried on Windows:
dom = xml.dom.minidom.parseString(xml_string)
pretty_xml_as_string = dom.toprettyxml(newl='').replace("\n\n", "\n")
Following function worked for my problem. I had to use python 2.7 and i was not allowed to install any 3rd party additional package.
The crux of implementation is as follows:
~
import os
import re
import xml.dom.minidom
import sys
class XmlTag:
opening = 0
closing = 1
self_closing = 2
closing_tag = "</"
self_closing_tag = "/>"
opening_tag = "<"
def to_pretty_xml(xml_file_path):
pretty_xml = ""
space_or_tab_count = " " # Add spaces or use \t
tab_count = 0
last_tag = -1
dom = xml.dom.minidom.parse(xml_file_path)
# get pretty-printed version of input file
string_xml = dom.toprettyxml(' ', os.linesep)
# remove version tag
string_xml = string_xml.replace("<?xml version=\"1.0\" ?>", '')
# remove empty lines and spaces
string_xml = "".join(string_xml.split())
# move each tag to new line
string_xml = string_xml.replace('>', '>\n')
for line in string_xml.split('\n'):
if line.__contains__(XmlTag.closing_tag):
# For consecutive closing tags decrease the indentation
if last_tag == XmlTag.closing:
tab_count = tab_count - 1
# Move closing element to next line
if last_tag == XmlTag.closing or last_tag == XmlTag.self_closing:
pretty_xml = pretty_xml + '\n' + (space_or_tab_count * tab_count)
pretty_xml = pretty_xml + line
last_tag = XmlTag.closing
elif line.__contains__(XmlTag.self_closing_tag):
# Print self closing on next line with one indentation from parent node
pretty_xml = pretty_xml + '\n' + (space_or_tab_count * (tab_count+1)) + line
last_tag = XmlTag.self_closing
elif line.__contains__(XmlTag.opening_tag):
# For consecutive opening tags increase the indentation
if last_tag == XmlTag.opening:
tab_count = tab_count + 1
# Move opening element to next line
if last_tag == XmlTag.opening or last_tag == XmlTag.closing:
pretty_xml = pretty_xml + '\n' + (space_or_tab_count * tab_count)
pretty_xml = pretty_xml + line
last_tag = XmlTag.opening
return pretty_xml
pretty_xml = to_pretty_xml("simple.xml")
with open("pretty.xml", 'w') as f:
f.write(pretty_xml)