Does anyone have a suggestion for the best way to open the xml data on the site below to put it in a dataframe (I prefer working with pandas) in python? The file is on the
This code it work to transform to df this type of Excel XML file:
import pandas as pd
from xml.sax import ContentHandler, parse
# Reference https://goo.gl/KaOBG3
class ExcelHandler(ContentHandler):
def __init__(self):
self.chars = [ ]
self.cells = [ ]
self.rows = [ ]
self.tables = [ ]
def characters(self, content):
self.chars.append(content)
def startElement(self, name, atts):
if name=="Cell":
self.chars = [ ]
elif name=="Row":
self.cells=[ ]
elif name=="Table":
self.rows = [ ]
def endElement(self, name):
if name=="Cell":
self.cells.append(''.join(self.chars))
elif name=="Row":
self.rows.append(self.cells)
elif name=="Table":
self.tables.append(self.rows)
excelHandler = ExcelHandler()
parse('feds200628.xls', excelHandler)
df1 = pd.DataFrame(excelHandler.tables[0][10:], columns=excelHandler.tables[0][9])
print df1.head()
I can't make comment (low reputation), but the answer of this question about "How to open Excel XML file programmatically" (with python and pandas) it should work.