I use a tool at work that lets me do queries and get back HTML tables of info. I do not have any kind of back-end access to it.
A lot of this inf
Basic Python implementation using BeautifulSoup, also considering both rowspan and colspan:
from BeautifulSoup import BeautifulSoup
def table2csv(html_txt):
csvs = []
soup = BeautifulSoup(html_txt)
tables = soup.findAll('table')
for table in tables:
csv = ''
rows = table.findAll('tr')
row_spans = []
do_ident = False
for tr in rows:
cols = tr.findAll(['th','td'])
for cell in cols:
colspan = int(cell.get('colspan',1))
rowspan = int(cell.get('rowspan',1))
if do_ident:
do_ident = False
csv += ','*(len(row_spans))
if rowspan > 1: row_spans.append(rowspan)
csv += '"{text}"'.format(text=cell.text) + ','*(colspan)
if row_spans:
for i in xrange(len(row_spans)-1,-1,-1):
row_spans[i] -= 1
if row_spans[i] < 1: row_spans.pop()
do_ident = True if row_spans else False
csv += '\n'
csvs.append(csv)
#print csv
return '\n\n'.join(csvs)