Convert HTML into CSV

后端 未结 5 1870

I want to convert a HTML table as obtained from the script below into a CSV file, but got type error as follows:

TypeError: sequence item 0: expected

5条回答
  •  -上瘾入骨i
    2020-11-29 08:19

    import csv
    from bs4 import BeautifulSoup
    import pandas as pd
    
    html = open('test.html').read()
    soup = BeautifulSoup(html, features='lxml')
    #Specify table name which you want to read.
    #Example: 
    table = soup.select_one('table.queryResults')
    
    def get_all_tables(soup):
        return soup.find_all("table")
    
    
    tbls = get_all_tables(soup)
    for i, tablen in enumerate(tbls, start=1):
        print(i)
        print(tablen)
    
    def get_table_headers(table):
        headers = []
        for th in table.find("tr").find_all("th"):
            headers.append(th.text.strip())
        return headers
    
    head = get_table_headers(table)
    #print(head)
    
    def get_table_rows(table):    
        rows = []
        for tr in table.find_all("tr")[1:]:
            cells = []
            # grab all td tags in this table row
            tds = tr.find_all("td")
            if len(tds) == 0:
                # if no td tags, search for th tags
                # can be found especially in wikipedia tables below the table
                ths = tr.find_all("th")
                for th in ths:
                    cells.append(th.text.strip())
            else:
                # use regular td tags
                for td in tds:
                    cells.append(td.text.strip())
            rows.append(cells)
        return rows
    
    table_rows = get_table_rows(table)
    #print(table_rows)
    
    def save_as_csv(table_name, headers, rows):
        pd.DataFrame(rows, columns=headers).to_csv(f"{table_name}.csv")
    
    save_as_csv("Test_table", head, table_rows)
    

    提交回复
    热议问题