Exporting DataFrame to Excel using pandas without subscribe

耗尽温柔 提交于 2021-02-11 14:18:03

问题


How can I export DataFrame to excel without subscribe? For exemple: I'm doing webscraping and there is a table with pagination, so I take the page 1 save it in DataFrame, export to excel e do it again in page 2. But every record is erased when a save it remaining the last one. Sorry for my english, here is my code:

import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver


i=1
url = "https://stats.nba.com/players/traditional/?PerMode=Totals&Season=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1"

driver = webdriver.Firefox(executable_path=r'C:/Users/Fabio\Desktop/robo/geckodriver.exe')

driver.get(url)
time.sleep(5)


driver.find_element_by_xpath("/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[2]/div[1]/table/thead/tr/th[9]").click()



contador = 1

#loop pagination
while(contador < 4):

    #findind table
    elemento = driver.find_element_by_xpath("/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[2]")
    html_content = elemento.get_attribute('outerHTML')

    # 2. Parse HTML - BeaultifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find(name='table')

    # 3. Data Frame - Pandas
    df_full = pd.read_html(str(table))[0]
    df = df_full[['PLAYER','TEAM', 'PTS']]
    df.columns = ['jogador','time', 'pontuacao']

    dados1 = pd.DataFrame(df)
    
    
    driver.find_element_by_xpath("/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[1]/div/div/a[2]").click()

    contador = contador + 1

#4. export to excel

dados = pd.DataFrame(df)
dados.to_excel("fabinho.xlsx")

driver.quit()

回答1:


You are re-assigning df to whatever data you retrieved everytime you go through the loop. A solution would be to append the data to a list and then pd.concat the list at the end.

import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver


i=1
url = "https://stats.nba.com/players/traditional/?PerMode=Totals&Season=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1"

driver = webdriver.Firefox(executable_path=r'C:/Users/Fabio\Desktop/robo/geckodriver.exe')

driver.get(url)
time.sleep(5)


driver.find_element_by_xpath("/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[2]/div[1]/table/thead/tr/th[9]").click()



contador = 1
df_list = list()
#loop pagination
while(contador < 4):

    #findind table
    elemento = driver.find_element_by_xpath("/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[2]")
    html_content = elemento.get_attribute('outerHTML')

    # 2. Parse HTML - BeaultifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find(name='table')

    # 3. Data Frame - Pandas
    df_full = pd.read_html(str(table))[0]
    df = df_full[['PLAYER','TEAM', 'PTS']]
    df.columns = ['jogador','time', 'pontuacao']
    df_list.append(df)
    
    driver.find_element_by_xpath("/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[1]/div/div/a[2]").click()

    contador = contador + 1

#4. export to excel

dados = pd.concat(df_list)
dados.to_excel("fabinho.xlsx")

driver.quit()


来源:https://stackoverflow.com/questions/62623508/exporting-dataframe-to-excel-using-pandas-without-subscribe

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!