if or try loop for an element in a page selenium

﹥>﹥吖頭↗ 提交于 2021-01-29 19:27:54

问题


I am trying to scrape agents data here. I am able to get the links from the first page. I am using numbered loops because I know the total number of pages. I tried to run this as long as the "next" page option is there. I tried both "try" and "if not" but wasn't able to figure it out. Any help is welcome. Here is the code.

from selenium import webdriver
import time

from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)


links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")
def first_links():
    initial_data = driver.find_elements_by_tag_name('td')
    for initial in initial_data:
        page_links = initial.find_elements_by_tag_name('a')
        for page in page_links:
            page_link = page.get_attribute("href")
            links_total.append(page_link)
    driver.refresh()
    if driver.find_element_by_partial_link_text('next'):
        next_page = driver.find_element_by_partial_link_text('next')
        next_page.click()
        time.sleep(2)
        new_data = driver.find_elements_by_tag_name('td')
        for new in new_data:
            links = new.find_elements_by_tag_name('a')
            for link in links:
                new_link = link.get_attribute("href")
                links_total.append(new_link)



for i in range(1, 23):
    first_links()


for link in links_total:
    print(link)

回答1:


Try-catch would be better option

from selenium import webdriver
import time

from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)

driver.implicitly_wait(10)
# links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")

def first_links(links_total=[]):
    initial_data = driver.find_elements_by_tag_name('td')
    for initial in initial_data:
        page_links = initial.find_elements_by_tag_name('a')
        for page in page_links:
            page_link = page.get_attribute("href")
            links_total.append(page_link)
    # driver.refresh()
    try:
      next_page = driver.find_element_by_partial_link_text('next')
      next_page.click()
      time.sleep(2)
      first_links(links_total) 
    except (TimeoutError, ElementNotVisibleException, NoSuchElementException):
      print("NEXT btn not found : ")
      pass

    return links_total

all_links = first_links()

for link in all_links:
    print(link)

You don't need to use Selenium actually. You could do it with BeautifulSoap like so :

import requests
from bs4 import BeautifulSoup

page_num=0
url_cbp = r"https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=&page={}"

def get_links(links_total=[], page_num=0):
  page = requests.get(url_cbp.format(page_num))
  soup = BeautifulSoup(page.content, 'html.parser')
  results = soup.find(id='region-content')

  table_cells = results.find_all('td', class_='views-field')
  for cell in table_cells:
    # print(cell )
    # print('\n\n')
    cell_link = cell.find('a')
    page_link = cell_link["href"]
    links_total.append(page_link)

  next_page = results.find('li', class_='pager-next')

  if next_page:
    page_num += 1
    get_links(links_total, page_num)

  return links_total

all_links = get_links()

for link in all_links:
  print(link)   


来源:https://stackoverflow.com/questions/62057651/if-or-try-loop-for-an-element-in-a-page-selenium

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!