if or try loop for an element in a page selenium

问题

I am trying to scrape agents data here. I am able to get the links from the first page. I am using numbered loops because I know the total number of pages. I tried to run this as long as the "next" page option is there. I tried both "try" and "if not" but wasn't able to figure it out. Any help is welcome. Here is the code.

from selenium import webdriver
import time

from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)


links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")
def first_links():
    initial_data = driver.find_elements_by_tag_name('td')
    for initial in initial_data:
        page_links = initial.find_elements_by_tag_name('a')
        for page in page_links:
            page_link = page.get_attribute("href")
            links_total.append(page_link)
    driver.refresh()
    if driver.find_element_by_partial_link_text('next'):
        next_page = driver.find_element_by_partial_link_text('next')
        next_page.click()
        time.sleep(2)
        new_data = driver.find_elements_by_tag_name('td')
        for new in new_data:
            links = new.find_elements_by_tag_name('a')
            for link in links:
                new_link = link.get_attribute("href")
                links_total.append(new_link)



for i in range(1, 23):
    first_links()


for link in links_total:
    print(link)

回答1:

Try-catch would be better option

from selenium import webdriver
import time

from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)

driver.implicitly_wait(10)
# links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")

def first_links(links_total=[]):
    initial_data = driver.find_elements_by_tag_name('td')
    for initial in initial_data:
        page_links = initial.find_elements_by_tag_name('a')
        for page in page_links:
            page_link = page.get_attribute("href")
            links_total.append(page_link)
    # driver.refresh()
    try:
      next_page = driver.find_element_by_partial_link_text('next')
      next_page.click()
      time.sleep(2)
      first_links(links_total) 
    except (TimeoutError, ElementNotVisibleException, NoSuchElementException):
      print("NEXT btn not found : ")
      pass

    return links_total

all_links = first_links()

for link in all_links:
    print(link)

You don't need to use Selenium actually. You could do it with BeautifulSoap like so :

import requests
from bs4 import BeautifulSoup

page_num=0
url_cbp = r"https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=&page={}"

def get_links(links_total=[], page_num=0):
  page = requests.get(url_cbp.format(page_num))
  soup = BeautifulSoup(page.content, 'html.parser')
  results = soup.find(id='region-content')

  table_cells = results.find_all('td', class_='views-field')
  for cell in table_cells:
    # print(cell )
    # print('\n\n')
    cell_link = cell.find('a')
    page_link = cell_link["href"]
    links_total.append(page_link)

  next_page = results.find('li', class_='pager-next')

  if next_page:
    page_num += 1
    get_links(links_total, page_num)

  return links_total

all_links = get_links()

for link in all_links:
  print(link)

来源：https://stackoverflow.com/questions/62057651/if-or-try-loop-for-an-element-in-a-page-selenium

标签

python

python-3.x

selenium

web-scraping