I have been doing research and so far I found out the python package that I will plan on using its scrapy, now I am trying to find out what is a good way to build a scraper
This is short & simple code which is working for me:
SCROLL_PAUSE_TIME = 20
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
posts = driver.find_elements_by_class_name("post-text")
for block in posts:
print(block.text)