问题
I am very new to data scrapping and scrappy,I want scrapy to use the page source output which i got using selenium webdriver to scrap data using xpath! Can anybody help me with that. i am getting an error AttributeError: 'unicode' object has no attribute 'text'
I think i am getting the output in string format and scrapy is not able to covert it. below is the code snippet generating the error.
def parse(self, response):
# process each category link
urls = response.xpath('//div[contains(@class,"colDataInnerBlk")]/p/a/@href').extract()
for url in urls:
if len(str(url).strip()) != 0 and url !="javascript:void(0);":
# absolute_url = response.urljoin(url)
#request = scrapy.Request(absolute_url, callback=self.parse_items)
self.browser.get(url)
self.scroll(self.browser)
while True:
try:
self.find_element(self.browser,By.XPATH,'//div[@id="see-more-products" and @style="visibility: visible;"]')
self.click(self.browser,By.XPATH,'//div[@id="see-more-products and @style="visibility: visible;"]')
self.scroll(self.browser)
except:
break
response=self.browser.page_source
self.browser.quit()
time.sleep(2)
items = DealItem()
hxs = HtmlXPathSelector(response)
items['product_name'] = hxs.select('//p[contains(@class,"product-title") or contains(@class,"pdp-e-i-head")]/text()')
items['product_sale_price'] = hxs.select('//span[contains(@class,"lfloat product-price") or contains(@class,"payBlkBig") or contains(@class,"pdp-final-price")]/text()')
items['Mrp_price'] = hxs.select('//span[contains(@class,"lfloat product-desc-price strike")]/text()')
items['Product_discount'] = hxs.select('//div[@class="product-discount"]/span/text()')
items['product_category'] = hxs.select('//h1[@class="category-name"]/text()')
yield items
来源:https://stackoverflow.com/questions/47138179/selenium-to-scroll-through-ifinite-scrollable-pages-and-scrapping-data-through-s