scrapy - can´t scrape multiple tables at once

我怕爱的太早我们不能终老 提交于 2021-01-29 18:25:52

问题


So I am trying to scrape a website and I want to scrape many tables. The problem is that when I use those two for loops it will scrape every month and year as it should but it will mix the data from different months and years instead of giving the tables by the order defined by the loops. Any idea how to solve this problem?

import scrapy

from ..items import RenItem
from scrapy.utils.response import open_in_browser
from scrapy.http import FormRequest

class ScrapeTableSpider(scrapy.Spider):

    name = 'scrape-table'
    allowed_domains = ['https://www.centrodeinformacao.ren.pt/PT/InformacaoExploracao/Paginas/EstatisticaMensal.aspx']
    start_urls = ['https://www.centrodeinformacao.ren.pt/PT/InformacaoExploracao/Paginas/EstatisticaMensal.aspx']


    def parse(self, response):

        for j in range (2007,2009):
        # print(j)
         yield FormRequest.from_response(response, formdata={
        'ctl00$m$g_9b99ffea_e036_46c7_9be7_88c49a7820ac$ddlAnos': str(j)}, callback=self.parse2, dont_filter=True)


    def parse2(self, response):

       for i in range (1,4):
            #print(i)
         yield FormRequest.from_response(response, formdata = {
                'ctl00$m$g_9b99ffea_e036_46c7_9be7_88c49a7820ac$ddlMeses': str(i),
                'ctl00$m$g_9b99ffea_e036_46c7_9be7_88c49a7820ac$cmdCxecutar': 'Executar'},
                callback=self.start_scraping, dont_filter=True)


    def start_scraping(self, response):
        open_in_browser(response)

        items = RenItem()

        for row in response.xpath('//tr[@class="grid_row"]'):

            renmensal = row.xpath('td[1]//text()').extract()
            mes1 = row.xpath('td[2]//text()').extract()
            acum1 = row.xpath('td[3]//text()').extract()
            mes2 = row.xpath('td[4]//text()').extract()
            acum2 = row.xpath('td[5]//text()').extract()
            mes_variacao = row.xpath('td[6]//text()').extract()
            acum_variacao = row.xpath('td[7]//text()').extract()

            if len(renmensal) == 0:
                items['renmensal'] = ' '
            else:
                items['renmensal'] = renmensal

            if len(mes1) == 0:
                items['mes1'] = ' '
            else:
                items['mes1'] = mes1

            if len(acum1) == 0:
                items['acum1'] = ' '
            else:
                items['acum1'] = acum1

            if len(mes2) == 0:
                items['mes2'] = ' '
            else:
                items['mes2'] = mes2

            if len(acum2) == 0:
                items['acum2'] = ' '
            else:
                items['acum2'] = acum2

            if len(mes_variacao) == 0:
              items['mes_variacao'] = ' '
            else:
              items['mes_variacao'] = mes_variacao

            if len(acum_variacao) == 0:
                items['acum_variacao'] = ' '
            else:
                items['acum_variacao'] = acum_variacao

            yield items

来源:https://stackoverflow.com/questions/64282670/scrapy-can%c2%b4t-scrape-multiple-tables-at-once

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!