Basically the code below scrapes the first 5 items of a table. One of the fields is another href and clicking on that href provides more info which I want to collect and add to
Oh.. yarr.. change the code into this..
def parse(self, response):
hxs = HtmlXPathSelector(response)
items = []
for x in range (1,6):
item = ScrapyItem()
str_selector = '//tr[@name="row{0}"]'.format(x)
item['thing1'] = hxs.select(str_selector")]/a/text()').extract()
item['thing2'] = hxs.select(str_selector")]/a/@href').extract()
print 'hello'
request = Request("www.nextpage.com", callback=self.parse_next_page,meta={'item':item})
print 'hello2'
yield request
#donot return or yield item here.. only yield request return item in the callback.
def parse_next_page(self, response):
print 'stuff'
hxs = HtmlXPathSelector(response)
item = response.meta['item']
item['thing3'] = hxs.select('//div/ul/li[1]/span[2]/text()').extract()
return item
I think now its pretty clear...