i want to print a proper table out of data scrapped using scrapy

匆匆过客 提交于 2021-02-11 17:20:38


so i have written all the code to scrap table from [http://www.rarityguide.com/cbgames_view.php?FirstRecord=21][1] but i am getting output like

# the output that i get

{'EXG': (['17.00',

 'G': ['8.00',
 'company': (['Milton Bradley',
              'Milton Bradley',
              'Milton Bradley',
              'Standard Toykraft',
              'Game Gems',
              'Milton Bradley',
              'Parker Brothers',
              'Parker Brothers',
              'King Features',
              'Parker Brothers',
              'Parker Brothers'],),
 'mnm': (['26.00',
 'rarity': ([],),
 'title': (['Beat the Clock',
            'Beat the Clock',
            'Beatles - Flip Your Wig',
            'Ben Casey M.D.',
            'Bermuda Triangle',
            'Betsy Ross and the Flag',
            'Beverly Hillbillies',
            'Beware the Spider',
            'Bewitched - Stymie Card Game',
            'Bionic Woman',
            'Blade Runner',
            'Blondie - Playing Card Game',
            'Blondie - Sunday Funnies',
            'Blondie - The Hurry Scurry Game',
            "Blondie and Dagwood's Race for the Office",
            'Blondie Goes to Leisureland',
            'Boom or Bust',
            'Boom or Bust'],),
 'year': (['1969',

can ayone help me achieve output like

# the output that i want!
{"EXG": ["17.00"],
  "MNM": ["26.00"],
  "year": ["1969"],
  "company": ["Milton Bradley"],
  "Title": ["Beat the Clock"] }

{"EXG": ["10.00"],
  "MNM": ["19.00"],
  "year": ["1954"],
  "company": ["Lowell"],
  "Title": ["Beat the Clock"] }
and then so on for all values.

basically i want to have one dictionary containing all the key value pairs instead of having one entire dictionary for each key. also here's my spider's code

import scrapy
from ..items import RarityItem

class RarityScrapper(scrapy.Spider):
    name = "rarity"
    start_urls = [

    def parse(self, response):
        table = response.css(

        items = RarityItem()

        for contents in table:
            title = contents.css("td:nth-child(2)::text").extract()
            company = contents.css("td:nth-child(3)::text").extract()
            year = contents.css("td:nth-child(4)::text").extract()
            rarity = contents.css("td:nth-child(5)::text").extract()
            mnm = contents.css("td:nth-child(6)::text").extract()
            EXG = contents.css("td:nth-child(7)::text").extract()
            G = contents.css("td:nth-child(8)::text").extract()

            items["title"] = title,
            items["company"] = company,
            items["year"] = year,
            items["rarity"] = rarity,
            items["mnm"] = mnm,
            items["EXG"] = EXG,
            items["G"] = G

            yield items


If all lists are same length, after this line

G = contents.css("td:nth-child(8)::text").extract():

Add this ode snippet:

arr = []
for _ in range(len(title)):
        'EXP': title[_], 'company': company[_], 'year': year[_], 'rarity': rarity[_],
        'MNM': mnm[_], 'EXG': EXG[_], 'G': G[_]})

Then type this:

for _ in arr:

to see output array


You need to iterate through each row in table and process row data separately. As all row have the same length you can use list unpacking to write data into dict item:

def parse(self, response):
    table = response.css(
        "form table")

    for row in table.css("tr"):
        i = {}
        _, i["title"], i["company"], i["year"], _, i["mnm"], i["EXG"], i["G"] = row.css("td::text").extract()
        i["rarity"] = row.css("td img::alt").extract_first("")
        yield i

