how to fetch data from javascript loaded site using beautifulsoup

拥有回忆 提交于 2021-01-28 08:02:41

问题


I am trying to fetch some data from this website

https://www.walmart.com/store/2141-philadelphia-pa/search?query=ice%20cream

I have been using this method to fetch javascript loaded sites

def getLocalStoreProducts():

    session = requests.Session()
    localStoreUrl = 'https://www.walmart.com/store/2141-philadelphia-pa/search?query='
    searchWord = "ice cream"
    searchWord1 = checkForSpace(searchWord)
    wordUrl = localStoreUrl+searchWord1
    print(wordUrl)

    # try:
    categorySoup = BeautifulSoup(session.get(wordUrl).text, 'html.parser')
    categorytagId = find_tag(categorySoup)
    print("this is the tag id ", categorytagId)
    categoryscript = categorySoup.find("script", {"id":categorytagId})
    categorydata = json.loads(categoryscript.get_text(strip=True))


    filename20 = "se.json"
    with open(filename20, "w") as f20:
        json.dump(categorydata, f20)

    print("saved to file")



getLocalStoreProducts()

this my find tag

def find_tag(soup):
    script = soup.find('script', {'type': 'application/json', 'id':re.compile(r'^((?!tb-djs).)*$')})
    return script['id']

but I keep getting this error

TypeError: 'NoneType' object is not subscriptable

how could I fetch data from this url

https://www.walmart.com/store/2141-philadelphia-pa/search?query=ice%20cream


回答1:


To load the json data from Ajax URL, you can use this script:

import re
import json
import requests


url = 'https://www.walmart.com/store/2141-philadelphia-pa/search?query=ice%20cream'
api_url = 'https://www.walmart.com/store/electrode/api/search'

params = {
    'query': 'ice cream',
    'cat_id': 0,
    'ps': 24,
    'offset': 0,
    'prg': 'desktop',
    'stores': re.search(r'store/(\d+)', url).group(1)
}

data = requests.get(api_url, params=params).json()
# print data to screen:
print(json.dumps(data, indent=4))

Prints:

{
    "items": [
        {
            "productId": "6W2PTANOXU63",
            "usItemId": "336104115",
            "productType": "REGULAR",
            "title": "<mark>Ice</mark> <mark>Cream</mark> Slime - Rainbow Sherbet, 6 fl oz",
            "description": "@generated",
            "imageUrl": "http://i5.walmartimages.com/asr/e107a36f-9cde-4119-978c-99509d8e47d7_1.5e79836a5963dbc2ab8c3e67385ed661.png?odnHeight=180&odnWidth=180&odnBg=ffffff",
            "productPageUrl": "/ip/Ice-Cream-Slime-Rainbow-Sherbet-6-fl-oz/336104115",
            "department": "Food",
            "customerRating": 3,
            "numReviews": 2,
            "sellerId": "F55CDC31AB754BB68FE0B39041159D63",
            "sellerName": "Walmart.com",
            "enableAddToCart": false,
            "canAddToCart": false,
            "showPriceAsAvailable": true,
            "highlightedTitleTerms": [
                "Cream",
                "Ice"
            ],
            "seeAllName": "All Ice Cream",
            "seeAllLink": "query=ice%20cream&cat_id=976759_976791_1001420_1001423_4833164&stores=2141&ps=24",
            "itemClassId": "1",
            "primaryOffer": {
                "offerId": "896BDD106566491C885E6D872F57DA8A",
                "offerPrice": 0.5,
                "currencyCode": "USD"
            },
            "fulfillment": {
                "isSOI": true,
                "isPUT": false
            },
            "inventory": {
                "status": "In Stock",
                "displayFlags": [
                    "IN_STORE_ONLY"
                ],
                "available": true
            },
            "quantity": 17,
            "brand": [
                "Slime Factory"
            ],
            "wmtgPricePerUnitQuantity": "1.0000",
            "standardUpc": [
                "00885777909776"
            ],
            "isHeartable": true,
            "marketPlaceItem": false,
            "virtualPack": false,
            "preOrderAvailable": false,
            "premiumBrand": false,
            "wfsEnabled": false,
            "blitzItem": false,
            "shippingPassEligible": false,
            "pickupDiscountEligible": false,
            "is_limited_qty": false
        },
        {
            "productId": "3Q1N47LYQKVD",
            "usItemId": "24008061",
            "productType": "REGULAR",
            "title": "Great Value <mark>Ice</mark> <mark>Cream</mark> Variety Pack, 32 Count",
            "description": "<li>12 <mark>Ice</mark> <mark>Cream</mark> Sandwich


...and so on.


来源:https://stackoverflow.com/questions/63139532/how-to-fetch-data-from-javascript-loaded-site-using-beautifulsoup

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!