问题
I am trying to fetch some data from this website
https://www.walmart.com/store/2141-philadelphia-pa/search?query=ice%20cream
I have been using this method to fetch javascript loaded sites
def getLocalStoreProducts():
session = requests.Session()
localStoreUrl = 'https://www.walmart.com/store/2141-philadelphia-pa/search?query='
searchWord = "ice cream"
searchWord1 = checkForSpace(searchWord)
wordUrl = localStoreUrl+searchWord1
print(wordUrl)
# try:
categorySoup = BeautifulSoup(session.get(wordUrl).text, 'html.parser')
categorytagId = find_tag(categorySoup)
print("this is the tag id ", categorytagId)
categoryscript = categorySoup.find("script", {"id":categorytagId})
categorydata = json.loads(categoryscript.get_text(strip=True))
filename20 = "se.json"
with open(filename20, "w") as f20:
json.dump(categorydata, f20)
print("saved to file")
getLocalStoreProducts()
this my find tag
def find_tag(soup):
script = soup.find('script', {'type': 'application/json', 'id':re.compile(r'^((?!tb-djs).)*$')})
return script['id']
but I keep getting this error
TypeError: 'NoneType' object is not subscriptable
how could I fetch data from this url
https://www.walmart.com/store/2141-philadelphia-pa/search?query=ice%20cream
回答1:
To load the json data from Ajax URL, you can use this script:
import re
import json
import requests
url = 'https://www.walmart.com/store/2141-philadelphia-pa/search?query=ice%20cream'
api_url = 'https://www.walmart.com/store/electrode/api/search'
params = {
'query': 'ice cream',
'cat_id': 0,
'ps': 24,
'offset': 0,
'prg': 'desktop',
'stores': re.search(r'store/(\d+)', url).group(1)
}
data = requests.get(api_url, params=params).json()
# print data to screen:
print(json.dumps(data, indent=4))
Prints:
{
"items": [
{
"productId": "6W2PTANOXU63",
"usItemId": "336104115",
"productType": "REGULAR",
"title": "<mark>Ice</mark> <mark>Cream</mark> Slime - Rainbow Sherbet, 6 fl oz",
"description": "@generated",
"imageUrl": "http://i5.walmartimages.com/asr/e107a36f-9cde-4119-978c-99509d8e47d7_1.5e79836a5963dbc2ab8c3e67385ed661.png?odnHeight=180&odnWidth=180&odnBg=ffffff",
"productPageUrl": "/ip/Ice-Cream-Slime-Rainbow-Sherbet-6-fl-oz/336104115",
"department": "Food",
"customerRating": 3,
"numReviews": 2,
"sellerId": "F55CDC31AB754BB68FE0B39041159D63",
"sellerName": "Walmart.com",
"enableAddToCart": false,
"canAddToCart": false,
"showPriceAsAvailable": true,
"highlightedTitleTerms": [
"Cream",
"Ice"
],
"seeAllName": "All Ice Cream",
"seeAllLink": "query=ice%20cream&cat_id=976759_976791_1001420_1001423_4833164&stores=2141&ps=24",
"itemClassId": "1",
"primaryOffer": {
"offerId": "896BDD106566491C885E6D872F57DA8A",
"offerPrice": 0.5,
"currencyCode": "USD"
},
"fulfillment": {
"isSOI": true,
"isPUT": false
},
"inventory": {
"status": "In Stock",
"displayFlags": [
"IN_STORE_ONLY"
],
"available": true
},
"quantity": 17,
"brand": [
"Slime Factory"
],
"wmtgPricePerUnitQuantity": "1.0000",
"standardUpc": [
"00885777909776"
],
"isHeartable": true,
"marketPlaceItem": false,
"virtualPack": false,
"preOrderAvailable": false,
"premiumBrand": false,
"wfsEnabled": false,
"blitzItem": false,
"shippingPassEligible": false,
"pickupDiscountEligible": false,
"is_limited_qty": false
},
{
"productId": "3Q1N47LYQKVD",
"usItemId": "24008061",
"productType": "REGULAR",
"title": "Great Value <mark>Ice</mark> <mark>Cream</mark> Variety Pack, 32 Count",
"description": "<li>12 <mark>Ice</mark> <mark>Cream</mark> Sandwich
...and so on.
来源:https://stackoverflow.com/questions/63139532/how-to-fetch-data-from-javascript-loaded-site-using-beautifulsoup