这篇文章为源码分享,爬虫的分析过程请阅读文章
源码
import requests
import json
from bs4 import BeautifulSoup
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5958.400 SLBrowser/10.0.3533.400'}
def GetSoup(url):
resp = requests.get(url, headers=headers)
resp.encoding = 'gbk'
soup = BeautifulSoup(resp.text, 'lxml')
return soup
#获取json数据,并解析成存有英雄id与名字的列表[[id,name],[id1,name1].......]
def jsonToHeroInfoList(jsonURL):
resp =requests.get(jsonURL)
jsonData = json.loads(resp.text)
heroInfoList = []
for data in jsonData:
hero_info =[]
hero_info.append(data['ename'])
hero_info.append(data['cname'])
heroInfoList.append(hero_info)
return heroInfoList
Json_url = "https://pvp.qq.com/web201605/js/herolist.json"
heroInfoList = jsonToHeroInfoList(Json_url)
f = open('C:/Users/TTODS/Desktop/王者荣耀/王者荣耀英雄技能介绍.txt', 'a')
for heroInfo in heroInfoList:
# Get Id
_id = heroInfo[0]
name = heroInfo[1]
f.write("\n\n"+name+'\n')
# 找技能
url = 'https://pvp.qq.com/web201605/herodetail/%s.shtml' % _id
soup = GetSoup(url)
skillList = soup.find_all('div', class_="show-list")
for skill in skillList:
skill_name = skill.find('b').text
skill_cd = skill.find('span').text
skill_cost = skill.find_all('span')[1].text
skill_desc = skill.find('p', class_="skill-desc").text
if skill_name != "":
f.write(skill_name+'\n')
f.write(skill_cost+'\n')
f.write(skill_cd+'\n')
f.write(skill_desc+'\n')
print(skill_name+'\n')
print(skill_cost)
print(skill_cd)
print(skill_desc)
f.close()
爬取结果
来源:CSDN
作者:TTODS.
链接:https://blog.csdn.net/qq_44525150/article/details/104224819