学习任务
获取去哪儿网的出发地列表
获取旅游景点列表
获取景点产品列表
存储数据
1 获取出发地站点
(1)访问touch.qunar.com
(2)按F12,单击自由行,在自由行页面点击搜索框



(3)单击任意一个城市,切换到headers,查看request URL如下所示。但是需要工具还原编码咋们才能知道这是啥(dep参数表示出发地,query表示目的地)。推荐网站http://www.jsons.cn/urlencode/,解码效果下面图2


3 实现
(1)首先获得出发地站点,因为最终需要获得整个自由行的产品列表。
自由行首页中点击左侧的出发点站点,然后获取目标URL如图二



1 import requests 2 url="https://touch.dujia.qunar.com/depCities.qunar" 3 4 5 strhtml=requests.get(url) 6 print(strhtml) 7 dep_dict=strhtml.json() 8 print(dep_dict) 9 for dep_item in dep_dict['data']: 10 for dep in dep_dict['data'][dep_item]: 11 print(dep)
(2)获得目的地。根据上面的分析,json工具解码以后通过拼接可得URL。

1 url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep))
(3)总源码

1 import requests
2 import urllib
3 import time
4 #import pymongo
5
6 # client=pymongo.MongoClient('localhost',27017)
7 # book_qunar=client['qunar']
8 # sheet_qunar_zyx=book_qunar['qunar_zyx']
9
10 #获取产品列表
11 def get_list(dep,item):
12 url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit=0,20&includeAD=true&qsact=search'.format(
13 urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item))
14 strhtml = get_json(url)
15 try:
16 routeCount = int(strhtml['data']['limit']['routeCount'])
17 except:
18 return
19 for limit in range(0, routeCount, 20):
20 url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit={},20&includeAD=true&qsact=search'.format(
21 urllib.request.quote(dep), urllib.request.quote(item),
22 urllib.request.quote(item), limit)
23 strhtml = get_json(url)
24 result = {
25 'date': time.strftime('%Y-%m-%d', time.localtime(time.time())),
26 'dep': dep,
27 'arrive': item,
28 'limit': limit,
29 'result': strhtml
30 }
31 #sheet_qunar_zyx.insert_one(result)
32 print(result)
33
34 # def connect_mongo():
35 # client=pymongo.MongoClient('localhost',27017)
36 # book_qunar=client['qunar']
37 # return book_qunar['qunar_zyx']
38
39
40 def get_json(url):
41 strhtml=requests.get(url)
42 time.sleep(1)
43 return strhtml.json()
44
45 if __name__ == "__main__":
46
47 url='https://touch.dujia.qunar.com/depCities.qunar'
48 dep_dict=get_json(url)
49 #这里是json格式 dep_dict中内嵌勒一层
50 for dep_item in dep_dict['data']:
51 for dep in dep_dict['data'][dep_item]:
52 a = []#目的地去重
53 #经过解码工具可以得到dep表示出发地 query和originalquery表示目的地
54 url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep))
55 arrive_dict = get_json(url)
56 for arr_item in arrive_dict['data']:
57 for arr_item_1 in arr_item['subModules']:
58 for query in arr_item_1['items']:
59 if query['query'] not in a:
60 a.append(query['query'])
61 for item in a:
62 get_list(dep,item)
来源:https://www.cnblogs.com/lanjianhappy/p/11783932.html
