实例二:淘宝商品比价定向爬虫

蓝咒 提交于 2019-12-05 07:17:25
import requestsimport redef getHTMLText(url):    try:        r = requests.get(url, timeout = 30)        r.raise_for_status()        r.encoding = r.apparent_encoding        return r.text    except:        return " "def parsePage(ilt,html):    try:        plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"', html)        tlt = re.findall(r'\"raw_title\"\:\".*?\"', html)        for i in range(len(plt)):            price = eval(plt[i].split(':')[1])            title = eval(tlt[i].split(':')[1])            ilt.append([price, title])    except:        print(" ")def printGoodsList(ilt):    tplt = "{:4}\t{:8}\t{:16}"    print(tplt.format("序号", "价格", "商品名称"))    count = 0    for g in ilt:        count = count + 1        print(tplt.format((count,g[0],g[1])))def main():    goods = '书包'    depth = 2    start_url = 'http://www.taobao.com/search?q=' + goods    infoList = []    for i in range(depth):        try:            url = start_url + '&s=' + str(44*i)            html = getHTMLText(url)            parsePage(infoList,html)        except:            continue    printGoodsList(infoList)main()
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!