test\
# -*- coding: utf-8 -*-
# @Time :2019/10/14 20:45
# Author :李成广(63)
# @Email :chengguang.li@dili.com
# @File :Spider.py
# @Brief :爬虫主程序
import requests
from bs4 import BeautifulSoup
spider_url='https://www.doutula.com/photo/list/?page=1'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
}
page1 = requests.get(spider_url,headers=headers)
soup = BeautifulSoup(page1.text, "html.parser")
print(soup)
div = soup.find(name='div', attrs={'class': 'page-content text-center'})
print(div)
div2 = div.find(name='div')
a_list = div2.find_all(name='a')
for a in a_list:
img = a.find(name='img')
data_original = img.attrs.get('data-original')
title = img.attrs.get('alt')
if not data_original:
continue
data = {"title":title,"url":data_original}
print(data)