import pymysql
import requests
from lxml import etree
def get_books():
url = "http://www.yangxin.wang/"
# 获取url中的内容
response = requests.get(url)
html_content = response.text
# 使用xpath进行内容解析
html = etree.HTML(html_content)
# 根据规则提取内容
book_all = []
for i in range(1,7):
books = html.xpath("/html/body/div[1]/div[1]/div[1]/div[{}]/div[2]/ul/li".format(i))
# print(len(books))
# print(type(books))
book_all.append(books)
print(book_all)
# 存入数据库
dbParmas = {
'host': '127.0.0.1',
'user': 'root',
'password': '123',
'db': 'film',
'port': 3306,
'charset': 'utf8'
}
conn = pymysql.Connect(**dbParmas) # 任意关键字参数
# 获取游标
cursor = conn.cursor()
for books in book_all:
for book in books:
book_image = book.xpath("./a[1]/img/@src")[0]
# print(book_image)
book_name = book.xpath("./a[2]/text()")[0]
# print(book_name)
book_auth = book.xpath("./span/text()")
if book_auth:
book_auth = book_auth[0]
else:
book_auth = '佚名'
print(book_auth)
# 执行sql 只是添加到执行队列中
sql = "insert into book(book_image, book_name, book_auth) values('{}','{}','{}')".format(book_image, book_name, book_auth)
cursor.execute(sql)
conn.commit()
if __name__ == '__main__':
pass
来源:CSDN
作者:扣剑书生
链接:https://blog.csdn.net/weixin_44038167/article/details/103692719