一,一些基本的用法
#获取响应及其的一些信息
response = requests.get('http://www.baidu.com')
print(response.status_code) # 打印状态码
print(response.url) # 打印请求url
print(response.headers) # 打印头信息
print(response.cookies) # 打印cookie信息
print(response.text) # 打印respose信息
print(response.content) # 打印二进制信息
#请求的方法
requests.get('http://httpbin.org/get')
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')
#代理及请求头
pro=['113.194.29.102:9999',
'58.253.154.72:9999'
'182.34.32.128:9999',
'221.207.227.84:6666',
'112.117.119.132:6666',
'117.8.131.202:6666',
'112.98.198.44:6675',
'61.138.47.124:6675',
'60.15.109.190:6666']
head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
#带参请求
data = {
'name': 'tom',
'age': 20
}
response = requests.get('http://httpbin.org/get', params=data)
response= requests.post('http://httpbin.org/get',data=data)
#session cookie
session = requests.Session()
session.get('http://httpbin.org/cookies/set/number/12345')
response = session.get('http://httpbin.org/cookies')
#证书警告
urllib3.disable_warnings() #从urllib3中消除警告
response = requests.get('https://www.12306.cn',verify=False) #证书验证设为FALSE
#异常
from requests.exceptions import ReadTimeout,HTTPError,RequestException
具体看文档
二,一般requests编写爬虫需要注意的点
1,代理
2,请求延时
3,将请求过的url,要有记录,将记录写进文本里,如果要爬大量数据,失败了不用重新再来,浪费时间。
4,要重视异常捕获。要不会很容易失败
5,一定要模块化调试
来源:CSDN
作者:一瓶子不满的拖油瓶
链接:https://blog.csdn.net/qq_37813963/article/details/103442315