get没有参数请求
# -*-coding:utf-8 -*- import requests url="https://www.baidu.com/" reponse = requests.get(url=url) print(reponse.text)
# -*-coding:utf-8 -*- import requests url="https://www.baidu.com/" reponse = requests.request(method="GET",url=url) print(reponse.text)
Post没有参数的请求
# -*-coding:utf-8 -*- import requests url="https://www.baidu.com/" reponse = requests.request(method="POST",url=url) print(reponse.text)
# -*-coding:utf-8 -*- import requests url="https://www.baidu.com/" reponse = requests.post(url=url) print(reponse.text)
get、post有请求参数几个统一形式:
params:是get的请求数据拼接在url后面的
data,json是post的不能类型的数据,解释如下:
# -*-coding:utf-8 -*- import requests import json url="https://www.baidu.com/" #https://www.baidu.com/s?wd= params ={"wd":"江可爱"} #这种类型的数据,请求的时候Content-Type= application/x-www-form-urlencoded会把数据 #传给requests.body,然后再转成dict,让request.POST.get()获取 data = {"usename":"1234","password":"1233"} #如果是post方式是变成这种格式请求"usename=1234;password=1233" #这个形式Content-Type= application/json是传给了requests.body,但是没有转成字典所以request.POST.get()是没有值的 jsons=json.dumps({"usename":"1234","password":"1233"}) #json(str) reponse = requests.get(url=url,params=params,data=data,json=jsons) print(reponse.text)
请求头headers、登陆返回的cookie,浏览器模拟user-agent,referer的作用是记录你访问一个目标网站时,在访问前你的原网站的地址:
#cookie r = requests.get(url=url, cookies={".CNBlogsCookie":"005E5D62C333893955F050B6B5A3ACCCEAB4FC6A1D6"}) print(r.text) #headers headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", "referer":"https://www.zhihu.com/", "Host":"www.zhihu.com", "cookie":"" }
将文本流保存到文件,iter_content可以迭代获取内容
import requests import os url = "https://www.cnblogs.com/venvive/p/11530684.html" r = requests.get(url=url, cookies={".CNBlogsCookie":"005E5D62C333893955F050B6B5A2D544319D412F1C111E3ACCCEAB4FC6A1D6"}) base_path = os.path.dirname(os.path.abspath(__file__)) with open(base_path+"/reportss.txt","wb") as fd: for chunk in r.iter_content(chunk_size=9): fd.write(chunk)
重定向与请求历史
例如Github将所有的HTTP请求重定向到HTTPS:
r = requests.get("http://github.com") #重定向 r.url = "https://github.com/" print(r.status_code) print(r.history) #输出: 200 [<Response [301]>]
如果是使用GET、OPTIONS、POST、PUT、PATCH或者DELETE、 HEAD,那么可以使用allow_redirects参数禁止重定向处理,False是禁止,True是允许
r = requests.get("http://github.com",allow_redirects=False) #重定向 r.url = "https://github.com/" print(r.status_code) print(r.history) #输出: 301 []
超时
requests在经过以timeout参数设定的秒数时间之后停止等待响应,基本上所有的生产代码都应该使用这一参数,如果不使用,程序可能会永远失去响应
r = requests.get("https://www.zhihu.com/",timeout=0.001) #输出报错: #requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='www.zhihu.com', port=443): Max retries exceeded