import requests import hashlib import time import os import json class requestsTools: def __init__(self, basePath): self._basePath = basePath self._headers = {} self._cookies = {} self._file_suffix = '.html' @property def basePath(self): return self._basePath @basePath.setter def basePath(self, basePath): self._basePath = basePath @property def headers(self): return self._headers @headers.setter def headers(self, headers): self._headers = headers @property def cookies(self): return self._cookies @cookies.setter def cookies(self, cookies): self._cookies = cookies def updateCookies(self, setCookies): for k in setCookies: self._cookies[k] = setCookies[k] @property def file_suffix(self): return self._file_suffix @file_suffix.setter def file_suffix(self, suffix): self._file_suffix = suffix def _get_md5(self, str): '''获取md5加密电子指纹''' _md5 = hashlib.md5() _md5.update(str) return _md5.hexdigest() def _get_timeStr(self, len=10): s = str(time.time())[:len] return s def _get_filePath(self, method, url, isUnique, toFile, fileName, **kwargs): if not toFile: return None else: if not fileName: if method == 'POST': if isUnique: _str = url + json.dumps(kwargs.get("postData", {})) + self._get_timeStr() else: _str = url + json.dumps(kwargs.get("postData", {})) else: if isUnique: _str = url + self._get_timeStr() else: _str = url _md5 = self._get_md5(_str.encode('utf8')) else: _md5 = fileName _filePath = self.basePath + os.sep + _md5 + self.file_suffix return _filePath def _parseUrl(self, url, method="GET", postData=None, headers=None, errorTimes=3, timeout=30, allow_redirects=False, *args, **kwargs): if not headers: headers = self.headers _errorTimes = 0 # 错误次数计数 while True: if _errorTimes == errorTimes: _response = None break try: if method == "POST": _response = requests.post(url=url, data=postData, verify=False, headers=headers, cookies=self.cookies, timeout=timeout, allow_redirects=allow_redirects) else: _response = requests.get(url=url, verify=False, headers=headers, cookies=self.cookies, timeout=timeout, allow_redirects=allow_redirects) return _response except Exception as e: _errorTimes += 1 return _response def _updateCookies(self, response): _cookies = response.cookies.get_dict() for k in _cookies: self._cookies[k] = _cookies[k] def parseUrl(self, url, method="GET", postData=None, errorTimes=3, toFile=True, fileName=None, isUnique=False, timeout=30, allow_redirects=True, *args, **kwargs): requests.packages.urllib3.disable_warnings() # 关闭ssl安全验证 _filePath = self._get_filePath(method, url, isUnique, postData=postData, toFile=toFile, fileName=fileName) if _filePath and os.path.exists(_filePath): # 判断文件是否存在,如果存在就从本地读取 print("{} from local".format(method)) with open(_filePath, 'r') as f: _content = f.read() return { "content": _content, "filePath": _filePath, "status": 200, "url": url, "responseRaw": None } print("{} from web".format(method)) _response = self._parseUrl(url, method=method, postData=postData, headers=self.headers, errorTimes=errorTimes, timeout=timeout, allow_redirects=allow_redirects, *args, **kwargs) if not _response: _responseInfo = {} else: # 更新cookies self._updateCookies(_response) if _filePath: with open(_filePath, 'w') as f: f.write(_response.content.decode("utf8")) # 保存到本地 _responseInfo = { "content": _response.content.decode("utf8"), "filePath": _filePath, "status": _response.status_code, "url": _response.url, "responseRaw": _response } return _responseInfo
来源:https://www.cnblogs.com/xujunkai/p/12308080.html