When I use the following function with the Python 3.2.3 package in cygwin it hangs on any request to any https host. It will throw with this error: [Errno 104] Connection reset by peer, after 60 seconds.
UPDATE: I thought it was limited to only cygwin, but this also happens in Windows 7 64bit with Python 3.3. I'll try 3.2 right now. The error when using the windows command shell is: urlopen error [WinError 10054] An existing connection was forcibly closed by the remote host
UPDATE2(Electric-Bugaloo): This is limited to a couple of sites that I'm trying to use. I tested against google and other major sites with no issue. It appears it's related to this bug:
http://bugs.python.org/issue16361
Specifically, the server is hanging after the client-hello. It's due to the version of openssl that shipped with the compiled versions of python3.2 and 3.3. It's mis-identifying the ssl version of the server. Now I need code to auto downgrade my version of ssl to sslv3 when opening a connection to the affected sites like in this post:
How to use urllib2 to get a webpage using SSLv3 encryption
but I can't get it to work.
def worker(url, body=None, bt=None): '''This function does all the requests to wherever for data takes in a url, optional body utf-8 encoded please, and optional body type''' hdrs = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Encoding': 'gzip,deflate', 'User-Agent': "My kewl Python tewl!"} if 'myweirdurl' in url: hdrs = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Encoding': 'gzip,deflate', 'User-Agent': "Netscape 6.0"} if bt: hdrs['Content-Type'] = bt urlopen = urllib.request.urlopen Request = urllib.request.Request start_req = time.time() logger.debug('request start: {}'.format(datetime.now().ctime())) if 'password' not in url: logger.debug('request url: {}'.format(url)) req = Request(url, data=body, headers=hdrs) try: if body: logger.debug("body: {}".format(body)) handle = urlopen(req, data=body, timeout=298) else: handle = urlopen(req, timeout=298) except socket.error as se: logger.error(se) logger.error(se.errno) logger.error(type(se)) if hasattr(se, 'errno') == 60: logger.error("returning: Request Timed Out") return 'Request Timed Out' except URLError as ue: end_time = time.time() logger.error(ue) logger.error(hasattr(ue, 'code')) logger.error(hasattr(ue, 'errno')) logger.error(hasattr(ue, 'reason')) if hasattr(ue, 'code'): logger.warn('The server couldn\'t fulfill the request.') logger.error('Error code: {}'.format(ue.code)) if ue.code == 404: return "Resource Not Found (404)" elif hasattr(ue, 'reason') : logger.warn('We failed to reach a server with {}'.format(url)) logger.error('Reason: {}'.format(ue.reason)) logger.error(type(ue.reason)) logger.error(ue.reason.errno) if ue.reason == 'Operation timed out': logger.error("Arrggghh, timed out!") else: logger.error("Why U no match my reason?") if ue.reason.errno == 60: return "Operation timed out" elif hasattr(ue, 'errno'): logger.warn(ue.reason) logger.error('Error code: {}'.format(ue.errno)) if ue.errno == 60: return "Operation timed out" logger.error("req time: {}".format(end_time - start_req)) logger.error("returning: Server Error") return "Server Error" else: resp_headers = dict(handle.info()) logger.debug('Here are the headers of the page : {}'.format(resp_headers)) logger.debug("The true URL in case of redirects {}".format(handle.geturl())) try: ce = resp_headers['Content-Encoding'] except KeyError as ke: ce = None else: logger.debug('Content-Encoding: {}'.format(ce)) try: ct = resp_headers['Content-Type'] except KeyError as ke: ct = None else: logger.debug('Content-Type: {}'.format(ct)) if ce == "gzip": logger.debug("Unzipping payload") bi = BytesIO(handle.read()) gf = GzipFile(fileobj=bi, mode="rb") if "charset=utf-8" in ct.lower() or ct == 'text/html' or ct == 'text/plain': payload = gf.read().decode("utf-8") else: logger.debug("Unknown content type: {}".format(ct)) sys.exit() return payload else: if ct is not None and "charset=utf-8" in ct.lower() or ct == 'text/html' or ct == 'text/plain': return handle.read().decode("utf-8") else: logger.debug("Unknown content type: {}".format(ct)) sys.exit()