The urllib2 documentation says that timeout parameter was added in Python 2.6. Unfortunately my code base has been running on Python 2.5 and 2.4 platforms.
With considerable irritation, you can override the httplib.HTTPConnection class that the urllib2.HTTPHandler uses.
def urlopen_with_timeout(url, data=None, timeout=None):
# Create these two helper classes fresh each time, since
# timeout needs to be in the closure.
class TimeoutHTTPConnection(httplib.HTTPConnection):
def connect(self):
"""Connect to the host and port specified in __init__."""
msg = "getaddrinfo returns an empty list"
for res in socket.getaddrinfo(self.host, self.port, 0,
socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
self.sock = socket.socket(af, socktype, proto)
if timeout is not None:
self.sock.settimeout(timeout)
if self.debuglevel > 0:
print "connect: (%s, %s)" % (self.host, self.port)
self.sock.connect(sa)
except socket.error, msg:
if self.debuglevel > 0:
print 'connect fail:', (self.host, self.port)
if self.sock:
self.sock.close()
self.sock = None
continue
break
if not self.sock:
raise socket.error, msg
class TimeoutHTTPHandler(urllib2.HTTPHandler):
http_request = urllib2.AbstractHTTPHandler.do_request_
def http_open(self, req):
return self.do_open(TimeoutHTTPConnection, req)
opener = urllib2.build_opener(TimeoutHTTPHandler)
opener.open(url, data)