Python: simple async download of url content?

后端 未结 10 1530
天命终不由人
天命终不由人 2020-12-15 11:23

I have a web.py server that responds to various user requests. One of these requests involves downloading and analyzing a series of web pages.

Is there a simple way

10条回答
  •  北海茫月
    2020-12-15 12:00

    You might be able to use urllib to download the files and the Queue module to manage a number of worker threads. e.g:

    import urllib
    from threading import Thread
    from Queue import Queue
    
    NUM_WORKERS = 20
    
    class Dnld:
        def __init__(self):
            self.Q = Queue()
            for i in xrange(NUM_WORKERS):
                t = Thread(target=self.worker)
                t.setDaemon(True)
                t.start()
    
        def worker(self):
            while 1:
                url, Q = self.Q.get()
                try:
                    f = urllib.urlopen(url)
                    Q.put(('ok', url, f.read()))
                    f.close()
                except Exception, e:
                    Q.put(('error', url, e))
                    try: f.close() # clean up
                    except: pass
    
        def download_urls(self, L):
            Q = Queue() # Create a second queue so the worker 
                        # threads can send the data back again
            for url in L:
                # Add the URLs in `L` to be downloaded asynchronously
                self.Q.put((url, Q))
    
            rtn = []
            for i in xrange(len(L)):
                # Get the data as it arrives, raising 
                # any exceptions if they occur
                status, url, data = Q.get()
                if status == 'ok':
                    rtn.append((url, data))
                else:
                    raise data
            return rtn
    
    inst = Dnld()
    for url, data in inst.download_urls(['http://www.google.com']*2):
        print url, data
    

提交回复
热议问题