How to use qtwebkit in python threads?

独自空忆成欢 提交于 2019-12-02 00:31:42

Given QT's async nature, the QtWebkit methods are non-blocking as well, so there is no point running them in threads. You can start them parallelly like this:

from functools import partial

from PySide.QtCore import QUrl
from PySide.QtGui import QApplication
from PySide.QtWebKit import QWebView, QWebSettings


TARGET_URLS = (
    'http://stackoverflow.com',
    'http://github.com',
    'http://bitbucket.org',
    'http://news.ycombinator.com',
    'http://slashdot.org',
    'http://www.reddit.com',
    'http://www.dzone.com',
    'http://www.ideone.com',
    'http://jsfiddle.net',
)


class Crawler(object):

    def __init__(self, app):
        self.app = app
        self.results = dict()
        self.browsers = dict()

    def _load_finished(self, browser_id, ok):
        print ok, browser_id
        web_view, _flag = self.browsers[browser_id]
        self.browsers[browser_id] = (web_view, True)

        frame = web_view.page().mainFrame()
        self.results[frame.url()] = frame.toHtml()

        web_view.loadFinished.disconnect()
        web_view.stop()

        if all([closed for bid, closed in self.browsers.values()]):
            print 'all finished'
            self.app.quit()

    def start(self, urls):
        for browser_id, url in enumerate(urls):
            web_view = QWebView()
            web_view.settings().setAttribute(QWebSettings.AutoLoadImages,
                                             False)
            loaded = partial(self._load_finished, browser_id)
            web_view.loadFinished.connect(loaded)
            web_view.load(QUrl(url))
            self.browsers[browser_id] = (web_view, False)


if __name__ == '__main__':
    app = QApplication([])
    crawler = Crawler(app)
    crawler.start(TARGET_URLS)
    app.exec_()
    print 'got:', crawler.results.keys()
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!