I have a very basic spider that looks like the followall spider from scrapy testspiders.
import re
import scrapy.sign
You have to install a reactor compatible with the Qt event loop, for example using:
python -m pip install qt5reactor
),python -m pip install qt-reactor
)import sys
from PyQt5 import QtWidgets, QtCore, QtGui
import qt5reactor
# import qreactor
from scrapy import signals
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import twisted
from Layout import Ui_MainWindow
from ZenSpider import ZenSpider
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
def __init__(self, parent=None):
super(MainWindow, self).__init__()
self.setupUi(self)
self.pushButton.pressed.connect(self.on_url_entered)
self.tableWidget.horizontalHeader().setSectionResizeMode(
QtWidgets.QHeaderView.ResizeToContents
)
def crawler_results(self, item):
row = self.tableWidget.rowCount()
url = item["url"]
it = QtWidgets.QTableWidgetItem(url)
self.tableWidget.insertRow(row)
self.tableWidget.setItem(row, 0, it)
def on_url_entered(self):
configure_logging()
runner = CrawlerRunner()
runner.crawl(ZenSpider, domain="google.com.au")
for p in runner.crawlers:
p.signals.connect(self.crawler_results, signal=signals.item_scraped)
def closeEvent(self, event):
super(MainWindow, self).closeEvent(event)
twisted.internet.reactor.stop()
if __name__ == "__main__":
app = QtWidgets.QApplication([])
qt5reactor.install()
# qreactor.install()
main_window = MainWindow()
main_window.show()
twisted.internet.reactor.run()