discord.py-rewrite - Dynamic Web Scraping using PyQt5 not working properly

末鹿安然 提交于 2021-02-11 18:10:20

问题


In short, I'm making a discord bot that downloads the "World of the Day" picture in the website https://growtopiagame.com as D:\Kelbot/render.png and then sends the picture to the channel the command was called. However, it is not a static website and the URL is not in the source code, so I found a solution that uses PyQt5:

import re
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
@client.command()
@commands.cooldown(1, 60, commands.BucketType.user)
async def wotd(ctx):
    class Page(QWebEnginePage):
        def __init__(self, url):
            self.app = QApplication(sys.argv)
            QWebEnginePage.__init__(self)
            self.html = ''
            self.loadFinished.connect(self._on_load_finished)
            self.load(QUrl(url))
            self.app.exec_()

        def _on_load_finished(self):
            self.html = self.toHtml(self.Callable)
            print('Load finished')

        def Callable(self, html_str):
            self.html = html_str
            self.app.quit()

    def main():
        page = Page('https://growtopiagame.com')
        soup = bs.BeautifulSoup(page.html, 'html.parser')
        js_test = soup.find('a', class_='world-of-day-image')
        link = []
        for x in js_test:
            link.append(str(x))
        urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', link[0])
        urllib.request.urlretrieve(urls[0], "D:\Kelbot/render.png")
    if __name__ == '__main__': main()
    await ctx.send(file=discord.File('render.png'))

When I ran the bot from my task scheduler, it didn't work. So, I tried using my Python Shell and Visual Studio Code to run it, and they both worked. However, when the command is called for a second time, python shell and visual studio code both restarted and the bot got killed for some reason. Is it because classes are incompatible with discord.py? How could I possibly fix this. Is there a better solution than using PyQt5?

(Also sometimes instead of getting the picture, I get https://growtopiagame.com/resources/assets/images/load.gif which is the image they put before showing the actual World of the Day picture, but it fixes itself when I restart my pc)


回答1:


PyQt5 is not compatible with asyncio, although there are libraries that try to make it compatible as quamash, asyncqt, qasync, in your case it is not necessary since not the only task you want Qt to do is scrape the web to obtain the ulr of an image and download it so a workaround is to create an external application whose function is just that and then use it in the wotd function:

├── downloader.py
├── .env
└── main.py

main.py

import asyncio
import os
import sys
import uuid

import discord
from discord.ext import commands

from dotenv import load_dotenv

bot = commands.Bot(command_prefix="!")


@commands.cooldown(1, 60, commands.BucketType.user)
@bot.command()
async def wotd(ctx):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    images_dir = os.path.join(current_dir, "images")

    if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
        os.mkdir(images_dir)

    output_filename = os.path.join(images_dir, "{}.png".format(uuid.uuid4()))

    args = [sys.executable, os.path.join(current_dir, "downloader.py"), output_filename]
    process = await asyncio.create_subprocess_exec(
        *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
    )
    print("Started: %s, pid=%s" % (args, process.pid), flush=True)
    stdout, stderr = await process.communicate()
    if process.returncode == 0:
        print(
            "Done: %s, pid=%s, result: %s"
            % (args, process.pid, stdout.decode().strip()),
            flush=True,
        )
        await ctx.send(file=discord.File(output_filename))
        print("end", output_filename)
    else:
        print(
            "Failed: %s, pid=%s, result: %s"
            % (args, process.pid, stderr.decode().strip()),
            flush=True,
        )
        print("error")


@wotd.error
async def wotd_error(ctx, error):
    if isinstance(error, commands.CommandOnCooldown):
        msg = "This command is ratelimited, please try again in {:.2f}s".format(
            error.retry_after
        )
        await ctx.send(msg)
    print(ctx, error)


def main():
    load_dotenv()
    token = os.getenv("DISCORD_TOKEN")
    bot.run(token)


if __name__ == "__main__":
    main()

downloader.py

import sys

from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets


class DownLoader(QtCore.QObject):
    def __init__(self, path, parent=None):
        super().__init__(parent)
        self.path = path

        url = "https://growtopiagame.com"
        self.manager = QtNetwork.QNetworkAccessManager(self)

        profile = QtWebEngineWidgets.QWebEngineProfile(
            QtCore.QUuid.createUuid().toString(QtCore.QUuid.Id128), self
        )
        self.page = QtWebEngineWidgets.QWebEnginePage(profile, self)
        self.page.loadProgress.connect(print)

        self.manager.finished.connect(self.on_finished)
        self.page.loadFinished.connect(self.on_load_finished)

        self.page.load(QtCore.QUrl(url))

    @QtCore.pyqtSlot(bool)
    def on_load_finished(self, ok):
        if ok:
            self.request_url()
        else:
            print("error", ok, file=sys.stderr)
            QtCore.QCoreApplication.exit(-1)

    def request_url(self):
        js = """
        function get_url(){
            var elements = document.getElementsByClassName("world-of-day-image")
            if(elements.length){
                var element = elements[0];
                if(element.children.length){
                    var e = element.children[0]
                    if(e.tagName == "IMG")
                        return e.src
                }
            }
            return "";
        }
        get_url();
        """
        self.page.runJavaScript(js, self.download)

    def download(self, url):
        if url:
            print(url)
            request = QtNetwork.QNetworkRequest(QtCore.QUrl(url))
            self.manager.get(request)
        else:
            QtCore.QTimer.singleShot(100, self.request_url)

    @QtCore.pyqtSlot(QtNetwork.QNetworkReply)
    def on_finished(self, reply):
        if reply.error() == QtNetwork.QNetworkReply.NoError:
            file = QtCore.QFile(self.path)
            if file.open(QtCore.QIODevice.WriteOnly):
                r = reply.readAll()
                print(len(r))
                file.write(r)
            file.close()
            QtCore.QCoreApplication.quit()
        else:
            print(reply.error(), reply.errorString(), file=sys.stderr)
            QtCore.QCoreApplication.exit(-1)


if __name__ == "__main__":
    app = QtWidgets.QApplication(sys.argv)
    parser = QtCore.QCommandLineParser()
    parser.addPositionalArgument("path", "Path of image")
    parser.process(app)
    args = parser.positionalArguments()
    if not args:
        print("not path", file=sys.stderr)
        sys.exit(-1)
    path = args[0]
    downloader = DownLoader(path)
    sys.exit(app.exec_())

.env

DISCORD_TOKEN=YOUR_TOKEN_HERE


来源:https://stackoverflow.com/questions/59241478/discord-py-rewrite-dynamic-web-scraping-using-pyqt5-not-working-properly

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!