Realtime scrap a chat using Nodejs

可紊 提交于 2019-12-01 05:40:01

问题


What I want to do is to build a scrap application on NodeJs from which it monitors on Realtime a chat and store certain messages within any database?

What I am wanting to do is the following, I am wanting to capture data from the chat platforms streaming, and thus capture some useful information that helps those who are doing the streaming service;

But I do not know how to start doing this using NodeJs,

What I have been able to do so far has been to capture the data of the messages, however I can not monitor in realtime new messages, any help in this regard?

What i did so far:

server.js

var express     = require('express');
var fs          = require('fs');
var request     = require('request');
var puppeteer = require('puppeteer');
var app         = express();

app.get('/', function(req, res){

    url = 'https://www.nimo.tv/live/6035521326';

    (async() => {

        const browser = await puppeteer.launch();

        const page = await browser.newPage();
        await page.goto(url);
        await page.waitForSelector('.msg-nickname');

        const messages = await page.evaluate(() => {
            return Array.from(document.querySelectorAll('.msg-nickname'))
                    .map(item => item.innerText);
        });

        console.log(messages);
    })();
    res.send('Check your console!')

});

app.listen('8081') 
console.log('Magic happens on port 8081'); 
exports = module.exports = app;

With this, I get the Nicknames of Users messages and put in an Array, I want to make my application run and receive new Nicknames automatically when the input is done in the chat, Any help with this challenge?

Maybe I'm going to need to use WebSocket


回答1:


If possible you should use the API, the chat is using. Try to open the network tab inside the Chrome developer tools and try to figure out which network requests are happening.


If that is not possible, you can use a MutationObserver to monitor DOM changes. Expose a function via page.exposeFunction and then listen to relevant changes. You can then insert the obtained data into a database.

Here is some example code to get you started:

const puppeteer = require('puppeteer');
const { Client } = require('pg');

(async () => {
    const client = new Client(/* ... */);
    await client.connect(); // connect to database

    const browser = await puppeteer.launch({ headless: false });
    const [page] = await browser.pages();

    // call a handler when a mutation happens
    async function mutationListener(addedText) {
        console.log(`Added text: ${addedText}`);

        // insert data into database
        await client.query('INSERT INTO users(text) VALUES($1)', [addedText]);
    }
    page.exposeFunction('mutationListener', mutationListener);

    await page.goto('http://...');
    await page.waitForSelector('.msg-nickname');

    await page.evaluate(() => {
        // wait for any mutations inside a specific element (e.g. the chatbox)
        const observerTarget = document.querySelector('ELEMENT-TO-MONITOR');
        const mutationObserver = new MutationObserver((mutationsList) => {
            // handle change by checking which elements were added and which were deleted
            for (const mutation of mutationsList) {
                const { removedNodes, addedNodes } = mutation;
                // example: pass innerText of first added element to our mutationListener
                mutationListener(addedNodes[0].innerText);
            }
        });
        mutationObserver.observe( // start observer
            observerTarget,
            { childList: true }, // wait for new child nodes to be added/removed
        );
    });
})();


来源:https://stackoverflow.com/questions/55461275/realtime-scrap-a-chat-using-nodejs

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!