Memory leak when calling too many promises in Nodejs/Request/MongoDB

可紊 提交于 2021-02-11 17:01:54

问题


When I tried to call up to 200,000 POST requests in NodeJS, it display some errors like heap memory leak.

In each POST request, I want to insert the resolved data into localhost mongo DB.

It's ok to make 2000 requests at one time but it's really difficult to deal with 200,000 requests. I got stuck in this problem and don't know exactly to resolve it.

I really need your help or any suggestions.

Thank you in advance for your help.

    const mongoose = require('mongoose');
    const request = require('request');

    // DB connection
    mongoose
        .connect("mongodb://localhost:27017/test?retryWrites=true&w=majority", { useNewUrlParser: true, useUnifiedTopology: true })
        .then(() => console.log('Connected!'))
        .catch(err => console.error('Could not connect...', err));

    // Initialize Mongoose 's model
    const Sample = mongoose.model(
        'Sample',
        new mongoose.Schema({}, { strict: false, versionKey: false }),
        'sample_collection'
    );

    // Insert data into Sample
    var insertDataIntoSample = function (means) {
        Sample.collection.insert(means, { ordered: false });
    }

    // HTTP POST request to get data
    const getDataFromInternet = function (param) {
        return new Promise((resolve, reject) => {
            request.post(
                'https://url-to-post-data.com/',
                { json: { 'query': param } },
                function (error, response, body) {
                    if (!error && response.statusCode == 200 && body) {
                        insertDataIntoSample(body.data);
                        resolve(param);
                    }
                }
            );
        });
    };

    // Call up to 200,000 requests
    var myParams = [...] // 200,000 elements
    for (var i = 0; i < myParams.length; i++) {
        getDataFromInternet(myParams[i]).then(function (data) {
            console.log(data)
        })
    }

回答1:


So, it's just downright counterproductive to submit 200,000 requests at a time to your database. There's no way your database can actually work on more than a few requests at a time anyway so all you're doing by putting that many requests in flight at the same time is just causing an enormous amount of peak memory usage.

With a little testing, you would figure out approximately how many simultaneous requests are still efficient and it depends entirely upon your database and its configuration. A big iron database server, might have access to lots of CPUs/threads and maybe even some efficient disk partitioning and be able to make progress on a number of requests at a time. A smaller configuration might not gain anything after just a couple requests in flight at a time.

There are several dozen options here on stackoverflow and elsewhere for making an asynchronous function call while processing an array and doing it so that only N requests are in flight at the same time. That's probably the general concept you want here. Libraries such as Bluebird and Async-Promises have functions built-in to manage concurrent access.

One of my simple favorites (just a function you can copy) is called mapConcurrent(). You pass it the array, the max number of requests you want in progress at a time and a promise-returning function that it will call for every item in the array.

You run experiments with your configuration to see what the optimal value for maxConcurrent is (hint, it's probably a fairly small number like under 10).

// takes an array of items and a function that returns a promise
function mapConcurrent(items, maxConcurrent, fn) {
    let index = 0;
    let inFlightCntr = 0;
    let doneCntr = 0;
    let results = new Array(items.length);
    let stop = false;

    return new Promise(function(resolve, reject) {

        function runNext() {
            let i = index;
            ++inFlightCntr;
            fn(items[index], index++).then(function(val) {
                ++doneCntr;
                --inFlightCntr;
                results[i] = val;
                run();
            }, function(err) {
                // set flag so we don't launch any more requests
                stop = true;
                reject(err);
            });
        }

        function run() {
            // launch as many as we're allowed to
            while (!stop && inflightCntr < maxConcurrent && index < items.length) {
                runNext();
            }
            // if all are done, then resolve parent promise with results
            if (doneCntr === items.length) {
                resolve(results);
            }
        }

        run();
    });
}

There are some other options mentioned in this answer Batching asynchronous operations.



来源:https://stackoverflow.com/questions/60481903/memory-leak-when-calling-too-many-promises-in-nodejs-request-mongodb

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!