How to perform mass inserts into mongodb using NodeJS

半腔热情 提交于 2019-12-07 23:15:58

问题


I Have to Insert about 10,00000 documents in mongodb using nodejs.

I'm generating these documents using a for loop storing them into an array before finally inserting them into mongodb.

var codeArray = new Array();
for (var i = 0; i<1000000; i++){
    var token = strNpm.generate();
    var now = moment().format('YYYYMMDD hhmmss');
    var doc1 = {id:token,
        Discount_strId:"pending",
        Promotion_strCode:token,
        Promotion_strStatus:"I",
        Promotion_dtmGeneratedDate:now,
        User_strLogin:"test",
        Promotion_strMode:"S",
        Promotion_dtmValidFrom:"pending",
        Promotion_dtmValidTill:"pending",
        LastModified_dtmStamp:now
    };
    codeArray.push(doc1);
    db.collection('ClPromoCodeMaster').insert(codeArray, function (err, result) {
    if (err){
        console.log(err);
    }else{
        console.log('Inserted Records - ', result.ops.length);
    }
});

The problem I'm facing is mongo has an inserting limit of 16mb, so I can't insert the entire array at once. Please suggest most optimum solutions.


回答1:


The main problem is in the request size and not the document size, but it amounts to the same limitation. Bulk operations and the async library with async.whilst will handle this:

var bulk = db.collection('ClPromoCodeMaster').initializeOrderedBulkOp(),
    i = 0;

async.whilst(
  function() { return i < 1000000; },
  function(callback) {
    var token = strNpm.generate();
    var now = moment().format('YYYYMMDD hhmmss');
    var doc = {
      id:token,
      Discount_strId:"pending",
      Promotion_strCode:token,
      Promotion_strStatus:"I",
      Promotion_dtmGeneratedDate:now,
      User_strLogin:"test",
      Promotion_strMode:"S",
      Promotion_dtmValidFrom:"pending",
      Promotion_dtmValidTill:"pending",
      LastModified_dtmStamp:now
    };

    bulk.insert(doc);
    i++;

    // Drain every 1000
    if ( i % 1000 == 0 ) {
      bulk.execute(function(err,response){
        bulk = db.collection('ClPromoCodeMaster').initializeOrderedBulkOp();
        callback(err);
      });
    } else {
        callback();
    }

  },
  function(err) {
    if (err) throw err;
    console.log("done");
  }
);

I should note that regardless there is an internal limit on bulk operations to 1000 operations per batch. You can submit in larger sizes, but the driver is just going to break these up and still submit in batches of 1000.

The 1000 is a good number to stay at though, since it is already in line with how the request will be handled, as well as being a reasonable number of things to hold in memory before draining the request queue and sending to the server.




回答2:


For inserting millions of record at a time, Create node.js child process fork with MongoDb bulk api.

Child Process Creation:(index.js)

const {fork} = require("child_process");
let counter = 1;

function createProcess(data){
    const worker =  fork("./dbOperation");    
    worker.send(data);    
    worker.on("message", (msg) => {        
        console.log("Worker Message :",counter, msg);
        counter++;
    })

}

function bulkSaveUser(records) {
    const singleBatchCount = 10000; // Save 10,000 records per hit
    const noOfProcess = Math.ceil(records/singleBatchCount);
    let data = {};
    console.log("No of Process :", noOfProcess);
    for(let index = 1; index <= noOfProcess; index++) {       
        data.startCount = (index == 1) ? index : (((index - 1) * singleBatchCount) + 1); 
        data.endCount = index * singleBatchCount;
        createProcess(data);
    }
} 


bulkSaveUser(1500000);

DB Operation (dbOperation.js)

const MongoClient = require('mongodb').MongoClient;
// Collection Name
const collectionName = ""; 
// DB Connection String
const connString = "";

process.on("message", (msg) => {
    console.log("Initialize Child Process", msg)
    const {startCount, endCount} = msg;
    inputStudents(startCount, endCount);
});

function initConnection() {
    return new Promise(function(r, e) {
        MongoClient.connect(connString, function(err, db) {
            if (err) e(err)            
            r(db);
        });
    });
}

function inputStudents(startCount, endCount) {    

    let bulkData = [];
    for(let index = startCount; index <= endCount; index++ ){ 
        var types = ['exam', 'quiz', 'homework', 'homework'];
        let scores = []
        // and each class has 4 grades
        for (j = 0; j < 4; j++) {
            scores.push({'type':types[j],'score':Math.random()*100});
        }
        // there are 500 different classes that they can take
        class_id = Math.floor(Math.random()*501); // get a class id between 0 and 500
        record = {'student_id':index, 'scores':scores, 'class_id':class_id};
        bulkData.push({ insertOne : { "document" : record } })
    }
    initConnection()
        .then((db) => {
            const studentDb = db.db("student");
            const collection =  studentDb.collection(colName)  
            console.log("Bulk Data :", bulkData.length);
            collection.bulkWrite(bulkData, function(err, res) {
                if (err) throw err;
                //console.log("Connected Successfully",res);
                process.send("Saved Successfully");
                db.close();
            });       
        })
        .catch((err) => { console.log("Err :", err) });        
}

Sample project to insert millions of record in mongodb using child process fork



来源:https://stackoverflow.com/questions/32049227/how-to-perform-mass-inserts-into-mongodb-using-nodejs

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!