问题
I've read through a number of SO questions about nodejs running out of memory, but I haven't seen anything that sounds similar to my situation.
I'm trying to process about 20GBs of data across 250 csv files (so ~80MBs/file). Launch the node script with --max-old-space-size=8192 on a server with 90GB of free memory using node v5.9.1. After 9mins of processing the script quits with an out-of-memory error.
I'm new to Node programming, but I thought I wrote the script to process data one line at a time and not to keep anything in memory. Yet it seems some object references are being held on to by something, so the script is leaking memory. Here's the full script:
var fs = require('fs');
var readline = require('readline');
var mongoose = require('mongoose');
mongoose.connect('mongodb://buzzard/xtra');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error:'));
var DeviceSchema = mongoose.Schema({
_id: String,
serial: String
});
var Device = mongoose.model('Device', DeviceSchema, 'devices');
function processLine(line) {
var serial = line.split(',')[8];
Device({
_id: serial,
serial: serial
}).save(function (err) {
if (err) return console.error(err);
});
}
function processFile(baseDir, fileName) {
if(!fileName.startsWith('qcx3'))
return;
var fullPath = `${baseDir}/${fileName}`;
var lineReader = readline.createInterface({
input: fs.createReadStream(fullPath)
});
lineReader.on('line', processLine);
}
function findFiles(rootDir) {
fs.readdir(rootDir, function (error, files) {
if (error) {
console.log(`Error: ${error}` );
return
}
files.forEach(function (file) {
if(file.startsWith('.'))
return;
var fullPath = `${rootDir}/${file}`;
fs.stat(fullPath, function(error, stat) {
if (error) {
console.log(`Error: ${error}` );
return;
}
if(stat.isDirectory())
dir(fullPath);
else
processFile(rootDir, file);
});
});
})
}
findFiles('c://temp/logs/compress');
I also noticed that when I run the script on a much smaller test set that it can completely finish processing, the script doesn't exit at the end. Just keeps hanging there until I ctrl+c it. Could this be somehow related?
What am I doing wrong?
回答1:
- The script is not exiting cause you have an open connection to mongoose, after all the files has been processed you should close the connection and the script will finish.
You have the right Idea of using streams but i think you missed something on the way, I suggest you the following article to update the streamInterface, and events. https://coderwall.com/p/ohjerg/read-large-text-files-in-nodejs
An other source of problem could be the mongodb, it seems you make a lot of inserts, it could be related with the max i/o of mongodb that exhaust the memory.
来源:https://stackoverflow.com/questions/36492268/nodejs-running-out-of-memory-processing-csv-files