问题
I'm using node-cron module for scheduling tasks in Node.js application. I also want run the application in several processes using core cluster module.
Running application in several processes ends up in scheduled tasks execution in each process (e.g. if task was to send an email the email would be sent multiple times).
What are the best practices/possible ways of running cron job along with cluster module? Should I create some separate process which will handle only cron job and do not accept any requests. If yes, how can I do that in a right way?
回答1:
If are using PM2,
You can use an environment variable provided by PM2 itself called NODE_APP_INSTANCE
which requires PM2 2.5 or greater.
NODE_APP_INSTANCE
environment variable can be used to determine difference between process, for example you may want to run a cronjob only on one process, you can just do this
if(process.env.NODE_APP_INSTANCE == 0) {
//schedule your cron job here since this part will be executed for only one cluster
}
,
Since two processes can never have the same number.
More Info on PM2 official doc here.
回答2:
After some research I ended up with "Distributed locks using Redis" solution. There is node module for that: node-redis-warlock.
Hope this answer will be useful for someone else.
UPDATE. Minimal sample code:
var Warlock = require('node-redis-warlock'),
redis = require('redis');
// Establish a redis client
redis = redis.createClient();
// and pass it to warlock
var warlock = new Warlock(redis);
function executeOnce (key, callback) {
warlock.lock(key, 20000, function(err, unlock){
if (err) {
// Something went wrong and we weren't able to set a lock
return;
}
if (typeof unlock === 'function') {
setTimeout(function() {
callback(unlock);
}, 1000);
}
});
}
// Executes call back only once
executeOnce('every-three-hours-lock', function(unlock) {
// Do here any stuff that should be done only once...
unlock();
});
UPDATE 2. More detailed example:
const CronJob = require('cron').CronJob;
const Warlock = require('node-redis-warlock');
const redis = require('redis').createClient();
const warlock = new Warlock(redis);
const async = require('async');
function executeOnce (key, callback) {
warlock.lock(key, 20000, function(err, unlock) {
if (err) {
// Something went wrong and we weren't able to set a lock
return;
}
if (typeof unlock === 'function') {
setTimeout(function() {
callback(unlock);
}, 1000);
}
});
}
function everyMinuteJobTasks (unlock) {
async.parallel([
sendEmailNotifications,
updateSomething,
// etc...
],
(err) => {
if (err) {
logger.error(err);
}
unlock();
});
}
let everyMinuteJob = new CronJob({
cronTime: '*/1 * * * *',
onTick: function () {
executeOnce('every-minute-lock', everyMinuteJobTasks);
},
start: true,
runOnInit: true
});
/* Actual tasks */
let sendEmailNotifications = function(done) {
// Do stuff here
// Call done() when finished or call done(err) if error occurred
}
let updateSomething = function(done) {
// Do stuff here
// Call done() when finished or call done(err) if error occurred
}
// etc...
回答3:
I actually do not like the redis approach that is also used in the cron-cluster npm plugin, because I do not want to have that redis server running on my maschine and maintain it, too.
I would like to discuss this approach with you:
Pro: we do not need to use redis Con: cron jobs are always running on the same worker
I use the message passing only for this, if you use it for other things, you want to pass the information that
if (cluster.isMaster) {
// Count the machine's CPUs
var cpuCount = require('os').cpus().length;;
// Create a worker for each CPU
for (var i = 0; i < cpuCount; i += 1) {
cluster.fork();
}
cluster.on('fork', (worker) => {
console.log("cluster forking new worker", worker.id);
});
// have a mainWorker that does the cron jobs.
var mainWorkerId = null;
cluster.on('listening', (worker, address) => {
console.log("cluster listening new worker", worker.id);
if(null === mainWorkerId) {
console.log("Making worker " + worker.id + " to main worker");
mainWorkerId = worker.id;
worker.send({order: "startCron"});
}
});
// Listen for dying workers if the mainWorker dies, make a new mainWorker
cluster.on('exit', function (worker, code, signal) {
console.log('Worker %d died :(', worker.id);
if(worker.id === mainWorkerId) {
console.log("Main Worker is dead...");
mainWorkerId = null;
}
console.trace("I am here");
console.log(worker);
console.log(code);
console.log(signal);
cluster.fork();
});
// Code to run if we're in a worker process
} else {
// other code like setup app and stuff
var doCron = function() {
// setup cron jobs...
}
// Receive messages from the master process.
process.on('message', function(msg) {
console.log('Worker ' + process.pid + ' received message from master.', message);
if(message.order == "startCron") {
doCron();
}
});
}
来源:https://stackoverflow.com/questions/30843605/how-to-run-cron-job-in-node-js-application-that-uses-cluster-module