Reading a file in real-time using Node.js

自古美人都是妖i 提交于 2019-11-28 06:33:49
hasanyasin

If you want to keep the file as a persistent store of your data to prevent a loss of stream in case of a system crash or one of the members in your network of running processes dies, you can still continue on writing to a file and reading from it.

If you do not need this file as a persistent storage of produced results from your Java process, then going with a Unix socket is much better for both the ease and also the performance.

fs.watchFile() is not what you need because it works on file stats as filesystem reports it and since you want to read the file as it is already being written, this is not what you want.

SHORT UPDATE: I am very sorry to realize that although I had accused fs.watchFile() for using file stats in previous paragraph, I had done the very same thing myself in my example code below! Although I had already warned readers to "take care!" because I had written it in just a few minutes without even testing well; still, it can be done better by using fs.watch() instead of watchFile or fstatSync if underlying system supports it.

For reading/writing from a file, I have just written below for fun in my break:

test-fs-writer.js: [You will not need this since you write file in your Java process]

var fs = require('fs'),
    lineno=0;

var stream = fs.createWriteStream('test-read-write.txt', {flags:'a'});

stream.on('open', function() {
    console.log('Stream opened, will start writing in 2 secs');
    setInterval(function() { stream.write((++lineno)+' oi!\n'); }, 2000);
});

test-fs-reader.js: [Take care, this is just demonstration, check err objects!]

var fs = require('fs'),
    bite_size = 256,
    readbytes = 0,
    file;

fs.open('test-read-write.txt', 'r', function(err, fd) { file = fd; readsome(); });

function readsome() {
    var stats = fs.fstatSync(file); // yes sometimes async does not make sense!
    if(stats.size<readbytes+1) {
        console.log('Hehe I am much faster than your writer..! I will sleep for a while, I deserve it!');
        setTimeout(readsome, 3000);
    }
    else {
        fs.read(file, new Buffer(bite_size), 0, bite_size, readbytes, processsome);
    }
}

function processsome(err, bytecount, buff) {
    console.log('Read', bytecount, 'and will process it now.');

    // Here we will process our incoming data:
        // Do whatever you need. Just be careful about not using beyond the bytecount in buff.
        console.log(buff.toString('utf-8', 0, bytecount));

    // So we continue reading from where we left:
    readbytes+=bytecount;
    process.nextTick(readsome);
}

You can safely avoid using nextTick and call readsome() directly instead. Since we are still working sync here, it is not necessary in any sense. I just like it. :p

EDIT by Oliver Lloyd

Taking the example above but extending it to read CSV data gives:

var lastLineFeed,
    lineArray;
function processsome(err, bytecount, buff) {
    lastLineFeed = buff.toString('utf-8', 0, bytecount).lastIndexOf('\n');

    if(lastLineFeed > -1){

        // Split the buffer by line
        lineArray = buff.toString('utf-8', 0, bytecount).slice(0,lastLineFeed).split('\n');

        // Then split each line by comma
        for(i=0;i<lineArray.length;i++){
            // Add read rows to an array for use elsewhere
            valueArray.push(lineArray[i].split(','));
        }   

        // Set a new position to read from
        readbytes+=lastLineFeed+1;
    } else {
        // No complete lines were read
        readbytes+=bytecount;
    }
    process.nextTick(readFile);
}

Why do you think tail -f is a hack?

While figuring out I found a good example I would do something similar. Real time online activity monitor example with node.js and WebSocket:
http://blog.new-bamboo.co.uk/2009/12/7/real-time-online-activity-monitor-example-with-node-js-and-websocket

Just to make this answer complete, I wrote you an example code which would run under 0.8.0 - (the http server is a hack maybe).

A child process is spawned running with tail, and since a child process is an EventEmitter with three streams (we use stdout in our case) you can just add the a listener with on

filename: tailServer.js

usage: node tailServer /var/log/filename.log

var http = require("http");
var filename = process.argv[2];


if (!filename)
    return console.log("Usage: node tailServer filename");

var spawn = require('child_process').spawn;
var tail = spawn('tail', ['-f', filename]);

http.createServer(function (request, response) {
    console.log('request starting...');

    response.writeHead(200, {'Content-Type': 'text/plain' });

    tail.stdout.on('data', function (data) {
      response.write('' + data);                
    });
}).listen(8088);

console.log('Server running at http://127.0.0.1:8088/');

this module is an implementation of the principle @hasanyasin suggests:

https://github.com/felixge/node-growing-file

I took the answer from @hasanyasin and wrapped it up into a modular promise. The basic idea is that you pass a file and a handler function that does something with the stringified-buffer that is read from the file. If the handler function returns true, then the file will stop being read. You can also set a timeout that will kill reading if the handler doesn't return true fast enough.

The promiser will return true if the resolve() was called due to timeout, otherwise it will return false.

See the bottom for usage example.

// https://stackoverflow.com/a/11233045

var fs = require('fs');
var Promise = require('promise');

class liveReaderPromiseMe {
    constructor(file, buffStringHandler, opts) {
        /*
            var opts = {
                starting_position: 0,
                byte_size: 256,
                check_for_bytes_every_ms: 3000,
                no_handler_resolution_timeout_ms: null
            };
        */

        if (file == null) {
            throw new Error("file arg must be present");
        } else {
            this.file = file;
        }

        if (buffStringHandler == null) {
            throw new Error("buffStringHandler arg must be present");
        } else {
            this.buffStringHandler = buffStringHandler;
        }

        if (opts == null) {
            opts = {};
        }

        if (opts.starting_position == null) {
            this.current_position = 0;
        } else {
            this.current_position = opts.starting_position;
        }

        if (opts.byte_size == null) {
            this.byte_size = 256;
        } else {
            this.byte_size = opts.byte_size;
        }

        if (opts.check_for_bytes_every_ms == null) {
            this.check_for_bytes_every_ms = 3000;
        } else {
            this.check_for_bytes_every_ms = opts.check_for_bytes_every_ms;
        }

        if (opts.no_handler_resolution_timeout_ms == null) {
            this.no_handler_resolution_timeout_ms = null;
        } else {
            this.no_handler_resolution_timeout_ms = opts.no_handler_resolution_timeout_ms;
        }
    }


    startHandlerTimeout() {
        if (this.no_handler_resolution_timeout_ms && (this._handlerTimer == null)) {
            var that = this;
            this._handlerTimer = setTimeout(
                function() {
                    that._is_handler_timed_out = true;
                },
                this.no_handler_resolution_timeout_ms
            );
        }
    }

    clearHandlerTimeout() {
        if (this._handlerTimer != null) {
            clearTimeout(this._handlerTimer);
            this._handlerTimer = null;
        }
        this._is_handler_timed_out = false;
    }

    isHandlerTimedOut() {
        return !!this._is_handler_timed_out;
    }


    fsReadCallback(err, bytecount, buff) {
        try {
            if (err) {
                throw err;
            } else {
                this.current_position += bytecount;
                var buff_str = buff.toString('utf-8', 0, bytecount);

                var that = this;

                Promise.resolve().then(function() {
                    return that.buffStringHandler(buff_str);
                }).then(function(is_handler_resolved) {
                    if (is_handler_resolved) {
                        that.resolve(false);
                    } else {
                        process.nextTick(that.doReading.bind(that));
                    }
                }).catch(function(err) {
                    that.reject(err);
                });
            }
        } catch(err) {
            this.reject(err);
        }
    }

    fsRead(bytecount) {
        fs.read(
            this.file,
            new Buffer(bytecount),
            0,
            bytecount,
            this.current_position,
            this.fsReadCallback.bind(this)
        );
    }

    doReading() {
        if (this.isHandlerTimedOut()) {
            return this.resolve(true);
        } 

        var max_next_bytes = fs.fstatSync(this.file).size - this.current_position;
        if (max_next_bytes) {
            this.fsRead( (this.byte_size > max_next_bytes) ? max_next_bytes : this.byte_size );
        } else {
            setTimeout(this.doReading.bind(this), this.check_for_bytes_every_ms);
        }
    }


    promiser() {
        var that = this;
        return new Promise(function(resolve, reject) {
            that.resolve = resolve;
            that.reject = reject;
            that.doReading();
            that.startHandlerTimeout();
        }).then(function(was_resolved_by_timeout) {
            that.clearHandlerTimeout();
            return was_resolved_by_timeout;
        });
    }
}


module.exports = function(file, buffStringHandler, opts) {
    try {
        var live_reader = new liveReaderPromiseMe(file, buffStringHandler, opts);
        return live_reader.promiser();
    } catch(err) {
        return Promise.reject(err);
    }
};

Then use the above code like this:

var fs = require('fs');
var path = require('path');
var Promise = require('promise');
var liveReadAppendingFilePromiser = require('./path/to/liveReadAppendingFilePromiser');

var ending_str = '_THIS_IS_THE_END_';
var test_path = path.join('E:/tmp/test.txt');

var s_list = [];
var buffStringHandler = function(s) {
    s_list.push(s);
    var tmp = s_list.join('');
    if (-1 !== tmp.indexOf(ending_str)) {
        // if this return never occurs, then the file will be read until no_handler_resolution_timeout_ms
        // by default, no_handler_resolution_timeout_ms is null, so read will continue forever until this function returns something that evaluates to true
        return true;
        // you can also return a promise:
        //  return Promise.resolve().then(function() { return true; } );
    }
};

var appender = fs.openSync(test_path, 'a');
try {
    var reader = fs.openSync(test_path, 'r');
    try {
        var options = {
            starting_position: 0,
            byte_size: 256,
            check_for_bytes_every_ms: 3000,
            no_handler_resolution_timeout_ms: 10000,
        };

        liveReadAppendingFilePromiser(reader, buffStringHandler, options)
        .then(function(did_reader_time_out) {
            console.log('reader timed out: ', did_reader_time_out);
            console.log(s_list.join(''));
        }).catch(function(err) {
            console.error('bad stuff: ', err);
        }).then(function() {
            fs.closeSync(appender);
            fs.closeSync(reader);
        });

        fs.write(appender, '\ncheck it out, I am a string');
        fs.write(appender, '\nwho killed kenny');
        //fs.write(appender, ending_str);
    } catch(err) {
        fs.closeSync(reader);
        console.log('err1');
        throw err;
    }
} catch(err) {
    fs.closeSync(appender);
        console.log('err2');
    throw err;
}
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!