nodejs synchronization read large file line by line?

后端 未结 5 1130
没有蜡笔的小新
没有蜡笔的小新 2021-01-02 07:54

I have a large file (utf8). I know fs.createReadStream can create stream to read a large file, but not synchronized. So i try to use fs.readSync, b

相关标签:
5条回答
  • 2021-01-02 08:11

    use https://github.com/nacholibre/node-readlines

    var lineByLine = require('n-readlines');
    var liner = new lineByLine('./textFile.txt');
    
    var line;
    var lineNumber = 0;
    while (line = liner.next()) {
        console.log('Line ' + lineNumber + ': ' + line.toString('ascii'));
        lineNumber++;
    }
    
    console.log('end of line reached');
    
    0 讨论(0)
  • 2021-01-02 08:19

    I built a simpler version JB Kohn's answer that uses split() on the buffer. It works on the larger files I tried.

    /*
     * Synchronously call fn(text, lineNum) on each line read from file descriptor fd.
     */
    function forEachLine (fd, fn) {
        var bufSize = 64 * 1024;
        var buf = new Buffer(bufSize);
        var leftOver = '';
        var lineNum = 0;
        var lines, n;
    
        while ((n = fs.readSync(fd, buf, 0, bufSize, null)) !== 0) {
            lines = buf.toString('utf8', 0 , n).split('\n');
            lines[0] = leftOver+lines[0];       // add leftover string from previous read
            while (lines.length > 1) {          // process all but the last line
                fn(lines.shift(), lineNum);
                lineNum++;
            }
            leftOver = lines.shift();           // save last line fragment (may be '')
        }
        if (leftOver) {                         // process any remaining line
            fn(leftOver, lineNum);
        }
    }
    
    0 讨论(0)
  • 2021-01-02 08:26

    two potential problems,

    1. 3bytes BOM at the beginning you did not skip
    2. first 4bytes cannot be well format to UTF8's chars( utf8 is not fixed length )
    0 讨论(0)
  • 2021-01-02 08:30

    Use readFileSync:

    fs.readFileSync(filename, [encoding]) Synchronous version of fs.readFile. Returns the contents of the filename.

    If encoding is specified then this function returns a string. Otherwise it returns a buffer.

    On a side note, since you are using node, I'd recommend using asynchronous functions.

    0 讨论(0)
  • 2021-01-02 08:34

    For large files, readFileSync can be inconvenient, as it loads the whole file in memory. A different synchronous approach is to iteratively call readSync, reading small bits of data at a time, and processing the lines as they come. The following bit of code implements this approach and synchronously processes one line at a time from the file 'test.txt':

    var fs = require('fs');
    var filename = 'test.txt'
    
    var fd = fs.openSync(filename, 'r');
    var bufferSize = 1024;
    var buffer = new Buffer(bufferSize);
    
    var leftOver = '';
    var read, line, idxStart, idx;
    while ((read = fs.readSync(fd, buffer, 0, bufferSize, null)) !== 0) {
      leftOver += buffer.toString('utf8', 0, read);
      idxStart = 0
      while ((idx = leftOver.indexOf("\n", idxStart)) !== -1) {
        line = leftOver.substring(idxStart, idx);
        console.log("one line read: " + line);
        idxStart = idx + 1;
      }
      leftOver = leftOver.substring(idxStart);
    }
    
    0 讨论(0)
提交回复
热议问题