Node reading file in specified chunk size

后端 未结 3 1797
野性不改
野性不改 2020-12-30 10:22

The goal: Upload large files to AWS Glacier without holding the whole file in memory.

I\'m currently uploading to glacier now using fs.readFileSync() and things are

3条回答
  •  青春惊慌失措
    2020-12-30 10:56

    Based on mscdex's answer here's a module using the sync alternative and with a StringDecoder to correctly parse UTF-8

    The problem with readableStream is that in order to use it, you've to convert the entire project to use async emitters & callbacks. If you're coding something simple, like a small CLI in nodejs, it doesn't make sense.

    //usage
    let file = new UTF8FileReader()
    file.open('./myfile.txt', 1024) 
    while ( file.isOpen ) {
        let stringData=file.readChunk()
        console.log(stringData)
    }
    
    
    //--------------------
    // UTF8FileReader.ts
    //--------------------
    import * as fs from 'fs';
    import { StringDecoder, NodeStringDecoder } from "string_decoder";
    
    export class UTF8FileReader {
    
        filename: string;
        isOpen: boolean = false;
        private chunkSize: number;
        private fd: number; //file handle from fs.OpenFileSync
        private readFilePos: number;
        private readBuffer: Buffer;
    
        private utf8decoder: NodeStringDecoder
    
        /**
         * open the file | throw
         * @param filename
         */
        open(filename, chunkSize: number = 16 * 1024) {
    
            this.chunkSize = chunkSize;
    
            try {
                this.fd = fs.openSync(filename, 'r');
            }
            catch (e) {
                throw new Error("opening " + filename + ", error:" + e.toString());
            }
    
            this.filename = filename;
            this.isOpen = true;
    
            this.readBuffer = Buffer.alloc(this.chunkSize);
            this.readFilePos = 0;
    
            //a StringDecoder is a buffered object that ensures complete UTF-8 multibyte decoding from a byte buffer
            this.utf8decoder = new StringDecoder('utf8')
    
        }
    
        /**
         * read another chunk from the file 
         * return the decoded UTF8 into a string
         * (or throw)
         * */
        readChunk(): string {
    
            let decodedString = '' //return '' by default
    
            if (!this.isOpen) {
                return decodedString;
            }
    
            let readByteCount: number;
            try {
                readByteCount = fs.readSync(this.fd, this.readBuffer, 0, this.chunkSize, this.readFilePos);
            }
            catch (e) {
                throw new Error("reading " + this.filename + ", error:" + e.toString());
            }
    
            if (readByteCount) {
                //some data read, advance readFilePos 
                this.readFilePos += readByteCount;
                //get only the read bytes (if we reached the end of the file)
                const onlyReadBytesBuf = this.readBuffer.slice(0, readByteCount);
                //correctly decode as utf8, and store in decodedString
                //yes, the api is called "write", but it decodes a string - it's a write-decode-and-return the string kind-of-thing :)
                decodedString = this.utf8decoder.write(onlyReadBytesBuf); 
            }
            else {
                //read returns 0 => all bytes read
                this.close();
            }
            return decodedString 
        }
    
        close() {
            if (!this.isOpen) {
                return;
            }
            fs.closeSync(this.fd);
            this.isOpen = false;
            this.utf8decoder.end();
        }
    
    }
    

    and here is the .js transpiled code if you don't have typescript yet:

    // UTF8FileReader.js
    "use strict";
    Object.defineProperty(exports, "__esModule", { value: true });
    exports.UTF8FileReader = void 0;
    //--------------------
    // UTF8FileReader
    //--------------------
    const fs = require("fs");
    const string_decoder_1 = require("string_decoder");
    class UTF8FileReader {
        constructor() {
            this.isOpen = false;
        }
        /**
         * open the file | throw
         * @param filename
         */
        open(filename, chunkSize = 16 * 1024) {
            this.chunkSize = chunkSize;
            try {
                this.fd = fs.openSync(filename, 'r');
            }
            catch (e) {
                throw new Error("opening " + filename + ", error:" + e.toString());
            }
            this.filename = filename;
            this.isOpen = true;
            this.readBuffer = Buffer.alloc(this.chunkSize);
            this.readFilePos = 0;
            //a StringDecoder is a buffered object that ensures complete UTF-8 multibyte decoding from a byte buffer
            this.utf8decoder = new string_decoder_1.StringDecoder('utf8');
        }
        /**
         * read another chunk from the file
         * return the decoded UTF8 into a string
         * (or throw)
         * */
        readChunk() {
            let decodedString = ''; //return '' by default
            if (!this.isOpen) {
                return decodedString;
            }
            let readByteCount;
            try {
                readByteCount = fs.readSync(this.fd, this.readBuffer, 0, this.chunkSize, this.readFilePos);
            }
            catch (e) {
                throw new Error("reading " + this.filename + ", error:" + e.toString());
            }
            if (readByteCount) {
                //some data read, advance readFilePos 
                this.readFilePos += readByteCount;
                //get only the read bytes (if we reached the end of the file)
                const onlyReadBytesBuf = this.readBuffer.slice(0, readByteCount);
                //correctly decode as utf8, and store in decodedString
                //yes, the api is called "write", but it decodes a string - it's a write-decode-and-return the string kind-of-thing :)
                decodedString = this.utf8decoder.write(onlyReadBytesBuf);
            }
            else {
                //read returns 0 => all bytes read
                this.close();
            }
            return decodedString;
        }
        close() {
            if (!this.isOpen) {
                return;
            }
            fs.closeSync(this.fd);
            this.isOpen = false;
            this.utf8decoder.end();
        }
    }
    exports.UTF8FileReader = UTF8FileReader;
    

提交回复
热议问题