Getting binary content in node.js with http.request

后端 未结 7 1478
广开言路
广开言路 2020-12-02 16:50

I would like to retrieve binary data from an https request.

I found a similar question that uses the request method, Getting binary content in Node

7条回答
  •  醉话见心
    2020-12-02 17:13

    As others here, I needed to process binary data chunks from Node.js HTTP response (aka http.IncomingMessage).

    None of the existing answers really worked for my Electron 6 project (bundled with Node.js 12.4.0, at the time of posting), besides Pärt Johanson's answer and its variants.

    Still, even with that solution, the chunks were always arriving at the response.on('data', ondata) handler as string objects (rather than expected and desired Buffer objects). That incurred extra conversion with Buffer.from(chunk, 'binary'). I was getting strings regardless of whether I explicitly specified binary encoding with response.setEncoding('binary') or response.setEncoding(null).

    The only way I managed to get the original Buffer chunks was to pipe the response to an instance of stream.Writable where I provide a custom write method:

    const https = require('https');
    const { Writable } = require('stream');
    
    async function getBinaryDataAsync(url) {
      // start HTTP request, get binary response
      const { request, response } = await new Promise((resolve, reject) => {
        const request = https.request(url, { 
          method: 'GET', 
            headers: { 
              'Accept': 'application/pdf', 
              'Accept-Encoding': 'identity'
            }        
          }
        );
    
        request.on('response', response => 
          resolve({request, response}));
        request.on('error', reject);
        request.end();
      });
    
      // read the binary response by piping it to stream.Writable
      const buffers = await new Promise((resolve, reject) => {
    
        response.on('aborted', reject);
        response.on('error', reject);
    
        const chunks = [];
    
        const stream = new Writable({
          write: (chunk, encoding, notifyComplete) => {
            try {
              chunks.push(chunk);
              notifyComplete();      
            }
            catch(error) {
              notifyComplete(error);      
            }
          }
        });
    
        stream.on('error', reject);
        stream.on('finish', () => resolve(chunks));
        response.pipe(stream);
      });
    
      const buffer = Buffer.concat(buffers);
      return buffer.buffer; // as ArrayBuffer
    }
    
    async function main() {
      const arrayBuff = await getBinaryDataAsync('https://download.microsoft.com/download/8/A/4/8A48E46A-C355-4E5C-8417-E6ACD8A207D4/VisualStudioCode-TipsAndTricks-Vol.1.pdf');
      console.log(arrayBuff.byteLength);
    };
    
    main().catch(error => console.error(error));
    

    Updated, as it turns, this behavior only manifests for our Web API server. So, response.on('data') actually works well for the sample URL I use in the above code snippet and the stream is not needed for it. It's weird though this is sever-specific, I'm investigating it further.

提交回复
热议问题