I would like to retrieve binary data from an https request.
I found a similar question that uses the request method, Getting binary content in Node
As others here, I needed to process binary data chunks from Node.js HTTP response (aka http.IncomingMessage).
None of the existing answers really worked for my Electron 6 project (bundled with Node.js 12.4.0, at the time of posting), besides Pärt Johanson's answer and its variants.
Still, even with that solution, the chunks were always arriving at the response.on('data', ondata) handler as string objects (rather than expected and desired Buffer objects). That incurred extra conversion with Buffer.from(chunk, 'binary'). I was getting strings regardless of whether I explicitly specified binary encoding with response.setEncoding('binary') or response.setEncoding(null).
The only way I managed to get the original Buffer chunks was to pipe the response to an instance of stream.Writable where I provide a custom write method:
const https = require('https');
const { Writable } = require('stream');
async function getBinaryDataAsync(url) {
// start HTTP request, get binary response
const { request, response } = await new Promise((resolve, reject) => {
const request = https.request(url, {
method: 'GET',
headers: {
'Accept': 'application/pdf',
'Accept-Encoding': 'identity'
}
}
);
request.on('response', response =>
resolve({request, response}));
request.on('error', reject);
request.end();
});
// read the binary response by piping it to stream.Writable
const buffers = await new Promise((resolve, reject) => {
response.on('aborted', reject);
response.on('error', reject);
const chunks = [];
const stream = new Writable({
write: (chunk, encoding, notifyComplete) => {
try {
chunks.push(chunk);
notifyComplete();
}
catch(error) {
notifyComplete(error);
}
}
});
stream.on('error', reject);
stream.on('finish', () => resolve(chunks));
response.pipe(stream);
});
const buffer = Buffer.concat(buffers);
return buffer.buffer; // as ArrayBuffer
}
async function main() {
const arrayBuff = await getBinaryDataAsync('https://download.microsoft.com/download/8/A/4/8A48E46A-C355-4E5C-8417-E6ACD8A207D4/VisualStudioCode-TipsAndTricks-Vol.1.pdf');
console.log(arrayBuff.byteLength);
};
main().catch(error => console.error(error));
Updated, as it turns, this behavior only manifests for our Web API server. So, response.on('data') actually works well for the sample URL I use in the above code snippet and the stream is not needed for it. It's weird though this is sever-specific, I'm investigating it further.