I want to make a HTTP-request using node.js to load some text from a webserver. Since the response can contain much text (some Megabytes) I want to process each text chunk s
var fs = require("fs");
function readFileLineByLine(filename, processline) {
var stream = fs.createReadStream(filename);
var s = "";
stream.on("data", function(data) {
s += data.toString('utf8');
var lines = s.split("\n");
for (var i = 0; i < lines.length - 1; i++)
processline(lines[i]);
s = lines[lines.length - 1];
});
stream.on("end",function() {
var lines = s.split("\n");
for (var i = 0; i < lines.length; i++)
processline(lines[i]);
});
}
var linenumber = 0;
readFileLineByLine(filename, function(line) {
console.log(++linenumber + " -- " + line);
});
If you have a single Buffer
you can use its toString method that will convert all or part of the binary contents to a string using a specific encoding. It defaults to utf8
if you don't provide a parameter, but I've explicitly set the encoding in this example.
var req = http.request(reqOptions, function(res) {
...
res.on('data', function(chunk) {
var textChunk = chunk.toString('utf8');
// process utf8 text chunk
});
});
If you have streamed buffers like in the question above where the first byte of a multi-byte UTF8
-character may be contained in the first Buffer
(chunk) and the second byte in the second Buffer
then you should use a StringDecoder. :
var StringDecoder = require('string_decoder').StringDecoder;
var req = http.request(reqOptions, function(res) {
...
var decoder = new StringDecoder('utf8');
res.on('data', function(chunk) {
var textChunk = decoder.write(chunk);
// process utf8 text chunk
});
});
This way bytes of incomplete characters are buffered by the StringDecoder
until all required bytes were written to the decoder.