Using highland.js to work with large files
highland.js is a really useful library for working with data in JavaScript as streams. It gives you nice, easily understandable chains of transforms to data.
Streams in Node.js are really useful for working with large files. If you don’t use streams to read large files in Node.js you will get errors.
Here are some example functions that demonstrate Highland working with big files.
For test data I downloaded a dump of stackoverflow.com comments. Unzipped this file is just over 14GB. I just copied it a couple of times to get multiple files.
Setup
const fs = require("fs");
const hl = require('highland');
const sourceFiles = ['./Comments1.xml', './Comments1.xml', './Comments1.xml'];
const destFile = './Out.xml';
Merge
function merge() {
const writableStream = fs.createWriteStream(destFile);
hl(sourceFiles)
.map(fs.createReadStream)
.flatMap(hl)
.pipe(writableStream);
}
First lines
function firstLines() {
hl(sourceFiles)
.map(fs.createReadStream)
.flatMap(nodeStream =>
hl(nodeStream)
.split()
.take(1)
)
.each(result => console.log(result));
}
First line of single file
function firstLine() {
_(fs.createReadStream('./Comments1.xml'))
.split()
.take(1)
.pull((err, line) => console.log(line));
}
Count lines
function countLines() {
hl(sourceFiles)
.map(fs.createReadStream)
.flatMap(nodeStream =>
hl(nodeStream)
.split()
.reduce(0, (total, line) => total + 1))
.reduce(0, (total, count) => total + count)
.toCallback((err, result) => console.log(result));
}