I would like to read a very very large file into a JavaScript array in node.js.
So if the file is like this:
first line
two
three
...
...
I would have the array:
['first line','two','three', ... , ... ]
The function would look like this:
var array = load(filename);
Therefore the idea of loading it all as a string and then split it is not acceptable.
If you can fit the final data into an array then wouldn't you also be able to fit it in a string and split it, as has been suggested? In any case if you would like to process the file one line at a time you can also try something like this:
var fs = require('fs');
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
while (index > -1) {
var line = remaining.substring(0, index);
remaining = remaining.substring(index + 1);
func(line);
index = remaining.indexOf('\n');
}
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
function func(data) {
console.log('Line: ' + data);
}
var input = fs.createReadStream('lines.txt');
readLines(input, func);
EDIT: (in response to comment by phopkins) I think (at least in newer versions) substring does not copy data but creates a special SlicedString object (from a quick glance at the v8 source code). In any case here is a modification that avoids the mentioned substring (tested on a file several megabytes worth of "All work and no play makes Jack a dull boy"):
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
func(line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}