I currently have a program that reads file (very huge) in single threaded mode and creates search index but it takes too long to index in single threaded environment.
<
If you can use Java 8, you may be able to do this quickly and easily using the Streams API. Read the file into a MappedByteBuffer, which can open a file up to 2GB very quicky, then read the lines out of the buffer (you need to make sure your JVM has enough extra memory to hold the file):
package com.objective.stream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Stream;
public class StreamsFileProcessor {
private MappedByteBuffer buffer;
public static void main(String[] args){
if (args[0] != null){
Path myFile = Paths.get(args[0]);
StreamsFileProcessor proc = new StreamsFileProcessor();
try {
proc.process(myFile);
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void process(Path file) throws IOException {
readFileIntoBuffer(file);
getBufferStream().parallel()
.forEach(this::doIndex);
}
private Stream getBufferStream() throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(buffer.array())))){
return reader.lines();
}
}
private void readFileIntoBuffer(Path file) throws IOException{
try(FileInputStream fis = new FileInputStream(file.toFile())){
FileChannel channel = fis.getChannel();
buffer = channel.map(FileChannel.MapMode.PRIVATE, 0, channel.size());
}
}
private void doIndex(String s){
// Do whatever I need to do to index the line here
}
}