I am developing android app that uses tesseract OCR to scan a text from image,
I heard that binarizing image before performing OCR on it will give better result,
So
Wouldn't be to hard to port this from java to android:
/**
* Image binarization - Otsu algorithm
*
* Author: Bostjan Cigan (http://zerocool.is-a-geek.net)
*
*/
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
public class OtsuBinarize {
private static BufferedImage original, grayscale, binarized;
public static void main(String[] args) throws IOException {
File original_f = new File(args[0]+".jpg");
String output_f = args[0]+"_bin";
original = ImageIO.read(original_f);
grayscale = toGray(original);
binarized = binarize(grayscale);
writeImage(output_f);
}
private static void writeImage(String output) throws IOException {
File file = new File(output+".jpg");
ImageIO.write(binarized, "jpg", file);
}
// Return histogram of grayscale image
public static int[] imageHistogram(BufferedImage input) {
int[] histogram = new int[256];
for(int i=0; i varMax) {
varMax = varBetween;
threshold = i;
}
}
return threshold;
}
private static BufferedImage binarize(BufferedImage original) {
int red;
int newPixel;
int threshold = otsuTreshold(original);
BufferedImage binarized = new BufferedImage(original.getWidth(), original.getHeight(), original.getType());
for(int i=0; i threshold) {
newPixel = 255;
}
else {
newPixel = 0;
}
newPixel = colorToRGB(alpha, newPixel, newPixel, newPixel);
binarized.setRGB(i, j, newPixel);
}
}
return binarized;
}
// Convert R, G, B, Alpha to standard 8 bit
private static int colorToRGB(int alpha, int red, int green, int blue) {
int newPixel = 0;
newPixel += alpha;
newPixel = newPixel << 8;
newPixel += red; newPixel = newPixel << 8;
newPixel += green; newPixel = newPixel << 8;
newPixel += blue;
return newPixel;
}
}