Preprocessing image for Tesseract OCR with OpenCV

前端 未结 5 666
既然无缘
既然无缘 2020-12-04 06:07

I\'m trying to develop an App that uses Tesseract to recognize text from documents taken by a phone\'s cam. I\'m using OpenCV to preprocess the image for better recognition,

5条回答
  •  感动是毒
    2020-12-04 06:52

    1. Scanning at 300 dpi (dots per inch) is not officially a standard for OCR (optical character recognition), but it is considered the gold standard.

    2. Converting image to Greyscale improves accuracy in reading text in general.

    I have written a module that reads text in Image which in turn process the image for optimum result from OCR, Image Text Reader .

    import tempfile
    
    import cv2
    import numpy as np
    from PIL import Image
    
    IMAGE_SIZE = 1800
    BINARY_THREHOLD = 180
    
    def process_image_for_ocr(file_path):
        # TODO : Implement using opencv
        temp_filename = set_image_dpi(file_path)
        im_new = remove_noise_and_smooth(temp_filename)
        return im_new
    
    def set_image_dpi(file_path):
        im = Image.open(file_path)
        length_x, width_y = im.size
        factor = max(1, int(IMAGE_SIZE / length_x))
        size = factor * length_x, factor * width_y
        # size = (1800, 1800)
        im_resized = im.resize(size, Image.ANTIALIAS)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
        temp_filename = temp_file.name
        im_resized.save(temp_filename, dpi=(300, 300))
        return temp_filename
    
    def image_smoothening(img):
        ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
        ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        blur = cv2.GaussianBlur(th2, (1, 1), 0)
        ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th3
    
    def remove_noise_and_smooth(file_name):
        img = cv2.imread(file_name, 0)
        filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41,
                                         3)
        kernel = np.ones((1, 1), np.uint8)
        opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
        img = image_smoothening(img)
        or_image = cv2.bitwise_or(img, closing)
        return or_image
    

提交回复
热议问题