Using C API of tesseract 3.02 with ctypes and cv2 in python

*爱你&永不变心* 提交于 2019-12-03 16:07:06

The default restype is c_int, and the default argument conversion from an integer is also c_int. You'll find examples on the web that assume a 32-bit platform that has sizeof(int) == sizeof(void *). This was never a good assumption to make. To protect a 64-bit pointer from truncation when converted to and from a Python integer, set the function pointer's argtypes and restype. It's a good idea to do this anyway, since it allows ctypes to raise an ArgumentError when the the wrong types or number of arguments are used.

If you'd rather not define the prototypes for every function, then at least set TessBaseAPICreate.restype to an opaque pointer type.

The following ctypes definitions are based on the header api/capi.h. For convenience I've packaged the API into a Tesseract class.

import sys
import cv2
import ctypes
import ctypes.util

if sys.platform == 'win32':
    LIBNAME = 'libtesseract302'
else:
    LIBNAME = 'tesseract'

class TesseractError(Exception):
    pass

class Tesseract(object):
    _lib = None
    _api = None

    class TessBaseAPI(ctypes._Pointer):
        _type_ = type('_TessBaseAPI', (ctypes.Structure,), {})

    @classmethod
    def setup_lib(cls, lib_path=None):
        if cls._lib is not None:
            return
        if lib_path is None:
            lib_path = ctypes.util.find_library(LIBNAME)
            if lib_path is None:
                 raise TesseractError('tesseract library not found')
        cls._lib = lib = ctypes.CDLL(lib_path)

        # source:
        # https://github.com/tesseract-ocr/tesseract/
        #         blob/3.02.02/api/capi.h

        lib.TessBaseAPICreate.restype = cls.TessBaseAPI

        lib.TessBaseAPIDelete.restype = None # void
        lib.TessBaseAPIDelete.argtypes = (
            cls.TessBaseAPI,) # handle

        lib.TessBaseAPIInit3.argtypes = (
            cls.TessBaseAPI, # handle
            ctypes.c_char_p, # datapath
            ctypes.c_char_p) # language

        lib.TessBaseAPISetImage.restype = None
        lib.TessBaseAPISetImage.argtypes = (
            cls.TessBaseAPI, # handle
            ctypes.c_void_p, # imagedata
            ctypes.c_int,    # width
            ctypes.c_int,    # height
            ctypes.c_int,    # bytes_per_pixel
            ctypes.c_int)    # bytes_per_line

        lib.TessBaseAPIGetUTF8Text.restype = ctypes.c_char_p
        lib.TessBaseAPIGetUTF8Text.argtypes = (
            cls.TessBaseAPI,) # handle

    def __init__(self, language='eng', datapath=None, lib_path=None):
        if self._lib is None:
            self.setup_lib(lib_path)
        self._api = self._lib.TessBaseAPICreate()
        if self._lib.TessBaseAPIInit3(self._api, datapath, language):
            raise TesseractError('initialization failed')

    def __del__(self):
        if not self._lib or not self._api:
            return
        if not getattr(self, 'closed', False):
            self._lib.TessBaseAPIDelete(self._api)
            self.closed = True

    def _check_setup(self):
        if not self._lib:
            raise TesseractError('lib not configured')
        if not self._api:
            raise TesseractError('api not created')

    def set_image(self, imagedata, width, height,
                  bytes_per_pixel, bytes_per_line=None):
        self._check_setup()
        if bytes_per_line is None:
            bytes_per_line = width * bytes_per_pixel
        self._lib.TessBaseAPISetImage(self._api,
                                      imagedata, width, height,
                                      bytes_per_pixel, bytes_per_line)

    def get_utf8_text(self):
        self._check_setup()
        return self._lib.TessBaseAPIGetUTF8Text(self._api)

    def get_text(self):
        self._check_setup()
        result = self._lib.TessBaseAPIGetUTF8Text(self._api)
        if result:
            return result.decode('utf-8')

Example usage:

if __name__ == '__main__':
    imcv = cv2.imread('ocrtest.png')
    height, width, depth = imcv.shape

    tess = Tesseract()
    tess.set_image(imcv.ctypes, width, height, depth)
    text = tess.get_text()

    print text.strip()

I tested this on Linux with libtesseract.so.3. Note that cv2.imread returns a NumPy array. This has a ctypes attribute that includes the _as_parameter_ hook, set as a c_void_p pointer to the the array. Note also that the code shown in the question has the width and height transposed. It should have been h, w, d = imcv.shape.

ocrtest.png:

Output:

I am trying to use Tesseract 3.02 with ctypes and cv2 in python. Tesseract
provides a DLL exposed set of C style APIs, one of them is as following:
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!