I am trying to find the bounding boxes of text in an image and am currently using this approach:
// calculate the local variances of the grayscale image
Mat
Python Implementation for @dhanushka's solution:
def process_rgb(rgb):
hasText = False
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
morphKernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, morphKernel)
# binarize
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# connect horizontally oriented regions
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, morphKernel)
# find contours
mask = np.zeros(bw.shape[:2], dtype="uint8")
_,contours, hierarchy = cv2.findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# filter contours
idx = 0
while idx >= 0:
x,y,w,h = cv2.boundingRect(contours[idx])
# fill the contour
cv2.drawContours(mask, contours, idx, (255, 255, 255), cv2.FILLED)
# ratio of non-zero pixels in the filled region
r = cv2.contourArea(contours[idx])/(w*h)
if(r > 0.45 and h > 5 and w > 5 and w > h):
cv2.rectangle(rgb, (x,y), (x+w,y+h), (0, 255, 0), 2)
hasText = True
idx = hierarchy[0][idx][0]
return hasText, rgb