How to get the bounding box of text that are overlapped with background lines?

问题

For example, in the following app screenshot, I want to get the bounding box tightly rounded over CA-85S (the text on the horizontal blue line), and Almaden Expy (text that overlapped with the blue line). I am extracting those bounding boxes for OCR.

I've tried several approaches in openCV that none of those approaches work for me.

回答1:

Using the observation that the desired text to extract is in black and has a contrast different from the blue river background lines, a potential approach is to use color thresholding with cv2.inRange. Here's the main idea and implementation using Python:

Obtain color thresholded mask. Load the image, convert to HSV format, define lower and upper color ranges, then color threshold to obtain a mask.
Merge text into a single contour. We create a rectangular structuring element using cv2.getStructuringElement then use morphological operations to merge individual text letters into a single contour.
Filter for text contours. We find contours with cv2.findContours, iterate through contours, then filter using cv2.contourArea and aspect ratio. If a contour passes this filter, we find the rotated bounding box.
Isolate text. We can perform this optional step to extract only the text using cv2.bitwise_and.

Here's a visualization of the process:

Color thresholded mask

Morph close to connect text into a single contour

Result

Extracted individual text

Code

import cv2
import numpy as np

# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
original = image.copy()
blank = np.zeros(image.shape[:2], dtype=np.uint8)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([179, 255, 165])
mask = cv2.inRange(hsv, lower, upper)

# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)

# Find contours
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    # Filter using contour area and aspect ratio
    x,y,w,h = cv2.boundingRect(c)
    area = cv2.contourArea(c)
    ar = w / float(h)
    if (ar > 1.4 and ar < 4) or ar < .85 and area > 100:
        # Find rotated bounding box
        rect = cv2.minAreaRect(c)
        box = cv2.boxPoints(rect)
        box = np.int0(box)
        cv2.drawContours(image,[box],0,(36,255,12),2)
        cv2.drawContours(blank,[box],0,(255,255,255),-1)

# Bitwise operations to isolate text
extract = cv2.bitwise_and(mask, blank)
extract = cv2.bitwise_and(original, original, mask=extract)
extract[extract==0] = 255

cv2.imshow('mask', mask)
cv2.imshow('image', image)
cv2.imshow('close', close)
cv2.imshow('extract', extract)
cv2.waitKey()

Note: The HSV lower and upper color threshold ranges were determined using this script

import cv2
import numpy as np

def nothing(x):
    pass

# Load image
image = cv2.imread('1.png')

# Create a window
cv2.namedWindow('image')

# Create trackbars for color change
# Hue is from 0-179 for Opencv
cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
cv2.createTrackbar('VMax', 'image', 0, 255, nothing)

# Set default value for Max HSV trackbars
cv2.setTrackbarPos('HMax', 'image', 179)
cv2.setTrackbarPos('SMax', 'image', 255)
cv2.setTrackbarPos('VMax', 'image', 255)

# Initialize HSV min/max values
hMin = sMin = vMin = hMax = sMax = vMax = 0
phMin = psMin = pvMin = phMax = psMax = pvMax = 0

while(1):
    # Get current positions of all trackbars
    hMin = cv2.getTrackbarPos('HMin', 'image')
    sMin = cv2.getTrackbarPos('SMin', 'image')
    vMin = cv2.getTrackbarPos('VMin', 'image')
    hMax = cv2.getTrackbarPos('HMax', 'image')
    sMax = cv2.getTrackbarPos('SMax', 'image')
    vMax = cv2.getTrackbarPos('VMax', 'image')

    # Set minimum and maximum HSV values to display
    lower = np.array([hMin, sMin, vMin])
    upper = np.array([hMax, sMax, vMax])

    # Convert to HSV format and color threshold
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, lower, upper)
    result = cv2.bitwise_and(image, image, mask=mask)

    # Print if there is a change in HSV value
    if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
        print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
        phMin = hMin
        psMin = sMin
        pvMin = vMin
        phMax = hMax
        psMax = sMax
        pvMax = vMax

    # Display result image
    cv2.imshow('image', result)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()

来源：https://stackoverflow.com/questions/60347865/how-to-get-the-bounding-box-of-text-that-are-overlapped-with-background-lines

标签

python

OpenCV

image-processing

computer-vision

ocr