问题
I have implemented a cropping algorithm on my solution that works pretty good. The problem is when the image is tilted, the crop will work but it will have background space showing as the images will show.
Cropping flow:
First step:
Second step:
Final result:
I have searched/tried multiple solutions but could not get a decent result or I'm not thinking the right way.
The expected result is this:
EDIT [FINAL RESULT]:
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def get_image_width_height(image):
image_width = image.shape[1] # current image's width
image_height = image.shape[0] # current image's height
return image_width, image_height
def calculate_scaled_dimension(scale, image):
image_width, image_height = get_image_width_height(image)
ratio_of_new_with_to_old = scale / image_width
dimension = (scale, int(image_height * ratio_of_new_with_to_old))
return dimension
def scale_image(image, size):
image_resized_scaled = cv2.resize(
image,
calculate_scaled_dimension(
size,
image
),
interpolation=cv2.INTER_AREA
)
return image_resized_scaled
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('images/damina_cc_back.jpg')
original_image = image.copy()
image = scale_image(image, 500)
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 1)
(h, w) = transformed.shape[:2]
if (h > w):
rotated = rotate_image(transformed, 90)
else:
rotated = transformed
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
回答1:
To align your image after cropping, we can we can use a perspective transformation. To begin, we separate the four corners of the rectangle into individual points given to us by cv2.approxPolyDP()
. We reorder the points into a clockwise orientation (top-left, top-right, bottom-right, bottom-left) using this function:
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
This function gives us the the bounding box coordinates of the ROI
Now with the isolated corner points, we can obtain the transformation matrix using cv2.getPerspectiveTransform()
and actually obtain the transformed image using cv2.warpPerspective()
.
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
Here's the result
We can rotate the image with this function
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
The final result after rotating:
Full code
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('1.PNG')
original_image = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(original_image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 3)
# Rotate image
rotated = rotate_image(transformed, -90)
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
回答2:
I assume you're looking for the minimum and maximum u and v positions where an edge was found (or maybe certain quantiles) to find the cropped rectangle. That is go over all image pixels that were marked an edge and update the u/v/ min/max values.
If if the computation time is not an issue for you, you can simply keep the algorithm as is and additionally loop over a number of rotations and update special values for each. Pseudocode:
for v
for u
if (u,v) is edge
for rotation_matrix
(ur, vr) = rotation_matrix * (u,v)
update boundary for given rotation matrix
In the end you can select the bounding box for the rotation matrix that is the smallest.
If the above algorithm is too slow for your use case, you can also try extracting the major axes using the opencv HoughLinesP function. This will of course not work for all types of images, but may be good enough for the case of id cards.
Finally, to apply the rotation correction refer to this tutorial.
来源:https://stackoverflow.com/questions/56459002/how-to-detect-and-align-tilted-images-after-cropping