本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者吴疆--------------
------点击此处链接至博客园原文------
""" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). 根据RPN目标回归值修正anchors并做后处理输出proposals和全0batch_ind组成的blob """
1.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [8, 16, 32]) 算法逻辑
调用generate_anchors(...)(generate_anchors.py中)产生9个base anchors--->
im_info = im_info[0] # 取出第一张图像更新im_info,存储该图像的宽、高和缩放因子--->
仅取出rpn_cls_prob_reshape层输出anchors属于fg的score--->
计算shifts偏移量,即在conv5_3 feature map各个位置相对于(0,0)位置(在scaled图像上)的距离,如[0,16,0,16],为什么不用2列表示,要用4列表示偏移?--->
在conv5_3 feature map各个位置利用shifts和9个base anchors产生所有anchors,计算anchors需对base anchors和shifts进行reshape,此处要用到Python的broadcast机制--->
调用bbox_transform_inv(...)(bbox_transform.py中)对所有anchors+预测得到的回归值得到proposals--->
调用clip_boxes(...)函数(bbox_transform.py中)将越界proposals限制在图像边界(原文说训练阶段,剔除越界的box;测试阶段,限制在图像边界,实际上代码表明均是限制在图像边界)--->
调用_filter_boxes(...)函数剔除尺寸小于min_size的proposals--->
按score从大到小对proposal进行排序,取前pre_nms_topN个proposals(训练12000,测试6000)--->
调用nms(...)(nms_wrapper.py中)进行nms处理,并取post_nms_topN个proposals(训练2000,测试300)--->
将proposal组成blob并返回,300*5,5=1batch_ind(全0)+(x1,y1,x2,y2)
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [8, 16, 32]):
"""
Parameters
----------
rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
NOTICE: the old version is ordered by (1, H, W, 2, A) !!!
rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
im_info: a list of [image_height, image_width, scale_ratios]
cfg_key: 'TRAIN' or 'TEST' !!!
_feat_stride: the downsampling ratio of feature map to the original input image
anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])!!!
----------
Returns
----------
rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
# 算法逻辑
# Algorithm:
#
# for each (H, W) location i
# generate A anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the A anchors
# clip predicted boxes to image
# remove predicted boxes with either height or width < threshold # 训练阶段:剔除越界的box 测试阶段:限制在图像边界
# sort all (proposal, score) pairs by score from highest to lowest
# take top pre_nms_topN proposals before NMS
# apply NMS with threshold 0.7 to remaining proposals
# take after_nms_topN proposals after NMS
# return the top proposals (-> RoIs top, scores top)
# layer_params = yaml.load(self.param_str_) # gt_data_layer/layer.py存在读取param_str_操作
"""
# anchor_scales = [8, 16, 32]
# 在conv5_3得到的feature map映射到原图的第一个位置产生9个base anchors
_anchors = generate_anchors(scales=np.array(anchor_scales))
_num_anchors = _anchors.shape[0]
# rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W)
# rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) # -> (1 , Ax4, H , W)
#rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
#rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
im_info = im_info[0] # 第一张图像??????
assert rpn_cls_prob_reshape.shape[0] == 1, \
'Only single item batches are supported'
# cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
# cfg_key = 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 12000/6000
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 2000/300
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # 均为0.7
min_size = cfg[cfg_key].RPN_MIN_SIZE # proposal在原始图片中的最小尺寸,均为16
height, width = rpn_cls_prob_reshape.shape[1:3] # conv5_3 feature map的 H 和 W
# the first set of _num_anchors channels are bg probs
# the second set are the fg probs, which we want
# (1 , H , W , Ax2)-----(1, H, W, A,2)-----(1, H, W, A)
# 得到所有anchors属于fg的score!!!
scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],
[1, height, width, _num_anchors])
# TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
# TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line
# scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:]
bbox_deltas = rpn_bbox_pred
#im_info = bottom[2].data[0, :]
# 默认DEBUG = False
if DEBUG:
print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
print 'scale: {}'.format(im_info[2])
# 1. Generate proposals from bbox deltas and shifted anchors
if DEBUG:
print 'score map size: {}'.format(scores.shape)
# 在原图像中16*16的像素块中找9个比例大小的anchor,要定位anchor在原图像的位置,只需定义左上角16*16区域所形成的9个anchor相对于其他16*16区域anchor的偏移量
# Enumerate all shifts
# 各位置在原图像中的相对(0,0)位置在两个方向的偏移量
shift_x = np.arange(0, width) * _feat_stride
shift_y = np.arange(0, height) * _feat_stride
# np.meshgrid()函数将参数1当做第1个结果的每一行, 并且一共有参数2的长度个行
# 同时, 第2个结果的每一列为参数2的内容, 并且重复参数1的长度个列
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# ravel()将多维数组转换为1维数组
# 得到conv5_3 feature map各个位置相对于(0,0)的偏移量,比如左上第一个位置偏移量为[0, 0, 0, 0]、第二个位置为[16, 0, 16, 0]
# 为什么不用两列表示,要用四列表示偏移???
# shifts.shape = (width*height,4)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Enumerate all shifted anchors:
#
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = _num_anchors # 各个位置上的锚点个数9
K = shifts.shape[0] # feature map(width*height)个位置
# _anchors中记录的是9个base anchors左上、右下坐标值
# Python中的broadcast机制
anchors = _anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
# 在conv5_3 feature map各个位置上产生9个anchors(scaled图像上的坐标值)
anchors = anchors.reshape((K * A, 4))
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
# bbox deltas will be (1, 4 * A, H, W) format
# transpose to (1, H, W, 4 * A)
# reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
# in slowest to fastest order
# 即rpn_bbox_pred
bbox_deltas = bbox_deltas.reshape((-1, 4))
# Same story for the scores:
# scores are (1, A, H, W) format
# transpose to (1, H, W, A)
# reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
scores = scores.reshape((-1, 1))
# 1.Convert anchors into proposals via bbox transformations
# 锚点坐标信息+预测坐标回归值得到proposal在scaled图像中的坐标信息
proposals = bbox_transform_inv(anchors, bbox_deltas)
# 2. clip predicted boxes to image 将proposal限制到图像边界
proposals = clip_boxes(proposals, im_info[:2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
# proposals尺寸应大于规定的最小size(返回对应索引),im_info[2]为该图像缩放因子
keep = _filter_boxes(proposals, min_size * im_info[2])
proposals = proposals[keep, :]
scores = scores[keep]
# remove irregular boxes, too fat too tall
# keep = _filter_irregular_boxes(proposals)
# proposals = proposals[keep, :]
# scores = scores[keep]
# 4. sort all (proposal, score) pairs by score from highest to lowest
# 5. take top pre_nms_topN (e.g. 6000)
# argsort()返回的是得分从小到大的索引,[::-1]是反序排列,因此order为从大到小的索引
# scores为各proposal属于fg的score
# 排序可能比较耗时!!!
order = scores.ravel().argsort()[::-1]
if pre_nms_topN > 0:
order = order[:pre_nms_topN] # 12000/6000 前pre_nms_topN个引索值
proposals = proposals[order, :]
scores = scores[order]
# 6. apply nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
# proposals, scores横向拼接构成dets,score仅占一列,表示属于fg的score
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: # 2000/300
keep = keep[:post_nms_topN] # 获取nms后的索引
proposals = proposals[keep, :]
scores = scores[keep] # 保存nms后的proposal和对应的score
# Output rois blob
# Our RPN implementation only supports a single input image, so all
# batch inds are 0
# 建立proposal的batch索引全0 proposals.shape[0]为proposal个数
batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
# 生成blob[全0引索,proposal]构成,(proposal.shape[0],5)!!!
blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
return blob
# top[0].reshape(*(blob.shape))
# top[0].data[...] = blob
# [Optional] output scores blob
# if len(top) > 1:
# top[1].reshape(*(scores.shape))
# top[1].data[...] = scores
# -*- coding:utf-8 -*- # Author: WUJiang # 测试功能 # np.meshgrid()函数和np.ravel() import numpy as np shift_x = np.arange(0, 4) shift_y = np.arange(1, 5) # np.meshgrid(参数1,参数2) # np.meshgrid()函数将参数1当做第1个结果的每一行, 并且一共有参数2的长度个行 # 同时, 第2个结果的每一列为参数2的内容, 并且重复参数1的长度个列 shift_x, shift_y = np.meshgrid(shift_x, shift_y) """ [[0 1 2 3] [0 1 2 3] [0 1 2 3] [0 1 2 3]] """ print(shift_x) # [0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3] # 将多维降为一维 print(shift_x.ravel()) """ [[1 1 1 1] [2 2 2 2] [3 3 3 3] [4 4 4 4]] """ print(shift_y) # [1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4] print(shift_y.ravel())
# -*- coding:utf-8 -*-
# Author: WUJiang
# 测试功能
# python中broadcast机制
import numpy as np
a = np.array([
[1, 2, 3, 4],
[2, 5, 7, 6],
])
b = np.array([
[5, 2, 3, 4],
[2, 7, 7, 6],
[9, 1, 2, 5]
])
# error:operands could not be broadcast together with shapes (2,4) (3,4)
# print(a+b)
# shape = (3, 1, 4)
print(b.reshape(1, 3, 4).transpose((1, 0, 2)).shape)
"""
[[[ 6 4 6 8]
[ 7 7 10 10]]
[[ 3 9 10 10]
[ 4 12 14 12]]
[[10 3 5 9]
[11 6 9 11]]]
"""
print(a.reshape(1, 2, 4) + b.reshape(1, 3, 4).transpose((1, 0, 2)))
2._filter_boxes(boxes,min_size)
过滤尺寸小于min_size的proposal,并返回相应索引,被proposal_layer(...)函数调用
# 过滤尺寸小于min_size的proposal
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1 # proposal的宽
hs = boxes[:, 3] - boxes[:, 1] + 1 # proposal的高
# 将尺寸大于最低要求的proposal对应索引存入keep返回
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
3._filter_irregular_boxes(boxes, min_ratio = 0.2, max_ratio = 5)
过滤纵横比不在规定区间的proposal,并返回相应索引,被proposal_layer(...)函数注释调用
# 过滤纵横比<0.2或>0.5的proposal
def _filter_irregular_boxes(boxes, min_ratio = 0.2, max_ratio = 5):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
rs = ws / hs
keep = np.where((rs <= max_ratio) & (rs >= min_ratio))[0]
return keep