本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者吴疆--------------
------点击此处链接至博客园原文------
定义函数与roi_data_layer/roidb.py类似
1.prepare_roidb(imdb)
给imdb.roidb增加'info_boxes'字段信息,shape为N*18,18表示(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target),未见调用
get_boxes_grid(image_height, image_width)(由C编译)未知函数意义
为何imdb.roidb中既有gt roi又有一般的roi???ex_rois和gt_rois???
默认无cfg.TRAIN.SCALE_MAPPING值,将报错,该函数并未被执行
# 为imdb.roidb增加'info_boxes'字段信息
# 包含(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)共18列
def prepare_roidb(imdb):
"""
Enrich the imdb's roidb by adding some derived quantities that
are useful for training. This function precomputes the maximum
overlap, taken over ground-truth boxes, between each ROI and
each ground-truth box. The class with maximum overlap is also
recorded.
"""
cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
imdb._roidb = cPickle.load(fid)
print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
return
roidb = imdb.roidb
# 遍历数据集每张图像
for i in xrange(len(imdb.image_index)):
roidb[i]['image'] = imdb.image_path_at(i)
boxes = roidb[i]['boxes']
labels = roidb[i]['gt_classes']
info_boxes = np.zeros((0, 18), dtype=np.float32)
if boxes.shape[0] == 0:
roidb[i]['info_boxes'] = info_boxes
continue
# compute grid boxes
s = PIL.Image.open(imdb.image_path_at(i)).size
image_height = s[1]
image_width = s[0]
# 未知函数意义???
boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
# for each scale
# 默认TRAIN.SCALES = (600,)
for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
boxes_rescaled = boxes * scale
# compute overlap
overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
# rois与某类gt最大的overlap值
max_overlaps = overlaps.max(axis = 1)
argmax_overlaps = overlaps.argmax(axis = 1)
# 对应的该类
max_classes = labels[argmax_overlaps]
# select positive boxes
fg_inds = []
for k in xrange(1, imdb.num_classes):
fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])
# 为何imdb.roidb中既有gt roi又有一般的roi???ex_rois和gt_rois???
if len(fg_inds) > 0:
gt_inds = argmax_overlaps[fg_inds]
# bounding box regression targets
gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
# scale mapping for RoI pooling???
# 无TRAIN.SCALE_MAPPING该值,将报错
scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
scale_map = cfg.TRAIN.SCALES[scale_ind_map]
# contruct the list of positive boxes
# 18对应存储的内容
# (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
info_box[:, 0] = cx[fg_inds]
info_box[:, 1] = cy[fg_inds]
info_box[:, 2] = scale_ind
info_box[:, 3:7] = boxes_grid[fg_inds,:]
info_box[:, 7] = scale_ind_map
info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
info_box[:, 12] = labels[gt_inds]
info_box[:, 14:] = gt_targets
info_boxes = np.vstack((info_boxes, info_box))
roidb[i]['info_boxes'] = info_boxes
2.add_bbox_regression_targets(roidb)
规范化目标回归值,并返回目标回归值的均值和标准差供测试阶段使用,未见调用
# 规范化目标回归值,并返回目标回归值的均值和标准差供测试阶段使用
def add_bbox_regression_targets(roidb):
"""Add information needed to train bounding-box regressors."""
assert len(roidb) > 0
assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?'
num_images = len(roidb)
# Infer number of classes from the number of columns in gt_overlaps
num_classes = roidb[0]['gt_overlaps'].shape[1]
# Compute values needed for means and stds
# var(x) = E(x^2) - E(x)^2
class_counts = np.zeros((num_classes, 1)) + cfg.EPS
sums = np.zeros((num_classes, 4))
squared_sums = np.zeros((num_classes, 4))
# (cx, cy, scale_ind, 4box, scale_ind_map, 4box_map, gt_label, gt_sublabel, 4target)共18列
for im_i in xrange(num_images):
targets = roidb[im_i]['info_boxes']
for cls in xrange(1, num_classes):
cls_inds = np.where(targets[:, 12] == cls)[0]
if cls_inds.size > 0:
class_counts[cls] += cls_inds.size
sums[cls, :] += targets[cls_inds, 14:].sum(axis=0)
squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0)
# 计算回归目标值的均值和标准差
means = sums / class_counts
stds = np.sqrt(squared_sums / class_counts - means ** 2)
# Normalize targets
# 规范化目标回归值
for im_i in xrange(num_images):
targets = roidb[im_i]['info_boxes']
for cls in xrange(1, num_classes):
cls_inds = np.where(targets[:, 12] == cls)[0]
roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :]
if stds[cls, 0] != 0:
roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :]
# These values will be needed for making predictions
# (the predicts will need to be unnormalized and uncentered)
# ravel()将多维降为1维
return means.ravel(), stds.ravel()
3._compute_target(ex_rois, gt_rois)
根据ex_rois和gt_rois计算回归目标值,与bbox_transform.py中bbox_transform(ex_rois, gt_rois)函数类似,被prepare_roidb(...)函数调用
# 计算回归目标值,与bbox_transform.py中函数类似
def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image. The targets are scale invariance"""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)
targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32)
targets[:, 0] = targets_dx
targets[:, 1] = targets_dy
targets[:, 2] = targets_dw
targets[:, 3] = targets_dh
return targets