本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者吴疆--------------
------点击此处链接至博客园原文------
与roi_data_layer/minibatch.py类似,该函数可能并未执行
"""Compute minibatch blobs for training a Fast R-CNN network."""
1.get_minibatch(roidb, num_classes)
更新roidb[i]'info_boxes'字段(未知内容,18是什么意思)、增加'data'(图像数据blob)和'parameters'字段(相关参数,含num_scale 图像缩放尺度数量、num_aspect 使用纵横比数量、cfg.TRAIN.SCALES、cfg.TRAIN.SCALE_MAPPING、cfg.TRAIN.ASPECT_HEIGHTS、cfg.TRAIN.ASPECT_WIDTHS,后面3个值均无应该会报错,也有可能该函数并未执行),未见调用
# 更新roidb[i]'info_boxes'字段、增加'data'和'parameters'字段
def get_minibatch(roidb, num_classes):
"""Given a roidb, construct a minibatch sampled from it."""
num_images = len(roidb)
# 默认TRAIN.BATCH_SIZE = 128
assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
'num_images ({}) must divide BATCH_SIZE ({})'. \
format(num_images, cfg.TRAIN.BATCH_SIZE)
# Get the input image blob, formatted for caffe
im_blob = _get_image_blob(roidb)
# build the box information blob
# 这里的18写死,指的是什么???
info_boxes_blob = np.zeros((0, 18), dtype=np.float32)
# 默认TRAIN.SCALES = (600,)
num_scale = len(cfg.TRAIN.SCALES)
for i in xrange(num_images):
info_boxes = roidb[i]['info_boxes']
# change the batch index
# 为何要这样处理???第3、8列各自代表什么???
info_boxes[:,2] += i * num_scale
info_boxes[:,7] += i * num_scale
info_boxes_blob = np.vstack((info_boxes_blob, info_boxes))
# build the parameter blob
# 默认TRAIN.ASPECTS= (1,)仅一个??? (Aspect ratio to use during training)
num_aspect = len(cfg.TRAIN.ASPECTS)
num = 2 + 2 * num_scale + 2 * num_aspect # 6?
# parameters_blob存储以下参数
# num_scale 图像缩放尺度数量 len(cfg.TRAIN.SCALES) = 1
# num_aspect 使用纵横比数量 len(cfg.TRAIN.ASPECTS) = 1
# cfg.TRAIN.SCALES (600,)
# cfg.TRAIN.SCALE_MAPPING 无该值 按理会触发error???或许该函数未被调用
# cfg.TRAIN.ASPECT_HEIGHTS 无该值 按理会触发error???
# cfg.TRAIN.ASPECT_WIDTHS 无该值 按理会触发error???
parameters_blob = np.zeros((num), dtype=np.float32)
parameters_blob[0] = num_scale
parameters_blob[1] = num_aspect
parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES
parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING
parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS
parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS
# For debug visualizations
# _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
blobs = {'data': im_blob,
'info_boxes': info_boxes_blob,
'parameters': parameters_blob}
return blobs
2._get_image_blob(roidb)
对传入的roidb中图像减均值、缩放处理,得到处理后的图像存储到processes_ims列表中,将其作为参数传入im_list_to_blob(...)函数中返回图像数据blob,被get_minibatch(...)函数调用,构成blobs中的‘data’字段
与roi_data_layer/minibatch.py(仅使用了target_size单一尺度进行缩放)此函数的区别在于缩放使用了多尺度TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0),为何要使用多尺度?未见调用
def _get_image_blob(roidb):
"""Builds an input blob from the images in the roidb at the different scales."""
num_images = len(roidb)
# 存储缩放后的图像构成的列表,作为参数传入im_list_to_blob(...)函数得到图像数据blob
processed_ims = []
for i in xrange(num_images):
# read image
im = cv2.imread(roidb[i]['image'])
if roidb[i]['flipped']:
im = im[:, ::-1, :]
im_orig = im.astype(np.float32, copy=True)
im_orig -= cfg.PIXEL_MEANS
# build image pyramid
# 与roi_data_layer/minibatch.py中_get_image_blob(...)区别在此!!!
# 默认TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0)
# 为何这里使用了多尺度???(Scales to compute real features)
for im_scale in cfg.TRAIN.SCALES_BASE:
im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR)
processed_ims.append(im)
# Create a blob to hold the input images,blob.py中
blob = im_list_to_blob(processed_ims)
return blob
3._project_image_blob(im_rois, im_scale_factor)
对rois进行缩放,未见调用
def _project_im_rois(im_rois, im_scale_factor):
"""Project image RoIs into the rescaled training image."""
rois = im_rois * im_scale_factor
return rois
4._get_bbox_regression_labels(bbox_target_data, num_classes)
扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值(网络接受的shape)、构造N*(4*num_classes)的bbox_loss_weights,返回 bbox_targets和bbox_loss_weights,未见调用
# 扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值
# 构造N*(4*num_classes)的bbox_loss_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""
Bounding-box regression targets are stored in a compact紧密的,紧凑的 form in the roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
bbox_target_data (ndarray): N x 4K blob of regression targets
bbox_loss_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0] # 剔除bg
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
# 扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值
bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
# shape为N*(4*num_classes),仅某类对应值为1 1 1 1,其余全0
bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
return bbox_targets, bbox_loss_weights
5._vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
绘制roi矩形框,打印相关信息,未见调用
# 绘制roi矩形框,打印相关信息
def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
"""Visualize a mini-batch for debugging."""
import matplotlib.pyplot as plt
for i in xrange(rois_blob.shape[0]):
# 1(roi来源索引)+4(roi坐标)
rois = rois_blob[i, :]
# 该roi来源图像索引
im_ind = rois[0]
roi = rois[2:]
im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
im += cfg.PIXEL_MEANS
im = im[:, :, (2, 1, 0)]
im = im.astype(np.uint8)
cls = labels_blob[i]
subcls = sublabels_blob[i]
plt.imshow(im)
print 'class: ', cls, ' subclass: ', subcls
plt.gca().add_patch(
plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
roi[3] - roi[1], fill=False,
edgecolor='r', linewidth=3)
)
plt.show()