赞
踩
算法流程
class SSDAugmentation(object): def __init__(self, size=300, mean=(104, 117, 123)): self.mean = mean self.size = size # 在进行数据增强时,需要将 image, boxes, labels 同步进行变换,因此需要单独实现数据增强的类 # 具体实现可参考代码 https://github.com/amdegroot/ssd.pytorch self.augment = Compose([ ConvertFromInts(), # 将图像像素值从整型变成浮点型 ToAbsoluteCoords(), # 将标签中的边框从比例坐标变换为真实坐标 PhotometricDistort(), # 光学变换 Expand(self.mean), # 随机扩展图像大小,图像仅靠右下方 RandomSampleCrop(), # 随机裁剪图像 RandomMirror(), # 随机左右镜像 ToPercentCoords(), # 从真实坐标变回比例坐标 Resize(self.size), # 缩放到固定的 300×300 大小 SubtractMeans(self.mean) # 去均值 ]) def __call__(self, img, boxes, labels): # 输入参数为 image, boxes, labels return self.augment(img, boxes, labels)
def vgg(cfg, i, batch_norm=False): layers = [] in_channels = i for v in cfg: if v == 'M': # add MaxPool layer layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # (floor(W/2), floor(H/2)) elif v == 'C': # add MaxPool layer layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] # (ceil(W/2), ceil(H/2)) else: # add (Conv[, BN], ReLU) layers conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) # (W, H) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v # 池化层 pool5 在增加感受野的同时,维持特征图的尺寸不变 pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) # (W, H) # Conv6 + Conv7 conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) # (W, H) conv7 = nn.Conv2d(1024, 1024, kernel_size=1) # (W, H) layers += [pool5, conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] return layers
# input size: (300, 300, 3)
base = [64, 64, # Conv1 -> output size: (300, 300, 64)
'M', 128, 128, # Conv2 -> output size: (150, 150, 128)
'M', 256, 256, 256, # Conv3 -> output size: (75, 75, 256)
'C', 512, 512, 512, # Conv4 -> output size: (38, 38, 512)
'M', 512, 512, 512] # Conv5 -> output size: (19, 19, 512)
# Conv6 -> output size: (19, 19, 1024)
# Conv7 -> output size: (19, 19, 1024)
vgg_base = vgg(base, 3) # 构造基础网络
def add_extras(cfg, i, batch_norm=False): # Extra layers added to VGG for feature scaling layers = [] in_channels = i flag = False for k, v in enumerate(cfg): # v == 'S' 代表后一个卷积层步长为 2 if in_channels != 'S': if v == 'S': layers += [nn.Conv2d(in_channels, cfg[k + 1], kernel_size=(1, 3)[flag], stride=2, padding=1)] else: layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] flag = not flag in_channels = v return layers
# input size: (19, 19, 1024)
extras = [256, 'S', 512, # Conv8 -> output size: (10, 10, 512)
128, 'S', 256, # Conv9 -> output size: (5, 5, 256)
128, 256, # Conv10 -> output size: (3, 3, 256)
128, 256] # Conv11 -> output size: (1, 1, 256)
conv_extras = add_extras(extras, 1024)
分类与位置卷积层
一共生成了 8732 个预选框
def multibox(vgg, extra_layers, cfg, num_classes):
loc_layers = []
conf_layers = []
vgg_source = [21, -2] # vgg 中 Conv4 和 Conv7 的序号
for k, v in enumerate(vgg_source):
loc_layers += [nn.Conv2d(vgg[v].out_channels, # 位置卷积层
cfg[k] * 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(vgg[v].out_channels, # 分类卷积层
cfg[k] * num_classes, kernel_size=3, padding=1)]
for k, v in enumerate(extra_layers[1::2], 2):
loc_layers += [nn.Conv2d(v.out_channels, cfg[k] # 位置卷积层
* 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(v.out_channels, cfg[k] # 分类卷积层
* num_classes, kernel_size=3, padding=1)]
return vgg, extra_layers, (loc_layers, conf_layers)
mbox = [4, 6, 6, 6, 4, 4] # number of boxes per feature map location
# vgg_base, conv_extras 分别为 VGG 和深度卷积层的各层组成的列表
base_, extras_, head_ = multibox(vgg_base, conv_extras, mbox, num_classes)
确定每一个特征图 PriorBox 的具体大小
深浅层特征融合是一种常用的解决浅层语义信息不足的方法,通常有 3 种计算方法:
DSSD 的深浅层特征融合
DSSD 的预测网络
Motivation
彩虹式特征融合
共享分类网络分支的卷积权重
RefineDet 的网络结构
ARM (Anchor Refinement Module)
RefineDet 通过 ARM 模块对 Anchor 位置进行了粗略修正,这也是其名字中 “Refine” 的由来
TCB (Transfer Connection Block)
ODM (Object Detection Module)
在具体的代码实现时,为了计算方便,在 ARM 处并没有直接抑制掉得分很低的 Anchor,而是在随后的 ODM 中将两部分的得分综合考虑,完成 Anchor 的分类。这种操作也保证了全网络只有一次预选框筛选,从这个角度来看,RefineDet 是一阶的
Motivation
RFB 模块
结论
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。