赞
踩
YOLO-v3网络由跨距为2的卷积层、跳跃连接层和上采样层组成,没有池化层。网络接收一幅416 * 416的图像作为输入,并提供三个YOLO输出。
目录
新建一个py文件导入以下代码,命名为myutils.py作为配置文件,辅助构建模型。
- import torch
- from torch import nn
-
-
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
- def parse_model_config(path2file):
- cfg_file = open(path2file, 'r')
- lines = cfg_file.read().split('\n')
-
- lines = [x for x in lines if x and not x.startswith('#')]
- lines = [x.rstrip().lstrip() for x in lines]
-
- blocks_list = []
- for line in lines:
- if line.startswith('['):
- blocks_list.append({})
- blocks_list[-1]['type'] = line[1:-1].rstrip()
- else:
- key, value = line.split("=")
- value = value.strip()
- blocks_list[-1][key.rstrip()] = value.strip()
-
- return blocks_list
-
-
- def create_layers(blocks_list):
- hyperparams = blocks_list[0]
- channels_list = [int(hyperparams["channels"])]
- module_list = nn.ModuleList()
-
- for layer_ind, layer_dict in enumerate(blocks_list[1:]):
- modules = nn.Sequential()
-
- if layer_dict["type"] == "convolutional":
- filters = int(layer_dict["filters"])
- kernel_size = int(layer_dict["size"])
- pad = (kernel_size - 1) // 2
- bn=layer_dict.get("batch_normalize",0)
-
-
- conv2d= nn.Conv2d(
- in_channels=channels_list[-1],
- out_channels=filters,
- kernel_size=kernel_size,
- stride=int(layer_dict["stride"]),
- padding=pad,
- bias=not bn)
- modules.add_module("conv_{0}".format(layer_ind), conv2d)
-
- if bn:
- bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)
- modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)
-
-
- if layer_dict["activation"] == "leaky":
- activn = nn.LeakyReLU(0.1)
- modules.add_module("leaky_{0}".format(layer_ind), activn)
-
- elif layer_dict["type"] == "upsample":
- stride = int(layer_dict["stride"])
- upsample = nn.Upsample(scale_factor = stride)
- modules.add_module("upsample_{}".format(layer_ind), upsample)
-
-
- elif layer_dict["type"] == "shortcut":
- backwards=int(layer_dict["from"])
- filters = channels_list[1:][backwards]
- modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())
-
- elif layer_dict["type"] == "route":
- layers = [int(x) for x in layer_dict["layers"].split(",")]
- filters = sum([channels_list[1:][l] for l in layers])
- modules.add_module("route_{}".format(layer_ind), EmptyLayer())
-
- elif layer_dict["type"] == "yolo":
- anchors = [int(a) for a in layer_dict["anchors"].split(",")]
- anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
-
- mask = [int(m) for m in layer_dict["mask"].split(",")]
-
- anchors = [anchors[i] for i in mask]
-
- num_classes = int(layer_dict["classes"])
- img_size = int(hyperparams["height"])
-
- yolo_layer = YOLOLayer(anchors, num_classes, img_size)
- modules.add_module("yolo_{}".format(layer_ind), yolo_layer)
-
- module_list.append(modules)
- channels_list.append(filters)
-
- return hyperparams, module_list
-
-
-
- class EmptyLayer(nn.Module):
- def __init__(self):
- super(EmptyLayer, self).__init__()
-
-
- class YOLOLayer(nn.Module):
-
- def __init__(self, anchors, num_classes, img_dim=416):
- super(YOLOLayer, self).__init__()
- self.anchors = anchors
- self.num_anchors = len(anchors)
- self.num_classes = num_classes
- self.img_dim = img_dim
- self.grid_size = 0
-
-
- def forward(self, x_in):
- batch_size = x_in.size(0)
- grid_size = x_in.size(2)
- devide=x_in.device
-
- prediction=x_in.view(batch_size, self.num_anchors,
- self.num_classes + 5, grid_size, grid_size)
- prediction=prediction.permute(0, 1, 3, 4, 2)
- prediction=prediction.contiguous()
-
- obj_score = torch.sigmoid(prediction[..., 4])
- pred_cls = torch.sigmoid(prediction[..., 5:])
-
- if grid_size != self.grid_size:
- self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)
-
- pred_boxes=self.transform_outputs(prediction)
-
- output = torch.cat(
- (
- pred_boxes.view(batch_size, -1, 4),
- obj_score.view(batch_size, -1, 1),
- pred_cls.view(batch_size, -1, self.num_classes),
- ), -1,)
- return output
-
-
-
- def compute_grid_offsets(self, grid_size, cuda=True):
- self.grid_size = grid_size
- self.stride = self.img_dim / self.grid_size
-
- self.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)
- self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)
-
- scaled_anchors=[(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]
- self.scaled_anchors=torch.tensor(scaled_anchors,device=device)
-
- self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
- self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
-
-
-
- def transform_outputs(self,prediction):
- device=prediction.device
- x = torch.sigmoid(prediction[..., 0]) # Center x
- y = torch.sigmoid(prediction[..., 1]) # Center y
- w = prediction[..., 2] # Width
- h = prediction[..., 3] # Height
-
- pred_boxes = torch.zeros_like(prediction[..., :4]).to(device)
- pred_boxes[..., 0] = x.data + self.grid_x
- pred_boxes[..., 1] = y.data + self.grid_y
- pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
- pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
-
- return pred_boxes * self.stride

解析配置文件,使用parse_model_config助手读取并打印
- from myutils import parse_model_config
-
- path2config="./config/yolov3.cfg"
- blocks_list = parse_model_config(path2config)
- blocks_list[:2]
基于解析的配置文件创建PyTorch模块,调用 create_layers 辅助函数进行转换并获取 PyTorch 模块的列表
- from myutils import create_layers
-
- hy_pa, m_l= create_layers(blocks_list)
- print(m_l)
- print(hy_pa)
- from torch import nn
-
- class Darknet(nn.Module):
- def __init__(self, config_path, img_size=416):
- super(Darknet, self).__init__()
- self.blocks_list = parse_model_config(config_path)
- self.hyperparams, self.module_list = create_layers(self.blocks_list)
- self.img_size = img_size
-
- def forward(self, x):
- img_dim = x.shape[2]
- layer_outputs, yolo_outputs = [], []
-
- for block, module in zip(self.blocks_list[1:], self.module_list):
- if block["type"] in ["convolutional", "upsample", "maxpool"]:
- x = module(x)
-
-
- elif block["type"] == "shortcut":
- layer_ind = int(block["from"])
- x = layer_outputs[-1] + layer_outputs[layer_ind]
- elif block["type"] == "yolo":
- x= module[0](x)
- yolo_outputs.append(x)
- elif block["type"] == "route":
- x = torch.cat([layer_outputs[int(l_i)]
- for l_i in block["layers"].split(",")], 1)
- layer_outputs.append(x)
- yolo_out_cat = torch.cat(yolo_outputs, 1)
- return yolo_out_cat, yolo_outputs
-
- model = Darknet(path2config).to(device)
- print(model)

- # 创建一个随机的dummy_img,大小为1x3x416x416,并将其移动到指定的设备上
- dummy_img=torch.rand(1,3,416,416).to(device)
- # 在不计算梯度的情况下,执行模型的前向传播
- with torch.no_grad():
- # 获取模型的前向传播结果
- dummy_out_cat, dummy_out=model.forward(dummy_img)
- # 打印dummy_out_cat的形状
- print(dummy_out_cat.shape)
- # 打印dummy_out中每个元素的形状
- print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)
YOLO通常使用组合损失函数
- def get_loss_batch(output,targets, params_loss, opt=None):
- # 获取损失函数的参数
- ignore_thres=params_loss["ignore_thres"]
- scaled_anchors= params_loss["scaled_anchors"]
- mse_loss= params_loss["mse_loss"]
- bce_loss= params_loss["bce_loss"]
-
- # 获取yolo的参数
- num_yolos=params_loss["num_yolos"]
- num_anchors= params_loss["num_anchors"]
- obj_scale= params_loss["obj_scale"]
- noobj_scale= params_loss["noobj_scale"]
-
- # 初始化损失
- loss=0.0
- for yolo_ind in range(num_yolos):
- # 获取yolo的输出
- yolo_out=output[yolo_ind]
- batch_size, num_bbxs, _=yolo_out.shape
-
- # 获取网格大小
- gz_2=num_bbxs/num_anchors
- grid_size=int(np.sqrt(gz_2))
-
- # 将yolo的输出reshape为(batch_size,num_anchors,grid_size,grid_size,-1)
- yolo_out=yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)
-
- # 获取预测的边界框
- pred_boxes=yolo_out[:,:,:,:,:4]
- x,y,w,h= transform_bbox(pred_boxes, scaled_anchors[yolo_ind])
- # 获取预测的置信度
- pred_conf=yolo_out[:,:,:,:,4]
- # 获取预测的类别概率
- pred_cls_prob=yolo_out[:,:,:,:,5:]
-
- # 获取yolo的目标
- yolo_targets = get_yolo_targets({
- "pred_cls_prob": pred_cls_prob,
- "pred_boxes":pred_boxes,
- "targets": targets,
- "anchors": scaled_anchors[yolo_ind],
- "ignore_thres": ignore_thres,
- })
-
- # 获取目标掩码
- obj_mask=yolo_targets["obj_mask"]
- noobj_mask=yolo_targets["noobj_mask"]
- # 获取目标的x,y,w,h
- tx=yolo_targets["tx"]
- ty=yolo_targets["ty"]
- tw=yolo_targets["tw"]
- th=yolo_targets["th"]
- # 获取目标的类别
- tcls=yolo_targets["tcls"]
- # 获取目标的置信度
- t_conf=yolo_targets["t_conf"]
-
- # 计算x,y,w,h的损失
- loss_x = mse_loss(x[obj_mask], tx[obj_mask])
- loss_y = mse_loss(y[obj_mask], ty[obj_mask])
- loss_w = mse_loss(w[obj_mask], tw[obj_mask])
- loss_h = mse_loss(h[obj_mask], th[obj_mask])
-
- # 计算置信度的损失
- loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
- loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])
- loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj
- # 计算类别的损失
- loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])
- # 累加损失
- loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
-
- # 如果有优化器,则进行反向传播和优化
- if opt is not None:
- opt.zero_grad()
- loss.backward()
- opt.step()
-
- # 返回损失
- return loss.item()

- def transform_bbox(bbox, anchors):
- # 将bbox的x、y、w、h分别赋值给x、y、w、h
- x=bbox[:,:,:,:,0]
- y=bbox[:,:,:,:,1]
- w=bbox[:,:,:,:,2]
- h=bbox[:,:,:,:,3]
- # 将anchors的w、h分别赋值给anchor_w、anchor_h
- anchor_w = anchors[:, 0].view((1, 3, 1, 1))
- anchor_h = anchors[:, 1].view((1, 3, 1, 1))
-
- # 将x、y分别减去其向下取整的值
- x=x-x.floor()
- y=y-y.floor()
- # 将w、h分别除以anchor_w、anchor_h,并取对数
- w= torch.log(w / anchor_w + 1e-16)
- h= torch.log(h / anchor_h + 1e-16)
- return x, y, w, h
-
- def get_yolo_targets(params):
- # 获取预测框、预测类别概率、目标、锚点、忽略阈值
- pred_boxes=params["pred_boxes"]
- pred_cls_prob=params["pred_cls_prob"]
- target=params["targets"]
- anchors=params["anchors"]
- ignore_thres=params["ignore_thres"]
-
- # 获取批量大小、锚点数量、网格大小、类别数量
- batch_size = pred_boxes.size(0)
- num_anchors = pred_boxes.size(1)
- grid_size = pred_boxes.size(2)
- num_cls = pred_cls_prob.size(-1)
-
-
- # 定义目标张量的形状
- sizeT=batch_size, num_anchors, grid_size, grid_size
- # 定义目标张量,用于存储目标框的掩码
- obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)
- # 定义目标张量,用于存储非目标框的掩码
- noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)
- # 定义目标张量,用于存储目标框的x坐标
- tx = torch.zeros(sizeT, device=device, dtype=torch.float32)
- # 定义目标张量,用于存储目标框的y坐标
- ty= torch.zeros(sizeT, device=device, dtype=torch.float32)
- # 定义目标张量,用于存储目标框的宽度
- tw= torch.zeros(sizeT, device=device, dtype=torch.float32)
- # 定义目标张量,用于存储目标框的高度
- th= torch.zeros(sizeT, device=device, dtype=torch.float32)
-
- # 定义目标张量的形状
- sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls
- # 定义目标张量,用于存储目标类别
- tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)
-
- # 将目标框的坐标乘以网格大小
- target_bboxes = target[:, 2:] * grid_size
- # 获取目标框的xy坐标
- t_xy = target_bboxes[:, :2]
- # 获取目标框的wh坐标
- t_wh = target_bboxes[:, 2:]
- # 获取目标框的x坐标
- t_x, t_y = t_xy.t()
- # 获取目标框的宽度
- t_w, t_h = t_wh.t()
-
- # 获取目标框的网格坐标
- grid_i, grid_j = t_xy.long().t()
-
- # 计算每个锚点与目标框的iou
- iou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]
- # 将iou转换为张量
- iou_with_anchors = torch.stack(iou_with_anchors)
- # 获取iou最大的锚点索引
- best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)
-
- # 获取目标框的batch索引和类别标签
- batch_inds, target_labels = target[:, :2].long().t()
- # 将目标框的掩码设置为1
- obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1
- # 将非目标框的掩码设置为0
- noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0
-
- # 将大于忽略阈值的iou对应的非目标框掩码设置为0
- for ind, iou_wa in enumerate(iou_with_anchors.t()):
- noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0
-
-
- # 将目标框的x坐标减去网格的整数部分
- tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()
- # 将目标框的y坐标减去网格的整数部分
- ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()
-
-
- # 获取最佳锚点的宽度
- anchor_w=anchors[best_anchor_ind][:, 0]
- # 将目标框的宽度除以锚点的宽度,并取对数
- tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)
-
- # 获取最佳锚点的高度
- anchor_h=anchors[best_anchor_ind][:, 1]
- # 将目标框的高度除以锚点的高度,并取对数
- th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)
-
- # 将目标类别设置为1
- tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1
-
- # 返回目标张量
- output={
- "obj_mask" : obj_mask,
- "noobj_mask" : noobj_mask,
- "tx": tx,
- "ty": ty,
- "tw": tw,
- "th": th,
- "tcls": tcls,
- "t_conf": obj_mask.float(),
- }
- return output
-
- def get_iou_WH(wh1, wh2):
- # 将wh2转置
- wh2 = wh2.t()
- # 获取wh1的宽度和高度
- w1, h1 = wh1[0], wh1[1]
- # 获取wh2的宽度和高度
- w2, h2 = wh2[0], wh2[1]
- # 计算交集面积
- inter_area = torch.min(w1, w2) * torch.min(h1, h2)
- # 计算并集面积
- union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
- # 返回交集面积与并集面积的比值
- return inter_area / union_area

在训练数据上训练模型,并在验证数据上对其进行评估,训练过程遵循标准的随机梯度下降(SGD)。
- def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):
- running_loss=0.0
- len_data=len(dataset_dl.dataset)
- running_metrics= {}
-
- for xb, yb,_ in dataset_dl:
- yb=yb.to(device)
- _,output=model(xb.to(device))
- loss_b=get_loss_batch(output,yb, params_loss,opt)
- running_loss+=loss_b
- if sanity_check is True:
- break
- loss=running_loss/float(len_data)
- return loss
-
- import copy
- def train_val(model, params):
- num_epochs=params["num_epochs"]
- params_loss=params["params_loss"]
- opt=params["optimizer"]
- train_dl=params["train_dl"]
- val_dl=params["val_dl"]
- sanity_check=params["sanity_check"]
- lr_scheduler=params["lr_scheduler"]
- path2weights=params["path2weights"]
-
-
- loss_history={
- "train": [],
- "val": [],
- }
- best_model_wts = copy.deepcopy(model.state_dict())
- best_loss=float('inf')
-
- for epoch in range(num_epochs):
- current_lr=get_lr(opt)
- print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))
- model.train()
- train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)
- loss_history["train"].append(train_loss)
- print("train loss: %.6f" %(train_loss))
-
- model.eval()
- with torch.no_grad():
- val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)
- loss_history["val"].append(val_loss)
- print("val loss: %.6f" %(val_loss))
-
-
- if val_loss < best_loss:
- best_loss = val_loss
- best_model_wts = copy.deepcopy(model.state_dict())
- torch.save(model.state_dict(), path2weights)
- print("Copied best model weights!")
-
- lr_scheduler.step(val_loss)
- if current_lr != get_lr(opt):
- print("Loading best model weights!")
- model.load_state_dict(best_model_wts)
- print("-"*10)
- model.load_state_dict(best_model_wts)
- return model, loss_history
-
- def get_lr(opt):
- for param_group in opt.param_groups:
- return param_group['lr']

- from torch import optim
- from torch.optim.lr_scheduler import ReduceLROnPlateau
-
- opt = optim.Adam(model.parameters(), lr=1e-3)
- lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)
-
- path2models= "./models/"
- if not os.path.exists(path2models):
- os.mkdir(path2models)
-
- scaled_anchors=[model.module_list[82][0].scaled_anchors,
- model.module_list[94][0].scaled_anchors,
- model.module_list[106][0].scaled_anchors]
-
- mse_loss = nn.MSELoss(reduction="sum")
- bce_loss = nn.BCELoss(reduction="sum")
- params_loss={
- "scaled_anchors" : scaled_anchors,
- "ignore_thres": 0.5,
- "mse_loss": mse_loss,
- "bce_loss": bce_loss,
- "num_yolos": 3,
- "num_anchors": 3,
- "obj_scale": 1,
- "noobj_scale": 100,
- }
-
- params_train={
- "num_epochs": 5,
- "optimizer": opt,
- "params_loss": params_loss,
- "train_dl": train_dl,
- "val_dl": val_dl,
- "sanity_check": True,
- "lr_scheduler": lr_scheduler,
- "path2weights": path2models+"weights.pt",
- }
- model,loss_hist=train_val(model,params_train)

将训练后的权重加载到模型中
- path2weights="./models/weights.pt"
- model.load_state_dict(torch.load(path2weights))
- img,tg,_=coco_val[11]
- print(img.shape)
- print(tg.shape)
- show_img_bbox(img,tg)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。