当前位置:   article > 正文

【PyTorch】基于YOLO的多目标检测项目(二)

【PyTorch】基于YOLO的多目标检测项目(二)

【PyTorch】基于YOLO的多目标检测项目(一)

【PyTorch】基于YOLO的多目标检测项目(二)

YOLO-v3网络由跨距为2的卷积层、跳跃连接层和上采样层组成,没有池化层。网络接收一幅416 * 416的图像作为输入,并提供三个YOLO输出。

目录

准备配置文件

搭建YOLO模型 

搭建PyTorch模块

搭建DarkNet模型

定义损失函数

训练模型

部署模型


准备配置文件

新建一个py文件导入以下代码,命名为myutils.py作为配置文件,辅助构建模型。

  1. import torch
  2. from torch import nn
  3. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  4. def parse_model_config(path2file):
  5. cfg_file = open(path2file, 'r')
  6. lines = cfg_file.read().split('\n')
  7. lines = [x for x in lines if x and not x.startswith('#')]
  8. lines = [x.rstrip().lstrip() for x in lines]
  9. blocks_list = []
  10. for line in lines:
  11. if line.startswith('['):
  12. blocks_list.append({})
  13. blocks_list[-1]['type'] = line[1:-1].rstrip()
  14. else:
  15. key, value = line.split("=")
  16. value = value.strip()
  17. blocks_list[-1][key.rstrip()] = value.strip()
  18. return blocks_list
  19. def create_layers(blocks_list):
  20. hyperparams = blocks_list[0]
  21. channels_list = [int(hyperparams["channels"])]
  22. module_list = nn.ModuleList()
  23. for layer_ind, layer_dict in enumerate(blocks_list[1:]):
  24. modules = nn.Sequential()
  25. if layer_dict["type"] == "convolutional":
  26. filters = int(layer_dict["filters"])
  27. kernel_size = int(layer_dict["size"])
  28. pad = (kernel_size - 1) // 2
  29. bn=layer_dict.get("batch_normalize",0)
  30. conv2d= nn.Conv2d(
  31. in_channels=channels_list[-1],
  32. out_channels=filters,
  33. kernel_size=kernel_size,
  34. stride=int(layer_dict["stride"]),
  35. padding=pad,
  36. bias=not bn)
  37. modules.add_module("conv_{0}".format(layer_ind), conv2d)
  38. if bn:
  39. bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)
  40. modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)
  41. if layer_dict["activation"] == "leaky":
  42. activn = nn.LeakyReLU(0.1)
  43. modules.add_module("leaky_{0}".format(layer_ind), activn)
  44. elif layer_dict["type"] == "upsample":
  45. stride = int(layer_dict["stride"])
  46. upsample = nn.Upsample(scale_factor = stride)
  47. modules.add_module("upsample_{}".format(layer_ind), upsample)
  48. elif layer_dict["type"] == "shortcut":
  49. backwards=int(layer_dict["from"])
  50. filters = channels_list[1:][backwards]
  51. modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())
  52. elif layer_dict["type"] == "route":
  53. layers = [int(x) for x in layer_dict["layers"].split(",")]
  54. filters = sum([channels_list[1:][l] for l in layers])
  55. modules.add_module("route_{}".format(layer_ind), EmptyLayer())
  56. elif layer_dict["type"] == "yolo":
  57. anchors = [int(a) for a in layer_dict["anchors"].split(",")]
  58. anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
  59. mask = [int(m) for m in layer_dict["mask"].split(",")]
  60. anchors = [anchors[i] for i in mask]
  61. num_classes = int(layer_dict["classes"])
  62. img_size = int(hyperparams["height"])
  63. yolo_layer = YOLOLayer(anchors, num_classes, img_size)
  64. modules.add_module("yolo_{}".format(layer_ind), yolo_layer)
  65. module_list.append(modules)
  66. channels_list.append(filters)
  67. return hyperparams, module_list
  68. class EmptyLayer(nn.Module):
  69. def __init__(self):
  70. super(EmptyLayer, self).__init__()
  71. class YOLOLayer(nn.Module):
  72. def __init__(self, anchors, num_classes, img_dim=416):
  73. super(YOLOLayer, self).__init__()
  74. self.anchors = anchors
  75. self.num_anchors = len(anchors)
  76. self.num_classes = num_classes
  77. self.img_dim = img_dim
  78. self.grid_size = 0
  79. def forward(self, x_in):
  80. batch_size = x_in.size(0)
  81. grid_size = x_in.size(2)
  82. devide=x_in.device
  83. prediction=x_in.view(batch_size, self.num_anchors,
  84. self.num_classes + 5, grid_size, grid_size)
  85. prediction=prediction.permute(0, 1, 3, 4, 2)
  86. prediction=prediction.contiguous()
  87. obj_score = torch.sigmoid(prediction[..., 4])
  88. pred_cls = torch.sigmoid(prediction[..., 5:])
  89. if grid_size != self.grid_size:
  90. self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)
  91. pred_boxes=self.transform_outputs(prediction)
  92. output = torch.cat(
  93. (
  94. pred_boxes.view(batch_size, -1, 4),
  95. obj_score.view(batch_size, -1, 1),
  96. pred_cls.view(batch_size, -1, self.num_classes),
  97. ), -1,)
  98. return output
  99. def compute_grid_offsets(self, grid_size, cuda=True):
  100. self.grid_size = grid_size
  101. self.stride = self.img_dim / self.grid_size
  102. self.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)
  103. self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)
  104. scaled_anchors=[(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]
  105. self.scaled_anchors=torch.tensor(scaled_anchors,device=device)
  106. self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
  107. self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
  108. def transform_outputs(self,prediction):
  109. device=prediction.device
  110. x = torch.sigmoid(prediction[..., 0]) # Center x
  111. y = torch.sigmoid(prediction[..., 1]) # Center y
  112. w = prediction[..., 2] # Width
  113. h = prediction[..., 3] # Height
  114. pred_boxes = torch.zeros_like(prediction[..., :4]).to(device)
  115. pred_boxes[..., 0] = x.data + self.grid_x
  116. pred_boxes[..., 1] = y.data + self.grid_y
  117. pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
  118. pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
  119. return pred_boxes * self.stride

搭建YOLO模型 

解析配置文件,使用parse_model_config助手读取并打印

  1. from myutils import parse_model_config
  2. path2config="./config/yolov3.cfg"
  3. blocks_list = parse_model_config(path2config)
  4. blocks_list[:2]

搭建PyTorch模块

基于解析的配置文件创建PyTorch模块,调用 create_layers 辅助函数进行转换并获取 PyTorch 模块的列表

  1. from myutils import create_layers
  2. hy_pa, m_l= create_layers(blocks_list)
  3. print(m_l)
  4. print(hy_pa)

搭建DarkNet模型

  1. from torch import nn
  2. class Darknet(nn.Module):
  3. def __init__(self, config_path, img_size=416):
  4. super(Darknet, self).__init__()
  5. self.blocks_list = parse_model_config(config_path)
  6. self.hyperparams, self.module_list = create_layers(self.blocks_list)
  7. self.img_size = img_size
  8. def forward(self, x):
  9. img_dim = x.shape[2]
  10. layer_outputs, yolo_outputs = [], []
  11. for block, module in zip(self.blocks_list[1:], self.module_list):
  12. if block["type"] in ["convolutional", "upsample", "maxpool"]:
  13. x = module(x)
  14. elif block["type"] == "shortcut":
  15. layer_ind = int(block["from"])
  16. x = layer_outputs[-1] + layer_outputs[layer_ind]
  17. elif block["type"] == "yolo":
  18. x= module[0](x)
  19. yolo_outputs.append(x)
  20. elif block["type"] == "route":
  21. x = torch.cat([layer_outputs[int(l_i)]
  22. for l_i in block["layers"].split(",")], 1)
  23. layer_outputs.append(x)
  24. yolo_out_cat = torch.cat(yolo_outputs, 1)
  25. return yolo_out_cat, yolo_outputs
  26. model = Darknet(path2config).to(device)
  27. print(model)

  1. # 创建一个随机的dummy_img,大小为1x3x416x416,并将其移动到指定的设备上
  2. dummy_img=torch.rand(1,3,416,416).to(device)
  3. # 在不计算梯度的情况下,执行模型的前向传播
  4. with torch.no_grad():
  5. # 获取模型的前向传播结果
  6. dummy_out_cat, dummy_out=model.forward(dummy_img)
  7. # 打印dummy_out_cat的形状
  8. print(dummy_out_cat.shape)
  9. # 打印dummy_out中每个元素的形状
  10. print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)

定义损失函数

YOLO通常使用组合损失函数

  1. def get_loss_batch(output,targets, params_loss, opt=None):
  2. # 获取损失函数的参数
  3. ignore_thres=params_loss["ignore_thres"]
  4. scaled_anchors= params_loss["scaled_anchors"]
  5. mse_loss= params_loss["mse_loss"]
  6. bce_loss= params_loss["bce_loss"]
  7. # 获取yolo的参数
  8. num_yolos=params_loss["num_yolos"]
  9. num_anchors= params_loss["num_anchors"]
  10. obj_scale= params_loss["obj_scale"]
  11. noobj_scale= params_loss["noobj_scale"]
  12. # 初始化损失
  13. loss=0.0
  14. for yolo_ind in range(num_yolos):
  15. # 获取yolo的输出
  16. yolo_out=output[yolo_ind]
  17. batch_size, num_bbxs, _=yolo_out.shape
  18. # 获取网格大小
  19. gz_2=num_bbxs/num_anchors
  20. grid_size=int(np.sqrt(gz_2))
  21. # 将yolo的输出reshape为(batch_size,num_anchors,grid_size,grid_size,-1)
  22. yolo_out=yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)
  23. # 获取预测的边界框
  24. pred_boxes=yolo_out[:,:,:,:,:4]
  25. x,y,w,h= transform_bbox(pred_boxes, scaled_anchors[yolo_ind])
  26. # 获取预测的置信度
  27. pred_conf=yolo_out[:,:,:,:,4]
  28. # 获取预测的类别概率
  29. pred_cls_prob=yolo_out[:,:,:,:,5:]
  30. # 获取yolo的目标
  31. yolo_targets = get_yolo_targets({
  32. "pred_cls_prob": pred_cls_prob,
  33. "pred_boxes":pred_boxes,
  34. "targets": targets,
  35. "anchors": scaled_anchors[yolo_ind],
  36. "ignore_thres": ignore_thres,
  37. })
  38. # 获取目标掩码
  39. obj_mask=yolo_targets["obj_mask"]
  40. noobj_mask=yolo_targets["noobj_mask"]
  41. # 获取目标的x,y,w,h
  42. tx=yolo_targets["tx"]
  43. ty=yolo_targets["ty"]
  44. tw=yolo_targets["tw"]
  45. th=yolo_targets["th"]
  46. # 获取目标的类别
  47. tcls=yolo_targets["tcls"]
  48. # 获取目标的置信度
  49. t_conf=yolo_targets["t_conf"]
  50. # 计算x,y,w,h的损失
  51. loss_x = mse_loss(x[obj_mask], tx[obj_mask])
  52. loss_y = mse_loss(y[obj_mask], ty[obj_mask])
  53. loss_w = mse_loss(w[obj_mask], tw[obj_mask])
  54. loss_h = mse_loss(h[obj_mask], th[obj_mask])
  55. # 计算置信度的损失
  56. loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
  57. loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])
  58. loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj
  59. # 计算类别的损失
  60. loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])
  61. # 累加损失
  62. loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
  63. # 如果有优化器,则进行反向传播和优化
  64. if opt is not None:
  65. opt.zero_grad()
  66. loss.backward()
  67. opt.step()
  68. # 返回损失
  69. return loss.item()
  1. def transform_bbox(bbox, anchors):
  2. # 将bbox的x、y、w、h分别赋值给x、y、w、h
  3. x=bbox[:,:,:,:,0]
  4. y=bbox[:,:,:,:,1]
  5. w=bbox[:,:,:,:,2]
  6. h=bbox[:,:,:,:,3]
  7. # 将anchors的w、h分别赋值给anchor_w、anchor_h
  8. anchor_w = anchors[:, 0].view((1, 3, 1, 1))
  9. anchor_h = anchors[:, 1].view((1, 3, 1, 1))
  10. # 将x、y分别减去其向下取整的值
  11. x=x-x.floor()
  12. y=y-y.floor()
  13. # 将w、h分别除以anchor_w、anchor_h,并取对数
  14. w= torch.log(w / anchor_w + 1e-16)
  15. h= torch.log(h / anchor_h + 1e-16)
  16. return x, y, w, h
  17. def get_yolo_targets(params):
  18. # 获取预测框、预测类别概率、目标、锚点、忽略阈值
  19. pred_boxes=params["pred_boxes"]
  20. pred_cls_prob=params["pred_cls_prob"]
  21. target=params["targets"]
  22. anchors=params["anchors"]
  23. ignore_thres=params["ignore_thres"]
  24. # 获取批量大小、锚点数量、网格大小、类别数量
  25. batch_size = pred_boxes.size(0)
  26. num_anchors = pred_boxes.size(1)
  27. grid_size = pred_boxes.size(2)
  28. num_cls = pred_cls_prob.size(-1)
  29. # 定义目标张量的形状
  30. sizeT=batch_size, num_anchors, grid_size, grid_size
  31. # 定义目标张量,用于存储目标框的掩码
  32. obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)
  33. # 定义目标张量,用于存储非目标框的掩码
  34. noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)
  35. # 定义目标张量,用于存储目标框的x坐标
  36. tx = torch.zeros(sizeT, device=device, dtype=torch.float32)
  37. # 定义目标张量,用于存储目标框的y坐标
  38. ty= torch.zeros(sizeT, device=device, dtype=torch.float32)
  39. # 定义目标张量,用于存储目标框的宽度
  40. tw= torch.zeros(sizeT, device=device, dtype=torch.float32)
  41. # 定义目标张量,用于存储目标框的高度
  42. th= torch.zeros(sizeT, device=device, dtype=torch.float32)
  43. # 定义目标张量的形状
  44. sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls
  45. # 定义目标张量,用于存储目标类别
  46. tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)
  47. # 将目标框的坐标乘以网格大小
  48. target_bboxes = target[:, 2:] * grid_size
  49. # 获取目标框的xy坐标
  50. t_xy = target_bboxes[:, :2]
  51. # 获取目标框的wh坐标
  52. t_wh = target_bboxes[:, 2:]
  53. # 获取目标框的x坐标
  54. t_x, t_y = t_xy.t()
  55. # 获取目标框的宽度
  56. t_w, t_h = t_wh.t()
  57. # 获取目标框的网格坐标
  58. grid_i, grid_j = t_xy.long().t()
  59. # 计算每个锚点与目标框的iou
  60. iou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]
  61. # 将iou转换为张量
  62. iou_with_anchors = torch.stack(iou_with_anchors)
  63. # 获取iou最大的锚点索引
  64. best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)
  65. # 获取目标框的batch索引和类别标签
  66. batch_inds, target_labels = target[:, :2].long().t()
  67. # 将目标框的掩码设置为1
  68. obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1
  69. # 将非目标框的掩码设置为0
  70. noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0
  71. # 将大于忽略阈值的iou对应的非目标框掩码设置为0
  72. for ind, iou_wa in enumerate(iou_with_anchors.t()):
  73. noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0
  74. # 将目标框的x坐标减去网格的整数部分
  75. tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()
  76. # 将目标框的y坐标减去网格的整数部分
  77. ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()
  78. # 获取最佳锚点的宽度
  79. anchor_w=anchors[best_anchor_ind][:, 0]
  80. # 将目标框的宽度除以锚点的宽度,并取对数
  81. tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)
  82. # 获取最佳锚点的高度
  83. anchor_h=anchors[best_anchor_ind][:, 1]
  84. # 将目标框的高度除以锚点的高度,并取对数
  85. th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)
  86. # 将目标类别设置为1
  87. tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1
  88. # 返回目标张量
  89. output={
  90. "obj_mask" : obj_mask,
  91. "noobj_mask" : noobj_mask,
  92. "tx": tx,
  93. "ty": ty,
  94. "tw": tw,
  95. "th": th,
  96. "tcls": tcls,
  97. "t_conf": obj_mask.float(),
  98. }
  99. return output
  100. def get_iou_WH(wh1, wh2):
  101. # 将wh2转置
  102. wh2 = wh2.t()
  103. # 获取wh1的宽度和高度
  104. w1, h1 = wh1[0], wh1[1]
  105. # 获取wh2的宽度和高度
  106. w2, h2 = wh2[0], wh2[1]
  107. # 计算交集面积
  108. inter_area = torch.min(w1, w2) * torch.min(h1, h2)
  109. # 计算并集面积
  110. union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
  111. # 返回交集面积与并集面积的比值
  112. return inter_area / union_area

训练模型

在训练数据上训练模型,并在验证数据上对其进行评估,训练过程遵循标准的随机梯度下降(SGD)。

  1. def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):
  2. running_loss=0.0
  3. len_data=len(dataset_dl.dataset)
  4. running_metrics= {}
  5. for xb, yb,_ in dataset_dl:
  6. yb=yb.to(device)
  7. _,output=model(xb.to(device))
  8. loss_b=get_loss_batch(output,yb, params_loss,opt)
  9. running_loss+=loss_b
  10. if sanity_check is True:
  11. break
  12. loss=running_loss/float(len_data)
  13. return loss
  14. import copy
  15. def train_val(model, params):
  16. num_epochs=params["num_epochs"]
  17. params_loss=params["params_loss"]
  18. opt=params["optimizer"]
  19. train_dl=params["train_dl"]
  20. val_dl=params["val_dl"]
  21. sanity_check=params["sanity_check"]
  22. lr_scheduler=params["lr_scheduler"]
  23. path2weights=params["path2weights"]
  24. loss_history={
  25. "train": [],
  26. "val": [],
  27. }
  28. best_model_wts = copy.deepcopy(model.state_dict())
  29. best_loss=float('inf')
  30. for epoch in range(num_epochs):
  31. current_lr=get_lr(opt)
  32. print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))
  33. model.train()
  34. train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)
  35. loss_history["train"].append(train_loss)
  36. print("train loss: %.6f" %(train_loss))
  37. model.eval()
  38. with torch.no_grad():
  39. val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)
  40. loss_history["val"].append(val_loss)
  41. print("val loss: %.6f" %(val_loss))
  42. if val_loss < best_loss:
  43. best_loss = val_loss
  44. best_model_wts = copy.deepcopy(model.state_dict())
  45. torch.save(model.state_dict(), path2weights)
  46. print("Copied best model weights!")
  47. lr_scheduler.step(val_loss)
  48. if current_lr != get_lr(opt):
  49. print("Loading best model weights!")
  50. model.load_state_dict(best_model_wts)
  51. print("-"*10)
  52. model.load_state_dict(best_model_wts)
  53. return model, loss_history
  54. def get_lr(opt):
  55. for param_group in opt.param_groups:
  56. return param_group['lr']
  1. from torch import optim
  2. from torch.optim.lr_scheduler import ReduceLROnPlateau
  3. opt = optim.Adam(model.parameters(), lr=1e-3)
  4. lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)
  5. path2models= "./models/"
  6. if not os.path.exists(path2models):
  7. os.mkdir(path2models)
  8. scaled_anchors=[model.module_list[82][0].scaled_anchors,
  9. model.module_list[94][0].scaled_anchors,
  10. model.module_list[106][0].scaled_anchors]
  11. mse_loss = nn.MSELoss(reduction="sum")
  12. bce_loss = nn.BCELoss(reduction="sum")
  13. params_loss={
  14. "scaled_anchors" : scaled_anchors,
  15. "ignore_thres": 0.5,
  16. "mse_loss": mse_loss,
  17. "bce_loss": bce_loss,
  18. "num_yolos": 3,
  19. "num_anchors": 3,
  20. "obj_scale": 1,
  21. "noobj_scale": 100,
  22. }
  23. params_train={
  24. "num_epochs": 5,
  25. "optimizer": opt,
  26. "params_loss": params_loss,
  27. "train_dl": train_dl,
  28. "val_dl": val_dl,
  29. "sanity_check": True,
  30. "lr_scheduler": lr_scheduler,
  31. "path2weights": path2models+"weights.pt",
  32. }
  33. model,loss_hist=train_val(model,params_train)

部署模型

将训练后的权重加载到模型中 

  1. path2weights="./models/weights.pt"
  2. model.load_state_dict(torch.load(path2weights))

  1. img,tg,_=coco_val[11]
  2. print(img.shape)
  3. print(tg.shape)
  4. show_img_bbox(img,tg)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/黑客灵魂/article/detail/920524
推荐阅读
相关标签
  

闽ICP备14008679号