【PyTorch】基于YOLO的多目标检测项目（二）

作者：黑客灵魂 | 2024-08-02 23:40:06

踩

【PyTorch】基于YOLO的多目标检测项目（一）

【PyTorch】基于YOLO的多目标检测项目（二）

YOLO-v3网络由跨距为2的卷积层、跳跃连接层和上采样层组成，没有池化层。网络接收一幅416 * 416的图像作为输入，并提供三个YOLO输出。

准备配置文件

新建一个py文件导入以下代码，命名为myutils.py作为配置文件，辅助构建模型。


import torch
from torch import nn
 
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
def parse_model_config(path2file):
    cfg_file = open(path2file, 'r')
    lines = cfg_file.read().split('\n')
 
    lines = [x for x in lines if x and not x.startswith('#')]
    lines = [x.rstrip().lstrip() for x in lines] 
    
    blocks_list = []
    for line in lines:
        if line.startswith('['): 
            blocks_list.append({})
            blocks_list[-1]['type'] = line[1:-1].rstrip()
        else:
            key, value = line.split("=")
            value = value.strip()
            blocks_list[-1][key.rstrip()] = value.strip()
 
    return blocks_list
 
 
def create_layers(blocks_list):
    hyperparams = blocks_list[0]
    channels_list = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    
    for layer_ind, layer_dict in enumerate(blocks_list[1:]):
        modules = nn.Sequential()
        
        if layer_dict["type"] == "convolutional":
            filters = int(layer_dict["filters"])
            kernel_size = int(layer_dict["size"])
            pad = (kernel_size - 1) // 2
            bn=layer_dict.get("batch_normalize",0)    
            
            
            conv2d= nn.Conv2d(
                        in_channels=channels_list[-1],
                        out_channels=filters,
                        kernel_size=kernel_size,
                        stride=int(layer_dict["stride"]),
                        padding=pad,
                        bias=not bn)
            modules.add_module("conv_{0}".format(layer_ind), conv2d)
            
            if bn:
                bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)
                modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)
                
                
            if layer_dict["activation"] == "leaky":
                activn = nn.LeakyReLU(0.1)
                modules.add_module("leaky_{0}".format(layer_ind), activn)
                
        elif layer_dict["type"] == "upsample":
            stride = int(layer_dict["stride"])
            upsample = nn.Upsample(scale_factor = stride)
            modules.add_module("upsample_{}".format(layer_ind), upsample) 
            
 
        elif layer_dict["type"] == "shortcut":
            backwards=int(layer_dict["from"])
            filters = channels_list[1:][backwards]
            modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())
            
        elif layer_dict["type"] == "route":
            layers = [int(x) for x in layer_dict["layers"].split(",")]
            filters = sum([channels_list[1:][l] for l in layers])
            modules.add_module("route_{}".format(layer_ind), EmptyLayer())
            
        elif layer_dict["type"] == "yolo":
            anchors = [int(a) for a in layer_dict["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
 
            mask = [int(m) for m in layer_dict["mask"].split(",")]
            
            anchors = [anchors[i] for i in mask]
            
            num_classes = int(layer_dict["classes"])
            img_size = int(hyperparams["height"])
            
            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
            modules.add_module("yolo_{}".format(layer_ind), yolo_layer)
            
        module_list.append(modules)       
        channels_list.append(filters)
 
    return hyperparams, module_list        
 
 
 
class EmptyLayer(nn.Module):
    def __init__(self):
        super(EmptyLayer, self).__init__()
        
        
class YOLOLayer(nn.Module):
 
    def __init__(self, anchors, num_classes, img_dim=416):
        super(YOLOLayer, self).__init__()
        self.anchors = anchors
        self.num_anchors = len(anchors)
        self.num_classes = num_classes
        self.img_dim = img_dim
        self.grid_size = 0 
        
        
    def forward(self, x_in):
        batch_size = x_in.size(0)
        grid_size = x_in.size(2)
        devide=x_in.device
        
        prediction=x_in.view(batch_size, self.num_anchors, 
                             self.num_classes + 5, grid_size, grid_size)
        prediction=prediction.permute(0, 1, 3, 4, 2)
        prediction=prediction.contiguous()
        
        obj_score = torch.sigmoid(prediction[..., 4]) 
        pred_cls = torch.sigmoid(prediction[..., 5:]) 
        
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)
            
        pred_boxes=self.transform_outputs(prediction) 
        
        output = torch.cat(
            (
                pred_boxes.view(batch_size, -1, 4),
                obj_score.view(batch_size, -1, 1),
                pred_cls.view(batch_size, -1, self.num_classes),
            ), -1,)
        return output        
    
    
        
    def compute_grid_offsets(self, grid_size, cuda=True):
        self.grid_size = grid_size
        self.stride = self.img_dim / self.grid_size
        
        self.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)
        self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)
        
        scaled_anchors=[(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]
        self.scaled_anchors=torch.tensor(scaled_anchors,device=device)
        
        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
        
        
        
    def transform_outputs(self,prediction):
        device=prediction.device
        x = torch.sigmoid(prediction[..., 0]) # Center x
        y = torch.sigmoid(prediction[..., 1]) # Center y
        w = prediction[..., 2] # Width
        h = prediction[..., 3] # Height
 
        pred_boxes = torch.zeros_like(prediction[..., :4]).to(device)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
        
        return pred_boxes * self.stride

搭建YOLO模型

解析配置文件，使用parse_model_config助手读取并打印


from myutils import parse_model_config
 
path2config="./config/yolov3.cfg"
blocks_list = parse_model_config(path2config)
blocks_list[:2]

搭建PyTorch模块

基于解析的配置文件创建PyTorch模块，调用 create_layers 辅助函数进行转换并获取 PyTorch 模块的列表


from myutils import create_layers
 
hy_pa, m_l= create_layers(blocks_list)
print(m_l)
print(hy_pa)

搭建DarkNet模型


from torch import nn
 
class Darknet(nn.Module):
    def __init__(self, config_path, img_size=416):
        super(Darknet, self).__init__()
        self.blocks_list = parse_model_config(config_path)
        self.hyperparams, self.module_list = create_layers(self.blocks_list)
        self.img_size = img_size
        
    def forward(self, x):
        img_dim = x.shape[2]
        layer_outputs, yolo_outputs = [], []
        
        for block, module in zip(self.blocks_list[1:], self.module_list):
            if block["type"] in ["convolutional", "upsample", "maxpool"]:
                x = module(x)        
                
                
            elif block["type"] == "shortcut":
                layer_ind = int(block["from"])
                x = layer_outputs[-1] + layer_outputs[layer_ind]
            elif block["type"] == "yolo":
                x= module[0](x)
                yolo_outputs.append(x)
            elif block["type"] == "route":
                x = torch.cat([layer_outputs[int(l_i)] 
                               for l_i in block["layers"].split(",")], 1)
            layer_outputs.append(x)
        yolo_out_cat = torch.cat(yolo_outputs, 1)
        return yolo_out_cat, yolo_outputs        
    
model = Darknet(path2config).to(device)
print(model)


# 创建一个随机的dummy_img，大小为1x3x416x416，并将其移动到指定的设备上
dummy_img=torch.rand(1,3,416,416).to(device)
# 在不计算梯度的情况下，执行模型的前向传播
with torch.no_grad():
    # 获取模型的前向传播结果
    dummy_out_cat, dummy_out=model.forward(dummy_img)
    # 打印dummy_out_cat的形状
    print(dummy_out_cat.shape)
    # 打印dummy_out中每个元素的形状
    print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)

定义损失函数

YOLO通常使用组合损失函数


def get_loss_batch(output,targets, params_loss, opt=None):
    # 获取损失函数的参数
    ignore_thres=params_loss["ignore_thres"]
    scaled_anchors= params_loss["scaled_anchors"]    
    mse_loss= params_loss["mse_loss"]
    bce_loss= params_loss["bce_loss"]
    
    # 获取yolo的参数
    num_yolos=params_loss["num_yolos"]
    num_anchors= params_loss["num_anchors"]
    obj_scale= params_loss["obj_scale"]
    noobj_scale= params_loss["noobj_scale"]
    
    # 初始化损失
    loss=0.0
    for yolo_ind in range(num_yolos):
        # 获取yolo的输出
        yolo_out=output[yolo_ind]
        batch_size, num_bbxs, _=yolo_out.shape
        
        # 获取网格大小
        gz_2=num_bbxs/num_anchors
        grid_size=int(np.sqrt(gz_2))
        
        # 将yolo的输出reshape为(batch_size,num_anchors,grid_size,grid_size,-1)
        yolo_out=yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)
        
        # 获取预测的边界框
        pred_boxes=yolo_out[:,:,:,:,:4]
        x,y,w,h= transform_bbox(pred_boxes, scaled_anchors[yolo_ind])
        # 获取预测的置信度
        pred_conf=yolo_out[:,:,:,:,4]
        # 获取预测的类别概率
        pred_cls_prob=yolo_out[:,:,:,:,5:]
        
        # 获取yolo的目标
        yolo_targets = get_yolo_targets({
                                            "pred_cls_prob": pred_cls_prob,
                                            "pred_boxes":pred_boxes,    
                                            "targets": targets,    
                                            "anchors": scaled_anchors[yolo_ind],    
                                            "ignore_thres": ignore_thres,
                                        }) 
        
        # 获取目标掩码
        obj_mask=yolo_targets["obj_mask"]        
        noobj_mask=yolo_targets["noobj_mask"]            
        # 获取目标的x,y,w,h
        tx=yolo_targets["tx"]                
        ty=yolo_targets["ty"]                    
        tw=yolo_targets["tw"]                        
        th=yolo_targets["th"]                            
        # 获取目标的类别
        tcls=yolo_targets["tcls"]                                
        # 获取目标的置信度
        t_conf=yolo_targets["t_conf"]
        
        # 计算x,y,w,h的损失
        loss_x = mse_loss(x[obj_mask], tx[obj_mask])
        loss_y = mse_loss(y[obj_mask], ty[obj_mask])
        loss_w = mse_loss(w[obj_mask], tw[obj_mask])
        loss_h = mse_loss(h[obj_mask], th[obj_mask])
        
        # 计算置信度的损失
        loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
        loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])
        loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj
        # 计算类别的损失
        loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])
        # 累加损失
        loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
        
    # 如果有优化器，则进行反向传播和优化
    if opt is not None:
        opt.zero_grad()
        loss.backward()
        opt.step()
        
    # 返回损失
    return loss.item()


def transform_bbox(bbox, anchors):
    # 将bbox的x、y、w、h分别赋值给x、y、w、h
    x=bbox[:,:,:,:,0]
    y=bbox[:,:,:,:,1]
    w=bbox[:,:,:,:,2]
    h=bbox[:,:,:,:,3]
    # 将anchors的w、h分别赋值给anchor_w、anchor_h
    anchor_w = anchors[:, 0].view((1, 3, 1, 1))
    anchor_h = anchors[:, 1].view((1, 3, 1, 1))       
    
    # 将x、y分别减去其向下取整的值
    x=x-x.floor()
    y=y-y.floor()
    # 将w、h分别除以anchor_w、anchor_h，并取对数
    w= torch.log(w / anchor_w + 1e-16)
    h= torch.log(h / anchor_h + 1e-16)
    return x, y, w, h
 
def get_yolo_targets(params):
    # 获取预测框、预测类别概率、目标、锚点、忽略阈值
    pred_boxes=params["pred_boxes"]
    pred_cls_prob=params["pred_cls_prob"]
    target=params["targets"]
    anchors=params["anchors"] 
    ignore_thres=params["ignore_thres"] 
 
    # 获取批量大小、锚点数量、网格大小、类别数量
    batch_size = pred_boxes.size(0)
    num_anchors = pred_boxes.size(1)
    grid_size = pred_boxes.size(2)
    num_cls = pred_cls_prob.size(-1)
    
    
    # 定义目标张量的形状
    sizeT=batch_size, num_anchors, grid_size, grid_size
    # 定义目标张量，用于存储目标框的掩码
    obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)
    # 定义目标张量，用于存储非目标框的掩码
    noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)
    # 定义目标张量，用于存储目标框的x坐标
    tx = torch.zeros(sizeT, device=device, dtype=torch.float32)
    # 定义目标张量，用于存储目标框的y坐标
    ty= torch.zeros(sizeT, device=device, dtype=torch.float32)
    # 定义目标张量，用于存储目标框的宽度
    tw= torch.zeros(sizeT, device=device, dtype=torch.float32)
    # 定义目标张量，用于存储目标框的高度
    th= torch.zeros(sizeT, device=device, dtype=torch.float32)
    
    # 定义目标张量的形状
    sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls
    # 定义目标张量，用于存储目标类别
    tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)
    
    # 将目标框的坐标乘以网格大小
    target_bboxes = target[:, 2:] * grid_size
    # 获取目标框的xy坐标
    t_xy = target_bboxes[:, :2]
    # 获取目标框的wh坐标
    t_wh = target_bboxes[:, 2:]
    # 获取目标框的x坐标
    t_x, t_y = t_xy.t()
    # 获取目标框的宽度
    t_w, t_h = t_wh.t()
 
    # 获取目标框的网格坐标
    grid_i, grid_j = t_xy.long().t()
    
    # 计算每个锚点与目标框的iou
    iou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]
    # 将iou转换为张量
    iou_with_anchors = torch.stack(iou_with_anchors)
    # 获取iou最大的锚点索引
    best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)
    
    # 获取目标框的batch索引和类别标签
    batch_inds, target_labels = target[:, :2].long().t()
    # 将目标框的掩码设置为1
    obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1
    # 将非目标框的掩码设置为0
    noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0
 
    # 将大于忽略阈值的iou对应的非目标框掩码设置为0
    for ind, iou_wa in enumerate(iou_with_anchors.t()):
        noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0
        
        
    # 将目标框的x坐标减去网格的整数部分
    tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()
    # 将目标框的y坐标减去网格的整数部分
    ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()
    
 
    # 获取最佳锚点的宽度
    anchor_w=anchors[best_anchor_ind][:, 0]
    # 将目标框的宽度除以锚点的宽度，并取对数
    tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)
    
    # 获取最佳锚点的高度
    anchor_h=anchors[best_anchor_ind][:, 1]
    # 将目标框的高度除以锚点的高度，并取对数
    th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)
    
    # 将目标类别设置为1
    tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1
    
    # 返回目标张量
    output={
        "obj_mask" : obj_mask,
        "noobj_mask" : noobj_mask,
        "tx": tx,
        "ty": ty,
        "tw": tw,
        "th": th,
        "tcls": tcls,
        "t_conf": obj_mask.float(),
    }
    return output    
 
def get_iou_WH(wh1, wh2):
    # 将wh2转置
    wh2 = wh2.t()
    # 获取wh1的宽度和高度
    w1, h1 = wh1[0], wh1[1]
    # 获取wh2的宽度和高度
    w2, h2 = wh2[0], wh2[1]
    # 计算交集面积
    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
    # 计算并集面积
    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
    # 返回交集面积与并集面积的比值
    return inter_area / union_area

训练模型

在训练数据上训练模型，并在验证数据上对其进行评估，训练过程遵循标准的随机梯度下降（SGD）。


def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):
    running_loss=0.0
    len_data=len(dataset_dl.dataset)
    running_metrics= {}
    
    for xb, yb,_ in dataset_dl:
        yb=yb.to(device)
        _,output=model(xb.to(device))
        loss_b=get_loss_batch(output,yb, params_loss,opt)
        running_loss+=loss_b
        if sanity_check is True:
            break 
    loss=running_loss/float(len_data)
    return loss
 
import copy
def train_val(model, params):
    num_epochs=params["num_epochs"]
    params_loss=params["params_loss"]
    opt=params["optimizer"]
    train_dl=params["train_dl"]
    val_dl=params["val_dl"]
    sanity_check=params["sanity_check"]
    lr_scheduler=params["lr_scheduler"]
    path2weights=params["path2weights"]
    
    
    loss_history={
        "train": [],
        "val": [],
    }
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss=float('inf') 
    
    for epoch in range(num_epochs):
        current_lr=get_lr(opt)
        print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr)) 
        model.train()
        train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)
        loss_history["train"].append(train_loss)
        print("train loss: %.6f" %(train_loss))    
        
        model.eval()
        with torch.no_grad():
            val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)
        loss_history["val"].append(val_loss)
        print("val loss: %.6f" %(val_loss))
        
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), path2weights)
            print("Copied best model weights!")
            
        lr_scheduler.step(val_loss)
        if current_lr != get_lr(opt):
            print("Loading best model weights!")
            model.load_state_dict(best_model_wts) 
        print("-"*10) 
    model.load_state_dict(best_model_wts)
    return model, loss_history            
 
def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']


from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
 
opt = optim.Adam(model.parameters(), lr=1e-3)
lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)
 
path2models= "./models/"
if not os.path.exists(path2models):
        os.mkdir(path2models)
        
scaled_anchors=[model.module_list[82][0].scaled_anchors,
                model.module_list[94][0].scaled_anchors,
                model.module_list[106][0].scaled_anchors]        
 
mse_loss = nn.MSELoss(reduction="sum")
bce_loss = nn.BCELoss(reduction="sum")
params_loss={
    "scaled_anchors" : scaled_anchors,
    "ignore_thres": 0.5,
    "mse_loss": mse_loss,
    "bce_loss": bce_loss,
    "num_yolos": 3,
    "num_anchors": 3,
    "obj_scale": 1,
    "noobj_scale": 100,
} 
 
params_train={
    "num_epochs": 5,
    "optimizer": opt,
    "params_loss": params_loss,
    "train_dl": train_dl,
    "val_dl": val_dl,
    "sanity_check": True,
    "lr_scheduler": lr_scheduler,
    "path2weights": path2models+"weights.pt",
}
model,loss_hist=train_val(model,params_train)

部署模型

将训练后的权重加载到模型中


path2weights="./models/weights.pt"
model.load_state_dict(torch.load(path2weights))


img,tg,_=coco_val[11]
print(img.shape)
print(tg.shape)
show_img_bbox(img,tg)

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/黑客灵魂/article/detail/920524