赞
踩
在深度学习领域,YOLO(You Only Look Once)系列模型因其出色的实时物体检测性能而广受欢迎。随着ONNX(Open Neural Network Exchange)格式的普及,将YOLOv5模型转换为ONNX格式,使其能在多种平台和框架间无缝运行,成为了提高部署灵活性和效率的关键步骤。本文将指导你完成使用YOLOv5-ONNX模型进行物体检测的全过程,从环境搭建到实际推理。
准备工作
YOLOv5-ONNX模型文件
代码如下:
yolov5-interface
- import os
- import random
- import onnxruntime
- from tool import *
-
-
- def onnx_load(w):
- cuda = torch.cuda.is_available()
- providers = ['CUDAExecutionProvider','CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
- session = onnxruntime.InferenceSession(w,providers=providers)
- output_names = [x.name for x in session.get_outputs()]
- print('-------',output_names)
- return session,output_names
-
- if __name__ == '__main__':
-
- # w = 'yolov5s.onnx'
- w = 'best.onnx'
-
- image_dir = './images'
- imgsz = [640,640]
- session,output_names = onnx_load(w)
- device = torch.device("cuda:0")
- image_list = os.listdir(image_dir)
- random.shuffle(image_list)
- for image_item in image_list:
- start_time = time.time()
- path = os.path.join(image_dir,image_item)
- im0 = cv2.imread(path)
- im, org_data = data_process_cv2(im0,imgsz)
- y = session.run(output_names,{session.get_inputs()[0].name: im})
- pred = torch.from_numpy(y[0]).to(device)
- pred = non_max_suppression(pred,conf_thres=0.25,iou_thres=0.45,max_det=1000)
- print("spend time:{0} ms".format((time.time() - start_time) * 1000))
- res_img = post_process_yolov5(pred[0],org_data)
- cv2.imshow('res',res_img)
- cv2.waitKey(0)
-
-
-
-
-
-
-
-
-
-
-
-
-

tool
- import cv2
- import numpy as np
- import torch.cuda
- import time
- import torchvision
- import yaml
-
-
- def resize_image_cv2(image,size):
- ih,iw,ic = image.shape
- w,h = size
-
- scale = min(w/iw,h / ih)
- nw = int(iw * scale)
- nh = int(ih * scale)
-
- image = cv2.resize(image,(nw,nh))
- new_image = np.ones((size[0],size[1],3),dtype='uint8') * 128
- start_h = (h - nh) / 2
- start_w = (w - nw) / 2
-
- end_h = size[1] - start_h
- end_w = size[0] - start_w
-
- new_image[int(start_h):int(end_h),int(start_w):int(end_w)] = image
-
- return new_image,nw,nh
-
- def xywh2xyxy(x):
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
- y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
- y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
- y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
- y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
- return y
-
- def box_iou(box1, box2, eps=1e-7):
-
- # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
- (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
- inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
-
- # IoU = inter / (area1 + area2 - inter)
- return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
-
- def non_max_suppression(
- prediction,
- conf_thres=0.25,
- iou_thres=0.35,
- classes=None,
- agnostic=False,
- multi_label=False,
- labels=(),
- max_det=300,
- nm=0, # number of masks
- ):
- # Checks
- assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
- assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
- if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out)
- prediction = prediction[0] # select only inference output
-
- device = prediction.device
- mps = 'mps' in device.type # Apple MPS
- if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
- prediction = prediction.cpu()
- bs = prediction.shape[0] # batch size
- nc = prediction.shape[2] - nm - 5 # number of classes
- xc = prediction[..., 4] > conf_thres # candidates
-
- # Settings
- # min_wh = 2 # (pixels) minimum box width and height
- max_wh = 7680 # (pixels) maximum box width and height
- max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
- time_limit = 0.5 + 0.05 * bs # seconds to quit after
- redundant = True # require redundant detections
- multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
- merge = False # use merge-NMS
-
- t = time.time()
- mi = 5 + nc # mask start index
- output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
- for xi, x in enumerate(prediction): # image index, image inference
- # Apply constraints
- # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
- x = x[xc[xi]] # confidence
-
- # Cat apriori labels if autolabelling
- if labels and len(labels[xi]):
- lb = labels[xi]
- v = torch.zeros((len(lb), nc + nm + 5), device=x.device)
- v[:, :4] = lb[:, 1:5] # box
- v[:, 4] = 1.0 # conf
- v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
- x = torch.cat((x, v), 0)
-
- # If none remain process next image
- if not x.shape[0]:
- continue
-
- # Compute conf
- x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
-
- # Box/Mask
- box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
- mask = x[:, mi:] # zero columns if no masks
-
- # Detections matrix nx6 (xyxy, conf, cls)
- if multi_label:
- i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
- x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
- else: # best class only
- conf, j = x[:, 5:mi].max(1, keepdim=True)
- x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
-
- # Filter by class
- if classes is not None:
- x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
-
- # Check shape
- n = x.shape[0] # number of boxes
- if not n: # no boxes
- continue
- x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
-
- # Batched NMS
- c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
- boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
- i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
- i = i[:max_det] # limit detections
- if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
- # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
- iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
- weights = iou * scores[None] # box weights
- x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
- if redundant:
- i = i[iou.sum(1) > 1] # require redundancy
- output[xi] = x[i]
- if mps:
- output[xi] = output[xi].to(device)
- if (time.time() - t) > time_limit:
- # LOGGER.warning(f'WARNING NMS time limit {time_limit:.3f}s exceeded')
- break # time limit exceeded
- return output
- def data_process_cv2(frame,input_shape):
- image_data,nw,nh = resize_image_cv2(frame,(input_shape[1],input_shape[0]))
- org_data = image_data.copy()
- np_data = np.array(image_data,np.float32)
- np_data = np_data / 255
- image_data = np.expand_dims(np.transpose(np_data,(2,0,1)),0)
- image_data = np.ascontiguousarray(image_data)
-
- return image_data,org_data
- def post_process_yolov5(det,im,label_path='coco128.yaml'):
- if len(det):
- det[:,:4] = scale_boxes(im.shape[:2],det[:,:4],im.shape).round()
- names = yaml_load(label_path)['names']
- colors = Colors()
- for *xyxy,conf,cls in reversed(det):
- c = int(cls)
- label = names[c]
- box_label(im,xyxy,label,color=colors(c,True))
- return im
- def scale_boxes(img1_shape,boxes,img0_shape,ratio_pad=None):
- if ratio_pad is None:
- gain = min(img1_shape[0] / img0_shape[0],img1_shape[1] / img0_shape[1])
- pad = (img1_shape[1] - img0_shape[1] * gain) / 2,(img1_shape[0] - img0_shape[0] * gain)
- else:
- gain = ratio_pad[0][0]
- pad = ratio_pad[1]
-
- boxes[...,[0,2]] -= pad[0]
- boxes[...,[1,3]] -= pad[1]
- boxes[...,:4] /= gain
- clip_boxes(boxes,img0_shape)
- return boxes
- def clip_boxes(boxes,shape):
- if isinstance(boxes,torch.Tensor):
- boxes[...,0].clamp_(0,shape[1])
- boxes[...,1].clamp_(0,shape[0])
- boxes[...,2].clamp_(0,shape[1])
- boxes[...,3].clamp_(0,shape[0])
- else:
- boxes[..., [0,2]] = boxes[...,[0,2]].clip(0, shape[1])
- boxes[..., [1,3]] = boxes[...,[1,3]].clip(0, shape[0])
- def yaml_load(file='coco128.yaml'):
- with open(file,errors='ignore') as f:
- return yaml.safe_load(f)
-
- class Colors:
- # Ultralytics color palette https://ultralytics.com/
- def __init__(self):
- """
- Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB.
- Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`.
- """
- hexs = (
- "FF3838",
- "FF9D97",
- "FF701F",
- "FFB21D",
- "CFD231",
- "48F90A",
- "92CC17",
- "3DDB86",
- "1A9334",
- "00D4BB",
- "2C99A8",
- "00C2FF",
- "344593",
- "6473FF",
- "0018EC",
- "8438FF",
- "520085",
- "CB38FF",
- "FF95C8",
- "FF37C7",
- )
- self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
- self.n = len(self.palette)
-
- def __call__(self, i, bgr=False):
- """Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index."""
- c = self.palette[int(i) % self.n]
- return (c[2], c[1], c[0]) if bgr else c
-
- @staticmethod
- def hex2rgb(h):
- """Converts hex color codes to RGB values (i.e. default PIL order)."""
- return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
- def box_label(im, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
- lw = 2
- p1,p2 = (int(box[0]),int(box[1])),(int(box[2]),int(box[3]))
- cv2.rectangle(im,p1,p2,color,thickness=lw,lineType=cv2.LINE_AA)
- if label:
- tf = max(lw - 1,1)
- w,h = cv2.getTextSize(label,0,fontScale=lw / 3,thickness=tf)[0]
- outside = p1[1] - h >= 3
- p2 = p1[0] + w,p1[1] - h - 3 if outside else p1[1] + h + 3
- cv2.rectangle(im,p1,p2,color,-1,cv2.LINE_AA)
- cv2.putText(im,label,(p1[0],p1[1] - 2 if outside else p1[1] + h + 2),
- 0,lw / 3,txt_color,thickness=tf,lineType=cv2.LINE_AA)
-
- def is_ascii(s) -> bool:
-
- # Convert list, tuple, None, etc. to string
- s = str(s)
-
- # Check if the string is composed of only ASCII characters
- return all(ord(c) < 128 for c in s)
-
-
-
-
-

源码在yolov5-master中可找到
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。