赞
踩
来源:CSDN—makcooo
地址:https://blog.csdn.net/qq_44756223/article/details/107727863
01
配置
Ubuntu 16.04
python 3.6
onnx 1.6
pytorch 1.5
pycuda 2019.1.2
torchvision 0.1.8
建议详读,先安装好环境:
https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#import_onnx_python
02
步骤
这边用的是Darknet生成的pytoch模型
- import torch
- from torch.autograd import Variable
- import onnx
-
-
-
-
- input_name = ['input']
- output_name = ['output']
- input = Variable(torch.randn(1, 3, 544, 544)).cuda()
- model = x.model.cuda()#x.model为我生成的模型
-
-
- # model = torch.load('', map_location="cuda:0")
- torch.onnx.export(model, input, 'model.onnx', input_names=input_name, output_names=output_name, verbose=True)
其中
- #model = x.model.cuda()
- #若是不添加cuda()
- model = x.model
出现报错
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
- model = onnx.load("model.onnx")
- onnx.checker.check_model(model)
- print("==> Passed")
- import pycuda.autoinit
- import numpy as np
- import pycuda.driver as cuda
- import tensorrt as trt
- import torch
- import os
- import time
- from PIL import Image
- import cv2
- import torchvision
-
-
- filename = '000000.jpg'
- max_batch_size = 1
- onnx_model_path = 'yolo.onnx'
-
-
- TRT_LOGGER = trt.Logger() # This logger is required to build an engine
-
-
-
-
- def get_img_np_nchw(filename):
- image = cv2.imread(filename)
- image_cv = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- image_cv = cv2.resize(image_cv, (1920, 1080))
- miu = np.array([0.485, 0.456, 0.406])
- std = np.array([0.229, 0.224, 0.225])
- img_np = np.array(image_cv, dtype=float) / 255.
- r = (img_np[:, :, 0] - miu[0]) / std[0]
- g = (img_np[:, :, 1] - miu[1]) / std[1]
- b = (img_np[:, :, 2] - miu[2]) / std[2]
- img_np_t = np.array([r, g, b])
- img_np_nchw = np.expand_dims(img_np_t, axis=0)
- return img_np_nchw
-
-
- class HostDeviceMem(object):
- def __init__(self, host_mem, device_mem):
- """Within this context, host_mom means the cpu memory and device means the GPU memory
- """
- self.host = host_mem
- self.device = device_mem
-
-
- def __str__(self):
- return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
-
-
- def __repr__(self):
- return self.__str__()
-
-
-
-
- def allocate_buffers(engine):
- inputs = []
- outputs = []
- bindings = []
- stream = cuda.Stream()
- for binding in engine:
- size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
- dtype = trt.nptype(engine.get_binding_dtype(binding))
- # Allocate host and device buffers
- host_mem = cuda.pagelocked_empty(size, dtype)
- device_mem = cuda.mem_alloc(host_mem.nbytes)
- # Append the device buffer to device bindings.
- bindings.append(int(device_mem))
- # Append to the appropriate list.
- if engine.binding_is_input(binding):
- inputs.append(HostDeviceMem(host_mem, device_mem))
- else:
- outputs.append(HostDeviceMem(host_mem, device_mem))
- return inputs, outputs, bindings, stream
-
-
-
-
- def get_engine(max_batch_size=1, onnx_file_path="", engine_file_path="", \
- fp16_mode=False, int8_mode=False, save_engine=False,
- ):
- """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
-
-
- def build_engine(max_batch_size, save_engine):
- """Takes an ONNX file and creates a TensorRT engine to run inference with"""
- EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
- with trt.Builder(TRT_LOGGER) as builder, \
- builder.create_network(EXPLICIT_BATCH) as network, \
- trt.OnnxParser(network, TRT_LOGGER) as parser:
-
-
- builder.max_workspace_size = 1 << 30 # Your workspace size
- builder.max_batch_size = max_batch_size
- # pdb.set_trace()
- builder.fp16_mode = fp16_mode # Default: False
- builder.int8_mode = int8_mode # Default: False
- if int8_mode:
- # To be updated
- raise NotImplementedError
-
-
- # Parse model file
- if not os.path.exists(onnx_file_path):
- quit('ONNX file {} not found'.format(onnx_file_path))
-
-
- print('Loading ONNX file from path {}...'.format(onnx_file_path))
- with open(onnx_file_path, 'rb') as model:
- print('Beginning ONNX file parsing')
- parser.parse(model.read())
-
-
- if not parser.parse(model.read()):
- for error in range(parser.num_errors):
- print(parser.get_error(error))
- print("===========Parsing fail!!!!=================")
- else :
- print('Completed parsing of ONNX file')
-
-
- print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
-
-
- engine = builder.build_cuda_engine(network)
- print("Completed creating Engine")
-
-
- if save_engine:
- with open(engine_file_path, "wb") as f:
- f.write(engine.serialize())
- return engine
-
-
- if os.path.exists(engine_file_path):
- # If a serialized engine exists, load it instead of building a new one.
- print("Reading engine from file {}".format(engine_file_path))
- with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
- return runtime.deserialize_cuda_engine(f.read())
- else:
- return build_engine(max_batch_size, save_engine)
-
-
-
-
- def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
- # Transfer data from CPU to the GPU.
- [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
- # Run inference.
- context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
- # Transfer predictions back from the GPU.
- [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
- # Synchronize the stream
- stream.synchronize()
- # Return only the host outputs.
- return [out.host for out in outputs]
-
-
-
-
- def postprocess_the_outputs(h_outputs, shape_of_output):
- h_outputs = h_outputs.reshape(*shape_of_output)
- return h_outputs
-
-
-
-
-
-
- img_np_nchw = get_img_np_nchw(filename)
- img_np_nchw = img_np_nchw.astype(dtype=np.float32)
-
-
- # These two modes are dependent on hardwares
- fp16_mode = False
- int8_mode = False
- trt_engine_path = './model_fp16_{}_int8_{}.trt'.format(fp16_mode, int8_mode)
- # Build an engine
- engine = get_engine(max_batch_size, onnx_model_path, trt_engine_path, fp16_mode, int8_mode)
- # Create the context for this engine
- context = engine.create_execution_context()
- # Allocate buffers for input and output
- inputs, outputs, bindings, stream = allocate_buffers(engine) # input, output: host # bindings
-
-
- # Do inference
- shape_of_output = (max_batch_size, 1000)
- # Load data to the buffer
- inputs[0].host = img_np_nchw.reshape(-1)
-
-
- # inputs[1].host = ... for multiple input
- t1 = time.time()
- trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data
- t2 = time.time()
- feat = postprocess_the_outputs(trt_outputs[0], shape_of_output)
-
-
- print('TensorRT ok')
- #将model改为自己的模型,此处为pytoch的resnet50,需联网下载
- model = torchvision.models.resnet50(pretrained=True).cuda()
- resnet_model = model.eval()
-
-
- input_for_torch = torch.from_numpy(img_np_nchw).cuda()
- t3 = time.time()
- feat_2= resnet_model(input_for_torch)
- t4 = time.time()
- feat_2 = feat_2.cpu().data.numpy()
- print('Pytorch ok!')
-
-
-
-
- mse = np.mean((feat - feat_2)**2)
- print("Inference time with the TensorRT engine: {}".format(t2-t1))
- print("Inference time with the PyTorch model: {}".format(t4-t3))
- print('MSE Error = {}'.format(mse))
-
-
- print('All completed!')

报错:
In node -1 (importModel): INVALID_VALUE: Assertion failed: !_importer_ctx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag."
解决:
- def build_engine(max_batch_size, save_engine):
-
- EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
- with trt.Builder(TRT_LOGGER) as builder, \
- builder.create_network(EXPLICIT_BATCH) as network, \
- trt.OnnxParser(network, TRT_LOGGER) as parser:
报错:
- Traceback (most recent call last):
- line 126, in <listcomp>
- [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
- pycuda._driver.LogicError: cuMemcpyHtoDAsync failed: invalid argument
解决:
- def get_img_np_nchw(filename):
- image = cv2.imread(filename)
- image_cv = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- image_cv = cv2.resize(image_cv, (1920, 1080))
输入的检测图像尺寸需要resize成model的input的size
改为
- def get_img_np_nchw(filename):
- image = cv2.imread(filename)
- image_cv = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- image_cv = cv2.resize(image_cv, (544,544))
报错
- line 139, in postprocess_the_outputs
- h_outputs = h_outputs.reshape(*shape_of_output)
- ValueError: cannot reshape array of size 5780 into shape (1,1000)
解决:
- #shape_of_output = (max_batch_size, 1000)
- #修改成自己模型ouput的大小
- shape_of_output = (1,20,17,17)
- 猜您喜欢:
- 戳我,查看GAN的系列专辑~!
- 一顿午饭外卖,成为CV视觉前沿弄潮儿!
-
- CVPR 2022 | 25+方向、最新50篇GAN论文
-
- ICCV 2021 | 35个主题GAN论文汇总
-
- 超110篇!CVPR 2021最全GAN论文梳理
-
- 超100篇!CVPR 2020最全GAN论文梳理
-
- 拆解组新的GAN:解耦表征MixNMatch
-
- StarGAN第2版:多域多样性图像生成
- 附下载 | 《可解释的机器学习》中文版
-
- 附下载 |《TensorFlow 2.0 深度学习算法实战》
-
- 附下载 |《计算机视觉中的数学方法》分享
-
- 《基于深度学习的表面缺陷检测方法综述》
-
- 《零样本图像分类综述: 十年进展》
-
- 《基于深度神经网络的少样本学习综述》

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。