赞
踩
前言:1.关于onnx介绍自己查阅相关资料,onnx文件保证已经成功获得的前提下进行以下内容。
2.为什么使用tensorrt框架加速,看这篇不同的部署框架的区别python与C++的效率区别、模型部署/ONNXRuntime/tensorrt
TX2在装系统时是默认安装tensorrt的,位置在/usr/src/tensorrt中,如图

我们只需要将我们自己的虚拟环境和系统自带的tensorrt建立软连接即可;具体方法见Jetson Nano、TX2等 conda 虚拟环境中使用TensorRT、gi等

建立好软连接后就开始根据我们的onnx文件生成tensorrt加速后的trt文件了。
进入到bin文件中,在该文件中打开终端
输入:
./trtexec --onnx=/home/zeh/Desktop/FPI/sim_file/net_cg.onnx --saveEngine=/home/zeh/Desktop/FPI/sim_file/net_cg.trt --workspace=1024
相当于是 ./trtexec --onnx=要加速的onnx文件的路径 --saveEngine=生成的trt加速后的文件保存路径 --workspace=1024(设置工作空间,不加可能会报错)
如:
成功截图:成功生成trt文件。

生成trt文件后就可以在tx2上进行推理了。下面是我的推理代码,包括predict和inference两个文件。
predict.py
import cv2
import tensorrt as trt
import numpy as np
import inference as inference_utils # TRT/TF inference wrappers
import torch
import os
import time
import exifread,re,json,requests,math,tqdm
from make_dataset import SiamUAV_test
# from tool import get_logger
if __name__ == "__main__":
# 1. 网络构建
# Precision command line argument -> TRT Engine datatype
TRT_PRECISION_TO_DATATYPE = {
16: trt.DataType.HALF,
32: trt.DataType.FLOAT
}
# datatype: float 32
trt_engine_datatype = TRT_PRECISION_TO_DATATYPE[16]
# batch size = 1
max_batch_size = 1
engine_file_path = "/home/zeh/Desktop/FPI/sim_file/net_cg.trt"
onnx_file_path = "/home/zeh/Desktop/FPI/sim_file/net_cg.onnx"
trt_inference_wrapper = inference_utils.TRTInference(
engine_file_path, onnx_file_path,
trt_engine_datatype, max_batch_size,
)
test_dir = "/home/zeh/Desktop/fpi_picture" #加载自己的数据集
dataset_test = SiamUAV_test(test_dir)
dataloaders = torch.utils.data.DataLoader(dataset_test,batch_size = 1, shuffle = False, num_workers = 0, pin_memory = True)
for uav, satellite, X, Y, uav_path, sa_path in dataloaders :
z = uav.numpy()
x = satellite.numpy()
#前面是处理自己的数据集
trt_outputs = trt_inference_wrapper.infer(z,x) #推理在这我的模型输入有两个
#后面是处理trt_outputs(推理结果)
map = torch.tensor(trt_outputs)[0]
map = torch.reshape(map,(384,384))
map = torch.sigmoid(map)
inference.py
import os
import sys
import time
# from PIL import Image
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import cv2
# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
for binding in engine:
shape = engine.get_binding_shape(binding)
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def load_engine(trt_path):
# 反序列化引擎
with open(trt_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
class TRTInference(object):
"""Manages TensorRT objects for model inference."""
def __init__(self, trt_engine_path, onnx_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1):
"""Initializes TensorRT objects needed for model inference.
Args:
trt_engine_path (str): path where TensorRT engine should be stored
uff_model_path (str): path of .uff model
trt_engine_datatype (trt.DataType):
requested precision of TensorRT engine used for inference
batch_size (int): batch size for which engine
should be optimized for
"""
# Initialize runtime needed for loading TensorRT engine from file
# TRT engine placeholder
self.trt_engine = None
# Display requested engine settings to stdout
print("TensorRT inference engine settings:")
print(" * Inference precision - {}".format(trt_engine_datatype))
print(" * Max batch size - {}\n".format(batch_size))
# If we get here, the file with engine exists, so we can load it
if not self.trt_engine:
print("Loading cached TensorRT engine from {}".format(
trt_engine_path))
self.trt_engine = load_engine(
trt_engine_path)
# This allocates memory for network inputs/outputs on both CPU and GPU
self.inputs, self.outputs, self.bindings, self.stream = allocate_buffers(self.trt_engine)
# Execution context is needed for inference
self.context = self.trt_engine.create_execution_context()
def infer(self, full_img1,full_img2):
"""Infers model on given image.
Args:
image_path (str): image to run object detection model on
"""
# 归一化
scale_img1 = full_img1
scale_img2 = full_img2
# if scale_img1.max() > 1:
# scale_img1 = scale_img1 / 255
# if scale_img2.max() > 1:
# scale_img2 = scale_img2 / 255
# 扩增通道数
# scale_img = np.expand_dims(scale_img, axis=0)
# 将数据成块
scale_img1 = np.array(scale_img1, dtype=np.float32, order='C')
scale_img2 = np.array(scale_img2, dtype=np.float32, order='C')
# Copy it into appropriate place into memory
# (self.inputs was returned earlier by allocate_buffers())
np.copyto(self.inputs[0].host, scale_img1.ravel())
np.copyto(self.inputs[1].host, scale_img2.ravel())
# Output shapes expected by the post-processor
# output_shapes = [(1, 11616, 4), (11616, 21)]
# When infering on single image, we measure inference
# time to output it to the user
inference_start_time = time.time()
# Fetch output from the model
trt_outputs = do_inference(
self.context, bindings=self.bindings, inputs=self.inputs,
outputs=self.outputs, stream=self.stream)
print("network output shape:{}".format(trt_outputs[0].shape))
# Output inference time
print("TensorRT inference time: {} ms".format(
int(round((time.time() - inference_start_time) * 1000))))
# Before doing post-processing, we need to reshape the outputs as the common.do_inference will
# give us flat arrays.
outputs = [output for output in trt_outputs]
# outputs = trt_outputs.reshape(384,384)
# And return results
return outputs
# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。