- 微信
- 微博
  
  分享文章到微博
- 复制链接
  
  复制链接到剪贴板

tensorrt python

风吹稻花香发表于 2021/06/04 22:45:13 2021/06/04

【摘要】代码没有亲测，tensorrt感觉是linux的 Onnx To Tensorrt 在安装tensorrt时，会下载一些官方示例，以tensorrt6为例，在samples/python/introductoryparsersamples中，有名为onnx_resnet50.py的文件，此文件是将resnet50.onnx转为tensorrt模型的示例代码，接下来以这个文...

代码没有亲测，tensorrt感觉是linux的

Onnx To Tensorrt

在安装tensorrt时，会下载一些官方示例，以tensorrt6为例，在samples/python/introductoryparsersamples中，有名为onnx_resnet50.py的文件，此文件是将resnet50.onnx转为tensorrt模型的示例代码，接下来以这个文件为基础，讲述如何将我们自己的onnx模型转为tensorrt模型。

先上代码。


  
   
    
     
    
    
     
      from PIL import Image
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      import numpy as np
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      import pycuda.driver as cuda
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      import time
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      import tensorrt as trt
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      import sys, os
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      sys.path.insert(1, os.path.join(sys.path[0], ".."))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      import common
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      class ModelData(object):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      MODEL_PATH = "result.onnx"
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      INPUT_SHAPE = (1, 512, 512)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # We can convert TensorRT data types to numpy types with trt.nptype()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      DTYPE = trt.float32
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # You can set the logger severity higher to suppress messages (or lower to display more messages).
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Allocate host and device buffers, and create a stream.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def allocate_buffers(engine):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Allocate device memory for inputs and outputs.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      d_input = cuda.mem_alloc(h_input.nbytes)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      d_output = cuda.mem_alloc(h_output.nbytes)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Create a stream in which to copy inputs/outputs and run inference.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      stream = cuda.Stream()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      return h_input, d_input, h_output, d_output, stream
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def do_inference(context, h_input, d_input, h_output, d_output, stream):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Transfer input data to the GPU.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      cuda.memcpy_htod_async(d_input, h_input, stream)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Run inference.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Transfer predictions back from the GPU.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      cuda.memcpy_dtoh_async(h_output, d_output, stream)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Synchronize the stream
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      stream.synchronize()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # The Onnx path is used for Onnx models.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def build_engine_onnx(model_file):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      builder.max_workspace_size = common.GiB(1)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Load the Onnx model and parse it in order to populate the TensorRT network.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with open(model_file, 'rb') as model:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      parser.parse(model.read())
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      last_layer = network.get_layer(network.num_layers - 1)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      network.mark_output(last_layer.get_output(0))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      returnresult=builder.build_cuda_engine(network)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      return returnresult
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def load_normalized_test_case(test_image, pagelocked_buffer):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Converts the input image to a CHW Numpy array
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def normalize_image(image):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Resize, antialias and transpose the image to CHW.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      c, h, w = ModelData.INPUT_SHAPE
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      image_arr = np.reshape(image_arr, image_arr.shape + (1,))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      image_arr=image_arr.transpose([2, 0, 1])
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      image_arr=image_arr.astype(trt.nptype(ModelData.DTYPE))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      image_arr=image_arr.ravel()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      return (image_arr / 255.0 - 0.45) / 0.225
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Normalize the image and copy to pagelocked memory.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      return test_image
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def main():
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      onnx_model_file='result.onnx'
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Build a TensorRT engine.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with build_engine_onnx(onnx_model_file) as engine:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Allocate buffers and create a CUDA stream.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with engine.create_execution_context() as context:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Load a normalized test case into the host input page-locked buffer.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      starttime=time.time()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      for i in range(100):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      test_image ='test.jpg'
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      test_case = load_normalized_test_case(test_image, h_input)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # probability that the image corresponds to that label
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      do_inference(context, h_input, d_input, h_output, d_output, stream)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      #print('ok')
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      endtime=time.time()
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      pertime=(endtime-starttime)/100
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      print('perimg cost'+str(pertime))

以上代码的大部分都是源自于onnx_resnet50中，各位可以参照着比对，主要的改动是：

（1）初始化部分，模型的输入大小，即经过各种预处理之后喂入到网络的图像大小。

（2）build_engineonnx函数部分，下面是onnx_resnet50中的原函数和我修改后的函数对比。


  
   
    
     
    
    
     
      #修改后
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def build_engine_onnx(model_file):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      builder.max_workspace_size = common.GiB(1)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Load the Onnx model and parse it in order to populate the TensorRT network.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with open(model_file, 'rb') as model:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      parser.parse(model.read())
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      last_layer = network.get_layer(network.num_layers - 1)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      network.mark_output(last_layer.get_output(0))
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      returnresult=builder.build_cuda_engine(network)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      return returnresult
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      #原函数
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      def build_engine_onnx(model_file):
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      builder.max_workspace_size = common.GiB(1)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      # Load the Onnx model and parse it in order to populate the TensorRT network.
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      with open(model_file, 'rb') as model:
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      parser.parse(model.read())
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      returnresult=builder.build_cuda_engine(network)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      return returnresult

主要的改动是在with循环之外增加了两句代码：


  
   
    
     
    
    
     
      last_layer = network.get_layer(network.num_layers - 1)
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      network.mark_output(last_layer.get_output(0))

如果不做修改的话，会报如下错误：

[TensorRT] ERROR: Network must have at least one output

上述增加代码就是明确了模型结构的输出。

以上代码就是onnx+tensorrt部署keras模型的全记录，希望能让弯路少一点。关于速度提升，我只是简单的转换了一下模型，没有做任何的优化，大约能够50%的速度提升，后面会继续优化，看情况补上前后的速度对比。

文章来源: blog.csdn.net，作者：网奇，版权归原作者所有，如需转载，请联系作者。

原文链接：blog.csdn.net/jacke121/article/details/116572083

点赞
收藏
关注作者

0/1000

抱歉，系统识别当前为高风险访问，暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称，即可参与社区互动！

*长度不超过10个汉字或20个英文字符，设置后3个月内不可修改。

确认取消

加入云驻计划，成为创作者

华为云周边好礼
免费体验产品
特殊身份标识
线下官方门票
内部专家零距离
与10000+优质创作者共同成长

立即加入

tensorrt python

全部回复

设置昵称

关于作者

目录

加入云驻计划，成为创作者

tensorrt python

全部回复

设置昵称

关于作者

目录

热门推荐查看更多

相关文章

加入云驻计划，成为创作者

相关产品