Onnx To Tensorrt



  1. from PIL import Image
  2. import numpy as np
  3. import pycuda.driver as cuda
  4. import time
  5. import tensorrt as trt
  6. import sys, os
  7. sys.path.insert(1, os.path.join(sys.path[0], ".."))
  8. import common
  9. class ModelData(object):
  10. MODEL_PATH = "result.onnx"
  11. INPUT_SHAPE = (1, 512, 512)
  12. # We can convert TensorRT data types to numpy types with trt.nptype()
  13. DTYPE = trt.float32
  14. # You can set the logger severity higher to suppress messages (or lower to display more messages).
  15. TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
  16. # Allocate host and device buffers, and create a stream.
  17. def allocate_buffers(engine):
  18. # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
  19. h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
  20. h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
  21. # Allocate device memory for inputs and outputs.
  22. d_input = cuda.mem_alloc(h_input.nbytes)
  23. d_output = cuda.mem_alloc(h_output.nbytes)
  24. # Create a stream in which to copy inputs/outputs and run inference.
  25. stream = cuda.Stream()
  26. return h_input, d_input, h_output, d_output, stream
  27. def do_inference(context, h_input, d_input, h_output, d_output, stream):
  28. # Transfer input data to the GPU.
  29. cuda.memcpy_htod_async(d_input, h_input, stream)
  30. # Run inference.
  31. context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
  32. # Transfer predictions back from the GPU.
  33. cuda.memcpy_dtoh_async(h_output, d_output, stream)
  34. # Synchronize the stream
  35. stream.synchronize()
  36. # The Onnx path is used for Onnx models.
  37. def build_engine_onnx(model_file):
  38. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
  39. builder.max_workspace_size = common.GiB(1)
  40. # Load the Onnx model and parse it in order to populate the TensorRT network.
  41. with open(model_file, 'rb') as model:
  42. parser.parse(model.read())
  43. last_layer = network.get_layer(network.num_layers - 1)
  44. network.mark_output(last_layer.get_output(0))
  45. returnresult=builder.build_cuda_engine(network)
  46. return returnresult
  47. def load_normalized_test_case(test_image, pagelocked_buffer):
  48. # Converts the input image to a CHW Numpy array
  49. def normalize_image(image):
  50. # Resize, antialias and transpose the image to CHW.
  51. c, h, w = ModelData.INPUT_SHAPE
  52. image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS))
  53. image_arr = np.reshape(image_arr, image_arr.shape + (1,))
  54. image_arr=image_arr.transpose([2, 0, 1])
  55. image_arr=image_arr.astype(trt.nptype(ModelData.DTYPE))
  56. image_arr=image_arr.ravel()
  57. # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
  58. return (image_arr / 255.0 - 0.45) / 0.225
  59. # Normalize the image and copy to pagelocked memory.
  60. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
  61. return test_image
  62. def main():
  63. onnx_model_file='result.onnx'
  64. # Build a TensorRT engine.
  65. with build_engine_onnx(onnx_model_file) as engine:
  66. # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
  67. # Allocate buffers and create a CUDA stream.
  68. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
  69. with engine.create_execution_context() as context:
  70. # Load a normalized test case into the host input page-locked buffer.
  71. starttime=time.time()
  72. for i in range(100):
  73. test_image ='test.jpg'
  74. test_case = load_normalized_test_case(test_image, h_input)
  75. # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
  76. # probability that the image corresponds to that label
  77. do_inference(context, h_input, d_input, h_output, d_output, stream)
  78. #print('ok')
  79. endtime=time.time()
  80. pertime=(endtime-starttime)/100
  81. print('perimg cost'+str(pertime))




  1. #修改后
  2. def build_engine_onnx(model_file):
  3. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
  4. builder.max_workspace_size = common.GiB(1)
  5. # Load the Onnx model and parse it in order to populate the TensorRT network.
  6. with open(model_file, 'rb') as model:
  7. parser.parse(model.read())
  8. last_layer = network.get_layer(network.num_layers - 1)
  9. network.mark_output(last_layer.get_output(0))
  10. returnresult=builder.build_cuda_engine(network)
  11. return returnresult
  12. #原函数
  13. def build_engine_onnx(model_file):
  14. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
  15. builder.max_workspace_size = common.GiB(1)
  16. # Load the Onnx model and parse it in order to populate the TensorRT network.
  17. with open(model_file, 'rb') as model:
  18. parser.parse(model.read())
  19. returnresult=builder.build_cuda_engine(network)
  20. return returnresult


  1. last_layer = network.get_layer(network.num_layers - 1)
  2. network.mark_output(last_layer.get_output(0))


[TensorRT] ERROR: Network must have at least one output



