- 微信
- 微博
  
  分享文章到微博
- 复制链接
  
  复制链接到剪贴板

ONNXRuntime在Linux上推理的C++实现

作家小然发表于 2022/03/02 17:43:43 2022/03/02

【摘要】我注意到许多使用ONNXRuntime的人希望看到可以在Linux上编译和运行的代码示例，所以我上传了这个Github库。原ONNXRuntime示例的代码结构被保留，当然，为了简单起见，此工程只保留了与c++相关的部分。

我注意到许多使用ONNXRuntime的人希望看到可以在Linux上编译和运行的代码示例，所以我上传了这个Github库。onnxruntime-inference-examples-cxx-for-linux
原ONNXRuntime示例的代码结构被保留，onnxruntime-inference-examples。当然，为了简单起见，此工程只保留了与c++相关的部分。

一. 如何编译

1.环境要求

Linux Ubuntu/CentOS
cmake(version >= 3.13)
libpng 1.6

你可以从这里得到预编译的libpng的库：libpng.zip

2.安装ONNX Runtime

下载预编译的包

你可以从这里下载预编译的onnxruntime https://github.com/microsoft/onnxruntime/releases/，例如你可以下载onnxruntime-win-x64-***.zip并将其解压到你的文件夹。

3.编译案例

3.1Windos环境下

打开你使用的Visual Studio版本的开发人员命令提示符，将当前目录改为samples\c_cxx，然后运行：

mkdir build && cd build
cmake .. -DLIBPNG_ROOTDIR=\path\to\your\libpng\binary -DONNXRUNTIME_ROOTDIR=\path\to\your\ort_install

如果不使用libpng库，可以省略"-DLIBPNG_ROOTDIR=…"参数。

如果安装在默认位置，可以省略“-DONNXRUNTIME_ROOTDIR=…”。

你可以添加"-Donnxruntime_USE_CUDA=ON"或"-Donnxruntime_USE_DML=ON"到最后一个命令参数，如果你的onnxruntime二进制文件是分别与CUDA或DirectML支持。

3.2Linux环境下

使用cmake生成的Makefile，输入make来编译c++代码。

make

可执行的文件可以在build目录中的项目文件夹中找到。

二. 数据准备

你可以在这里下载MINST（mnist）模型，或者在我的Github库中的resouces（resources/MNIST）中下载。

然后你可以准备一些写有数字的图片：

要求：

PNG格式的灰度图片
28x28尺寸。

例如：下面这个数字1的图片：

使用画图就能构造出需要的图片。

三. 运行

在命令行执行：

./MNIST_cxx <model_path> <input_image_path> [cpu|cuda|dml]

要使用CUDA或者DirectML执行时作为参数提供给命令，默认使用CPU。

这里呈现其中MNIST_cxx的代码，详见 MNIST.cc。

#include <iostream>
#include <string>
#include <assert.h>
#include <png.h>
#include <array>
#include <vector>
#include <cmath>
#include <algorithm>

#include "onnxruntime_c_api.h"
#include "providers.h"

const OrtApi* g_ort = NULL;

#define ORT_ABORT_ON_ERROR(expr)                             \
  do {                                                       \
    OrtStatus* onnx_status = (expr);                         \
    if (onnx_status != NULL) {                               \
      const char* msg = g_ort->GetErrorMessage(onnx_status); \
      std::cout << msg << std::endl;                         \
      g_ort->ReleaseStatus(onnx_status);                     \
      abort();                                               \
    }                                                        \
  } while (0);

template <typename T>
static void softmax(T& input) {
  float rowmax = *std::max_element(input.begin(), input.end());
  std::vector<float> y(input.size());
  float sum = 0.0f;
  for (size_t i = 0; i != input.size(); ++i) {
    sum += y[i] = std::exp(input[i] - rowmax);
  }
  for (size_t i = 0; i != input.size(); ++i) {
    input[i] = y[i] / sum;
  }
}

/**
 * get_output_data from HWC format
 */
static void get_output_data(const png_byte* input, size_t h, size_t w, float** output, size_t* output_count) {
  size_t stride = h * w;
  *output_count = stride * 1;
  float* output_data = (float*)malloc(*output_count * sizeof(float));

  for (size_t i = 0; i != stride; ++i) {
    output_data[i] = input[i * 3] == 0 ? 1.0f : 0.0f;
    std::string temp = input[i * 3] == 0 ? "**" : "--";
    std::cout << temp ;
    if ((i + 1) % w == 0) { std::cout << std::endl; };
  }
  *output = output_data;
}

/**
 * \param out should be freed by caller after use
 * \param output_count Array length of the `out` param
 */
static int read_png_file(const char* input_file, size_t* height, size_t* width, float** out, size_t* output_count) {
  png_image image; /* The control structure used by libpng */
  /* Initialize the 'png_image' structure. */
  memset(&image, 0, (sizeof image));
  image.version = PNG_IMAGE_VERSION;
  if (png_image_begin_read_from_file(&image, input_file) == 0) {
    return -1;
  }
  png_bytep buffer;
  image.format = PNG_FORMAT_BGR;
  size_t input_data_length = PNG_IMAGE_SIZE(image);
  if (input_data_length != 28 * 28 * 3) {
    std::cout << "input_data_length: " << input_data_length << std::endl;
    return -1;
  }
  buffer = (png_bytep)malloc(input_data_length);
  memset(buffer, 0, input_data_length);
  if (png_image_finish_read(&image, NULL /*background*/, buffer, 0 /*row_stride*/, NULL /*colormap*/) == 0) {
    return -1;
  }

  get_output_data(buffer, image.height, image.width, out, output_count);
  free(buffer);
  *width = image.width;
  *height = image.height;
  return 0;
}

static void usage() { std::cout << "usage: <model_path> <input_file> [cpu|cuda|dml]" << std::endl; }

int run_inference(OrtSession* session, const ORTCHAR_T* input_file) {
  size_t input_height;
  size_t input_width;
  float* model_input;
  size_t model_input_ele_count;

  const char* input_file_p = input_file;

  if (read_png_file(input_file_p, &input_height, &input_width, &model_input, &model_input_ele_count) != 0) {
    return -1;
  }

  if (input_height != 28 || input_width != 28) {
    std::cout << "please resize to image to 28x28" << std::endl;
    free(model_input);
    return -1;
  }

  OrtMemoryInfo* memory_info;
  ORT_ABORT_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
  const int64_t input_shape[] = {1, 1, 28, 28};
  const size_t input_shape_len = sizeof(input_shape) / sizeof(input_shape[0]);
  const size_t model_input_len = model_input_ele_count * sizeof(float);

  OrtValue* input_tensor = NULL;
  ORT_ABORT_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, model_input, model_input_len, input_shape,
                                                           input_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
                                                           &input_tensor));
  assert(input_tensor != NULL);
  int is_tensor;
  ORT_ABORT_ON_ERROR(g_ort->IsTensor(input_tensor, &is_tensor));
  assert(is_tensor);
  g_ort->ReleaseMemoryInfo(memory_info);
  const char* input_names[] = {"Input3"};
  const char* output_names[] = {"Plus214_Output_0"};

  std::array<float, 10> results_{};
  int result_{0};
  const int64_t output_shape[] = {1, 10};
  const size_t output_shape_len = sizeof(output_shape) / sizeof(output_shape[0]);

  OrtValue* output_tensor = NULL;
  ORT_ABORT_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, results_.data(), results_.size()*sizeof(float), output_shape, output_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &output_tensor));

  ORT_ABORT_ON_ERROR(
      g_ort->Run(session, NULL, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor));
  assert(output_tensor != NULL);
  ORT_ABORT_ON_ERROR(g_ort->IsTensor(output_tensor, &is_tensor));
  assert(is_tensor);


  softmax(results_);
  for (size_t i = 0; i < results_.size(); i++) {
   std::cout << i << " : " << results_[i] << std::endl;
  }

  result_ = std::distance(results_.begin(), std::max_element(results_.begin(), results_.end()));
  std::cout << "Result : " << result_ << std::endl;

  int ret = 0;
  g_ort->ReleaseValue(output_tensor);
  g_ort->ReleaseValue(input_tensor);
  free(model_input);
  return ret;
}

void verify_input_output_count(OrtSession* session) {
  size_t count;
  ORT_ABORT_ON_ERROR(g_ort->SessionGetInputCount(session, &count));
  assert(count == 1);
  ORT_ABORT_ON_ERROR(g_ort->SessionGetOutputCount(session, &count));
  assert(count == 1);
}

#ifdef USE_CUDA
void enable_cuda(OrtSessionOptions* session_options) {
  ORT_ABORT_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
}
#endif

#ifdef USE_DML
void enable_dml(OrtSessionOptions* session_options) {
  ORT_ABORT_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_DML(session_options, 0));
}
#endif

int main(int argc, char* argv[]) {
  if (argc < 3) {
    usage();
    return -1;
  }

  g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);

  ORTCHAR_T* model_path = argv[1];
  ORTCHAR_T* input_file = argv[2];
  ORTCHAR_T* execution_provider = (argc >= 4) ? argv[3] : NULL;
  OrtEnv* env;
  ORT_ABORT_ON_ERROR(g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));
  OrtSessionOptions* session_options;
  ORT_ABORT_ON_ERROR(g_ort->CreateSessionOptions(&session_options));

  if (execution_provider)
  {
    if (strcmp(execution_provider, ORT_TSTR("cpu")) == 0) {
      // Nothing; this is the default
    } else if (strcmp(execution_provider, ORT_TSTR("cuda")) == 0) {
    #ifdef USE_CUDA
      enable_cuda(session_options);
    #else
      puts("CUDA is not enabled in this build.");
      return -1;
    #endif
    } else if (strcmp(execution_provider, ORT_TSTR("dml")) == 0) {
    #ifdef USE_DML
      enable_dml(session_options);
    #else
      puts("DirectML is not enabled in this build.");
      return -1;
    #endif
    } else {
      usage();
      puts("Invalid execution provider option.");
      return -1;
    }
  }

  OrtSession* session;
  ORT_ABORT_ON_ERROR(g_ort->CreateSession(env, model_path, session_options, &session));
  verify_input_output_count(session);
  int ret = run_inference(session, input_file);
  g_ort->ReleaseSessionOptions(session_options);
  g_ort->ReleaseSession(session);
  g_ort->ReleaseEnv(env);
  if (ret != 0) {
    std::cout << "fail." << std::endl;
  }
  return 0;
}

点赞
收藏
关注作者

0/1000

抱歉，系统识别当前为高风险访问，暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称，即可参与社区互动！

*长度不超过10个汉字或20个英文字符，设置后3个月内不可修改。

确认取消

加入云驻计划，成为创作者

华为云周边好礼
免费体验产品
特殊身份标识
线下官方门票
内部专家零距离
与10000+优质创作者共同成长

立即加入