c++加载TensorRT调用深度学习模型方法

使用TensorRT来调用训练好的模型并输出结果是一个高效的推理过程，特别是在需要低延迟和高吞吐量的应用场景中。以下是一个基本的步骤指南，展示了如何在C++中使用TensorRT进行推理。

步骤1：准备环境

安装TensorRT：确保你已经安装了NVIDIA TensorRT库。
准备模型：确保你的训练好的模型已经转换为TensorRT支持的格式，通常是一个.engine文件。你可以使用onnx-tensorrt、TensorFlow-TensorRT等工具将模型转换为TensorRT引擎。

步骤2：编写C++代码

以下是一个简单的C++代码示例，演示如何加载TensorRT引擎并执行推理。

cpp复制代码

|---|-------------------------------------------------------------------------------------------------------------------------|
| | #include <NvInfer.h> |
| | #include <NvInferRuntime.h> |
| | #include <cuda_runtime_api.h> |
| | #include <fstream> |
| | #include <iostream> |
| | #include <memory> |
| | #include <vector> |
| | |
| | // Logger for TensorRT info/warning/errors |
| | class Logger : public nvinfer1::ILogger { |
| | public: |
| | void log(Severity severity, const char* msg) noexcept override { |
| | // Filter out info-level messages |
| | if (severity != Severity::kINFO) |
| | std::cout << msg << std::endl; |
| | } |
| | }; |
| | |
| | std::vector<char> readFile(const std::string& filepath) { |
| | std::ifstream file(filepath, std::ios::binary | std::ios::ate); |
| | if (!file.is_open()) { |
| | throw std::runtime_error("Unable to open file " + filepath); |
| | } |
| | size_t size = file.tellg(); |
| | file.seekg(0, std::ios::beg); |
| | std::vector<char> buffer(size); |
| | file.read(buffer.data(), size); |
| | return buffer; |
| | } |
| | |
| | void inference(const std::string& enginePath, const std::vector<float>& inputData) { |
| | // Logger |
| | Logger logger; |
| | |
| | // Read the engine file |
| | std::vector<char> engineData = readFile(enginePath); |
| | std::istringstream engineStream(std::string(engineData.begin(), engineData.end())); |
| | |
| | // Deserialize the engine |
| | IRuntime* runtime = createInferRuntime(logger); |
| | ICudaEngine* engine = runtime->deserializeCudaEngine(engineStream); |
| | |
| | // Create execution context |
| | IExecutionContext* context = engine->createExecutionContext(); |
| | |
| | // Allocate GPU memory |
| | void* buffers[2]; |
| | cudaMalloc(&buffers[0], inputData.size() * sizeof(float)); // Input buffer |
| | float* outputData = nullptr; |
| | cudaMalloc(&buffers[1], engine->getBindingDimensions(1).d[0] * sizeof(float)); // Output buffer |
| | |
| | // Copy input data to GPU |
| | cudaMemcpy(buffers[0], inputData.data(), inputData.size() * sizeof(float), cudaMemcpyHostToDevice); |
| | |
| | // Set dynamic input dimensions if needed (omitting for simplicity) |
| | |
| | // Run inference |
| | context->enqueue(batchSize, buffers, 0, nullptr); |
| | |
| | // Synchronize the stream |
| | cudaStreamSynchronize(context->getStream()); |
| | |
| | // Copy the output data to the host |
| | outputData = new float[engine->getBindingDimensions(1).d[0]]; |
| | cudaMemcpy(outputData, buffers[1], engine->getBindingDimensions(1).d[0] * sizeof(float), cudaMemcpyDeviceToHost); |
| | |
| | // Print the output data (or process it as needed) |
| | std::cout << "Output data: "; |
| | for (int i = 0; i < engine->getBindingDimensions(1).d[0]; ++i) { |
| | std::cout << outputData[i] << " "; |
| | } |
| | std::cout << std::endl; |
| | |
| | // Clean up |
| | delete[] outputData; |
| | cudaFree(buffers[0]); |
| | cudaFree(buffers[1]); |
| | context->destroy(); |
| | engine->destroy(); |
| | runtime->destroy(); |
| | } |
| | |
| | int main() { |
| | // Path to the TensorRT engine file |
| | std::string enginePath = "your_model.engine"; |
| | |
| | // Example input data (must match the model's input dimensions) |
| | std::vector<float> inputData = { /* Populate with your input data */ }; |
| | |
| | // Run inference |
| | try { |
| | inference(enginePath, inputData); |
| | } catch (const std::exception& ex) { |
| | std::cerr << "Error: " << ex.what() << std::endl; |
| | return EXIT_FAILURE; |
| | } |
| | |
| | return EXIT_SUCCESS; |
| | } |

注意事项

输入数据：确保输入数据的维度和类型与你的模型匹配。
动态维度：如果你的模型包含动态输入维度，需要在创建执行上下文后设置这些维度。
错误处理：实际代码中应包含更多的错误处理逻辑，以应对各种可能的异常情况。
优化：TensorRT提供了多种优化选项，例如使用FP16进行推理以减少内存带宽和计算需求，你可以根据需求进行调整。

编译和运行

确保你的编译命令链接了TensorRT和CUDA库。例如：

sh复制代码

|---|-----------------------------------------------------------------------------------------------|
| | g++ -o tensorrt_inference tensorrt_inference.cpp -lnvinfer -lnvinfer_runtime -lcudart -lcudnn |
| | ./tensorrt_inference |

希望这个示例能帮助你理解如何在C++中使用TensorRT进行推理。