//! //! \brief Initializes members of the params struct using the command line args //! 使用命令行参数初始化params结构的成员 //! samplesCommon::CaffeSampleParams initializeSampleParams(const samplesCommon::Args& args) { samplesCommon::CaffeSampleParams params; if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths如果用户未提供目录路径,则使用默认目录 { params.dataDirs.push_back("data/mnist/"); params.dataDirs.push_back("data/samples/mnist/"); } else// Use the data directory provided by the user使用用户提供的目录路径 { params.dataDirs = args.dataDirs; }
//! //! \brief Creates the network, configures the builder and creates the network engine //!创建网络、配置生成器并创建网络引擎 //! \details This function creates the MNIST network by parsing the caffe model and builds //! the engine that will be used to run MNIST (mEngine) //!此函数通过解析caffe模型创建MNIST网络,并构建用于运行MNIST(mEngine)的引擎 //! \return true if the engine was created successfully and false otherwise //!如果引擎被创建成功,直接返回True boolSampleMNIST::build() { //创建一个 IBuilder,传进gLogger参数是为了方便打印信息。 auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { returnfalse; } //创建空的network,后面 constructNetwork 中会定义 auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0)); if (!network) { returnfalse; } //创建一个配置文件解析对象 auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig()); if (!config) { returnfalse; } //创建一个caffe模型解析对象,在constructNetwork函数中解析模型,转换为 network auto parser = SampleUniquePtr<nvcaffeparser1::ICaffeParser>(nvcaffeparser1::createCaffeParser()); if (!parser) { returnfalse; } // 解析 caffe 模型,并转换为 network 形式 if (!constructNetwork(parser, network)) { returnfalse; } // 设置batch大小,工作空间等等 builder->setMaxBatchSize(mParams.batchSize); config->setFlag(BuilderFlag::kGPU_FALLBACK); if (mParams.fp16) { config->setFlag(BuilderFlag::kFP16); } if (mParams.int8) { config->setFlag(BuilderFlag::kINT8); }
// CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { returnfalse; } config->setProfileStream(*profileStream);
SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { returnfalse; }
SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())}; if (!runtime) { returnfalse; } //构建 tensorrt 引擎 mEngine = std::shared_ptr<nvinfer1::ICudaEngine>( runtime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); if (!mEngine) { returnfalse; }
//! //! \brief Uses a caffe parser to create the MNIST Network and marks the //! output layers //!使用caffe解析器创建MNIST网络并标记输出层 //! \param network Pointer to the network that will be populated with the MNIST network //!指向将用MNIST网络填充的网络指针 //! \param builder Pointer to the engine builder //!指向引擎生成器的生成器指针 boolSampleMNIST::constructNetwork( SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser, SampleUniquePtr<nvinfer1::INetworkDefinition>& network) { //这里就解析出了network并包含了权重 const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse( mParams.prototxtFileName.c_str(), mParams.weightsFileName.c_str(), *network, nvinfer1::DataType::kFLOAT); //输出Tensor标记 for (auto& s : mParams.outputTensorNames) { network->markOutput(*blobNameToTensor->find(s.c_str())); } // 在网络开头添加减均值操作 // add mean subtraction to the beginning of the network nvinfer1::Dims inputDims = network->getInput(0)->getDimensions(); //// 读取均值文件的数据 mMeanBlob = SampleUniquePtr<nvcaffeparser1::IBinaryProtoBlob>(parser->parseBinaryProto(mParams.meanFileName.c_str())); nvinfer1::Weights meanWeights{nvinfer1::DataType::kFLOAT, mMeanBlob->getData(), inputDims.d[1] * inputDims.d[2]}; // For this sample, a large range based on the mean data is chosen and applied to the head of the network. // After the mean subtraction occurs, the range is expected to be between -127 and 127, so the rest of the network // is given a generic range. // The preferred method is use scales computed based on a representative data set // and apply each one individually based on the tensor. The range here is large enough for the // network, but is chosen for example purposes only. //数据的原始分布是[0,256] // 减去均值之后是[-127,127] float maxMean = samplesCommon::getMaxValue(static_cast<constfloat*>(meanWeights.values), samplesCommon::volume(inputDims)); //在网络中添加一个常量的层 常量是均值 1 28 28 auto mean = network->addConstant(nvinfer1::Dims3(1, inputDims.d[1], inputDims.d[2]), meanWeights); if (!mean->getOutput(0)->setDynamicRange(-maxMean, maxMean)) { returnfalse; } if (!network->getInput(0)->setDynamicRange(-maxMean, maxMean)) { returnfalse; } //向网络添加一个元素操作层 执行减均值操作 auto meanSub = network->addElementWise(*network->getInput(0), *mean->getOutput(0), ElementWiseOperation::kSUB);//元素减 if (!meanSub->getOutput(0)->setDynamicRange(-maxMean, maxMean)) { returnfalse; } network->getLayer(0)->setInput(0, *meanSub->getOutput(0));//将这一层的输入替换为一个特定的张量。 替换第0层的网络为meanSub的输出 // 设置范围 samplesCommon::setAllDynamicRanges(network.get(), 127.0f, 127.0f);
//! //! \brief Runs the TensorRT inference engine for this sample //!对这个例子执行TensorRT的前向推理 //! \details This function is the main execution function of the sample. It allocates //! the buffer, sets inputs, executes the engine, and verifies the output. //!此函数是示例的主要执行功能。 它分配缓冲区,设置输入,执行推理引擎并验证输出。 boolSampleMNIST::infer() { // Create RAII buffer manager object // 缓存对象管理 samplesCommon::BufferManager buffers(mEngine, mParams.batchSize); // 创建上下文 auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext()); if (!context) { returnfalse; }
// Pick a random digit to try to infer // 随机选择一个数字 srand(time(NULL)); constint digit = rand() % 10;
// Read the input data into the managed buffers // There should be just 1 input tensor // 读取输入数据到缓存对象中 // 即将 digit 写入 buffers 中,名字为 mParams.inputTensorNames[0] ASSERT(mParams.inputTensorNames.size() == 1); if (!processInput(buffers, mParams.inputTensorNames[0], digit)) { returnfalse; } // Create CUDA stream for the execution of this inference. // 创建 cuda 流,准备执行推理 cudaStream_t stream; CHECK(cudaStreamCreate(&stream));
// Asynchronously copy data from host input buffers to device input buffers // 异步将数据从主机输入缓冲区(buffer)复制到设备输入缓冲区(stream) buffers.copyInputToDeviceAsync(stream);
// Asynchronously enqueue the inference work // 异步将推理任务加入队列中 if (!context->enqueue(mParams.batchSize, buffers.getDeviceBindings().data(), stream, nullptr)) { returnfalse; } // Asynchronously copy data from device output buffers to host output buffers // 异步将模型结果从设备(stream)保存到主机缓冲区(buffers) buffers.copyOutputToHostAsync(stream);
// Wait for the work in the stream to complete // 等待工作结束,关闭stream CHECK(cudaStreamSynchronize(stream));
// Check and print the output of the inference // There should be just one output tensor // 得到结果,判断结果是否准确 // 即从 buffer 中获取名为 mParams.outputTensorNames[0] 的结果,判断与digit是否相同 ASSERT(mParams.outputTensorNames.size() == 1); bool outputCorrect = verifyOutput(buffers, mParams.outputTensorNames[0], digit);
//! //! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer //! boolSampleMNIST::processInput( const samplesCommon::BufferManager& buffers, const std::string& inputTensorName, int inputFileIdx)const { constint inputH = mInputDims.d[1];//mInputDims.d[0]=1 mInputDims.d[1]=28 mInputDims.d[2]=28 constint inputW = mInputDims.d[2];
// Read a random digit file srand(unsigned(time(nullptr))); std::vector<uint8_t> fileData(inputH * inputW); //读取图片 readPGMFile(locateFile(std::to_string(inputFileIdx) + ".pgm", mParams.dataDirs), fileData.data(), inputH, inputW);
// CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { returnfalse; } config->setProfileStream(*profileStream);
//! //! \brief Creates the network, configures the builder and creates the network engine //! //! \details This function creates the MNIST network by using the API to create a model and builds //! the engine that will be used to run MNIST (mEngine) //! //! \return true if the engine was created successfully and false otherwise //! boolSampleMNISTAPI::build() { ////加载权重,*.wts文件 mWeightMap = loadWeights(locateFile(mParams.weightsFile, mParams.dataDirs));
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { returnfalse; }
constauto explicitBatchFlag = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatchFlag)); if (!network) { returnfalse; }
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig()); if (!config) { returnfalse; }
auto constructed = constructNetwork(builder, network, config);//区别在这里面,使用的是API函数来创建网络结构并加载权重 if (!constructed) { returnfalse; }
ASSERT(network->getNbInputs() == 1); auto inputDims = network->getInput(0)->getDimensions(); ASSERT(inputDims.nbDims == 4);
ASSERT(network->getNbOutputs() == 1); auto outputDims = network->getOutput(0)->getDimensions(); ASSERT(outputDims.nbDims == 4);
//! //! \brief Uses the API to create the MNIST Network //! //! \param network Pointer to the network that will be populated with the MNIST network //! //! \param builder Pointer to the engine builder //! boolSampleMNISTAPI::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder, SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config) { // Create input tensor of shape { 1, 1, 28, 28 } //将输入张量添加到网络中。Dims4{1,1, mParams.inputH, mParams.inputW}指的是,batch_size为 1,channel为 1,输入height和width分别为 INPUT_H, INPUT_W 的blob。 ITensor* data = network->addInput( mParams.inputTensorNames[0].c_str(), DataType::kFLOAT, Dims4{1, 1, mParams.inputH, mParams.inputW}); ASSERT(data);
// Add convolution layer with 20 outputs and a 5x5 filter. //`20`表示卷积核的个数,`DimsHW{5, 5}`表示卷积核的大小,`weightMap["conv1filter"]和weightMap["conv1bias"]`表示权值系数矩阵 IConvolutionLayer* conv1 = network->addConvolutionNd( *scale_1->getOutput(0), 20, Dims{2, {5, 5}}, mWeightMap["conv1filter"], mWeightMap["conv1bias"]); ASSERT(conv1); conv1->setStride(DimsHW{1, 1});//设置步长为1 这些数值都可以在mnist.caffemodel中获取
// Add max pooling layer with stride of 2x2 and kernel size of 2x2. IPoolingLayer* pool1 = network->addPoolingNd(*conv1->getOutput(0), PoolingType::kMAX, Dims{2, {2, 2}}); ASSERT(pool1); pool1->setStride(DimsHW{2, 2});
// Add second convolution layer with 50 outputs and a 5x5 filter. IConvolutionLayer* conv2 = network->addConvolutionNd( *pool1->getOutput(0), 50, Dims{2, {5, 5}}, mWeightMap["conv2filter"], mWeightMap["conv2bias"]); ASSERT(conv2); conv2->setStride(DimsHW{1, 1});
// Add second max pooling layer with stride of 2x2 and kernel size of 2x3> IPoolingLayer* pool2 = network->addPoolingNd(*conv2->getOutput(0), PoolingType::kMAX, Dims{2, {2, 2}}); ASSERT(pool2); pool2->setStride(DimsHW{2, 2});
// Utility for use MatMul as FC auto addMatMulasFCLayer = [&network](ITensor* input, int32_tconst outputs, Weights& filterWeights, Weights& biasWeights) -> ILayer* { Dims inputDims = input->getDimensions(); int32_tconst m = inputDims.d[0]; int32_tconst k = std::accumulate(inputDims.d + 1, inputDims.d + inputDims.nbDims, 1, std::multiplies<int32_t>()); int32_tconst n = static_cast<int32_t>(filterWeights.count / static_cast<int64_t>(k)); ASSERT(static_cast<int64_t>(n) * static_cast<int64_t>(k) == filterWeights.count); ASSERT(static_cast<int64_t>(n) == biasWeights.count); ASSERT(n == outputs);
// CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { returnfalse; } config->setProfileStream(*profileStream);
SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { returnfalse; }
SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())}; if (!runtime) { returnfalse; }
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>( runtime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); if (!mEngine) { returnfalse; }
//! //! \brief Creates the network, configures the builder and creates the network engine //! //! \details This function creates the Onnx MNIST network by parsing the Onnx model and builds //! the engine that will be used to run MNIST (mEngine) //! //! \return true if the engine was created successfully and false otherwise //! boolSampleOnnxMNIST::build() { auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { returnfalse; }
constauto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch)); if (!network) { returnfalse; }
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig()); if (!config) { returnfalse; } //类似第一个例子 使用onnx的解析器,只是参数不一样 auto parser = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); if (!parser) { returnfalse; } auto constructed = constructNetwork(builder, network, config, parser);//区别还是这个构建网络的函数,下面列出函数内容 if (!constructed) { returnfalse; }
// CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { returnfalse; } config->setProfileStream(*profileStream);
SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)}; if (!plan) { returnfalse; }
SampleUniquePtr<IRuntime> runtime{createInferRuntime(sample::gLogger.getTRTLogger())}; if (!runtime) { returnfalse; }
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>( runtime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); if (!mEngine) { returnfalse; }
//! //! \brief Uses a ONNX parser to create the Onnx MNIST Network and marks the //! output layers //! //! \param network Pointer to the network that will be populated with the Onnx MNIST network //! //! \param builder Pointer to the engine builder //! boolSampleOnnxMNIST::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder, SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config, SampleUniquePtr<nvonnxparser::IParser>& parser) { //类比第一个例子,就是使用onnx的解析器创建TensorRT的网络 auto parsed = parser->parseFromFile(locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast<int>(sample::gLogger.getReportableSeverity())); if (!parsed) { returnfalse; }
if (mParams.fp16) { config->setFlag(BuilderFlag::kFP16); } if (mParams.int8) { config->setFlag(BuilderFlag::kINT8); samplesCommon::setAllDynamicRanges(network.get(), 127.0f, 127.0f); }
//Parameters //deploy The plain text, prototxt file used to define the network configuration. //model The binaryproto Caffe model that contains the weights associated with the network. //network Network in which the CaffeParser will fill the layers. //weightType The type to which the weights will transformed.
这样就能得到一个填充好的 network ,就可以编译 engine 了,似乎一切都很美妙呢…
然而实际 TRT 并不完善,比如 TensorFlow 的很多操作并不支持,因此你传入的文件往往是根本就解析不了(深度学习框架最常见的困境之一)。因此我们需要自己去做填充 network 这件事,这就需要调用 TRT 中低级别的接口来创建模型结构,类似于你在 Caffe 或者 TensorFlow 中做的那样。
// Parameters // input The input tensor to the convolution. // nbOutputMaps The number of output feature maps for the convolution. // kernelSize The HW-dimensions of the convolution kernel. // kernelWeights The kernel weights for the convolution. // biasWeights The optional bias weights for the convolution.
这里的参数基本上就是和其他深度学习框架类似的意思,没有什么好讲的。就是把数据封装成 TRT 中的数据结构即可。可能和平时构建训练网络不同的地方就是需要填充好模型的参数,因为 TRT 是推理框架,参数是已知确定的。这个过程一般是读取已经训练好的模型,构造 TRT 的数据结构类型放到其中,也就是需要你自己去解析模型参数文件。
之所以说 TRT 的网络构造接口是较为丰富,是因为即使使用这些低级接口这样,很多操作还是没办法完成,也就是没有相应的 add* 方法,更何况现实业务可能还会涉及很多自定义的功能层,因此 TRT 又有了 plugin 接口,允许你自己定义一个 add* 的操作。其流程就是继承 nvinfer1::IPluginV2 接口,利用 cuda 编写一个自定义层的功能,然后继承 nvinfer1::IPluginCreator 编写其创建类,需要重写其虚方法 createPlugin。最后调用 REGISTER_TENSORRT_PLUGIN 宏来注册这个 plugin 就可以用了。plugin 接口的成员函数介绍。
class Dims { public: static const int MAX_DIMS = 8; //!< The maximum number of dimensions supported for a tensor. int nbDims; //!< The number of dimensions. int d[MAX_DIMS]; //!< The extent of each dimension. DimensionType type[MAX_DIMS]; //!< The type of each dimension. };
/* * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */