赞
踩
目录
TensorRT 的”hello world“ 程序sampleMNIST是众多TensorRT初学者很好的起点,本文旨在详细分析sampleMNIST的代码,从实践出发帮助理解TensorRT的相关概念、与cuda的关系、以及核心API的使用。
sampleMNIST的github 代码参考link: https://github.com/NVIDIA/TensorRT/blob/release/6.0/samples/opensource/sampleMNIST/sampleMNIST.cpp
程序的主要流程分为 main与程序输入参数初始化 -> 网络构建 -> 网络推理 -> 释放资源结束 这几个阶段,下面逐个阶段分析代码
- void printHelpInfo()
- {
- std::cout
- << "Usage: ./sample_mnist [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]\n";
- std::cout << "--help Display help information\n";
- std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
- "multiple times to add multiple directories. If no data directories are given, the default is to use "
- "(data/samples/mnist/, data/mnist/)"
- << std::endl;
- std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
- "where n is the number of DLA engines on the platform."
- << std::endl;
- std::cout << "--int8 Run in Int8 mode.\n";
- std::cout << "--fp16 Run in FP16 mode.\n";
- }
-
- int main(int argc, char** argv)
- {
- samplesCommon::Args args;
- bool argsOK = samplesCommon::parseArgs(args, argc, argv);

- samplesCommon::CaffeSampleParams initializeSampleParams(const samplesCommon::Args& args)
- {
- samplesCommon::CaffeSampleParams params;
- if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
- {
- params.dataDirs.push_back("data/mnist/");
- params.dataDirs.push_back("data/samples/mnist/");
- }
- else //!< Use the data directory provided by the user
- {
- params.dataDirs = args.dataDirs;
- }
-
- params.prototxtFileName = locateFile("mnist.prototxt", params.dataDirs);
- params.weightsFileName = locateFile("mnist.caffemodel", params.dataDirs);
- params.meanFileName = locateFile("mnist_mean.binaryproto", params.dataDirs);
- params.inputTensorNames.push_back("data");
- params.batchSize = 1;
- params.outputTensorNames.push_back("prob");
- params.dlaCore = args.useDLACore;
- params.int8 = args.runInInt8;
- params.fp16 = args.runInFp16;
-
- return params;
- }
-
- ......
-
- int main(int arg, char** arg)
- {
- ......
- samplesCommon::CaffeSampleParams params = initializeSampleParams(args);

- class SampleMNIST
- {
- template <typename T>
- using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
-
- public:
- SampleMNIST(const samplesCommon::CaffeSampleParams& params)
- : mParams(params)
-
- ......
-
- int main(int argc, char** argv)
- {
- ......
-
- SampleMNIST sample(params);
- gLogInfo << "Building and running a GPU inference engine for MNIST" << std::endl;

- int main(int argc, char** argv)
- {
- ......
-
- if (!sample.build())
- {
- return gLogger.reportFail(sampleTest);
- }
通过SampleMNIST对象来创建MNIST深度学习网络,下面开始详细分析网络构建阶段的build方法
- bool SampleMNIST::build()
- {
- auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(gLogger.getTRTLogger()));
- if (!builder)
- {
- return false;
- }
-
- auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
- if (!network)
- {
- return false;
- }
-
- auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
- if (!config)
- {
- return false;
- }
-
- auto parser = SampleUniquePtr<nvcaffeparser1::ICaffeParser>(nvcaffeparser1::createCaffeParser());
- if (!parser)
- {
- return false;
- }
-
- constructNetwork(parser, network);

- void SampleMNIST::constructNetwork(
- SampleUniquePtr<nvcaffeparser1::ICaffeParser>& parser, SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
- {
- const nvcaffeparser1::IBlobNameToTensor* blobNameToTensor = parser->parse(
- mParams.prototxtFileName.c_str(), mParams.weightsFileName.c_str(), *network, nvinfer1::DataType::kFLOAT);
-
- for (auto& s : mParams.outputTensorNames)
- {
- network->markOutput(*blobNameToTensor->find(s.c_str()));
- }
-
- // add mean subtraction to the beginning of the network
- nvinfer1::Dims inputDims = network->getInput(0)->getDimensions();
- mMeanBlob
- = SampleUniquePtr<nvcaffeparser1::IBinaryProtoBlob>(parser->parseBinaryProto(mParams.meanFileName.c_str()));
- nvinfer1::Weights meanWeights{nvinfer1::DataType::kFLOAT, mMeanBlob->getData(), inputDims.d[1] * inputDims.d[2]};
- // For this sample, a large range based on the mean data is chosen and applied to the head of the network.
- // After the mean subtraction occurs, the range is expected to be between -127 and 127, so the rest of the network
- // is given a generic range.
- // The preferred method is use scales computed based on a representative data set
- // and apply each one individually based on the tensor. The range here is large enough for the
- // network, but is chosen for example purposes only.
- float maxMean
- = samplesCommon::getMaxValue(static_cast<const float*>(meanWeights.values), samplesCommon::volume(inputDims));
-
- auto mean = network->addConstant(nvinfer1::Dims3(1, inputDims.d[1], inputDims.d[2]), meanWeights);
- mean->getOutput(0)->setDynamicRange(-maxMean, maxMean);
- network->getInput(0)->setDynamicRange(-maxMean, maxMean);
- auto meanSub = network->addElementWise(*network->getInput(0), *mean->getOutput(0), ElementWiseOperation::kSUB);
- meanSub->getOutput(0)->setDynamicRange(-maxMean, maxMean);
- network->getLayer(0)->setInput(0, *meanSub->getOutput(0));
- samplesCommon::setAllTensorScales(network.get(), 127.0f, 127.0f);
- }

- bool SampleMNIST::build()
- {
- ......
- builder->setMaxBatchSize(mParams.batchSize);
- config->setMaxWorkspaceSize(16_MiB);
- config->setFlag(BuilderFlag::kGPU_FALLBACK);
- config->setFlag(BuilderFlag::kSTRICT_TYPES);
- if (mParams.fp16)
- {
- config->setFlag(BuilderFlag::kFP16);
- }
- if (mParams.int8)
- {
- config->setFlag(BuilderFlag::kINT8);
- }
-
- samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
-
- mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
- builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
-
- if (!mEngine)
- return false;
-
- assert(network->getNbInputs() == 1);
- mInputDims = network->getInput(0)->getDimensions();
- assert(mInputDims.nbDims == 3);
-
- return true;
- }

- bool SampleMNIST::infer()
- {
- // Create RAII buffer manager object
- samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);
-
- auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
- if (!context)
- {
- return false;
- }
-
- // Pick a random digit to try to infer
- srand(time(NULL));
- const int digit = rand() % 10;
-
- // Read the input data into the managed buffers
- // There should be just 1 input tensor
- assert(mParams.inputTensorNames.size() == 1);
- if (!processInput(buffers, mParams.inputTensorNames[0], digit))
- {
- return false;
- }
-
- .....
-
-
- int main(int argc, char** argv)
- {
-
- ......
-
- if (!sample.infer())
- {
- return gLogger.reportFail(sampleTest);
- }

- class DeviceAllocator
- {
- public:
- bool operator()(void** ptr, size_t size) const
- {
- return cudaMalloc(ptr, size) == cudaSuccess;
- }
- };
-
- class DeviceFree
- {
- public:
- void operator()(void* ptr) const
- {
- cudaFree(ptr);
- }
- };
-
- ......
-
- class HostAllocator
- {
- public:
- bool operator()(void** ptr, size_t size) const
- {
- *ptr = malloc(size);
- return *ptr != nullptr;
- }
- };
-
- class HostFree
- {
- public:
- void operator()(void* ptr) const
- {
- free(ptr);
- }
- };

- BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine, const int& batchSize,
- const nvinfer1::IExecutionContext* context = nullptr)
- : mEngine(engine)
- , mBatchSize(batchSize)
- {
- // Create host and device buffers
- for (int i = 0; i < mEngine->getNbBindings(); i++)
- {
- auto dims = context ? context->getBindingDimensions(i) : mEngine->getBindingDimensions(i);
- size_t vol = context ? 1 : static_cast<size_t>(mBatchSize);
- nvinfer1::DataType type = mEngine->getBindingDataType(i);
- int vecDim = mEngine->getBindingVectorizedDim(i);
- if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
- {
- int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
- dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
- vol *= scalarsPerVec;
- }
- vol *= samplesCommon::volume(dims);
- std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
- manBuf->deviceBuffer = DeviceBuffer(vol, type);
- manBuf->hostBuffer = HostBuffer(vol, type);
- mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
- mManagedBuffers.emplace_back(std::move(manBuf));
- }
- }

- bool SampleMNIST::infer()
- {
-
- ......
-
- // Pick a random digit to try to infer
- srand(time(NULL));
- const int digit = rand() % 10;
-
- // Read the input data into the managed buffers
- // There should be just 1 input tensor
- assert(mParams.inputTensorNames.size() == 1);
- if (!processInput(buffers, mParams.inputTensorNames[0], digit))
- {
- return false;
- }
-
- ......
-
- bool SampleMNIST::processInput(
- const samplesCommon::BufferManager& buffers, const std::string& inputTensorName, int inputFileIdx) const
- {
- const int inputH = mInputDims.d[1];
- const int inputW = mInputDims.d[2];
-
- // Read a random digit file
- srand(unsigned(time(nullptr)));
- std::vector<uint8_t> fileData(inputH * inputW);
- readPGMFile(locateFile(std::to_string(inputFileIdx) + ".pgm", mParams.dataDirs), fileData.data(), inputH, inputW);
-
- // Print ASCII representation of digit
- gLogInfo << "Input:\n";
- for (int i = 0; i < inputH * inputW; i++)
- {
- gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % inputW) ? "" : "\n");
- }
- gLogInfo << std::endl;
-
- float* hostInputBuffer = static_cast<float*>(buffers.getHostBuffer(inputTensorName));
-
- for (int i = 0; i < inputH * inputW; i++)
- {
- hostInputBuffer[i] = float(fileData[i]);
- }
-
- return true;
- }

- void* getDeviceBuffer(const std::string& tensorName) const
- {
- return getBuffer(false, tensorName);
- }
-
-
- void* getHostBuffer(const std::string& tensorName) const
- {
- return getBuffer(true, tensorName);
- }
-
- ......
-
- void* getBuffer(const bool isHost, const std::string& tensorName) const
- {
- int index = mEngine->getBindingIndex(tensorName.c_str());
- if (index == -1)
- return nullptr;
- return (isHost ? mManagedBuffers[index]->hostBuffer.data() : mManagedBuffers[index]->deviceBuffer.data());
- }

- bool SampleMNIST::infer()
- {
- ......
-
- // Create CUDA stream for the execution of this inference.
- cudaStream_t stream;
- CHECK(cudaStreamCreate(&stream));
-
- // Asynchronously copy data from host input buffers to device input buffers
- buffers.copyInputToDeviceAsync(stream);
-
- ......
- void copyInputToDeviceAsync(const cudaStream_t& stream = 0)
- {
- memcpyBuffers(true, false, true, stream);
- }
-
- ......
-
- void memcpyBuffers(const bool copyInput, const bool deviceToHost, const bool async, const cudaStream_t& stream = 0)
- {
- for (int i = 0; i < mEngine->getNbBindings(); i++)
- {
- void* dstPtr
- = deviceToHost ? mManagedBuffers[i]->hostBuffer.data() : mManagedBuffers[i]->deviceBuffer.data();
- const void* srcPtr
- = deviceToHost ? mManagedBuffers[i]->deviceBuffer.data() : mManagedBuffers[i]->hostBuffer.data();
- const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
- const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
- if ((copyInput && mEngine->bindingIsInput(i)) || (!copyInput && !mEngine->bindingIsInput(i)))
- {
- if (async)
- CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
- else
- CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
- }
- }
- }

- bool SampleMNIST::infer()
- {
- ......
-
- // Asynchronously enqueue the inference work
- if (!context->enqueue(mParams.batchSize, buffers.getDeviceBindings().data(), stream, nullptr))
- {
- return false;
- }
- // Asynchronously copy data from device output buffers to host output buffers
- buffers.copyOutputToHostAsync(stream);
-
- // Wait for the work in the stream to complete
- cudaStreamSynchronize(stream);
-
- // Release stream
- cudaStreamDestroy(stream);
-
- // Check and print the output of the inference
- // There should be just one output tensor
- assert(mParams.outputTensorNames.size() == 1);
- bool outputCorrect = verifyOutput(buffers, mParams.outputTensorNames[0], digit);
-
- return outputCorrect;
- }

- bool SampleMNIST::verifyOutput(
- const samplesCommon::BufferManager& buffers, const std::string& outputTensorName, int groundTruthDigit) const
- {
- const float* prob = static_cast<const float*>(buffers.getHostBuffer(outputTensorName));
-
- // Print histogram of the output distribution
- gLogInfo << "Output:\n";
- float val{0.0f};
- int idx{0};
- const int kDIGITS = 10;
-
- for (int i = 0; i < kDIGITS; i++)
- {
- if (val < prob[i])
- {
- val = prob[i];
- idx = i;
- }
-
- gLogInfo << i << ": " << std::string(int(std::floor(prob[i] * 10 + 0.5f)), '*') << "\n";
- }
- gLogInfo << std::endl;
-
- return (idx == groundTruthDigit && val > 0.9f);
- }

- bool SampleMNIST::teardown()
- {
- //! Clean up the libprotobuf files as the parsing is complete
- //! \note It is not safe to use any other part of the protocol buffers library after
- //! ShutdownProtobufLibrary() has been called.
- nvcaffeparser1::shutdownProtobufLibrary();
- return true;
- }
-
- ......
-
- int main(int argc, char** argv)
- {
- .......
-
- if (!sample.teardown())
- {
- return gLogger.reportFail(sampleTest);
- }
-
- return gLogger.reportPass(sampleTest);
- }

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。