TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h,caffeWeightFactory.cpp源碼研讀三
前言
接上上篇及上篇,本篇將繼續介紹跟trtcaffe::BlobProto
有關的函數。
TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.cpp
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "caffeMacros.h"
#include "caffeWeightFactory.h"
#include "half.h"
using namespace nvinfer1;
using namespace nvcaffeparser1;
//...
/*
trtcaffe::BlobProto
定義於TensorRT/parsers/caffe/proto/trtcaffe.proto
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true]; //必為float型別
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true]; //必為double型別
repeated double double_diff = 9 [packed = true];
// New raw storage (faster and takes 1/2 of space for FP16)
optional Type raw_data_type = 10;
optional Type raw_diff_type = 11;
optional bytes raw_data = 12 [packed = false]; //有可能是半,單,雙精度
optional bytes raw_diff = 13 [packed = false];
// 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0];
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
}
*/
/*
新申請一塊記憶體,用於存放被轉換為type型別的blobMsg裡的數據(raw_data,data或double_data)。
這塊記憶體會被存放到tmpAllocs這個向量裡面。
將這塊記憶體的起始位置加上其元素個數,構成一個pair後回傳
如果type或count或raw_data,data,double_data不符合預期,
則回傳std::make_pair(nullptr, 0UL)
*/
// The size returned here is the number of array entries, not bytes
std::pair<const void*, size_t> CaffeWeightFactory::getBlobProtoData(const trtcaffe::BlobProto& blobMsg,
trtcaffe::Type type, std::vector<void*>& tmpAllocs)
{
// NVCaffe new binary format. It may carry any type.
//trtcaffe::BlobProto::has_raw_data的定義在哪?
if (blobMsg.has_raw_data())
{
//trtcaffe::BlobProto::has_raw_data_type的定義在哪?
assert(blobMsg.has_raw_data_type());
//這裡只考慮raw_data的型別等於type的情況,下面會考慮兩個不相符的情況
if (blobMsg.raw_data_type() == type)
{
//指向起始位置的指標跟元素個數?
return std::make_pair(&blobMsg.raw_data().front(),
blobMsg.raw_data().size() / sizeOfCaffeType(type));
}
}
// Old BVLC format.
//從data裡獲取數據,單精度
if (blobMsg.data_size() > 0 && type == trtcaffe::FLOAT)
{
//trtcaffe::BlobProto::data_size的定義在哪?
return std::make_pair(&blobMsg.data().Get(0), blobMsg.data_size());
}
// Converting to the target type otherwise
//trtcaffe::BlobProto::double_data_size的定義在哪?
//回傳raw_data的長度或data_size或double_data_size
const int count = blobMsg.has_raw_data() ? blobMsg.raw_data().size() / sizeOfCaffeType(blobMsg.raw_data_type()) : (blobMsg.data_size() > 0 ? blobMsg.data_size() : blobMsg.double_data_size());
if (count > 0)
{
void* new_memory = malloc(count * sizeOfCaffeType(type));
//將這塊新申請的記憶體存到tmpAllocs裡
tmpAllocs.push_back(new_memory);
if (type == trtcaffe::FLOAT)
{
//將數據轉為單精度,存到dst(即new_memory)這個指標所指向的地方
auto* dst = reinterpret_cast<float*>(new_memory);
//從raw_data裡獲取數據
if (blobMsg.has_raw_data())
{
//raw_data的型別是半精度的
if (blobMsg.raw_data_type() == trtcaffe::FLOAT16)
{
//將指標轉為float16*型別的,再依次存取
const auto* src = reinterpret_cast<const float16*>(&blobMsg.raw_data().front());
for (int i = 0; i < count; ++i)
{
//把每個元素都轉為單精度
dst[i] = float(src[i]);
}
}
//raw_data的型別是雙精度的
else if (blobMsg.raw_data_type() == trtcaffe::DOUBLE)
{
//將指標轉為float16*型別的,再依次存取
const auto* src = reinterpret_cast<const double*>(&blobMsg.raw_data().front());
for (int i = 0; i < count; ++i)
{
//把每個元素都轉為單精度
dst[i] = float(src[i]);
}
}
//raw_data的型別是單精度的情況在函數NVCaffe new binary format這一段就考慮過了
}
//從double_data裡獲取數據
else if (blobMsg.double_data_size() == count)
{
for (int i = 0; i < count; ++i)
{
//把數據由雙精度轉為單精度
//BlobProto::double_data必為double型別
dst[i] = float(blobMsg.double_data(i));
}
}
//data的型別是單精度的情況在函數Old BVLC format這一段就考慮過了
return std::make_pair(new_memory, count);
}
if (type == trtcaffe::FLOAT16)
{
//將數據轉為半精度,存到dst(即new_memory)這個指標所指向的地方
auto* dst = reinterpret_cast<float16*>(new_memory);
//從raw_data裡獲取數據
if (blobMsg.has_raw_data())
{
if (blobMsg.raw_data_type() == trtcaffe::FLOAT)
{
const auto* src = reinterpret_cast<const float*>(&blobMsg.raw_data().front());
for (int i = 0; i < count; ++i)
{
/*
float16
定義於TensorRT/parsers/common/half.h
typedef half_float::half float16;
*/
dst[i] = float16(src[i]);
}
}
else if (blobMsg.raw_data_type() == trtcaffe::DOUBLE)
{
const auto* src = reinterpret_cast<const double*>(&blobMsg.raw_data().front());
for (int i = 0; i < count; ++i)
{
dst[i] = float16(float(src[i]));
}
}
}
//從data裡獲取數據
else if (blobMsg.data_size() == count)
{
for (int i = 0; i < count; ++i)
{
//BlobProto::data必為float型別
dst[i] = float16(blobMsg.data(i));
}
}
//從double_data裡獲取數據
else if (blobMsg.double_data_size() == count)
{
for (int i = 0; i < count; ++i)
{
dst[i] = float16(float(blobMsg.double_data(i)));
}
}
return std::make_pair(new_memory, count);
}
//沒有考慮type是雙精度的情況?
}
return std::make_pair(nullptr, 0UL);
}
//獲取blobMsg的資料型別後回傳
trtcaffe::Type CaffeWeightFactory::getBlobProtoDataType(const trtcaffe::BlobProto& blobMsg)
{
//blobMsg裡的數據可能放在raw_data,double_data或data裡
//raw_data的型別不定,要靠raw_data_type才能得知
if (blobMsg.has_raw_data())
{
assert(blobMsg.has_raw_data_type());
return blobMsg.raw_data_type();
}
//double_data的型別必為double
if (blobMsg.double_data_size() > 0)
{
return trtcaffe::DOUBLE;
}
//data的型別必為float
return trtcaffe::FLOAT;
}
0UL
在getBlobProtoData
函數中用到了0UL
,詳見C 常數0L,0LL,0UL,0.0f,0.0L。
NVCaffe v.s. old BVLC
在getBlobProtoData
函數裡,分成NVCaffe及old BVLC兩種情況來考慮。
Caffe is a deep-learning framework made with flexibility, speed,
and modularity in mind.
NVCaffe is an NVIDIA-maintained fork of BVLC Caffe
tuned for NVIDIA GPUs, particularly in multi-GPU configurations.
我們可以知道,BVLC Caffe是原版的Caffe,NVCaffe則是Nvidia對原版的fork,特別針對Nvidia GPU做了改良。
reinterpret_cast
在getBlobProtoData
中將void*
轉為float*
時用到了reinterpret_cast
:
void* new_memory = malloc(count * sizeOfCaffeType(type));
//...
auto* dst = reinterpret_cast<float*>(new_memory);
但是根據Should I use static_cast or reinterpret_cast when casting a void* to whatever,事實上對於這種情況,我們能用較安全的static_cast
來取代,而不必用到reinterpret_cast
。
在CaffeWeightFactory::checkForNans
中將const void*
轉為const T*
時也用到了reinterpret_cast
:
//const void* values
const T* v = reinterpret_cast<const T*>(values);
根據How to convert const void* to unsigned char*?,這種情況下同樣也能用static_cast
來取代。
static member function
getBlobProtoDataType
,sizeOfCaffeType
及getBlobProtoData
等三個函數是static member function。類別裡定義的static member function表示這些function屬於class本身,而不是屬於class所產生的instance。
為何要將它們定義為static而非一般的成員函數呢?一個可能的原因是因為它們不需要用到non-static的成員變數,為了安全(?)起見,才將它們定義成static的。
Follow Up:CaffeWeightFactory::checkForNans
同樣也未使用到non-static的成員變數,為何沒被定義成static
呢?
參考連結
Should I use static_cast or reinterpret_cast when casting a void* to whatever
How to convert const void* to unsigned char*?
Can static function access non static variables in C++?
TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h,caffeWeightFactory.cpp源碼研讀一
TensorRT/parsers/caffe/caffeWeightFactory/caffeWeightFactory.h,caffeWeightFactory.cpp源碼研讀二