系列文章目录
MNN createFromBuffer(一)
MNN createRuntime(二)
MNN createSession 之 Schedule(三)
MNN createSession 之创建流水线后端(四)
MNN Session 之维度计算(五)
MNN Session 之几何计算(六)
MNN Session 之 CPU 算子(七)
MNN Session 之 Vulkan 算子(八)
MNN 执行推理(九)
文章目录
- 系列文章目录
- 1、createSession
- 1.1 createMultiPathSession
- 1.1.1 Session::resize
- 1.1.1.1 Pipeline::encode
- 1.1.1.1.1 GeometryComputerUtils::shapeComputeAndGeometryTransform
- 1.1.1.1.1.1 SizeComputer::computeOutputSize
- 1.1.1.1.1.1.1 SizeComputerSuite::search
- 1.1.1.1.1.1.1.1 维度计算初始化与注册
- 1.1.1.1.1.1.1.1.1 注册维度计算
- 1.1.1.1.1.1.2 SizeComputer::onComputeSize
- 1.1.1.1.2 Pipeline::UnitInfo::setUp
- 1.1.1.1.2.1 SizeComputer::computeFlops
- 1.1.1.1.2.1.1 SizeComputer::onComputeFlops
- 1.1.1.2 Command 命令
- 1.1.1.3 CommandBuffer
1、createSession
依据 ScheduleConfig 和 RuntimeInfo 创建会话。
// source/core/Interpreter.cpp
Session* Interpreter::createSession(const ScheduleConfig& config, const RuntimeInfo& runtime) {
return createMultiPathSession({config}, runtime);
}
1.1 createMultiPathSession
// source/core/Interpreter.cpp
Session* Interpreter::createMultiPathSession(const std::vector<ScheduleConfig>& configs, const RuntimeInfo& runtime) {
// ...
auto result = newSession.get();
auto validForResize = info.validForResize;
if (validForResize && mNet->modes.inputMode == Session_Input_Inside && mNet->modes.resizeMode == Session_Resize_Direct) {
result->resize();
}
// ...
return result;
}
1.1.1 Session::resize
// source/core/Session.cpp
ErrorCode Session::resize() {
#ifdef LOG_VERBOSE
for (auto& iter : mInputs) {
auto& inputTensor = iter.second;
MNN_PRINT("before resize, input name:%s, ptr:%p, hostPtr:%p, shape:", iter.first.c_str(), inputTensor, inputTensor->host<void>());
inputTensor->printShape();
MNN_PRINT("\n");
}
#endif
bool permitCodegen = mCodegenMode == Interpreter::Session_Codegen_Enable;
bool firstMalloc = false;
if (mNeedResize) {
bool debug = mCallBackMode == Interpreter::Session_Debug;
// mPipelines 类型为 std::vector<std::shared_ptr<Pipeline>>
for (auto& iter : mPipelines) {
auto error = iter->encode(debug, permitCodegen);
if (NO_ERROR != error) {
return error;
}
}
mNeedResize = false;
mNeedMalloc = true;
firstMalloc = true;
}
if (mNeedMalloc) {
// Set needResize = true for easy for judge in runSession when error
mNeedResize = true;
// Turn Pipeline to Command Buffer and Malloc resource
// TODO: Separate Schedule and Malloc
bool forbidReplace = permitCodegen;
if (mInfo.constReplaceBackend != nullptr) {
forbidReplace = true;
}
for (auto& iter : mPipelines) {
auto error = iter->allocMemory(firstMalloc, forbidReplace);
if (NO_ERROR != error) {
return error;
}
}
if(mMemoryUsageMode == Interpreter::Session_Memory_Collect) {
#ifdef LOG_VERBOSE
float memory = 0.0f;
#endif
for (auto& iter : mRuntime.first) {
iter.second->onGabageCollect(0);
#ifdef LOG_VERBOSE
memory += iter.second->onGetMemoryInMB();
#endif
}
#ifdef LOG_VERBOSE
FUNC_PRINT_ALL(memory, f);
#endif
}
mNeedMalloc = false;
mNeedResize = false;
}
#ifdef LOG_VERBOSE
MNN_PRINT("session after resize\n");
for (auto& iter : mOutputs) {
auto& outputTensor = iter.second;
MNN_PRINT("output name:%s, ptr:%p,shape:", iter.first.c_str(), outputTensor);
outputTensor->printShape();
MNN_PRINT("\n");
}
#endif
return NO_ERROR;
}
1.1.1.1 Pipeline::encode
// source/core/Pipeline.cpp
// typedef std::pair<BackendCache, std::vector<OpCacheInfo>> PipelineInfo;
//
// struct BackendCache {
// Backend::Info info;
// BackendConfig config;
// std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>> cache;
// bool needComputeShape = true;
// bool needComputeGeometry = true;
// bool reportError = true;
// std::map<Tensor*, TENSORCACHE> inputTensorCopyCache;
// };
//
// /** pipeline info */
// struct OpCacheInfo {
// /** op */
// const Op* op;
// /** input tensors */
// std::vector<Tensor*> inputs;
// /** output tensors */
// std::vector<Tensor*> outputs;
// /** schedule type*/
// Schedule::Type type = Schedule::Type::SEPARATE;
//
// /**Command buffer for cache*/
// CommandBuffer cacheBuffer;
//
// /**Command buffer for execute*/
// CommandBuffer executeBuffer;
//
// std::map<const Op*, std::shared_ptr<Execution>> executionCache;
// };
//
ErrorCode Pipeline::encode(bool supportDebug, bool permitCodegen) {
// mInfo.first.cache 类型为 std::pair<std::shared_ptr<Backend>, std::shared_ptr<Backend>>
// mBackend 创建的后端如(VulkanBackend)
auto& mBackend = mInfo.first.cache.first;
// mBackupBackend 创建的后备(默认)后端如(CPUBackend)
auto& mBackupBackend = mInfo.first.cache.second;
// Static Model just copy info to command buffer
// mInfo.first 类型为 BackendCache
if (!mInfo.first.needComputeGeometry) {
for (int i=0; i<mInfo.second.size(); ++i) {
auto& info = mInfo.second[i];
SharedPtr<Command> cmd = new Command;
cmd->op = info.op;
if (cmd->op->type() == OpType_Raster) {
// Compability for Origin Static Model
cmd->outputs = info.outputs;
if (TensorUtils::getDescribe(info.outputs[0])->regions.empty() && info.inputs.size() > 0 && TensorUtils::getDescribe(info.inputs[0])->regions.size() > 0) {
TensorUtils::getDescribe(info.outputs[0])->regions = std::move(TensorUtils::getDescribe(info.inputs[0])->regions);
TensorUtils::setRasterInputs(cmd.get());
} else {
cmd->inputs = info.inputs;
}
} else {
cmd->inputs = info.inputs;
cmd->outputs = info.outputs;
}
info.executeBuffer.command = {cmd};
}
} else {
#ifndef MNN_BUILD_MINI
// mContext 类型为 GeometryComputer::Context
mContext.clear();
/** Size Compute and compute Const Begin */
auto res = GeometryComputerUtils::shapeComputeAndGeometryTransform(mInfo.second, mContext, mInfo.first.cache.second, mUseGeometry, false, permitCodegen);
if (res != NO_ERROR) {
return res;
}
#endif
}
// Propagate Scale and insert new command
if (mIsQuantModel && (mBackend->type() == MNN_FORWARD_CPU || mBackend->type() == MNN_FORWARD_CPU_EXTENSION || mBackend->type() == MNN_FORWARD_CUDA || mBackend->type() == MNN_FORWARD_NN || mBackend->type() == MNN_FORWARD_OPENCL)) {
// get propagate map
using PropagateMap = std::map<const MNN::Tensor*, std::set<const MNN::Tensor*>>;
PropagateMap forwardMap, backwardMap;
auto insertPropagateMap = [](PropagateMap& propagateMap, const Tensor* s, const Tensor* t) {
if (propagateMap.find(s) == propagateMap.end()) {
propagateMap[s] = std::set<const Tensor*>({t});
} else {
propagateMap[s].insert(t);
}
};
std::set<OpType> propagateOpTypes = { OpType_Raster, OpType_ReLU, OpType_ReLU6, OpType_Pooling,
OpType_Interp, OpType_CropAndResize, OpType_ROIPooling, OpType_Gather,
OpType_GatherV2, OpType_GatherV2, OpType_ScatterNd};
for (auto& info : mInfo.second) {
auto& buffer = info.executeBuffer;
for (const auto& cmdP : buffer.command) {
auto& cmd = *cmdP;
const auto type = cmd.op->type();
const auto output = cmd.outputs[0];
if (propagateOpTypes.find(type) != propagateOpTypes.end()) {
for (auto t : cmd.inputs) {
insertPropagateMap(forwardMap, t, output);
insertPropagateMap(backwardMap, output, t);
}
}
}
}
auto getStart = [&forwardMap, &backwardMap](bool forward) {
auto& propagateMap = forward ? forwardMap : backwardMap;
auto& antiMap = forward ? backwardMap : forwardMap;
// delete N->1 Map of Op
for (const auto& iter : antiMap) {
if (iter.second.size() > 1) {
for (auto t : iter.second) {
auto res = propagateMap.find(t);
if (res != propagateMap.end()) {
propagateMap.erase(res);
}
}
}
}
std::set<const Tensor*> root, leaf, start;
for (const auto& iter : propagateMap) {
root.insert(iter.first);
for (auto t : iter.second) {
leaf.insert(t);
}
}
std::set_difference(root.begin(), root.end(), leaf.begin(), leaf.end(), std::inserter(start, start.begin()));
return start;
};
auto forwardStart = getStart(true);
auto backwardStart = getStart(false);
// propagate scale
auto propagateScale = [](PropagateMap& propagateMap, std::set<const Tensor*>& start) {
std::function<bool(const Tensor*)> scalePropagate = [&propagateMap, &scalePropagate](const Tensor* t) {
if (TensorUtils::getDescribe(t)->quantAttr.get() == nullptr) {
return false;
}
if (propagateMap.find(t) == propagateMap.end()) {
return false;
}
bool change = false;
for (auto x : propagateMap[t]) {
if (TensorUtils::getDescribe(x)->quantAttr != TensorUtils::getDescribe(t)->quantAttr) {
TensorUtils::getDescribe(x)->quantAttr = TensorUtils::getDescribe(t)->quantAttr;
change = true;
}
change |= scalePropagate(x);
}
return change;
};
bool change = false;
for (auto t : start) {
change |= scalePropagate(t);
}
return change;
};
for (int i = 0; i < 3 && (propagateScale(forwardMap, forwardStart) || propagateScale(backwardMap, backwardStart)); i++);
// Insert cast
std::map<const Tensor*, Tensor*> cachedCastTensor;
for (auto& info : mInfo.second) {
auto bufferCommand = std::move(info.executeBuffer.command);
bool hasConvert = false;
for (auto cmdP : bufferCommand) {
auto& cmd = *cmdP;
auto& outputs = cmd.outputs;
auto& inputs = cmd.inputs;
auto opType = cmd.op->type();
// Check if need use quant op
DataType runType = DataType_DT_FLOAT;
bool useQuant = false;
if (outputs.size() == 1) {
// Quant: output and all input has quantAttr and op support
if (TensorUtils::getDescribe(outputs[0])->quantAttr != nullptr) {
useQuant = _supportQuant(cmd.op, inputs, outputs, mBackend->type());
}
if (useQuant) {
for (auto t : inputs) {
if (TensorUtils::getDescribe(t)->quantAttr == nullptr) {
useQuant = false;
break;
}
}
}
}
if (useQuant) {
runType = DataType_DT_INT8;
}
for (auto o : outputs) {
auto quan = TensorUtils::getDescribe(o)->quantAttr;
if (nullptr != quan) {
TensorUtils::getDescribe(o)->type = runType;
}
}
auto makeCommand = [&cachedCastTensor, &info](CommandBuffer& cmdBuffer, Tensor* input, DataType runType) {
if (cachedCastTensor.find(input) != cachedCastTensor.end()) {
return cachedCastTensor[input];
}
std::shared_ptr<Tensor> wrapTensor(new Tensor);
TensorUtils::copyShape(input, wrapTensor.get(), true);
TensorUtils::setLinearLayout(wrapTensor.get());
auto des = TensorUtils::getDescribe(wrapTensor.get());
auto originDes = TensorUtils::getDescribe(input);
if (originDes->quantAttr != nullptr) {
des->quantAttr.reset(new QuantAttr);
*des->quantAttr = *originDes->quantAttr;
des->type = runType;
}
cmdBuffer.extras.emplace_back(wrapTensor);
SharedPtr<Command> command(new Command);
command->inputs = {input};
command->outputs = {wrapTensor.get()};
info.cacheBuffer.hasWrap = true;
flatbuffers::FlatBufferBuilder builder;
OpBuilder opB(builder);
if (runType == DataType_DT_INT8) {
opB.add_type(OpType_FloatToInt8);
} else {
opB.add_type(OpType_Int8ToFloat);
}
builder.Finish(opB.Finish());
command->buffer.reset(new BufferStorage);
command->buffer->storage = builder.ReleaseRaw(command->buffer->allocated_size, command->buffer->offset);
command->op = flatbuffers::GetRoot<Op>(command->buffer->buffer());
info.executeBuffer.command.emplace_back(std::move(command));
return wrapTensor.get();
};
// judge is it need CastWrap
if (OpType_Raster == opType) {
for (int v=0; v<cmd.inputs.size(); ++v) {
auto input = cmd.inputs[v];
bool needCast = CPUBackend::getDataType(input) != runType;
if (needCast) {
cmd.inputs[v] = makeCommand(info.executeBuffer, input, runType);
}
}
} else {
for (int i = 0; i < cmd.inputs.size(); i++) {
if (OpCommonUtils::opNeedContent(cmd.op, i) && inputs[i]->getType() != halide_type_of<int>()) {
bool needCast = CPUBackend::getDataType(inputs[i]) != runType;
if (needCast) {
cmd.inputs[i] = makeCommand(info.executeBuffer, inputs[i], runType);
}
}
}
}
info.executeBuffer.command.emplace_back(cmdP);
}
}
}
/** Prepare DebugInfo*/
if (supportDebug) {
mFlops = 0.0f;
int totalIndex = 0;
for (auto& info : mInfo.second) {
auto& buffer = info.executeBuffer;
int index = 0;
for (auto& cmdP : buffer.command) {
auto& cmd = *cmdP;
cmd.info.reset(new UnitInfo);
static_cast<UnitInfo*>(cmd.info.get())->setUp(cmd, index++, info.op, totalIndex++);
mFlops += cmd.info->flops();
}
}
}
#ifndef MNN_BUILD_MINI
else {
for (auto& info : mInfo.second) {
auto& buffer = info.executeBuffer;
for (auto& cmdP : buffer.command) {
mFlops += SizeComputer::computeFlops(cmdP->op, cmdP->inputs, cmdP->outputs);
}
}
}
#endif
return NO_ERROR;
}
1.1.1.1.1 GeometryComputerUtils::shapeComputeAndGeometryTransform
// source/geometry/GeometryComputerUtils.cpp
// /** pipeline info */
// struct OpCacheInfo {
// /** op */
// const Op* op;
// /** input tensors */
// std::vector<Tensor*> inputs;
// /** output tensors */
// std::vector<Tensor*> outputs;
// /** schedule type*/
// Schedule::Type type = Schedule::Type::SEPARATE;
//
// /**Command buffer for cache*/
// CommandBuffer cacheBuffer;
//
// /**Command buffer for execute*/
// CommandBuffer executeBuffer;
//
// std::map<const Op*, std::shared_ptr<Execution>> executionCache;
// };
//
ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
std::vector<Schedule::OpCacheInfo>& infos,
GeometryComputer::Context& geoContext,
std::shared_ptr<Backend> backupBackend,
Runtime::CompilerType compileType,
bool skipShapeCompute,
bool permitCodegen) {
/** Size Compute and compute Const Begin */
GeometryComputer::Context ctx(backupBackend);
// Size Compute and compute Const
// infos 为算子缓存,大小为 171
for (int i=0; i<infos.size(); ++i) {
// info 类型为 OpCacheInfo
auto& info = infos[i];
auto& cmdBufferVir = info.executeBuffer;
auto& tempBuffer = info.cacheBuffer;
// TODO: Optimize
cmdBufferVir.command.clear();
cmdBufferVir.extras.clear();
for (auto t : info.outputs) {
if (!TensorUtils::getDescribe(t)->isMutable) {
continue;
}
auto usage = TensorUtils::getDescribe(t)->usage;
auto type = TensorUtils::getDescribe(t)->memoryType;
MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_OUTSIDE);
MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_HOST);
if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1) {
TensorUtils::getDescribeOrigin(t)->mContent = new Tensor::InsideDescribe::NativeInsideDescribe;
t->buffer().dim = TensorUtils::getDescribe(t)->dims;
TensorUtils::getDescribe(t)->usage = usage;
} else {
// 不是不变的和可训练的
if (info.type != Schedule::CONSTANT && usage != Tensor::InsideDescribe::TRAINABLE) {
TensorUtils::getDescribeOrigin(t)->mContent->setBackend(nullptr);
// TODO: If output is static and length larger than new size, don't clear mem
TensorUtils::getDescribeOrigin(t)->mContent->mem.reset(nullptr);
}
}
}
if (!skipShapeCompute) {
auto res = SizeComputer::computeOutputSize(info.op, info.inputs, info.outputs);
if (!res) {
if (info.op->name() != nullptr) {
MNN_ERROR("Compute Shape Error for %s\n", info.op->name()->c_str());
} else {
MNN_ERROR("Compute Shape Error for %d\n", info.op->type());
}
return COMPUTE_SIZE_ERROR;
}
// FIXME: Find better way to may compability for old model
/**
For Convolution of 2D / 3D Tensor(Dense / 1D Convolution)
Because of old code, we will acces dim[2] / dim[3] to get width and height
Set the lenght to 1 for compability
*/
for (auto t : info.outputs) {
TensorUtils::adjustTensorForCompability(t);
}
}
if (info.type == Schedule::CONSTANT) {
if (_hasZeroShapeOutput(info)) {
continue;
}
ctx.clear();
auto geo = GeometryComputer::search(info.op->type(), Runtime::Compiler_Loop);
{
auto res = geo->onRecompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer);
if (!res) {
tempBuffer.command.clear();
tempBuffer.extras.clear();
res = geo->onCompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer);
}
if (!res) {
MNN_ERROR("Const Folder Error in geometry for %s\n", info.op->name()->c_str());
return NOT_SUPPORT;
}
}
GeometryComputerUtils::makeRaster(tempBuffer, cmdBufferVir, ctx);
for (auto t : info.outputs) {
ctx.getRasterCacheCreateRecursive(t, cmdBufferVir);
}
for (auto& cp : cmdBufferVir.command) {
auto& c = *cp;
if (nullptr == c.execution) {
c.execution.reset(backupBackend->onCreate(c.inputs, c.outputs, c.op));
}
auto exe = c.execution;
if (nullptr == exe.get()) {
MNN_ERROR("Const Folder Error for %s\n", info.op->name()->c_str());
return NO_EXECUTION;
}
for (auto t : c.outputs) {
auto des = TensorUtils::getDescribe(t);
TensorUtils::setLinearLayout(t);
auto res = backupBackend->onAcquireBuffer(t, Backend::STATIC);
if (!res) {
return OUT_OF_MEMORY;
}
des->setBackend(backupBackend.get());
}
backupBackend->onResizeBegin();
auto code = exe->onResize(c.inputs, c.outputs);
if (NO_ERROR != code) {
return NOT_SUPPORT;
}
code = backupBackend->onResizeEnd();
if (NO_ERROR != code) {
return NOT_SUPPORT;
}
code = exe->onExecute(c.inputs, c.outputs);
if (NO_ERROR != code) {
return NOT_SUPPORT;
}
}
// Clear const command
ctx.pushCache(cmdBufferVir);
cmdBufferVir.command.clear();
cmdBufferVir.extras.clear();
}
}
/** Size Compute and compute Const End */
/** Geometry Transform */
for (int i=0; i<infos.size(); ++i) {
auto& info = infos[i];
auto& cmdBufferReal = info.executeBuffer;
auto& tempBuffer = info.cacheBuffer;
// TODO: Optimize
if (info.type == Schedule::CONSTANT) {
continue;
}
if (_hasZeroShapeOutput(info)) {
continue;
}
auto geo = GeometryComputer::search(info.op->type(), compileType);
{
bool res = false;
if (!tempBuffer.hasWrap) {
res = geo->onRecompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer);
}
if (!res) {
tempBuffer.command.clear();
tempBuffer.extras.clear();
res = geo->onCompute(info.op, info.inputs, info.outputs, geoContext, tempBuffer);
}
if (!res) {
return NOT_SUPPORT;
}
tempBuffer.hasWrap = false;
GeometryComputerUtils::makeRaster(tempBuffer, cmdBufferReal, geoContext);
for (auto t : info.outputs) {
auto des = TensorUtils::getDescribe(t);
if (des->usage == Tensor::InsideDescribe::OUTPUT || des->usage == Tensor::InsideDescribe::TRAINABLE) {
// For output and trainable value, must directly compute the tensor
geoContext.getRasterCacheCreateRecursive(t, cmdBufferReal);
}
}
}
}
#ifdef MNN_BUILD_CODEGEN
if(permitCodegen) {
#ifdef LOG_VERPOSE
MNN_PRINT("infos : [\n");
for (auto info : infos) {
auto& cmds = info.executeBuffer.command;
for (auto cmd : cmds) {
MNN_PRINT("\t%s", EnumNameOpType(cmd->op->type()));
if(cmd->op->type() == OpType_BinaryOp) {
MNN_PRINT(" %d ", cmd->op->main_as_BinaryOp()->opType());
}
if(cmd->op->type() == OpType_UnaryOp) {
MNN_PRINT(" %d ", cmd->op->main_as_UnaryOp()->opType());
}
MNN_PRINT("\n");
}
}
MNN_PRINT("]\n");
MNN_PRINT("==================== opFuse ====================\n");
#endif
opFuse(infos, geoContext.forwardType(), geoContext.precisionType());
#ifdef LOG_VERPOSE
MNN_PRINT("infos : [\n");
for (auto info : infos) {
auto& cmds = info.executeBuffer.command;
for (auto cmd : cmds) {
MNN_PRINT("\t%s\n", EnumNameOpType(cmd->op->type()));
}
}
MNN_PRINT("]\n");
#endif
}
#endif
return NO_ERROR;
}
1.1.1.1.1.1 SizeComputer::computeOutputSize
// source/shape/SizeComputer.cpp
bool SizeComputer::computeOutputSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) {
auto computeFactory = SizeComputerSuite::get();
// When op is nullptr, it means a copy op
if (nullptr != op) {
// For Loop Op
if (op->type() == OpType_While && op->main_type() == OpParameter_LoopParam) {
auto loop = op->main_as_LoopParam();
if (loop->extraTensorInfos() == nullptr) {
return false;
}
MNN_ASSERT(loop->extraTensorInfos()->size() == outputs.size());
for (int i=0; i<outputs.size(); ++i) {
auto des = loop->extraTensorInfos()->GetAs<TensorDescribe>(i);
MNN_ASSERT(des->blob() != nullptr);
auto blob = des->blob();
TensorUtils::getDescribe(outputs[i])->dimensionFormat = blob->dataFormat();
outputs[i]->setType(blob->dataType());
if (blob->dims() != nullptr) {
auto dims = blob->dims()->data();
outputs[i]->buffer().dimensions = blob->dims()->size();
for (int j=0; j<blob->dims()->size(); ++j) {
outputs[i]->setLength(j, dims[j]);
}
} else {
outputs[i]->buffer().dimensions = 0;
}
}
return true;
}
// Don't support compute shape for control flow op
if (op->type() == OpType_While || op->type() == OpType_If) {
return false;
}
// Check -1 input
for (auto& t : inputs) {
for (int i=0; i < t->dimensions(); ++i) {
if (t->length(i) < 0) {
return false;
}
}
}
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
bool ret = computer->onComputeSize(op, inputs, outputs);
#ifdef MNN_DEBUG_TENSOR_SIZE
_printShape(op, inputs, outputs);
#endif
return ret;
}
}
// Default Set to the same
if (inputs.size() >= 1 && (outputs.size() == 1 || outputs.size() == inputs.size())) {
if (inputs[0] == outputs[0]) {
return true;
}
for (int i=0; i<outputs.size(); ++i) {
const auto& ib = inputs[i]->buffer();
auto& ob = outputs[i]->buffer();
memcpy(ob.dim, ib.dim, sizeof(halide_dimension_t) * ib.dimensions);
ob.dimensions = ib.dimensions;
ob.type = ib.type;
TensorUtils::getDescribe(outputs[i])->dimensionFormat = TensorUtils::getDescribe(inputs[i])->dimensionFormat;
}
#ifdef MNN_DEBUG_TENSOR_SIZE
_printShape(op, inputs, outputs);
#endif
return true;
}
// Not Support
MNN_PRINT("Can't compute size for %d, name=%s\n", op->type(), op->name() ? op->name()->c_str() : "");
return false;
}
1.1.1.1.1.1.1 SizeComputerSuite::search
// source/shape/SizeComputer.cpp
SizeComputer* SizeComputerSuite::search(OpType name) {
auto iter = mRegistry[name];
if (iter == nullptr) {
return nullptr;
}
return iter;
}
1.1.1.1.1.1.1.1 维度计算初始化与注册
维度计算初始化与注册在 registerBackend 函数中调用 SizeComputerSuite::init() 来实现的。
static std::once_flag s_flag;
void registerBackend() {
std::call_once(s_flag, [&]() {
// ...
SizeComputerSuite::init();
GeometryComputer::init();
// ...
});
}
SizeComputerSuite::init() 实现如下:
// source/shape/SizeComputer.cpp
void SizeComputerSuite::init() {
if (nullptr != gInstance) {
return;
}
gInstance = new SizeComputerSuite;
gInstance->mRegistry.resize(OpType_MAX + 1);
::memset(gInstance->mRegistry.data(), 0, gInstance->mRegistry.size() * sizeof(SizeComputer*));
registerShapeOps();
}
1.1.1.1.1.1.1.1.1 注册维度计算
registerShapeOps 用来注册维度计算,它通过调用一个个维度计算函数来实现注册。
// source/shape/ShapeRegister.cpp
void registerShapeOps() {
___ShapeSizeComputer__OpType_Shape__();
___ShapeRasterComputer__OpType_Raster__();
// ...
}
如 ___ShapeSizeComputer__OpType_Shape__ 这样的函数是通过 REGISTER_SHAPE 宏定义的:
// source/shape/SizeComputer.hpp
#define REGISTER_SHAPE(name, op) \
void ___##name##__##op##__() { \
name* _temp = new name; \
SizeComputerSuite* ts = SizeComputerSuite::get(); \
ts->insert(_temp, op); \
}
其实现代码如下:
// source/shape/ShapeShape.cpp
class ShapeSizeComputer : public SizeComputer {
virtual bool onComputeSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(1 <= inputs.size());
MNN_ASSERT(1 == outputs.size());
auto& ib = inputs[0]->buffer();
auto& ob = outputs[0]->buffer();
ob.dimensions = 1;
outputs[0]->setType(DataType_DT_INT32);
TensorUtils::getDescribe(outputs[0])->dimensionFormat = op->defaultDimentionFormat();
auto inputFormat = TensorUtils::getDescribe(inputs[0])->dimensionFormat;
if (inputFormat == MNN_DATA_FORMAT_NC4HW4 && op->defaultDimentionFormat() == MNN_DATA_FORMAT_NHWC) {
// For compability
ob.dim[0].extent = 4;
} else {
ob.dim[0].extent = ib.dimensions;
}
return true;
}
};
REGISTER_SHAPE(ShapeSizeComputer, OpType_Shape);
REGISTER_SHAPE(ShapeSizeComputer, OpType_Shape) 宏扩展如下:
// REGISTER_SHAPE(ShapeSizeComputer, OpType_Shape)
void ___ShapeSizeComputer__OpType_Shape__() {
ShapeSizeComputer* _temp = new ShapeSizeComputer;
SizeComputerSuite* ts = SizeComputerSuite::get();
ts->insert(_temp, op);
}
ts->insert 的实现代码如下:
// source/shape/SizeComputer.cpp
void SizeComputerSuite::insert(SizeComputer* t, OpType type) {
mRegistry[type] = t;
}
由上可见,扩展后的代码正是一个函数,其把维度计算类(ShapeSizeComputer)注册到 mRegistry 中,函数名 ___ShapeSizeComputer__OpType_Shape__ 呼应了 registerShapeOps 函数中的调用。mRegistry 呼应了 SizeComputerSuite::search 函数的实现。
1.1.1.1.1.1.2 SizeComputer::onComputeSize
SizeComputer::computeOutputSize 函数中的 onComputeSize 如下:
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
bool ret = computer->onComputeSize(op, inputs, outputs);
search 函数找到对应的维度计算实现(如:ConvolutionSizeComputer),然后调用其方法 onComputeSize。
备注:维度计算的所有实现都是基于 SizeComputer 基类的,实际运行中根据 op->type() 类型,调用不同的维度计算子类
如下为 ConvolutionSizeComputer::onComputeSize 的实现:
// source/shape/ShapeConvolution.cpp
virtual bool onComputeSize(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
MNN_ASSERT(inputs.size() >= 1);
MNN_ASSERT(1 == outputs.size());
const Convolution2DCommon* layer = loadCommon(op);
int kX = layer->kernelX();
int kY = layer->kernelY();
auto outputCount = layer->outputCount();
if (inputs.size() > 1 && outputCount == 0) {
// From TF's multi input convolution
outputCount = inputs[1]->length(0);
kX = inputs[1]->length(3);
kY = inputs[1]->length(2);
}
int kernel_width = layer->dilateX() * (kX - 1) + 1;
int kernel_height = layer->dilateY() * (kY - 1) + 1;
int output_width = 1;
int output_height = 1;
auto input = inputs[0];
if (input->dimensions() <= 1) {
// Convolution is not valid for dimension <= 1
return false;
}
auto inputCount = layer->inputCount();
bool depthwiseMatch =
inputCount == layer->outputCount() &&
inputCount == layer->group() &&
inputCount == input->channel();
int commonChannelMatch =
inputCount == inputs[0]->channel() || // real relationship in express
(inputCount * layer->group() == input->channel()); // standard definition of group convolution
bool valid = inputCount == 0 || depthwiseMatch || commonChannelMatch;
// For Tensorflow Group Convolution, the inputCount is the size of filter's input count
if (inputs.size() == 1 && !valid && OpType_Convolution == op->type()) {
input->printShape();
MNN_ERROR(
"Error for compute convolution shape, inputCount:%d, outputCount:%d, KH:%d, KW:%d, group:%d\ninputChannel: %d, batch:%d, width:%d, height:%d. "
"Input data channel may be mismatch with filter channel count\n",
layer->inputCount(), outputCount, kY, kX, layer->group(),
input->channel(), input->batch(), input->width(), input->height());
return false;
}
if (layer->padMode() == PadMode_SAME) {
// Tensorflow padding mode SAME
output_width = ceil((float)input->width() / (float)layer->strideX());
output_height = ceil((float)input->height() / (float)layer->strideY());
} else if (layer->padMode() == PadMode_VALID) {
// Tensorflow padding mode VALID
output_width = ceil((float)(input->width() - kernel_width + 1) / (float)layer->strideX());
output_height = ceil((float)(input->height() - kernel_height + 1) / (float)layer->strideY());
} else {
// Pad_Caffe means User setted padding
if (nullptr != layer->pads()) {
MNN_ASSERT(layer->pads()->size() >= 4);
int input_width = input->width() + layer->pads()->data()[1] + layer->pads()->data()[3];
int input_height = input->height() + layer->pads()->data()[0] + layer->pads()->data()[2];
output_width = input_width < kernel_width ? 0 : (input_width - kernel_width) / layer->strideX() + 1;
output_height = input_height < kernel_height ? 0 : (input_height - kernel_height) / layer->strideY() + 1;
} else {
int input_width = input->width() + layer->padX() * 2;
int input_height = input->height() + layer->padY() * 2;
output_width = (input_width - kernel_width) / layer->strideX() + 1;
output_height = (input_height - kernel_height) / layer->strideY() + 1;
}
}
auto& outputBuffer = outputs[0]->buffer();
outputBuffer.dimensions = input->buffer().dimensions;
auto format = TensorUtils::getDescribe(input)->dimensionFormat;
outputBuffer.type = input->getType();
if (op->main_as_Convolution2D() && op->main_as_Convolution2D()->symmetricQuan() && op->main_as_Convolution2D()->symmetricQuan()->outputDataType() != DataType_DT_INT8) {
auto type = op->main_as_Convolution2D()->symmetricQuan()->outputDataType();
outputs[0]->setType(type);
}
outputBuffer.dim[0].extent = input->buffer().dim[0].extent;
if (MNN_DATA_FORMAT_NHWC == format) {
outputBuffer.dim[3].extent = outputCount;
outputBuffer.dim[1].extent = output_height;
outputBuffer.dim[2].extent = output_width;
} else {
outputBuffer.dim[1].extent = outputCount;
outputBuffer.dim[2].extent = output_height;
outputBuffer.dim[3].extent = output_width;
}
// MNN_PRINT("outputs: %d, %d, %d, %d\n", outputs[0]->length(0), outputs[0]->length(1), outputs[0]->length(2), outputs[0]->length(3));
TensorUtils::getDescribe(outputs[0])->dimensionFormat = TensorUtils::getDescribe(inputs[0])->dimensionFormat;
return true;
}
1.1.1.1.2 Pipeline::UnitInfo::setUp
在函数 Pipeline::encode 中调用 Pipeline::UnitInfo::setUp 函数的代码如下:
static_cast<UnitInfo*>(cmd.info.get())->setUp(cmd, index++, info.op, totalIndex++);
其实现代码如下:
// source/core/Pipeline.cpp
void Pipeline::UnitInfo::setUp(const Command& command, int index, const Op* originOp, int totalIndex) {
if (nullptr != command.op->name()) {
mContent->name = command.op->name()->str();
} else {
if (nullptr != originOp && nullptr != originOp->name()) {
char buffer[20];
sprintf(buffer, "%d", index);
mContent->name = originOp->name()->str() + "_raster_" + buffer;
} else {
char buffer[20];
sprintf(buffer, "_raster_%d", totalIndex);
mContent->name = buffer;
}
}
#ifdef MNN_OP_SEPERATE
if (command.op->type() == OpType_UnaryOp) {
mContent->type = EnumNameUnaryOpOperation(command.op->main_as_UnaryOp()->opType());
} else if (command.op->type() == OpType_BinaryOp) {
mContent->type = EnumNameBinaryOpOperation((BinaryOpOperation)(command.op->main_as_BinaryOp()->opType()));
} else if (command.op->type() == OpType_Reduction) {
mContent->type = EnumNameReductionType(command.op->main_as_ReductionParam()->operation());
} else {
mContent->type = EnumNameOpType(command.op->type());
}
#else
mContent->type = EnumNameOpType(command.op->type());
#endif
#ifndef MNN_BUILD_MINI
mContent->flops = SizeComputer::computeFlops(command.op, command.inputs, command.outputs);
#endif
}
1.1.1.1.2.1 SizeComputer::computeFlops
// source/shape/SizeComputer.cpp
float SizeComputer::computeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) {
auto computeFactory = SizeComputerSuite::get();
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
return computer->onComputeFlops(op, inputs, outputs);
}
if (op->type() == OpType_While && op->main_type() == OpParameter_LoopParam) {
auto sumFlops = 0.0f;
auto loop = op->main_as_LoopParam();
if (nullptr != loop->commands()) {
auto cmdSize = loop->commands()->size();
for (int i=0; i<cmdSize; ++i) {
auto cmd = loop->commands()->GetAs<RegionCommand>(i);
auto size = cmd->size()->data();
sumFlops += (float)size[0] * (float)size[1] * (float)size[2];
}
}
sumFlops *= (float)loop->loopNumber();
return sumFlops / 1024.0f / 1024.0f;
}
auto sumFlops = 0.0f;
for (auto output : outputs) {
sumFlops += (float)output->elementSize() / 1024.0f / 1024.0f;
}
return sumFlops;
}
1.1.1.1.2.1.1 SizeComputer::onComputeFlops
在函数 SizeComputer::computeFlops 中调用 SizeComputer::onComputeFlops 函数的代码如下:
auto computeFactory = SizeComputerSuite::get();
auto computer = computeFactory->search(op->type());
if (nullptr != computer) {
return computer->onComputeFlops(op, inputs, outputs);
}
onComputeFlops 函数是个虚函数, computer->onComputeFlops 调用是个多态,其基类为 SizeComputer,我们选择一个实例 ConvolutionSizeComputer 进行分析,其具体实现代码如下:
// source/shape/ShapeConvolution.cpp
class ConvolutionSizeComputer : public SizeComputer {
virtual float onComputeFlops(const MNN::Op* op, const std::vector<Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {
const Convolution2DCommon* layer = loadCommon(op);
auto kw = layer->kernelX();
auto kh = layer->kernelY();
auto group = layer->group();
auto ic = inputs[0]->channel();
auto oc = outputs[0]->channel();
auto oSize = outputs[0]->width() * outputs[0]->height() * outputs[0]->batch();
if (op->type() == OpType_QuantizedDepthwiseConv2D) {
group = ic;
}
if (layer->inputCount() != ic && layer->inputCount() > 0) {
group = ic / layer->inputCount();
}
auto flops = (float)oSize * kw * kh * (ic * oc / (group == 0 ? 1 : group)) / FLOPS_M;
return flops;
}
}
1.1.1.2 Command 命令
// source/core/Command.hpp
struct Command : public RefCount {
const Op* op;
std::vector<Tensor*> workInputs;
std::vector<Tensor*> workOutputs;
std::vector<Tensor*> inputs;
std::vector<Tensor*> outputs;
std::shared_ptr<BufferStorage> buffer;
std::shared_ptr<Execution> execution;
std::shared_ptr<OperatorInfo> info;
#ifdef MNN_BUILD_CODEGEN
bool canVectorize = false;
#endif
};
1.1.1.3 CommandBuffer
// source/core/Command.hpp
struct CommandBuffer {
std::vector<SharedPtr<Command>> command;
std::vector<std::shared_ptr<Tensor>> extras;
bool hasWrap = false;
};
☆