MNN学习笔记

平丘月初

已于 2022-06-16 11:49:35 修改

阅读量567

点赞数

分类专栏： MNN 文章标签： mnn 学习 microsoft

于 2022-06-09 11:21:10 首次发布

本文链接：https://blog.csdn.net/u011994454/article/details/125199270

版权

MNN 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

#define MNN_PUBLIC __attribute__((visibility("default")))

程序调用某函数A，A函数存在于两个动态链接库liba.so,libb.so中，并且程序执行需要链接这两个库，此时程序调用的A函数到底是来自于a还是b呢？
这取决于链接时的顺序，比如先链接liba.so，这时候通过liba.so的导出符号表就可以找到函数A的定义，并加入到符号表中，链接libb.so的时候，符号表中已经存在函数A，就不会再更新符号表，所以调用的始终是liba.so中的A函数。
为了避免这种混乱，所以使用

__attribute__((visibility("default")))  //默认，设置为：default之后就可以让外面的类看见
__attribute__((visibility("hidden")))  //隐藏

visibility用于设置动态链接库中函数的可见性，将变量或函数设置为hidden，则该符号仅在本so中可见，对其他库不可见。

_MSC_VER是MSVC编译器的内置宏，定义了编译器的版本。在程序中加入_MSC_VER宏可以根据编译器版本让编译器有选择性地编译一段程序。可以用来区分windows和linux平台。Linux和Windows是两大编程平台，这两个系统的C++程序不一定能够兼容，在linux平台下能够运行的程序不一定能在windows下运行，在windows平台下的C++程序不一定能在Linux下运行，因此如果希望写出来的程序对于两个平台兼容，那么就需要在写代码的时候进行手动控制。

#ifdef _MSC_VER
	// to do
#else if
	// to do
#endif

_MSC_VER这个名称中，MS是Microsoft的简写。MSC就是Microsoft的C编译器。VER是Version的简写。因此_MSC_VER的意思就是Microsoft的C编译器的版本。微软不同时期，编译器有不同的版本。

MSC    1.0   _MSC_VER == 100
MSC    2.0   _MSC_VER == 200
MSC    3.0   _MSC_VER == 300
MSC    4.0   _MSC_VER == 400
MSC    5.0   _MSC_VER == 500
MSC    6.0   _MSC_VER == 600
MSC    7.0   _MSC_VER == 700
MSVC++ 1.0   _MSC_VER == 800
MSVC++ 2.0   _MSC_VER == 900
MSVC++ 4.0   _MSC_VER == 1000 (Developer Studio 4.0)
MSVC++ 4.2   _MSC_VER == 1020 (Developer Studio 4.2)
MSVC++ 5.0   _MSC_VER == 1100 (Visual Studio 97 version 5.0)
MSVC++ 6.0   _MSC_VER == 1200 (Visual Studio 6.0 version 6.0)
MSVC++ 7.0   _MSC_VER == 1300 (Visual Studio .NET 2002 version 7.0)
MSVC++ 7.1   _MSC_VER == 1310 (Visual Studio .NET 2003 version 7.1)
MSVC++ 8.0   _MSC_VER == 1400 (Visual Studio 2005 version 8.0)
MSVC++ 9.0   _MSC_VER == 1500 (Visual Studio 2008 version 9.0)
MSVC++ 10.0  _MSC_VER == 1600 (Visual Studio 2010 version 10.0)
MSVC++ 11.0  _MSC_VER == 1700 (Visual Studio 2012 version 11.0)
MSVC++ 12.0  _MSC_VER == 1800 (Visual Studio 2013 version 12.0)
MSVC++ 14.0  _MSC_VER == 1900 (Visual Studio 2015 version 14.0)
MSVC++ 14.1  _MSC_VER == 1910 (Visual Studio 2017 version 15.0)
MSVC++ 14.11 _MSC_VER == 1911 (Visual Studio 2017 version 15.3)
MSVC++ 14.12 _MSC_VER == 1912 (Visual Studio 2017 version 15.5)
MSVC++ 14.13 _MSC_VER == 1913 (Visual Studio 2017 version 15.6)
MSVC++ 14.14 _MSC_VER == 1914 (Visual Studio 2017 version 15.7)
MSVC++ 14.15 _MSC_VER == 1915 (Visual Studio 2017 version 15.8)
MSVC++ 14.16 _MSC_VER == 1916 (Visual Studio 2017 version 15.9)
MSVC++ 14.2  _MSC_VER == 1920 (Visual Studio 2019 Version 16.0)
MSVC++ 14.21 _MSC_VER == 1921 (Visual Studio 2019 Version 16.1)
MSVC++ 14.22 _MSC_VER == 1922 (Visual Studio 2019 Version 16.2)

在程序中加入_MSC_VER宏可以根据编译器版本让编译器有选择性地编译一段程序，例如一个版本编译器生成的 lib 文件可能不被另一个版本的编译器使用，那么在写程序的时候，通过if语句选择使用多个版本编译器产生的 lib 文件。

#if _MSC_VER >= 1400 // for vc8, or vc9
	#ifdef _DEBUG
		#pragma comment(lib, "SomeLib-vc8-d.lib")
	#else if
		#pragma comment(lib, "SomeLib-vc8-r.lib")
	#endif
#else if _MSC_VER >= 1310 // for vc71
    #ifdef _DEBUG 
        #pragma comment(lib, "SomeLib-vc71-d.lib") 
    #else if 
        #pragma comment(lib, "SomeLib-vc71-r.lib") 
    #endif 
#else if _MSC_VER >=1200 // for vc6
    #ifdef _DEBUG 
        #pragma comment(lib, "SomeLib-vc6-d.lib") 
    #else if 
        #pragma comment(lib, "SomeLib-vc6-r.lib") 
    #endif 
#endif

BackendConfig定义

namespace MNN {
struct BackendConfig {
	enum MemoryMode { Memory_Normal = 0, Memory_High, Memory_Low };
	MemoryMode memory = Memory_Normal;
	enum PowerMode { Power_Normal = 0, Power_High, Power_Low };
	PowerMode power = Power_Normal;
	enum PrecisionMode { Precision_Normal = 0; Precision_High, Precision_Low};
	PrecisionMode precision = Precision_Normal;
	/** user defined context */
	union {
		void* sharedContext = nullptr;
		size_t flags; // Valid for CPU Backend
	};
};	
};

Backend定义

class Backend: public NonCopyable {
public:
	/** info used to create backend */
	struct Info {
		/** forward type. */
		MNNForwardType type = MNN_FORWARD_CPU;
		/** numThread for CPU. gpuMode for GPU only. tuning/memory Mode setting. */
		union {
			int numThread = 4;
			int gpuMode;
		};
		/** user data. */
		BackendConfig* user = NULL;
		enum Mode {
			DIRECT = 0, // The Op will be run in execution->onExecute
			INDIRECT = 1 // The Op will be recorded. Run in OnExecuteBegin and Wait in on ExecuteEnd
		};
		Mode mode = DIRECT;
	};
	
	enum StorageType {
		STATIC, /** use NOT reusable memory */
		DYNAMIC, /** use resuable memory */
		DYNAMIC_SEPERATE /** use NOT reusable memory */
	};

public:
	Backend(MNNForwardType type): mType(type) {}
	virtual ~Backend() = default;
public:
	/** measure the cost for op with input and output tensors. */
	virtual std::pair<float, bool> onMeasure(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op){
		return std::make_pair(0.0f, false);
	}
	/** create execution for op with input and output tensors. */
	virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op) = 0;
	/** callback before resize ops. */
	virtual void onResizeBegin() {}
	/** callback after resize ops. */
	virtual void onResizeEnd() {}
	/** callback before executing ops. */
	virtual void onExecuteBegin() const = 0;
	/** callback after executing ops. */
	virtual void onExecuteEnd() const = 0;
public:
	/** allocate buffer of tensor for given storage type. */
	virtual bool onAcquireBuffer(const Tensor* tensor, StorageType storageType) = 0;
	/** release buffer of tensor for given storage type. */
	virtual bool onReleaseBuffer(const Tensor* tensor, StorageType storageType) = 0;
	/** clear all dynamic buffers. */
	virtual bool onClearBuffer() = 0;
	/** copy buffer from tensor to tensor. */
	virtual void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const = 0;
	/** get runtime datatype. */
	virtual halide_type_t getRunType(const MNN::Op* op, halide_type_t qtype, halide_type_t  rtype) {return rtype; }
public:
	/** get gpu Tensor map host ptr / unmap. */
	virtual void* onMapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* srcTensor){return nullptr;}
	virtual bool onUnmapTensor(Tensor::MapType mtype, Tensor::DimensionType dtype, const Tensor* dstTensor, void* mapPtr){return false;}

private:
	const MNNForwardType mType;	
};

/** Each backend belong to a runtime. */
class Runtime: public NonCopyable {
public:
	/** Origin Op -> (Compiler) -> New Op -> Backend
	Default use Compiler_Geometry, Origin Op -> Compiler_Geometry -> Little Op
	For several Backend, we can't use Geometry to decompose origin op, then it set Compiler_Origin
	*/
	enum CompilerType {
		Compiler_Geometry = 0,
		Compiler_Origin = 1,
		Compiler_Loop = 2,
	};
	virtual CompilerType onGetCompilerType() const { return Compiler_Loop; }
	virtual ~Runtime() = default;
	/** create backend */
	virtual Backend* onCreate(const BackendConfig* config = nullptr) const = 0;
	/** clear unuseful resource, clear level: 0 - 100, bigger mean clear more, smaller mean cache more. */
	virtual void onGabageCollect(int level) = 0;
	/** Measure the memory it used in MB */
	virtual float onGetMemoryInMB() { return 0.0f; }
	virtual bool onSetCache(const void* buffer, size_t size) {
		// default cache valid, avoid beging reset
		return true;
	}
	virtual std::pair<const void*, size_t> onGetCache() {
		return std::make_pair(nullptr, 0);
	}
};

问：BackendConfig类和Backend类定义了op运行时的计算设备，以及对功耗，内存，计算精度的配置。每个Backend都从属于一个Runtime类；但Execution类在定义时，也需要以backend类作为构造参数，所以Execution类和Runtime类，两者之间是什么关系？cpu相关的op基本都继承了Execution类。

/** abstract execution */
class Execution: public NonCopyable {
public:
	/** initializer: backend that execution will running on. */
	Execution() = delete;
	Execution(Backend *backend): mBackEnd(backend) {}
	/** deinitializer. */
	virtual ~Execution() = default;
	
	/** response shape change of input or output tensors. */
	virtual ErrorCode onResize(const std::vector<Tensor*> &inputs, const std::vector<Tensor*> &outputs){
		return NO_ERROR;
	}
	/** perform execution. */
	virtual ErrorCode onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor*> &outputs) = 0;
	/** clone execution, new execution will share weight from this execution */
	virtual bool onClone(Backend* bn, const Op* op, Execution** dst) { return false; }
};

CoreFunctions定义

namespace MNN {
struct CoreFunctions {
	// cpu feature
	bool supportFp16arith = false;
	bool supportSDot = false;
	/** MatMul Pack and Functions */
	
};
};

MNNPackC4Common函数解析
目标是将输入tensor从NCHW变为NC4HW4，从而可以使用SIMD，并且降低cache miss。

template<typename T>
void MNNPackC4Common(T* dst, const T* src, size_t area, size_t depth, int* areaOffset) {
	int depthC4 = depth / 4; /**这里的depth应该是input_channels. */
	int depthRemain = depthC4 * 4;
	int remain = depth - depthRemain; /** 不足4倍数的部分 */
	int z, x, y;
	const T* srcChannel[4];
	const T* srcOffset = src;
	for(z = 0; z < depthC4; ++z) {
		auto dstZ = dst + z * areaOffset[1] * 4; /**这里的areaOffset[1]应该是H*W */
		for(y = 0; y < 4; ++y) {
			srcChannels[y] = srcOffset + areaOffset[0] * y; /** 感觉areaOffset[0]也是H*W, 这里构建4个指针，分别指向连续的4张特征图*/
		}
		for(x = 0; x < area; ++x){
			for(y = 0; y < 4; ++y){
				dstZ[0] = srcChannel[y][x]; /**以每个点位做为主循环，顺序的从4张特征图中读取数值*/
				dstZ++;
			}
		}
		srcOffset += areaOffset[0] * 4;
	}
	if (remain > 0){
		auto dstZ = dst + depthC4 * areaOffset[1] * 4;
		for(y = 0; y < remain; ++y){
			srcChannel[y] = srcOffset + areaOffset[0] * y;
		}
		for(x = 0; x < area; ++x){
			for(y = 0; y < remain; ++y){
				dstZ[0] = srcChannel[y][x];
				dstZ++;
			}
			for(y = remain; y < 4; ++y){
				dstZ[0] = 0;	/** 4 - remain的通道补0 */
				dstZ++;
		}
	}
}

与PackC4对应的是UnPackC4，将NC4HW4转换回NCHW

Precision_Low相关代码

#ifdef MNN_USE_ARMV82
	auto core = MNNGetCoreFunctions();
	if (core->supportFp16arith && precision == BackendConfig::Precision_Low){
		return new Arm82Backend(this);
	}
#endif
#ifdef MNN_SUPPORT_BF16
	if (precision == BackendConfig::Precision_Low && BF16Functions::get()){
		return new BF16Backend(this);
	}
#endif

平丘月初

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
MNN学习笔记

程序调用某函数A，A函数存在于两个动态链接库liba.so,libb.so中，并且程序执行需要链接这两个库，此时程序调用的A函数到底是来自于a还是b呢？这取决于链接时的顺序，比如先链接liba.so，这时候通过liba.so的导出符号表就可以找到函数A的定义，并加入到符号表中，链接libb.so的时候，符号表中已经存在函数A，就不会再更新符号表，所以调用的始终是liba.so中的A函数。为了避免这种混乱，所以使用visibility用于设置动态链接库中函数的可见性，将变量或函数设置为hidden，则该
复制链接

扫一扫