![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/9ee41d2775cdc328bbc38c572f0f342b.jpeg#pic_center)
1. 目的
背景: cpuinfo 库提供了 CPU 信息的查询, 涵盖了 x86 和 arm 等常见的指令集架构, 可以提供自行实现的 CPU 基础功能函数的正确性验证。自行实现 CPU 基础功能则是为了简化功能、同时提供原汁原味的实现、而不是照抄开源代码, 原汁原味指的是有自己的思考过程。
cpuinfo 库是 pytorch 官方维护的一个基于 C 语言实现的库。什么意思呢?它的接口是 C 风格的, 实现也是 C 风格的,好处是比较容易看懂代码, 坏处是比较啰嗦,尤其是每次调用的最开始、结束都要手动初始化和反初始化:
cpuinfo_initialize()
cpuinfo_deinitialize()
一旦忘记初始化或反初始化就容易得到报错。为了方便使用, 不妨用 C++ 封装 cpuinfo.h 里面的 API 函数的实现,提供一个class CpuInfo
, 在它的构造函数和析构函数中执行初始化和反初始化, 而作为调用 CpuInfo 类的人来说, 完全不用知晓这些工作。
最终的代码截图如下:
2. 设计
比较 naive 的方式是找到每个 API 函数, 然后逐一拷贝到 cpuinfo.hpp
文件中,逐一编写函数来封装调用。缺点:手工操作过多,容易写错, 如果想整体修改(例如整体增加换行符)很麻烦。
凡是容易出错的地方,一定会出错!
由于一共涉及到大约50个 cpuinfo_
开头的函数, 考虑用 Python
扫描 cpuinfo.h
文件, 自动生成CpuInfo
类, 并自动写入到 cpuinfo.hpp
文件。这个做法的特点也是很明晰的:
- 如果 pytorch 官方更新了 cpuinfo 库, 增加或删除了 API, 或修改了 API 的参数列表、返回值类型,可以自动重新生成,避免了潜在的手工操作
- 需要对 Python 有一定熟练度,能够正确解析原本的每个 API, 包括解析出:
- 返回值类型
- 函数名称
- 参数列表
这其实有点像编译原理小作业中,实现一个简陋的 parser 的过程了。
具体设计:
- CodeWriter 类: 负责写代码, 包括tab、空格、保存文件等
- 解析 C API 的函数: 解析出返回值类型、参数名称、参数列表
- 具体调用的函数: 逐个API解析
- 特殊处理: 对于构造函数和析构函数, 不需要也不能写 return 和返回值类型
3. 生成器: Python 代码
Python 代码:
# Author: ChrisZZ <imzhuo@foxmail.com>
# Homepage: https://github.com/zchrissirhcz
class CodeWriter(object):
def __init__(self, indent_len):
self.lines = []
self.indent_num = 0
self.indent_len = indent_len
def write(self, content):
padding = (self.indent_len * self.indent_num) * ' '
line = padding + content
self.lines.append(line)
def save(self, filename):
with open(filename, "w") as fout:
for line in self.lines:
fout.write(line + "\n")
def tab(self):
self.indent_num += 1
def backspace(self):
if (self.indent_num > 0):
self.indent_num -= 1
header_path = "/home/zz/work/cpuinfo/include/cpuinfo.h"
s1 = []
with open(header_path, "r") as fin:
for line in fin.readlines():
line = line.rstrip()
if (" CPUINFO_ABI " in line) and (not ("define" in line)):
s1.append(line)
class Param(object):
def __init__(self, type_str, name):
self.type_str = type_str
self.name = name
class FunctionDeclaration(object):
def __init__(self, line: str, delimeter="CPUINFO_ABI"):
items = line.split(delimeter)
self.return_type = items[0].rstrip()
function_name_and_param_lst = items[1].strip()[0:-1]
self.c_function_name = function_name_and_param_lst.split('(')[0]
self.cpp_function_name = self.c_function_name[len("cpuinfo_"):]
self.param_lst_str = function_name_and_param_lst.split('(')[1][:-1]
self.param_lst = []
param_kv_lst = self.param_lst_str.split(',')
for param_kv in param_kv_lst:
param_kv_items = param_kv.split(' ')
param_name = param_kv_items[-1]
param_type_str = ' '.join(param_kv_items[0:-1])
param = Param(param_type_str, param_name)
self.param_lst.append(param)
w = CodeWriter(4)
w.write("#include <cpuinfo.h>")
w.write("")
w.write("class CpuInfo")
w.write("{")
w.write("public:")
w.tab()
for line in s1:
#w.write(s)
fd = FunctionDeclaration(line)
require_return = True
if fd.c_function_name == "cpuinfo_initialize":
declaration = "CpuInfo()"
require_return = False
elif fd.c_function_name == "cpuinfo_deinitialize":
declaration = "~CpuInfo()"
require_return = False
else:
declaration = "{:s} {:s}".format(fd.return_type, fd.cpp_function_name)
if fd.param_lst_str == "void":
declaration += "()"
else:
declaration += "({:s})".format(fd.param_lst_str)
w.write(declaration)
w.write('{')
w.tab()
call = ""
if require_return:
call = "return "
call += "{:s}".format(fd.c_function_name)
if fd.param_lst_str == "void":
call += "();"
else:
call += "("
index = 0
for param in fd.param_lst:
if (index > 0):
call += ", "
call += "{:s}".format(param.name)
index += 1
call += ");"
w.write(call)
w.backspace()
w.write('}')
w.backspace()
w.write("};")
w.save("tests/cpuinfo.hpp")
4. 结果: C++ 代码
生成的 cpuinfo.hpp
C++ 代码:
#include <cpuinfo.h>
class CpuInfo
{
public:
CpuInfo()
{
cpuinfo_initialize();
}
~CpuInfo()
{
cpuinfo_deinitialize();
}
const struct cpuinfo_processor* get_processors()
{
return cpuinfo_get_processors();
}
const struct cpuinfo_core* get_cores()
{
return cpuinfo_get_cores();
}
const struct cpuinfo_cluster* get_clusters()
{
return cpuinfo_get_clusters();
}
const struct cpuinfo_package* get_packages()
{
return cpuinfo_get_packages();
}
const struct cpuinfo_uarch_info* get_uarchs()
{
return cpuinfo_get_uarchs();
}
const struct cpuinfo_cache* get_l1i_caches()
{
return cpuinfo_get_l1i_caches();
}
const struct cpuinfo_cache* get_l1d_caches()
{
return cpuinfo_get_l1d_caches();
}
const struct cpuinfo_cache* get_l2_caches()
{
return cpuinfo_get_l2_caches();
}
const struct cpuinfo_cache* get_l3_caches()
{
return cpuinfo_get_l3_caches();
}
const struct cpuinfo_cache* get_l4_caches()
{
return cpuinfo_get_l4_caches();
}
const struct cpuinfo_processor* get_processor(uint32_t index)
{
return cpuinfo_get_processor(index);
}
const struct cpuinfo_core* get_core(uint32_t index)
{
return cpuinfo_get_core(index);
}
const struct cpuinfo_cluster* get_cluster(uint32_t index)
{
return cpuinfo_get_cluster(index);
}
const struct cpuinfo_package* get_package(uint32_t index)
{
return cpuinfo_get_package(index);
}
const struct cpuinfo_uarch_info* get_uarch(uint32_t index)
{
return cpuinfo_get_uarch(index);
}
const struct cpuinfo_cache* get_l1i_cache(uint32_t index)
{
return cpuinfo_get_l1i_cache(index);
}
const struct cpuinfo_cache* get_l1d_cache(uint32_t index)
{
return cpuinfo_get_l1d_cache(index);
}
const struct cpuinfo_cache* get_l2_cache(uint32_t index)
{
return cpuinfo_get_l2_cache(index);
}
const struct cpuinfo_cache* get_l3_cache(uint32_t index)
{
return cpuinfo_get_l3_cache(index);
}
const struct cpuinfo_cache* get_l4_cache(uint32_t index)
{
return cpuinfo_get_l4_cache(index);
}
uint32_t get_processors_count()
{
return cpuinfo_get_processors_count();
}
uint32_t get_cores_count()
{
return cpuinfo_get_cores_count();
}
uint32_t get_clusters_count()
{
return cpuinfo_get_clusters_count();
}
uint32_t get_packages_count()
{
return cpuinfo_get_packages_count();
}
uint32_t get_uarchs_count()
{
return cpuinfo_get_uarchs_count();
}
uint32_t get_l1i_caches_count()
{
return cpuinfo_get_l1i_caches_count();
}
uint32_t get_l1d_caches_count()
{
return cpuinfo_get_l1d_caches_count();
}
uint32_t get_l2_caches_count()
{
return cpuinfo_get_l2_caches_count();
}
uint32_t get_l3_caches_count()
{
return cpuinfo_get_l3_caches_count();
}
uint32_t get_l4_caches_count()
{
return cpuinfo_get_l4_caches_count();
}
uint32_t get_max_cache_size()
{
return cpuinfo_get_max_cache_size();
}
const struct cpuinfo_processor* get_current_processor()
{
return cpuinfo_get_current_processor();
}
const struct cpuinfo_core* get_current_core()
{
return cpuinfo_get_current_core();
}
uint32_t get_current_uarch_index()
{
return cpuinfo_get_current_uarch_index();
}
uint32_t get_current_uarch_index_with_default(uint32_t default_uarch_index)
{
return cpuinfo_get_current_uarch_index_with_default(default_uarch_index);
}
};