title: 基于ANTLR4 C++(cmake)解析设备树文件
top: 41
date: 2024-05-22 09:28:25
tags:
- ANTLR4
- C++
- 设备树
categories: - ANTLR4
- C++
什么是设备树?
引导程序要初始化并且引导一个计算机系统实现多种软件模块的交互。例如Bootloaders和hypervisors(固件)程序, 他们在把控制权交给操作系统软件之前可能需要在系统硬件上做低级的初始化工作。同时,bootloaders,hypervisos能够依次加载系统并把控制权交给操作系统, 使得在软件之间的交互变的标准化、接口化并且易用。在这个文档中引导程序被用作泛指那些初始化系统状态并加载执行操作系统的程序。boot引导程序主要包括:固件、bootloaders和hypervisors。一个client程序主要包括:bootloader、hypervisors、操作系统和一些其他特别目的的程序。一个软件包可能包含boot程序和client程序。
设备树规范提供了一个完整的boot引导程序到client程序的接口定义和广泛且多样系统开发的最小集合。
设备树文件样例
// template.dts
/dts-v1/;
//memreserve/ <address> <length>;
/*
mem {
mem;
};*/
/ {
interrupt-parent = <&intc>;
compatible = "linux,dummy-virt";
#size-cells = <0x02>;
#address-cells = <0x02>;
psci {
cpu_on = <0xc4000003>;
compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
cpu_suspend = <0xc4000001>;
migrate = <0xc4000005>;
cpu_off = <0x84000002>;
method = "hvc";
assigned-clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&v2m_sysctl 3>, <&v2m_sysctl 3>;
};
mem {
};
memory {
reg = <0x00 0x80000000 0x00 0x10000000>;
device_type = "memory";
};
uart {
compatible = "arm,pl011", "arm,primecell";
clock-names = "uartclk", "apb_pclk";
clocks = <&apb_pclk>, <&apb_pclk>;
interrupts = <0x00 0x01 0x04>;
reg = <0x00 0x9000000 0x00 0x1000>;
};
intc: intc {
interrupt-controller;
#size-cells = <0x02>;
ranges;
compatible = "arm,cortex-a15-gic";
#interrupt-cells = <0x03>;
reg = <0x00 0x8000000 0x00 0x10000>, <0x00 0x8010000 0x00 0x10000>;
#address-cells = <0x02>;
v2m {
msi-controller;
compatible = "arm,gic-v2m-frame";
reg = <0x00 0x8020000 0x00 0x1000>;
};
};
timer {
compatible = "arm,armv8-timer\0arm,armv7-timer";
interrupts = <0x01 0x0d 0x104>, <0x01 0x0e 0x104>, <0x01 0x0b 0x104>, <0x01 0x0a 0x104>;
always-on;
};
ethernet: ethernet {
compatible = "smsc,lan9118", "smsc,lan9115";
reg-io-width = <4>;
reg = <0x0 0xC0000000 0x0 0x10000>;
smsc,irq-active-high;
interrupts = <0 4 4>;
phy-mode = "mii";
smsc,irq-push-pull;
};
chosen {
stdout-path = "/uart";
rng-seed = <0x35d0afb0 0x45d1f049 0x9b6bc5af 0x61073667 0xfa7f51b8 0xa46898c7 0xf96fbe17 0x2093044>;
kaslr-seed = <0xe61a0cad 0xcc2e0aab>;
};
cpus {
#size-cells = <0x00>;
};
};
/ {
cpus {
#size-cells = <0x00>;
#address-cells = <0x01>;
cpu-map {
socket0 {
cluster0 {
core0 {
cpu = <&cpu_0>;
};};};};
cpu_0: cpu@0 {
reg = <0x00>;
info: compatible = "arm,cortex-a53";
device_type = "cpu";
extra_1 = [ab cd ef byte4: 00 ff fe];
extra_2 = reglabel: <0 sizelabel: 0x1000000>;
};};};
-
设备树文件注释方式同C语言。
-
一份设备树文件中,如果同时记录了多个根(
/
),解析完成后需将多个根的数据合并,相同路径下的相同属性键对应的值进行覆盖。 -
详细的设备树格式定义可见参考链接[1]。
ANTLR4描述(g4文件)
DTSLEXER.g4
lexer grammar DTSLexer;
LC : '{';
RC : '}';
LP : '(';
RP : ')';
SC : ';';
EQ : '=';
CO : ',';
SL : '/';
BEGIN_ADDRESS : '<' -> skip, pushMode(ADDRESS);
BEGIN_HEX_MODE : '[' -> skip, pushMode(HEX_MODE);
BEGIN_DEFINE_VAL : '"' -> skip, pushMode(DEFINE_VAL);
BEGIN_LINE_COMMENT : '//' -> skip, pushMode(LINE_COMMENT);
BEGIN_MUL_COMMENT : '/*' ->skip, pushMode(MUL_COMMENT);
WS : [ \t\r\n]+ -> skip ;
MEMRESERVE : SL 'memreserve' SL;
INCLUDE : 'include' ;
VERSION : SL 'dts-v' [0-9]+ SL;
LABEL : LABEL_NAME CL;
NODE_NAME : CHAR+(CHAR|[0-9]|[,._+]|'-')* AT (CHAR|[0-9]|[,._+]|'-')+
| CHAR+(CHAR|[0-9]|[,._+]|'-')*
;
PROPERTY_NAME : (CHAR|[0-9]|[,._+?#]|'-')+ ;
fragment CHAR : [a-zA-Z] ;
fragment HEX : '0' [Xx] [0-9a-fA-F]+ ;
fragment DEC : ('0' [Dd])?[0-9]+ ;
fragment OCT : '0' [Oo] [0-7]+ ;
fragment BIN : '0' [Bb] [0-1]+ ;
fragment NUM : HEX | DEC | OCT | BIN ;
fragment STR : [\u0021\u0023-\u003a\u003c-\u007e]+ ;
fragment NAME_ADDR : CHAR+(CHAR|[0-9]|[,._+]|'-')* ;
fragment AT : '@';
fragment HA : '#';
fragment CL : ':';
fragment NODE_PATH : (SL NODE_NAME)+ ;
fragment ASCII : [\u0000-\u007e] ;
fragment LABEL_NAME : CHAR+(CHAR|[0-9]|'_')* ;
// ADDRESS
mode ADDRESS;
WS_1 : [ \t\r\n]+ -> skip ;
PL : '+';
MI : '-';
AS : '*';
DI : '/' ;
MO : '%';
AM : '&';
LABEL_1 : LABEL ;
ADDRESS_VALUE : NUM+ | [0-9]+
| AM LABEL_NAME
| AM LC NODE_PATH RC
;
END_ADDRESS : '>' -> skip, popMode ;
// HEX_MODE
mode HEX_MODE;
WS_2 : WS -> skip;
LABEL_2 : LABEL ;
HEX_NUM : [0-9a-fA-F]+ ;
END_HEX_MODE : ']' -> skip, popMode ;
// DEFINE_VAL
mode DEFINE_VAL;
LABEL_3 : LABEL ;
PROPERTY_VALUE : [\u0021\u0023-\u007e]+ ;
END_DEFINE_VAL : '"' -> skip, popMode ;
// LINE_COMMENT
mode LINE_COMMENT;
END_LINE_COMMENT : '\n' -> skip, popMode ;
SKIP_ALL : ASCII -> skip ;
// MUL_COMMENT
mode MUL_COMMENT;
END_MUL_COMMENT : '*/' -> skip, popMode;
SKIP_ALL_1 : ASCII -> skip ;
DTSPARSER.g4
parser grammar DTSParser;
options {
tokenVocab = DTSLexer;
}
top : version_stat memory_stat? node+ EOF ;
version_stat : VERSION SC ;
memory_stat : MEMRESERVE ADDRESS_VALUE ADDRESS_VALUE SC ;
node : node_key LC node_value* RC SC ;
node_key : SL
| labels* NODE_NAME
;
node_value : stat
| node
;
stat : labels* property_key (EQ property_value)? SC ;
property_key : PROPERTY_NAME
| NODE_NAME
;
property_value : labels* value_type labels* (CO labels* value_type labels*)* CO? ;
value_type : PROPERTY_VALUE
| (labels* ADDRESS_VALUE)+
| (labels* HEX_NUM)+
;
labels : LABEL | LABEL_1 | LABEL_2 | LABEL_3 ;
- 值得注意的是,我们忽略了设备树语法中的
#inclued 'file'
,该类语句为预处理命令,需要单独解析处理,这里暂且忽略了。
基于visitor接口的C++实现
// myVisitor.h
#pragma once
#include <iostream>
#include <string>
#include <map>
#include <vector>
#include <algorithm>
#include "DTSParserBaseVisitor.h"
struct Node
{
std::map<std::string, std::vector<std::string>> property;
std::map<std::string, Node *> nodes;
~Node()
{
for (auto &pair : nodes)
{
delete pair.second;
pair.second = nullptr;
}
// nodes.swap(decltype(nodes){});
}
void print()
{
for (auto &pair : property)
{
std::cout << std::endl << " " << pair.first << " : {";
for (auto &var : pair.second)
{
std::cout << var << " ";
}
std::cout << "}, ";
}
for (auto &pair : nodes)
{
std::cout << std::endl << pair.first << " : {";
pair.second->print();
std::cout << std::endl << "} " ;
}
}
};
class MyVisitor : public DTSParserBaseVisitor
{
public:
~MyVisitor() override;
std::any visitProperty_key(DTSParser::Property_keyContext *ctx) override;
std::any visitValue_type(DTSParser::Value_typeContext *ctx) override;
std::any visitProperty_value(DTSParser::Property_valueContext *ctx) override;
std::any visitStat(DTSParser::StatContext *ctx) override;
std::any visitNode_key(DTSParser::Node_keyContext *ctx) override;
std::any visitNode(DTSParser::NodeContext *ctx) override;
std::any visitTop(DTSParser::TopContext *ctx) override;
std::vector<Node *> get_roots();
void print();
private:
std::vector<Node *> roots;
};
// myVisitor.cpp
#include "myVisitor.h"
std::any MyVisitor::visitProperty_key(DTSParser::Property_keyContext *ctx)
{
return ctx->getText();
}
std::any MyVisitor::visitValue_type(DTSParser::Value_typeContext *ctx)
{
std::string vals{};
std::vector<DTSParser::LabelsContext *> labels = ctx->labels();
for (auto &var : ctx->children)
{
auto it = std::find(labels.begin(), labels.end(), var);
if (it != labels.end())
{
continue;
}
vals += " " + var->getText();
}
return vals;
}
std::any MyVisitor::visitProperty_value(DTSParser::Property_valueContext *ctx)
{
std::vector<std::string> vals;
std::vector<DTSParser::LabelsContext *> labels = ctx->labels();
for (auto &var : ctx->children)
{
auto it = std::find(labels.begin(), labels.end(), var);
if (it != labels.end() || var->getText() == ",")
{
continue;
}
vals.emplace_back(std::any_cast<std::string>(visit(var)));
}
return vals;
}
std::any MyVisitor::visitStat(DTSParser::StatContext *ctx)
{
std::string key = std::any_cast<std::string>(visit(ctx->property_key()));
// std::cout << "length:" << ctx->children.size() << " "<< key << std::endl;
// std::cout << " : " << ctx->property_value()->getText() << std::endl;
std::vector<std::string> val;
if (ctx->children.size() > 2 && ctx->property_value())
{
val = std::any_cast<std::vector<std::string>>(visit(ctx->property_value()));
}
std::pair<std::string, std::vector<std::string>> kv{key, val};
return kv;
}
std::any MyVisitor::visitNode_key(DTSParser::Node_keyContext *ctx)
{
auto it = ctx->children;
return it.back()->getText();
}
std::any MyVisitor::visitNode(DTSParser::NodeContext *ctx)
{
std::string key{std::any_cast<std::string>(visit(ctx->node_key()))};
Node *node = new Node();
for (auto &var : ctx->node_value())
{
try
{
std::pair<std::string, std::vector<std::string>> kv = std::any_cast<std::pair<std::string, std::vector<std::string>>>(visit(var));
node->property[kv.first] = kv.second;
}
catch (const std::bad_any_cast &e)
{
std::pair<std::string, Node *> kv = std::any_cast<std::pair<std::string, Node *>>(visit(var));
node->nodes[kv.first] = kv.second;
}
}
std::pair<std::string, Node *> res{key, node};
return res;
}
std::any MyVisitor::visitTop(DTSParser::TopContext *ctx)
{
for (auto &pair : ctx->node())
{
auto kv = std::any_cast<std::pair<std::string, Node *>>(visit(pair));
roots.emplace_back(kv.second);
}
return NULL;
}
void MyVisitor::print()
{
for (auto &node : roots)
{
node->print();
std::cout << std::endl;
}
}
std::vector<Node *> MyVisitor::get_roots()
{
return roots;
}
MyVisitor::~MyVisitor()
{
for (auto &node : roots)
{
delete node;
node = nullptr;
}
}
main.cpp
#include <string>
#include "antlr4-runtime.h"
#include "DTSLexer.h"
#include "DTSParser.h"
#include "myVisitor.h"
using namespace antlr4;
const static char Delimiter = '/';
void recursive_merge(Node* base, Node* cur)
{
for (auto& it : cur->property)
{
base->property[it.first] = it.second;
}
for(auto& it : cur->nodes)
{
if (base->nodes.find(it.first) == base->nodes.end())
{
base->nodes.insert(it);
} else {
recursive_merge(base->nodes[it.first], it.second);
}
}
}
Node* merge_roots(std::vector<Node*> roots)
{
if (roots.empty())
{
return nullptr;
} else if (roots.size() == 1) {
return roots[0];
} else {
Node* root = roots[0];
for(auto it = roots.begin() + 1; it != roots.end(); it ++)
{
recursive_merge(root, *it);
}
return root;
}
}
int main(int , const char **) {
std::ifstream fr("D:/documents/lang/C/antlr4/dts/template_arm64.dts");
std::stringstream buffer;
buffer << fr.rdbuf();
std::string text = buffer.str();
// std::cout << "text: " << text <<std::endl;
ANTLRInputStream input(text);
DTSLexer lexer(&input);
CommonTokenStream tokens(&lexer);
DTSParser parser(&tokens);
tree::ParseTree* tree = parser.top();
auto* visitor = new MyVisitor();
visitor->visit(tree);
// visitor->print();
Node* node = merge_roots(visitor->get_roots());
node->print();
return 0;
}
CmakeLists.txt
cmake_minimum_required(VERSION 3.23)
project(dts2map)
set(CMAKE_CXX_COMPILER cl)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_GENERATOR_PLATFORM x64)
message(STATUS "file path: ${CMAKE_CURRENT_LIST_DIR}")
file(GLOB CPPLIST ${CMAKE_CURRENT_LIST_DIR}/*.cpp)
message(STATUS "cpps:: ${CPPLIST}")
include_directories(D:/documents/lang/C/antlr4/antlr-runtime/src)
link_directories(D:/documents/lang/C/antlr4/antlr-runtime)
add_executable(dts2map main.cpp ${CPPLIST})
target_link_libraries(dts2map antlr4-runtime-static.lib)
target_compile_definitions(dts2map PUBLIC ANTLR4CPP_STATIC)
ANTLR4
环境配置,antlr4-runtime-static.lib
来源可见参考链接[3]DTSLexer.cpp DTSParser.cpp DTSParserBaseVisitor.cpp DTSParserVisitor.cpp
文件由命令lr2c -visitor -no-listener
生成- 这里编的是静态库,编译时需添加编译宏
ANTLR4CPP_STATIC
;动态库应该是可以默认不添加宏的。
设备树文件的最终解析输出
#address-cells : { 0x02 },
#size-cells : { 0x02 },
compatible : { linux,dummy-virt },
interrupt-parent : { &intc },
chosen : {
kaslr-seed : { 0xe61a0cad 0xcc2e0aab },
rng-seed : { 0x35d0afb0 0x45d1f049 0x9b6bc5af 0x61073667 0xfa7f51b8 0xa46898c7 0xf96fbe17 0x2093044 },
stdout-path : { /uart },
}
cpus : {
#address-cells : { 0x01 },
#size-cells : { 0x00 },
cpu-map : {
socket0 : {
cluster0 : {
core0 : {
cpu : { &cpu_0 },
}
}
}
}
cpu@0 : {
compatible : { arm,cortex-a53 },
device_type : { cpu },
extra_1 : { ab cd ef 00 ff fe },
extra_2 : { 0 0x1000000 },
reg : { 0x00 },
}
}
ethernet : {
compatible : { smsc,lan9118 smsc,lan9115 },
interrupts : { 0 4 4 },
phy-mode : { mii },
reg : { 0x0 0xC0000000 0x0 0x10000 },
reg-io-width : { 4 },
smsc,irq-active-high : {},
smsc,irq-push-pull : {},
}
intc : {
#address-cells : { 0x02 },
#interrupt-cells : { 0x03 },
#size-cells : { 0x02 },
compatible : { arm,cortex-a15-gic },
interrupt-controller : {},
ranges : {},
reg : { 0x00 0x8000000 0x00 0x10000 0x00 0x8010000 0x00 0x10000 },
v2m : {
compatible : { arm,gic-v2m-frame },
msi-controller : {},
reg : { 0x00 0x8020000 0x00 0x1000 },
}
}
mem : {
}
memory : {
device_type : { memory },
reg : { 0x00 0x80000000 0x00 0x10000000 },
}
psci : {
assigned-clocks : { &v2m_sysctl 0 &v2m_sysctl 1 &v2m_sysctl 3 &v2m_sysctl 3 },
compatible : { arm,psci-1.0 arm,psci-0.2 arm,psci },
cpu_off : { 0x84000002 },
cpu_on : { 0xc4000003 },
cpu_suspend : { 0xc4000001 },
method : { hvc },
migrate : { 0xc4000005 },
}
timer : {
always-on : {},
compatible : { arm,armv8-timer\0arm,armv7-timer },
interrupts : { 0x01 0x0d 0x104 0x01 0x0e 0x104 0x01 0x0b 0x104 0x01 0x0a 0x104 },
}
uart : {
clock-names : { uartclk apb_pclk },
clocks : { &apb_pclk &apb_pclk },
compatible : { arm,pl011 arm,primecell },
interrupts : { 0x00 0x01 0x04 },
reg : { 0x00 0x9000000 0x00 0x1000 },
}
参考链接
[1] 设备树文件规范
[2] Linux设备树语法规范
[3] ANTR4 Cpp环境配置与使用