关于怎么造xml解析这个轮子
问题分析
这个解析库只是基础版,没有充分考虑xml文件的所有情况,只是最基本的情况。用于类似游戏mod中xml的解析,这时我们可以规定xml规范,好处是对比通用的解析器,针对性的解析器性能更好。
实现
读取文件
FILE* file;
errno_t err = fopen_s(&file, _file_name, "rb");
if(err != 0){
// std::cout << "Failed to open file" << std::endl;
// TODO log 打开文件失败
return;
}
char* buffer = new char[FILE_SIZE];
size_t read_size = 0;
std::stack<ZDSJ::XmlItem*> stack;
stack.push(this->m_root);
// stack.push_back(this->m_root);
while(!feof(file)){
read_size = fread_s(buffer, FILE_SIZE, 1, FILE_SIZE, file);
if(this->dealBuffer(buffer, read_size, stack)){
break;
}
}
end:
delete[] buffer;
fclose(file);
这一部分没什么好说的,正常读一下文件就行。主要在如何处理数据上。
数据处理
这部分主要是处理读取的文件数据,并转为树形结构,下图为示意图:
对应的xml结构就是
<root>
<name></name>
<url></url>
<logo><img></img></logo>
</root>
分析文件结构很明显可以看出,可以通过正则进行字符串处理,获取标签起始与结束。
但这里没有用正则,原因有两个
- c++正则regex库只接收std::string类型,string类型在创建时会拷贝一份字符串,造成额外开销。
- 要获取标签内属性,要再次读取标签内容,也就是单个标签加上正则操作遍历了两遍。
这里做了个类似于状态机的机制处理
状态机
图就不详细画了,类似于这种结构,每个状态对应一种处理方式,满足条件时转为其他状态。
实现方式
enum Deal_State{
Nothing,
TagBegin,
Description,
GetTagName,
SkipSpace,
GetKey,
GetValue,
TagEnd,
Transfer,
CloseTag
};
先用枚举定义出状态,随后构建状态类,上图可以看出需要上一个状态信息
class State{
public:
using callbackType = Deal_State(Deal_State);
State();
State(Deal_State _state);
void setCallBack(Deal_State _state, std::function<callbackType> _callback);
Deal_State operator()();
void operator=(Deal_State _state);
~State();
private:
Deal_State m_state;
Deal_State m_last_state;
std::map<Deal_State, std::function<callbackType>> m_callback;
};
callback用来设置不同状态的处理方式,返回值为下一步将要进入的状态。
ZDSJ::Deal_State ZDSJ::State::operator()(){
*this = this->m_callback.find(this->m_state)->second(this->m_last_state);
return this->m_state;
}
重载括号运算符,用来运行状态机。
void ZDSJ::State::operator=(ZDSJ::Deal_State _state){
if(this->m_state == _state){
return;
}
this->m_last_state = this->m_state;
this->m_state = _state;
}
重载赋值运算符,保存上一步状态。
ZDSJ::State state(ZDSJ::SkipSpace);
state.setCallBack(ZDSJ::TagBegin, [&](ZDSJ::Deal_State) -> ZDSJ::Deal_State{
ZDSJ::Deal_State result = ZDSJ::TagBegin;
switch (_buffer[tag_index])
{
case '/':
result = ZDSJ::CloseTag;
break;
case ' ':
result = ZDSJ::SkipSpace;
break;
case '?':
result = ZDSJ::Description;
break;
default:
item = new ZDSJ::XmlItem();
name_key_ptr = _buffer + tag_index;
name_key_len = 1;
result = ZDSJ::GetTagName;
break;
}
return result;
});
state.setCallBack(ZDSJ::Description, [&](ZDSJ::Deal_State) -> ZDSJ::Deal_State {
ZDSJ::Deal_State result = ZDSJ::Description;
switch (_buffer[tag_index])
{
case '>':
item = const_cast<ZDSJ::XmlItem*>(this->m_root);
result = ZDSJ::TagEnd;
break;
default:
break;
}
return result;
});
state.setCallBack(ZDSJ::GetTagName, [&](ZDSJ::Deal_State _last_state) -> ZDSJ::Deal_State{
ZDSJ::Deal_State result = ZDSJ::GetTagName;
if(item == nullptr){
// TODO 异常
return ZDSJ::TagEnd;
}
switch (_buffer[tag_index])
{
case ' ':
result = ZDSJ::SkipSpace;
item->name(name_key_ptr, name_key_len);
name_key_ptr = nullptr;
name_key_len = 0;
break;
case '>':
result = ZDSJ::TagEnd;
item->name(name_key_ptr, name_key_len);
name_key_ptr = nullptr;
name_key_len = 0;
break;
default:
++name_key_len;
break;
}
return result;
});
state.setCallBack(ZDSJ::SkipSpace, [&](ZDSJ::Deal_State _last_state) -> ZDSJ::Deal_State{
ZDSJ::Deal_State result = ZDSJ::SkipSpace;
const char** temp_ptr = nullptr;
size_t* temp_len = nullptr;
switch (_buffer[tag_index])
{
case '?':
result = ZDSJ::Description;
break;
case '/':
result = ZDSJ::CloseTag;
break;
case '\r':
break;
case '\n':
break;
case '<':
result = ZDSJ::TagBegin;
break;
case '>':
result = ZDSJ::TagEnd;
break;
case ' ':
break;
default:
switch (_last_state)
{
case ZDSJ::TagBegin:
item = new ZDSJ::XmlItem();
result = ZDSJ::GetTagName;
name_key_ptr = _buffer + tag_index;
name_key_len = 1;
break;
case ZDSJ::GetTagName:
result = ZDSJ::GetKey;
name_key_ptr = _buffer + tag_index;
name_key_len = 1;
break;
case ZDSJ::SkipSpace:
break;
case ZDSJ::GetKey:
result = ZDSJ::GetValue;
value_ptr = _buffer + tag_index;
value_len = 1;
quotation_marks_stack = 1;
break;
case ZDSJ::GetValue:
result = ZDSJ::GetKey;
name_key_ptr = _buffer + tag_index;
name_key_len = 1;
break;
default:
break;
}
break;
}
return result;
});
state.setCallBack(ZDSJ::GetKey, [&](ZDSJ::Deal_State _last_state) -> ZDSJ::Deal_State{
ZDSJ::Deal_State result = ZDSJ::GetKey;
switch (_buffer[tag_index])
{
case '=':
result = ZDSJ::SkipSpace;
break;
case ' ':
break;
default:
++name_key_len;
break;
}
return result;
});
state.setCallBack(ZDSJ::GetValue, [&](ZDSJ::Deal_State _last_state) -> ZDSJ::Deal_State{
ZDSJ::Deal_State result = ZDSJ::GetValue;
switch (_buffer[tag_index])
{
case '<':
++angle_brackets_stack;
break;
case '>':
--angle_brackets_stack;
if(angle_brackets_stack < 0){
result = ZDSJ::TagEnd;
}
break;
case '\"':
++quotation_marks_stack;
if (quotation_marks_stack == 2) {
item->property(name_key_ptr, name_key_len, value_ptr + 1, value_len - 1);
value_len = -1;
name_key_len = 0;
result = ZDSJ::SkipSpace;
quotation_marks_stack = 0;
}
break;
case '\\':
result = ZDSJ::Transfer;
break;
case ' ':
if(quotation_marks_stack == 2){
item->property(name_key_ptr, name_key_len, value_ptr + 1, value_len - 1);
value_len = -1;
name_key_len = 0;
result = ZDSJ::SkipSpace;
quotation_marks_stack = 0;
}
break;
default:
break;
}
++value_len;
return result;
});
state.setCallBack(ZDSJ::CloseTag, [&](ZDSJ::Deal_State _last_state) -> ZDSJ::Deal_State {
ZDSJ::Deal_State result = ZDSJ::CloseTag;
switch (_buffer[tag_index])
{
case '>':
result = TagEnd;
break;
default:
break;
}
return result;
});
state.setCallBack(ZDSJ::TagEnd, [&](ZDSJ::Deal_State _last_state) -> ZDSJ::Deal_State{
return ZDSJ::TagEnd;
});
while(tag_index < _size){
if(ZDSJ::TagEnd == state()){
break;
}
++tag_index;
}
通过设置回调,定义不同状态处理方式,处理文件数据。这种写法不仅扩展性好,可读性也较好。如果在while循环里写switch,很容易出现屎一样的多层嵌套。