tinyxml2源码分析-2

最新推荐文章于 2023-05-12 16:09:43 发布

走调的鱼

最新推荐文章于 2023-05-12 16:09:43 发布

阅读量1.1k

点赞数 4

分类专栏： tinyxml2

本文链接：https://blog.csdn.net/qiukongjian/article/details/52663946

版权

tinyxml2 专栏收录该内容

5 篇文章 3 订阅

订阅专栏

</pre><p>为避免对std::string的依赖，自己编写一个StrPair，如下(这不是主代码，直接上最终代码)：</p><p></p><pre name="code" class="cpp">class StrPair
{
public:
    enum {
        NEEDS_ENTITY_PROCESSING			= 0x01,
        NEEDS_NEWLINE_NORMALIZATION		= 0x02,
        NEEDS_WHITESPACE_COLLAPSING     = 0x04,

        TEXT_ELEMENT		            = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
        TEXT_ELEMENT_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,
        ATTRIBUTE_NAME		            = 0,
        ATTRIBUTE_VALUE		            = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
        ATTRIBUTE_VALUE_LEAVE_ENTITIES  = NEEDS_NEWLINE_NORMALIZATION,
        COMMENT							= NEEDS_NEWLINE_NORMALIZATION
    };

    StrPair() : _flags( 0 ), _start( 0 ), _end( 0 ) {}
    ~StrPair();

    void Set( char* start, char* end, int flags ) {
        TIXMLASSERT( start );
        TIXMLASSERT( end );
        Reset();
        _start  = start;
        _end    = end;
        _flags  = flags | NEEDS_FLUSH;
    }

    const char* GetStr();

    bool Empty() const {
        return _start == _end;
    }

    void SetInternedStr( const char* str ) {
        Reset();
        _start = const_cast<char*>(str);
    }

    void SetStr( const char* str, int flags=0 );

    char* ParseText( char* in, const char* endTag, int strFlags );
    char* ParseName( char* in );

    void TransferTo( StrPair* other );
	void Reset();

private:
    void CollapseWhitespace();

    enum {
        NEEDS_FLUSH = 0x100,
        NEEDS_DELETE = 0x200
    };

    int     _flags;
    char*   _start;
    char*   _end;

    StrPair( const StrPair& other );	// not supported
    void operator=( StrPair& other );	// not supported, use TransferTo()
};

代码很容易读懂，一个ｓｔｒｉｎｇ类的基本要求，构造，ｓｅｔ，ｇｅｔ，但是禁止了拷贝，而使用函数ＴｒａｎｓｆｅｒＴｏ。有点费解的是_flag成员变量，StrPair的持有的指针有些情况是不需要delete，这就是枚举NEEDS_DELETE,NEDDS_FLUSH的来源,所有的标志亮是bit定义，所以操作flag的时候位操作。

Reset:

void StrPair::Reset()
{
    if ( _flags & NEEDS_DELETE ) {
        delete [] _start;
    }
    _flags = 0;
    _start = 0;
    _end = 0;
}

SetStr:先调用Reset，将原来的字符串释放(是NEEDS_DELETE类型)，调用strlen获取长度，分配内存并拷贝，将标志置为NEEDS_DELETE。

void StrPair::SetStr( const char* str, int flags )
{
    TIXMLASSERT( str );
    Reset();
    size_t len = strlen( str );
    TIXMLASSERT( _start == 0 );
    _start = new char[ len+1 ];
    memcpy( _start, str, len+1 );
    _end = _start + len;
    _flags = flags | NEEDS_DELETE;
}

ParseText: 所谓的ParseText，其实就是个find，找到相等字符串后，设置标志

char* StrPair::ParseText( char* p, const char* endTag, int strFlags )
{
    TIXMLASSERT( endTag && *endTag );

    char* start = p;
    char  endChar = *endTag;
    size_t length = strlen( endTag );

    // Inner loop of text parsing.
    while ( *p ) {
        if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
            Set( start, p, strFlags );
            return p + length;
        }
        ++p;
    }
    return 0;
}

IsNameStartChar:

    inline static bool IsNameStartChar( unsigned char ch ) {
        if ( ch >= 128 ) {
            // This is a heuristic guess in attempt to not implement Unicode-aware isalpha()
            return true;
        }
        if ( isalpha( ch ) ) {
            return true;
        }
        return ch == ':' || ch == '_';
    }

IsNameChar:

    inline static bool IsNameChar( unsigned char ch ) {
        return IsNameStartChar( ch )
               || isdigit( ch )
               || ch == '.'
               || ch == '-';
    }

ParseName:搜索名称

char* StrPair::ParseName( char* p )
{
    if ( !p || !(*p) ) {
        return 0;
    }
    if ( !XMLUtil::IsNameStartChar( *p ) ) {
        return 0;
    }

    char* const start = p;
    ++p;
    while ( *p && XMLUtil::IsNameChar( *p ) ) {
        ++p;
    }

    Set( start, p, 0 );
    return p;
}

CollapseWhitespace:就是把多余的空格合并为一个...

void StrPair::CollapseWhitespace()
{
    // Adjusting _start would cause undefined behavior on delete[]
    TIXMLASSERT( ( _flags & NEEDS_DELETE ) == 0 );
    // Trim leading space.
    _start = XMLUtil::SkipWhiteSpace( _start );

    if ( *_start ) {
        const char* p = _start;	// the read pointer
        char* q = _start;	// the write pointer

        while( *p ) {
            if ( XMLUtil::IsWhiteSpace( *p )) {
                p = XMLUtil::SkipWhiteSpace( p );
                if ( *p == 0 ) {
                    break;    // don't write to q; this trims the trailing space.
                }
                *q = ' ';
                ++q;
            }
            *q = *p;
            ++q;
            ++p;
        }
        *q = 0;
    }
}

GetStr:NEEDS_FLUSH,就是字符串结尾需要赋值null，并根据不同的标志，处理字符串，处理xml转义字符。

const char* StrPair::GetStr()
{
    TIXMLASSERT( _start );
    TIXMLASSERT( _end );
    if ( _flags & NEEDS_FLUSH ) {
        *_end = 0;
        _flags ^= NEEDS_FLUSH;//清除标记

        if ( _flags ) {
            const char* p = _start;	// the read pointer
            char* q = _start;	// the write pointer

            while( p < _end ) {
                if ( (_flags & NEEDS_NEWLINE_NORMALIZATION) && *p == CR ) {
                    if ( *(p+1) == LF ) {
                        p += 2;<span style="font-family: Arial, Helvetica, sans-serif;">// \r\n 替换为\n</span>
                    }
                    else {
                        ++p;<span style="font-family: Arial, Helvetica, sans-serif;">// </span><span style="font-family: Arial, Helvetica, sans-serif;">\r</span><span style="font-family: Arial, Helvetica, sans-serif;"> </span><span style="font-family: Arial, Helvetica, sans-serif;">替换为\n</span>
                    }
                    *q++ = LF;
                }
                else if ( (_flags & NEEDS_NEWLINE_NORMALIZATION) && *p == LF ) {
                    if ( *(p+1) == CR ) {//\n\r替换为\n
                        p += 2;
                    }
                    else {
                        ++p;
                    }
                    *q++ = LF;
                }

<span>		</span>//对&#dddd;类型的unicode转义
                else if ( (_flags & NEEDS_ENTITY_PROCESSING) && *p == '&' ) {
                    if ( *(p+1) == '#' ) {
                        const int buflen = 10;
                        char buf[buflen] = { 0 };
                        int len = 0;
                        char* adjusted = const_cast<char*>( XMLUtil::GetCharacterRef( p, buf, &len ) );
                        if ( adjusted == 0 ) {
                            *q = *p;
                            ++p;
                            ++q;
                        }
                        else {
                            TIXMLASSERT( 0 <= len && len <= buflen );
                            TIXMLASSERT( q + len <= adjusted );
                            p = adjusted;
                            memcpy( q, buf, len );
                            q += len;
                        }
                    }
                    else {
                        bool entityFound = false;//循环遍历转义字符全局变量，如果字符串相等，则转义
                        for( int i = 0; i < NUM_ENTITIES; ++i ) {
                            const Entity& entity = entities[i];
                            if ( strncmp( p + 1, entity.pattern, entity.length ) == 0
                                    && *( p + entity.length + 1 ) == ';' ) {
                                // Found an entity - convert.
                                *q = entity.value;
                                ++q;
                                p += entity.length + 2;
                                entityFound = true;
                                break;
                            }
                        }
                        if ( !entityFound ) {
                            // fixme: treat as error?找不到转义字符，继续
                            ++p;
                            ++q;
                        }
                    }
                }
                else {
                    *q = *p;
                    ++p;
                    ++q;
                }
            }
            *q = 0;
        }
        // The loop below has plenty going on, and this
        // is a less useful mode. Break it out.
        if ( _flags & NEEDS_WHITESPACE_COLLAPSING ) {
            CollapseWhitespace();
        }
        _flags = (_flags & NEEDS_DELETE);
    }
    TIXMLASSERT( _start );
    return _start;
}

entities是转义字符定义

struct Entity {
    const char* pattern;
    int length;
    char value;
};

static const int NUM_ENTITIES = 5;
static const Entity entities[NUM_ENTITIES] = {
    { "quot", 4,	DOUBLE_QUOTE },
    { "amp", 3,		'&'  },
    { "apos", 4,	SINGLE_QUOTE },
    { "lt",	2, 		'<'	 },
    { "gt",	2,		'>'	 }
};

在我们实现其他子类时，先考虑一下内存分配，内存池，我们先定义内存池接口：

class MemPool
{
public:
	MemPool() {}
	virtual ~MemPool() {}
	virtual int ItemSize() const = 0;
	virtual void* Alloc() = 0;
	virtual void Free(void*) = 0;
	virtual void SetTracked() = 0;
	virtual void Clear() = 0;
};

接下来是内存池放在哪里，毫无悬念，只能放在XMLDocument，因为对于一个xml文件，只有一个XMLDocument。既然这样，我们先定义ＸＭＬＤｏｃｕｍｅｎｔ，读取文件，我们放在后续编写，先保留接口。先实现向下转型，定义RootElement，然后就是各个节点的New函数。抛开ＭｅｍＰｏｏｌＴ是对ＭｅｍＰｏｏｌ的实现，后面会分析其代码，

class XMLDocument : public XMLNode
{
	friend class XMLElement;
public:
	/// constructor
	XMLDocument(bool processEntities = true, Whitespace = PRESERVE_WHITESPACE);
	~XMLDocument();

	virtual XMLDocument* ToDocument()				{
		TIXMLASSERT(this == _document);
		return this;
	}
	virtual const XMLDocument* ToDocument() const	{
		TIXMLASSERT(this == _document);
		return this;
	}

	XMLError LoadFile(const char* filename);

	XMLError SaveFile(const char* filename, bool compact = false);

	XMLElement* RootElement()				{
		return FirstChildElement();
	}
	const XMLElement* RootElement() const	{
		return FirstChildElement();
	}

	XMLElement* NewElement(const char* name);

	XMLComment* NewComment(const char* comment);

	XMLText* NewText(const char* text);

	XMLDeclaration* NewDeclaration(const char* text = 0);

	XMLUnknown* NewUnknown(const char* text);

	void DeleteNode(XMLNode* node);
	/// Clear the document, resetting it to the initial state.
	void Clear();
private:
	XMLDocument(const XMLDocument&);	// not supported
	void operator=(const XMLDocument&);	// not supported
	XMLError    _errorID;
	MemPoolT< sizeof(XMLElement) >	 _elementPool;
	MemPoolT< sizeof(XMLAttribute) > _attributePool;
	MemPoolT< sizeof(XMLText) >		 _textPool;
	MemPoolT< sizeof(XMLComment) >	 _commentPool;
};

接下来我们实现XMLText

class XMLText : public XMLNode
{
	friend class XMLDocument;
public:
	virtual XMLText* ToText(){
		return this;
	}
	virtual const XMLText* ToText() const{
		return this;
	}
	/// Declare whether this should be CDATA or standard text.
	void SetCData(bool isCData)	{
		_isCData = isCData;
	}
	/// Returns true if this is a CDATA text element.
	bool CData() const{
		return _isCData;
	}
protected:
	XMLText(XMLDocument* doc) : XMLNode(doc), _isCData(false){
	}
	virtual ~XMLText(){
	}
private:
	bool _isCData;
	XMLText(const XMLText&);	// not supported
	XMLText& operator=(const XMLText&);	// not supported
};

首先实现向下转型函数，XMLText有两种类型，一种是CData类型，一种是普通类型，这里需要一个标志来区分_isCData，所以有SetCData和CData接口。这里构造函数仍为protected，是因为该类型需要在XMLDocument中new出来。好像少了Text内容，这时候我们是不是应该增加一个成员变量来实现呢，比如std::string _strValue;考虑到所有子类都有自己的内容，所以把内容放在基类，

给ＸＭＬＮｏｄｅ增加ｐｒｏｔｅｃｔｅｄ成员变量，和相关接口函数。Value的意义对不同的元素是不同的，具体如下

Document:空(返回NULL，而不是空字符串)
Element: Elment的名字
Comment: 注释文字
Unknown: the tag contents
Text: 文本

mutable StrPair	_value;

    const char* Value() const;
    void SetValue( const char* val, bool staticMem=false );

接下来的似乎不言而喻的

class XMLComment : public XMLNode
{
	friend class XMLDocument;
public:
	virtual XMLComment*	ToComment()					{
		return this;
	}
	virtual const XMLComment* ToComment() const		{
		return this;
	}
protected:
	XMLComment(XMLDocument* doc);
	virtual ~XMLComment();
private:
	XMLComment(const XMLComment&);	// not supported
	XMLComment& operator=(const XMLComment&);	// not supported
};
class XMLDeclaration : public XMLNode
{
	friend class XMLDocument;
public:
	virtual XMLDeclaration*	ToDeclaration()					{
		return this;
	}
	virtual const XMLDeclaration* ToDeclaration() const		{
		return this;
	}
protected:
	XMLDeclaration(XMLDocument* doc);
	virtual ~XMLDeclaration();
private:
	XMLDeclaration(const XMLDeclaration&);	// not supported
	XMLDeclaration& operator=(const XMLDeclaration&);	// not supported
};

class XMLUnknown : public XMLNode
{
	friend class XMLDocument;
public:
	virtual XMLUnknown*	ToUnknown()					{
		return this;
	}
	virtual const XMLUnknown* ToUnknown() const		{
		return this;
	}
protected:
	XMLUnknown(XMLDocument* doc);
	virtual ~XMLUnknown();
private:
	XMLUnknown(const XMLUnknown&);	// not supported
	XMLUnknown& operator=(const XMLUnknown&);	// not supported
};

在编写ＸＭＬＥｌｅｍｅｎｔ前，首先编写ＸＭＬＡｔｔｒｉｂｕｔｅ：

class XMLAttribute
{
	friend class XMLElement;
public:
	/// The name of the attribute.
	const char* Name() const;

	/// The value of the attribute.
	const char* Value() const;

	/// The next attribute in the list.
	const XMLAttribute* Next() const {
		return _next;
	}
	void SetAttribute(const char* value);
private:
	enum { BUF_SIZE = 200 };

	XMLAttribute() : _next(0), _memPool(0) {}
	virtual ~XMLAttribute()	{}
	XMLAttribute(const XMLAttribute&);	// not supported
	void operator=(const XMLAttribute&);	// not supported
	void SetName(const char* name);
	mutable StrPair _name;
	mutable StrPair _value;
	XMLAttribute*   _next;
	MemPool*        _memPool;
};

所谓的Ａｔｔｒｉｂｕｔｅ其实就是个ｋｅｙ，ｖａｌｕｅ对。而Ａｔｔｒｉｂｕｔｅ还要实现单向链表，这样只要在Ｅｌｅｍｅｎｔ中有首属性，就可以通过＿ｎｅｘｔ来遍历属性。Ａｔｔｒｉｂｕｔｅ只能由Document来创建。

class XMLElement : public XMLNode
{
	friend class XMLDocument;
public:
	/// Get the name of an element (which is the Value() of the node.)
	const char* Name() const		{
		return Value();
	}
	/// Set the name of the element.
	void SetName(const char* str, bool staticMem = false)	{
		SetValue(str, staticMem);
	}

	virtual XMLElement* ToElement()				{
		return this;
	}
	virtual const XMLElement* ToElement() const {
		return this;
	}

	const char* Attribute(const char* name, const char* value = 0) const;

	/// Sets the named attribute to value.
	void SetAttribute(const char* name, const char* value)	{
		XMLAttribute* a = FindOrCreateAttribute(name);
		a->SetAttribute(value);
	}

	/**
	Delete an attribute.
	*/
	void DeleteAttribute(const char* name);

	/// Return the first attribute in the list.
	const XMLAttribute* FirstAttribute() const {
		return _rootAttribute;
	}
	/// Query a specific attribute in the list.
	const XMLAttribute* FindAttribute(const char* name) const;
	const char* GetText() const;
	void SetText(const char* inText);
private:
	XMLElement(XMLDocument* doc);
	virtual ~XMLElement();
	XMLElement(const XMLElement&);	// not supported
	void operator=(const XMLElement&);	// not supported

	XMLAttribute* FindAttribute(const char* name) {
		return const_cast<XMLAttribute*>(const_cast<const XMLElement*>(this)->FindAttribute(name));
	}
	XMLAttribute* FindOrCreateAttribute(const char* name);

	static void DeleteAttribute(XMLAttribute* attribute);

	// The attribute list is ordered; there is no 'lastAttribute'
	// because the list needs to be scanned for dupes before adding
	// a new attribute.
	XMLAttribute* _rootAttribute;
};

Attribute:

const char* XMLElement::Attribute( const char* name, const char* value ) const
{
    const XMLAttribute* a = FindAttribute( name );
    if ( !a ) {
        return 0;
    }
    if ( !value || XMLUtil::StringEqual( a->Value(), value )) {
        return a->Value();
    }
    return 0;
}

查找name Attribute,若果输入的value非空，则需要比较name 和value,都不能匹配则返回0。

FindAttribute:

const XMLAttribute* XMLElement::FindAttribute( const char* name ) const
{
    for( XMLAttribute* a = _rootAttribute; a; a = a->_next ) {
        if ( XMLUtil::StringEqual( a->Name(), name ) ) {
            return a;
        }
    }
    return 0;
}

遍历单向列表，也没什么说的。

GetText:从代码说明一切，Element的text属性，要求它第一个子节点必须为XMLText。

const char* XMLElement::GetText() const
{
    if ( FirstChild() && FirstChild()->ToText() ) {
        return FirstChild()->Value();
    }
    return 0;
}

SetText:注意NewText，内存由Document来分配

void	XMLElement::SetText(const char* inText)
{
	if (FirstChild() && FirstChild()->ToText())
		FirstChild()->SetValue(inText);
	else {
		XMLText*	theText = GetDocument()->NewText(inText);
		InsertFirstChild(theText);
	}
}

FindOrCreateAttribute：遍历链表查找Attribute,如果找不到，则调用Document的内存池分配内存，定位new，并将该Attribute添加到链表尾。

XMLAttribute* XMLElement::FindOrCreateAttribute( const char* name )
{
    XMLAttribute* last = 0;
    XMLAttribute* attrib = 0;
    for( attrib = _rootAttribute;
            attrib;
            last = attrib, attrib = attrib->_next ) {
        if ( XMLUtil::StringEqual( attrib->Name(), name ) ) {
            break;
        }
    }
    if ( !attrib ) {
        TIXMLASSERT( sizeof( XMLAttribute ) == _document->_attributePool.ItemSize() );
        attrib = new (_document->_attributePool.Alloc() ) XMLAttribute();
        attrib->_memPool = &_document->_attributePool;
        if ( last ) {
            last->_next = attrib;
        }
        else {
            _rootAttribute = attrib;
        }
        attrib->SetName( name );
        attrib->_memPool->SetTracked(); // always created and linked.
    }
    return attrib;
}

DeleteAttribute:单向链表删除某个节点，首先遍历比较，如果name相等则断开链接，并在内存池中删除内存

void XMLElement::DeleteAttribute( const char* name )
{
    XMLAttribute* prev = 0;
    for( XMLAttribute* a=_rootAttribute; a; a=a->_next ) {
        if ( XMLUtil::StringEqual( name, a->Name() ) ) {
            if ( prev ) {
                prev->_next = a->_next;
            }
            else {
                _rootAttribute = a->_next;
            }
            DeleteAttribute( a );
            break;
        }
        prev = a;
    }
}

删除XMLAttribute的内存，因为使用的是定位new，所以主动调用析构函数。

void XMLElement::DeleteAttribute(XMLAttribute* attribute)
{
	if (attribute == 0) {
		return;
	}
	MemPool* pool = attribute->_memPool;
	attribute->~XMLAttribute();
	pool->Free(attribute);
}

FindAttribute：前面已经分析过类似的了

const XMLAttribute* XMLElement::FindAttribute(const char* name) const
{
	for (XMLAttribute* a = _rootAttribute; a; a = a->_next) {
		if (XMLUtil::StringEqual(a->Name(), name)) {
			return a;
		}
	}
	return 0;
}

至此，我们基本编写完毕各个子类的代码。现在来看，各个ＸＭＬＮｏｄｅ的子类，只有ＸＭＬＤｏｃｕｍｅｎｔ的构造函数是ｐｕｂｌｉｃ，其他的子类只能通过ＸＭＬＤｏｃｕｍｅｎｔ来ｎｅｗ，而ＸＭＬＤｏｃｕｍｅｎｔ持有各个元素的内存池，包括ＸＭＬＡｔｔｒｉｂｕｔｅ，这样只要ＸＭＬＤｏｃｕｍｅｎｔ没用析构，所有元素的指针都是有效的。

走调的鱼

关注

4
点赞
踩
3

收藏

觉得还不错? 一键收藏
2
评论
tinyxml2源码分析-2

我们实现了XMLNode，接下来我们实现XMLTextclass XMLText : public XMLNode{ friend class XMLDocument;public: virtual XMLText* ToText(){ return this; } virtual const XMLText* ToText() const{ return this;
复制链接

扫一扫

专栏目录