</pre><p>为避免对std::string的依赖,自己编写一个StrPair,如下(这不是主代码,直接上最终代码):</p><p></p><pre name="code" class="cpp">class StrPair
{
public:
enum {
NEEDS_ENTITY_PROCESSING = 0x01,
NEEDS_NEWLINE_NORMALIZATION = 0x02,
NEEDS_WHITESPACE_COLLAPSING = 0x04,
TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
ATTRIBUTE_NAME = 0,
ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
ATTRIBUTE_VALUE_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
COMMENT = NEEDS_NEWLINE_NORMALIZATION
};
StrPair() : _flags( 0 ), _start( 0 ), _end( 0 ) {}
~StrPair();
void Set( char* start, char* end, int flags ) {
TIXMLASSERT( start );
TIXMLASSERT( end );
Reset();
_start = start;
_end = end;
_flags = flags | NEEDS_FLUSH;
}
const char* GetStr();
bool Empty() const {
return _start == _end;
}
void SetInternedStr( const char* str ) {
Reset();
_start = const_cast<char*>(str);
}
void SetStr( const char* str, int flags=0 );
char* ParseText( char* in, const char* endTag, int strFlags );
char* ParseName( char* in );
void TransferTo( StrPair* other );
void Reset();
private:
void CollapseWhitespace();
enum {
NEEDS_FLUSH = 0x100,
NEEDS_DELETE = 0x200
};
int _flags;
char* _start;
char* _end;
StrPair( const StrPair& other ); // not supported
void operator=( StrPair& other ); // not supported, use TransferTo()
};
代码很容易读懂,一个string类的基本要求,构造,set,get,但是禁止了拷贝,而使用函数TransferTo。有点费解的是_flag成员变量,StrPair的持有的指针有些情况是不需要delete,这就是枚举NEEDS_DELETE,NEDDS_FLUSH的来源,所有的标志亮是bit定义,所以操作flag的时候位操作。
Reset:
void StrPair::Reset()
{
if ( _flags & NEEDS_DELETE ) {
delete [] _start;
}
_flags = 0;
_start = 0;
_end = 0;
}
SetStr:先调用Reset,将原来的字符串释放(是NEEDS_DELETE类型),调用strlen获取长度,分配内存并拷贝,将标志置为NEEDS_DELETE。
void StrPair::SetStr( const char* str, int flags )
{
TIXMLASSERT( str );
Reset();
size_t len = strlen( str );
TIXMLASSERT( _start == 0 );
_start = new char[ len+1 ];
memcpy( _start, str, len+1 );
_end = _start + len;
_flags = flags | NEEDS_DELETE;
}
ParseText: 所谓的ParseText,其实就是个find,找到相等字符串后,设置标志
char* StrPair::ParseText( char* p, const char* endTag, int strFlags )
{
TIXMLASSERT( endTag && *endTag );
char* start = p;
char endChar = *endTag;
size_t length = strlen( endTag );
// Inner loop of text parsing.
while ( *p ) {
if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
Set( start, p, strFlags );
return p + length;
}
++p;
}
return 0;
}
IsNameStartChar:
inline static bool IsNameStartChar( unsigned char ch ) {
if ( ch >= 128 ) {
// This is a heuristic guess in attempt to not implement Unicode-aware isalpha()
return true;
}
if ( isalpha( ch ) ) {
return true;
}
return ch == ':' || ch == '_';
}
IsNameChar:
inline static bool IsNameChar( unsigned char ch ) {
return IsNameStartChar( ch )
|| isdigit( ch )
|| ch == '.'
|| ch == '-';
}
ParseName:搜索名称
char* StrPair::ParseName( char* p )
{
if ( !p || !(*p) ) {
return 0;
}
if ( !XMLUtil::IsNameStartChar( *p ) ) {
return 0;
}
char* const start = p;
++p;
while ( *p && XMLUtil::IsNameChar( *p ) ) {
++p;
}
Set( start, p, 0 );
return p;
}
CollapseWhitespace:就是把多余的空格合并为一个...
void StrPair::CollapseWhitespace()
{
// Adjusting _start would cause undefined behavior on delete[]
TIXMLASSERT( ( _flags & NEEDS_DELETE ) == 0 );
// Trim leading space.
_start = XMLUtil::SkipWhiteSpace( _start );
if ( *_start ) {
const char* p = _start; // the read pointer
char* q = _start; // the write pointer
while( *p ) {
if ( XMLUtil::IsWhiteSpace( *p )) {
p = XMLUtil::SkipWhiteSpace( p );
if ( *p == 0 ) {
break; // don't write to q; this trims the trailing space.
}
*q = ' ';
++q;
}
*q = *p;
++q;
++p;
}
*q = 0;
}
}
GetStr:NEEDS_FLUSH,就是字符串结尾需要赋值null,并根据不同的标志,处理字符串,处理xml转义字符。
const char* StrPair::GetStr()
{
TIXMLASSERT( _start );
TIXMLASSERT( _end );
if ( _flags & NEEDS_FLUSH ) {
*_end = 0;
_flags ^= NEEDS_FLUSH;//清除标记
if ( _flags ) {
const char* p = _start; // the read pointer
char* q = _start; // the write pointer
while( p < _end ) {
if ( (_flags & NEEDS_NEWLINE_NORMALIZATION) && *p == CR ) {
if ( *(p+1) == LF ) {
p += 2;<span style="font-family: Arial, Helvetica, sans-serif;">// \r\n 替换为\n</span>
}
else {
++p;<span style="font-family: Arial, Helvetica, sans-serif;">// </span><span style="font-family: Arial, Helvetica, sans-serif;">\r</span><span style="font-family: Arial, Helvetica, sans-serif;"> </span><span style="font-family: Arial, Helvetica, sans-serif;">替换为\n</span>
}
*q++ = LF;
}
else if ( (_flags & NEEDS_NEWLINE_NORMALIZATION) && *p == LF ) {
if ( *(p+1) == CR ) {//\n\r替换为\n
p += 2;
}
else {
++p;
}
*q++ = LF;
}
<span> </span>//对&#dddd;类型的unicode转义
else if ( (_flags & NEEDS_ENTITY_PROCESSING) && *p == '&' ) {
if ( *(p+1) == '#' ) {
const int buflen = 10;
char buf[buflen] = { 0 };
int len = 0;
char* adjusted = const_cast<char*>( XMLUtil::GetCharacterRef( p, buf, &len ) );
if ( adjusted == 0 ) {
*q = *p;
++p;
++q;
}
else {
TIXMLASSERT( 0 <= len && len <= buflen );
TIXMLASSERT( q + len <= adjusted );
p = adjusted;
memcpy( q, buf, len );
q += len;
}
}
else {
bool entityFound = false;//循环遍历转义字符全局变量,如果字符串相等,则转义
for( int i = 0; i < NUM_ENTITIES; ++i ) {
const Entity& entity = entities[i];
if ( strncmp( p + 1, entity.pattern, entity.length ) == 0
&& *( p + entity.length + 1 ) == ';' ) {
// Found an entity - convert.
*q = entity.value;
++q;
p += entity.length + 2;
entityFound = true;
break;
}
}
if ( !entityFound ) {
// fixme: treat as error?找不到转义字符,继续
++p;
++q;
}
}
}
else {
*q = *p;
++p;
++q;
}
}
*q = 0;
}
// The loop below has plenty going on, and this
// is a less useful mode. Break it out.
if ( _flags & NEEDS_WHITESPACE_COLLAPSING ) {
CollapseWhitespace();
}
_flags = (_flags & NEEDS_DELETE);
}
TIXMLASSERT( _start );
return _start;
}
entities是转义字符定义
struct Entity {
const char* pattern;
int length;
char value;
};
static const int NUM_ENTITIES = 5;
static const Entity entities[NUM_ENTITIES] = {
{ "quot", 4, DOUBLE_QUOTE },
{ "amp", 3, '&' },
{ "apos", 4, SINGLE_QUOTE },
{ "lt", 2, '<' },
{ "gt", 2, '>' }
};
在我们实现其他子类时,先考虑一下内存分配,内存池,我们先定义内存池接口:
class MemPool
{
public:
MemPool() {}
virtual ~MemPool() {}
virtual int ItemSize() const = 0;
virtual void* Alloc() = 0;
virtual void Free(void*) = 0;
virtual void SetTracked() = 0;
virtual void Clear() = 0;
};
接下来是内存池放在哪里,毫无悬念,只能放在XMLDocument,因为对于一个xml文件,只有一个XMLDocument。既然这样,我们先定义XMLDocument,读取文件,我们放在后续编写,先保留接口。先实现向下转型,定义RootElement,然后就是各个节点的New函数。抛开MemPoolT是对MemPool的实现,后面会分析其代码,
class XMLDocument : public XMLNode
{
friend class XMLElement;
public:
/// constructor
XMLDocument(bool processEntities = true, Whitespace = PRESERVE_WHITESPACE);
~XMLDocument();
virtual XMLDocument* ToDocument() {
TIXMLASSERT(this == _document);
return this;
}
virtual const XMLDocument* ToDocument() const {
TIXMLASSERT(this == _document);
return this;
}
XMLError LoadFile(const char* filename);
XMLError SaveFile(const char* filename, bool compact = false);
XMLElement* RootElement() {
return FirstChildElement();
}
const XMLElement* RootElement() const {
return FirstChildElement();
}
XMLElement* NewElement(const char* name);
XMLComment* NewComment(const char* comment);
XMLText* NewText(const char* text);
XMLDeclaration* NewDeclaration(const char* text = 0);
XMLUnknown* NewUnknown(const char* text);
void DeleteNode(XMLNode* node);
/// Clear the document, resetting it to the initial state.
void Clear();
private:
XMLDocument(const XMLDocument&); // not supported
void operator=(const XMLDocument&); // not supported
XMLError _errorID;
MemPoolT< sizeof(XMLElement) > _elementPool;
MemPoolT< sizeof(XMLAttribute) > _attributePool;
MemPoolT< sizeof(XMLText) > _textPool;
MemPoolT< sizeof(XMLComment) > _commentPool;
};
接下来我们实现XMLText
class XMLText : public XMLNode
{
friend class XMLDocument;
public:
virtual XMLText* ToText(){
return this;
}
virtual const XMLText* ToText() const{
return this;
}
/// Declare whether this should be CDATA or standard text.
void SetCData(bool isCData) {
_isCData = isCData;
}
/// Returns true if this is a CDATA text element.
bool CData() const{
return _isCData;
}
protected:
XMLText(XMLDocument* doc) : XMLNode(doc), _isCData(false){
}
virtual ~XMLText(){
}
private:
bool _isCData;
XMLText(const XMLText&); // not supported
XMLText& operator=(const XMLText&); // not supported
};
首先实现向下转型函数,XMLText有两种类型,一种是CData类型,一种是普通类型,这里需要一个标志来区分_isCData,所以有SetCData和CData接口。这里构造函数仍为protected,是因为该类型需要在XMLDocument中new出来。好像少了Text内容,这时候我们是不是应该增加一个成员变量来实现呢,比如std::string _strValue;考虑到所有子类都有自己的内容,所以把内容放在基类,
给XMLNode增加protected成员变量,和相关接口函数。Value的意义对不同的元素是不同的,具体如下
Document:空(返回NULL,而不是空字符串)
Element: Elment的名字
Comment: 注释文字
Unknown: the tag contents
Text: 文本
mutable StrPair _value;
const char* Value() const;
void SetValue( const char* val, bool staticMem=false );
接下来的似乎不言而喻的
class XMLComment : public XMLNode
{
friend class XMLDocument;
public:
virtual XMLComment* ToComment() {
return this;
}
virtual const XMLComment* ToComment() const {
return this;
}
protected:
XMLComment(XMLDocument* doc);
virtual ~XMLComment();
private:
XMLComment(const XMLComment&); // not supported
XMLComment& operator=(const XMLComment&); // not supported
};
class XMLDeclaration : public XMLNode
{
friend class XMLDocument;
public:
virtual XMLDeclaration* ToDeclaration() {
return this;
}
virtual const XMLDeclaration* ToDeclaration() const {
return this;
}
protected:
XMLDeclaration(XMLDocument* doc);
virtual ~XMLDeclaration();
private:
XMLDeclaration(const XMLDeclaration&); // not supported
XMLDeclaration& operator=(const XMLDeclaration&); // not supported
};
class XMLUnknown : public XMLNode
{
friend class XMLDocument;
public:
virtual XMLUnknown* ToUnknown() {
return this;
}
virtual const XMLUnknown* ToUnknown() const {
return this;
}
protected:
XMLUnknown(XMLDocument* doc);
virtual ~XMLUnknown();
private:
XMLUnknown(const XMLUnknown&); // not supported
XMLUnknown& operator=(const XMLUnknown&); // not supported
};
在编写XMLElement前,首先编写XMLAttribute:
class XMLAttribute
{
friend class XMLElement;
public:
/// The name of the attribute.
const char* Name() const;
/// The value of the attribute.
const char* Value() const;
/// The next attribute in the list.
const XMLAttribute* Next() const {
return _next;
}
void SetAttribute(const char* value);
private:
enum { BUF_SIZE = 200 };
XMLAttribute() : _next(0), _memPool(0) {}
virtual ~XMLAttribute() {}
XMLAttribute(const XMLAttribute&); // not supported
void operator=(const XMLAttribute&); // not supported
void SetName(const char* name);
mutable StrPair _name;
mutable StrPair _value;
XMLAttribute* _next;
MemPool* _memPool;
};
所谓的Attribute其实就是个key,value对。而Attribute还要实现单向链表,这样只要在Element中有首属性,就可以通过_next来遍历属性。Attribute只能由Document来创建。
class XMLElement : public XMLNode
{
friend class XMLDocument;
public:
/// Get the name of an element (which is the Value() of the node.)
const char* Name() const {
return Value();
}
/// Set the name of the element.
void SetName(const char* str, bool staticMem = false) {
SetValue(str, staticMem);
}
virtual XMLElement* ToElement() {
return this;
}
virtual const XMLElement* ToElement() const {
return this;
}
const char* Attribute(const char* name, const char* value = 0) const;
/// Sets the named attribute to value.
void SetAttribute(const char* name, const char* value) {
XMLAttribute* a = FindOrCreateAttribute(name);
a->SetAttribute(value);
}
/**
Delete an attribute.
*/
void DeleteAttribute(const char* name);
/// Return the first attribute in the list.
const XMLAttribute* FirstAttribute() const {
return _rootAttribute;
}
/// Query a specific attribute in the list.
const XMLAttribute* FindAttribute(const char* name) const;
const char* GetText() const;
void SetText(const char* inText);
private:
XMLElement(XMLDocument* doc);
virtual ~XMLElement();
XMLElement(const XMLElement&); // not supported
void operator=(const XMLElement&); // not supported
XMLAttribute* FindAttribute(const char* name) {
return const_cast<XMLAttribute*>(const_cast<const XMLElement*>(this)->FindAttribute(name));
}
XMLAttribute* FindOrCreateAttribute(const char* name);
static void DeleteAttribute(XMLAttribute* attribute);
// The attribute list is ordered; there is no 'lastAttribute'
// because the list needs to be scanned for dupes before adding
// a new attribute.
XMLAttribute* _rootAttribute;
};
Attribute:
const char* XMLElement::Attribute( const char* name, const char* value ) const
{
const XMLAttribute* a = FindAttribute( name );
if ( !a ) {
return 0;
}
if ( !value || XMLUtil::StringEqual( a->Value(), value )) {
return a->Value();
}
return 0;
}
查找name Attribute,若果输入的value非空,则需要比较name 和value,都不能匹配则返回0。
FindAttribute:
const XMLAttribute* XMLElement::FindAttribute( const char* name ) const
{
for( XMLAttribute* a = _rootAttribute; a; a = a->_next ) {
if ( XMLUtil::StringEqual( a->Name(), name ) ) {
return a;
}
}
return 0;
}
遍历单向列表,也没什么说的。
GetText:从代码说明一切,Element的text属性,要求它第一个子节点必须为XMLText。
const char* XMLElement::GetText() const
{
if ( FirstChild() && FirstChild()->ToText() ) {
return FirstChild()->Value();
}
return 0;
}
SetText:注意NewText,内存由Document来分配
void XMLElement::SetText(const char* inText)
{
if (FirstChild() && FirstChild()->ToText())
FirstChild()->SetValue(inText);
else {
XMLText* theText = GetDocument()->NewText(inText);
InsertFirstChild(theText);
}
}
FindOrCreateAttribute:遍历链表查找Attribute,如果找不到,则调用Document的内存池分配内存,定位new,并将该Attribute添加到链表尾。
XMLAttribute* XMLElement::FindOrCreateAttribute( const char* name )
{
XMLAttribute* last = 0;
XMLAttribute* attrib = 0;
for( attrib = _rootAttribute;
attrib;
last = attrib, attrib = attrib->_next ) {
if ( XMLUtil::StringEqual( attrib->Name(), name ) ) {
break;
}
}
if ( !attrib ) {
TIXMLASSERT( sizeof( XMLAttribute ) == _document->_attributePool.ItemSize() );
attrib = new (_document->_attributePool.Alloc() ) XMLAttribute();
attrib->_memPool = &_document->_attributePool;
if ( last ) {
last->_next = attrib;
}
else {
_rootAttribute = attrib;
}
attrib->SetName( name );
attrib->_memPool->SetTracked(); // always created and linked.
}
return attrib;
}
DeleteAttribute:单向链表删除某个节点,首先遍历比较,如果name相等则断开链接,并在内存池中删除内存
void XMLElement::DeleteAttribute( const char* name )
{
XMLAttribute* prev = 0;
for( XMLAttribute* a=_rootAttribute; a; a=a->_next ) {
if ( XMLUtil::StringEqual( name, a->Name() ) ) {
if ( prev ) {
prev->_next = a->_next;
}
else {
_rootAttribute = a->_next;
}
DeleteAttribute( a );
break;
}
prev = a;
}
}
删除XMLAttribute的内存,因为使用的是定位new,所以主动调用析构函数。
void XMLElement::DeleteAttribute(XMLAttribute* attribute)
{
if (attribute == 0) {
return;
}
MemPool* pool = attribute->_memPool;
attribute->~XMLAttribute();
pool->Free(attribute);
}
FindAttribute:前面已经分析过类似的了
const XMLAttribute* XMLElement::FindAttribute(const char* name) const
{
for (XMLAttribute* a = _rootAttribute; a; a = a->_next) {
if (XMLUtil::StringEqual(a->Name(), name)) {
return a;
}
}
return 0;
}
至此,我们基本编写完毕各个子类的代码。现在来看,各个XMLNode的子类,只有XMLDocument的构造函数是public,其他的子类只能通过XMLDocument来new,而XMLDocument持有各个元素的内存池,包括XMLAttribute,这样只要XMLDocument没用析构,所有元素的指针都是有效的。