C++ tinyXML2封装,解析XML文件


一.前言

要实现对XML文件的解析,首先需要使用github上面的开源组件tinyxml2,将tinyxml2中的tinyxml2.cpptinyxml2.h同时复制到项目下面即可。
tinyxml2 github链接


二.程序实现

1.tinyxml组件使用

整个的使用方法与log4cpp的使用方法比较相似,通过程序创建好的XML文件如下:
data.xml:

<?xml version="1.0" encoding="UTF-8"?>
<School SchoolName="SWJTU">
    <Student StudentName="Worthy">
        <Age>23</Age>
        <Grade>99.5</Grade>
    </Student>
    <Student StudentName="John">
        <Age>25</Age>
        <Grade>100</Grade>
    </Student>
</School>

终端的输出如下:

wwx@linux:~/week3/parseXML$ g++ main.cpp tinyxml2.cpp -o main
wwx@linux:~/week3/parseXML$ ./main
School SchoolName=SWJTU
Student StudentName=Worthy
                Age:23
                Grade:99.5
Student StudentName=John
                Age:25
                Grade:100

接下来就是完整的程序,可以仔细研究:


#include "tinyxml2.h"
#include <iostream>

using namespace tinyxml2;
using namespace std;


bool createXml(const char* path) {
	XMLDocument *doc = new XMLDocument();
	XMLDeclaration *declaration = doc->NewDeclaration("xml version=\"1.0\" encoding=\"UTF-8\"");
	doc->LinkEndChild(declaration);

	//School
	XMLElement *School = doc->NewElement("School");
	School->SetAttribute("SchoolName", "SWJTU");
	doc->LinkEndChild(School);

	//School->Student1
	XMLElement *Student1 = doc->NewElement("Student");
	Student1->SetAttribute("StudentName", "Worthy");
	School->LinkEndChild(Student1);

	//Age, Grade
	XMLElement *Age1 = doc->NewElement("Age");
	XMLText *ageText1 = doc->NewText("23");
	Age1->LinkEndChild(ageText1);

	XMLElement *Grade1 = doc->NewElement("Grade");
	XMLText *gradeText1 = doc->NewText("99.5");
	Grade1->LinkEndChild(gradeText1);
	Student1->LinkEndChild(Age1);
	Student1->LinkEndChild(Grade1);

	//School->Student2
	XMLElement *Student2 = doc->NewElement("Student");
	Student2->SetAttribute("StudentName", "John");
	School->LinkEndChild(Student2);

	//Age, Grade
	XMLElement *Age2 = doc->NewElement("Age");
	XMLText *ageText2 = doc->NewText("25");
	Age2->LinkEndChild(ageText2);

	XMLElement *Grade2 = doc->NewElement("Grade");
	XMLText *gradeText2 = doc->NewText("100");
	Grade2->LinkEndChild(gradeText2);
	Student2->LinkEndChild(Age2);
	Student2->LinkEndChild(Grade2);




	if (XML_SUCCESS != doc->SaveFile(path)) {
		cout << "SaveFile ERROR!" << endl;
		delete doc;
		return false;
	}
	delete doc;
	return true;
}

bool readXml(const char* path) {
	XMLDocument doc;
	if (doc.LoadFile(path) != XML_SUCCESS) {
		cout << "LoadFile ERROR!" << endl;
		return false;
	}
	
	//get Root Element
	XMLElement *Root = doc.RootElement();
	cout << Root->Name() << " SchoolName=" << Root->Attribute("SchoolName") << endl;
	
	//Tree Structure , from First Layer to Second Layer
	XMLElement *First = Root->FirstChildElement();
	
	while (First) {
		cout << First->Name() << "\tStudentName=" << First->Attribute("StudentName") << endl;
		XMLElement *Second = First->FirstChildElement();
		while (Second) {
			cout << "\t\t" << Second->Name() << ":"<< Second->GetText() << endl;
			Second = Second->NextSiblingElement();
		}
		First = First->NextSiblingElement();
	}

	return true;

}


int main()
{
	const char * path = "data.xml";
	createXml(path);
	readXml(path);
	return 0;
}

2.XML文件进行DFS(深度优先遍历)遍历所有节点

XML文件的内部组成和树的结构相似,从根节点开始不断往下延申,要想遍历XML文件中的所有信息,就需要用到遍历树节点的DFS方法。


#include "tinyxml2.h"
#include <iostream>
#include <iomanip>
using namespace tinyxml2;
using namespace std;


void show_all_node(XMLElement *tmpnode, int presize)
{
	while(tmpnode)
	{
		/******************获取节点名与节点属性*********************/
		cout<< setw(presize)<< "<"<<tmpnode->Name() << " ";
		//获取节点属性第一个
		const 	XMLAttribute* tmp_attr = tmpnode->FirstAttribute();
		//遍历整个属性列表
		while(tmp_attr)
		{
			cout<<  tmp_attr->Name()<<"="<<tmp_attr->Value()<<" ";
			//获取下一个属性节点
			tmp_attr=tmp_attr->Next();
		}
		cout << ">" << endl;

		/******************获取节点信息Text*********************/
		if(tmpnode->GetText())//一定要判断不然会有问题,如果文本为空的话,会打印text=后直接结束进程
			cout<< setw(presize+3) << tmpnode->GetText()<<" "<<endl;
		show_all_node(tmpnode->FirstChildElement(), presize+2);//递归掉用,打印子节点所有属性和文本信息
		
		/******************节点闭合信息以及转移节点*********************/
		cout<< setw(presize) <<"</" <<tmpnode->Name()<< ">" << endl;
		//获取同级别的下一个兄弟元素
		tmpnode=tmpnode->NextSiblingElement();
	}
}

bool readXml(const char* path) {
	XMLDocument doc;
	if (doc.LoadFile(path) != XML_SUCCESS) {
		cout << "LoadFile ERROR!" << endl;
		return false;
	}
	//get Root Element
	XMLElement *Root = doc.RootElement();
	
	//Tree Structure , from First Layer to Second Layer
	show_all_node(Root, 0);
	
	return true;

}


int main()
{
	const char * path = "coolshell.xml";
	readXml(path);
	return 0;
}

三.tinyXML2封装

RssReader.hpp:

#ifndef _RSSREADER_H_
#define _RSSREADER_H_

#include "tinyxml2.h"
#include <iostream>
#include <vector>
#include <string>
#include <string.h>
#include <regex>
#include <fstream>
#include <sstream>
using std::cout;
using std::endl;
using std::ofstream;
using std::regex;
using std::regex_replace;
using std::string;
using std::stringstream;
using std::vector;

using namespace tinyxml2;

struct RssItem
{
    string title;
    string link;
    string description;
    string content;
};

class RssReader
{
public:
    void DFS(XMLElement *);
    bool parseRss(const char *filename); //解析
    void dump(const string &filename);   //输出
private:
    vector<RssItem> _rss;
};

void RssReader::DFS(XMLElement *Root)
{
    while (Root)
    {
        if (!strcmp(Root->Name(), "item"))
        {
            RssItem it;
            XMLElement *Title = Root->FirstChildElement("title");
            XMLElement *Link = Root->FirstChildElement("link");
            XMLElement *Description = Root->FirstChildElement("description");
            XMLElement *Content = Root->FirstChildElement("content:encoded");
            if (Title->GetText())
            {
                stringstream ss;
                string s = Title->GetText();
                regex e("(<)[^>]*(>)");               //<XXX> 这样的形式全部去掉
                ss << regex_replace(s, e, " "); //换为 " "
                it.title = ss.str();            
            }
            if (Link->GetText())
            {
                stringstream ss;
                string s = Link->GetText();
                regex e("(<)[^>]*(>)");               //<XXX> 这样的形式全部去掉
                ss << regex_replace(s, e, " "); //换为 " "
                it.link = ss.str();          
            }
            if (Description->GetText())
            {
                stringstream ss;
                string s = Description->GetText();
                regex e("(<)[^>]*(>)");               //<XXX> 这样的形式全部去掉
                ss << regex_replace(s, e, " "); //换为 " "
                it.description = ss.str();          
            }
            if (Content->GetText())
            {
                stringstream ss;
                string s = Content->GetText();
                regex e("(<)[^>]*(>)");               //<XXX> 这样的形式全部去掉
                ss << regex_replace(s, e, " "); //换为 " "
                it.content = ss.str();          
            }
            _rss.push_back(it);
        }

        DFS(Root->FirstChildElement());
        Root = Root->NextSiblingElement();
    }
}

bool RssReader::parseRss(const char *filename)
{
    XMLDocument doc;
    if (doc.LoadFile(filename) != XML_SUCCESS)
    {
        cout << "LoadFile ERROR!" << endl;
        return false;
    }
    //get Root Element
    XMLElement *Root = doc.RootElement();
    DFS(Root);

    return true;
}

void RssReader::dump(const string &filename)
{
    ofstream ofs(filename, ofstream::out);
    if (!ofs)
    {
        cout << "open fileERROR!" << endl;
        exit(-1);
    }
    for (int i = 0; i < _rss.size(); i++)
    {
        ofs << "<doc>" << endl;
        ofs << "\t <docid>" << i << "</docid>" << endl;
        ofs << "\t <title>" << _rss[i].title << "</title>" << endl;
        ofs << "\t <link>" << _rss[i].link << "</link>" << endl;
        ofs << "\t <description>" << _rss[i].description << "</description>" << endl;
        ofs << "\t <content>" << _rss[i].content << "</content>" << endl;
        ofs << "</doc>" << endl;
        ofs << endl
            << endl
            << endl;
    }

    ofs.close();
}

#endif

main.cpp:

#include "RssReader.h"



int main(){
    RssReader reader;
    reader.parseRss("coolshell.xml");//你需要解析的xml文件
    reader.dump("pagelib.txt"); //结果输出到文件钟
    return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值