参考博客:
tinyXml2 – https://github.com/leethomason/tinyxml2
rss – http://www.runoob.com/rss/rss-tutorial.htm
mxl格式 – http://blog.csdn.net/zhubinqiang/article/details/7185322
在c++中使用tinyxml2 解析XML格式文件例子 – http://blog.csdn.net/educast/article/details/12908455
tinyxml2 使用教程 – http://blog.csdn.net/K346K346/article/details/48750417
xmldocument 类 – http://www.cnblogs.com/kissdodog/archive/2013/02/24/2924236.html
参考接口:
struct RssItem{
string title;
string link;
string description;
string content;
};
class RssReader{
public:
RssReader();
void parseRss();//解析
void dump();//输出
private:
vector _rss;
};
要求:最后生成一个 pagelib.dat, 其格式:
<doc>
<docid>1</docid>
<title> ... </title>
<link> ... </link>
<content> ... </content>
</doc>
<doc>
...
</doc>
<doc>
...
</doc>
rss 格式
<?xml version="1.0" encoding="utf-8" ?> //声明该文档是xml文档
<rss version="2.0"> //声明当前文件为rss格式文件
<channel>
<title></title> //对网站和当前rss进行描述
<description>国内焦点新闻列表</description> //对当前rss进行描述
<link></link>
<item>//一条信息
<title>最高检:严惩公务员利用审批等权力索贿受贿</title>
<link></link>//新闻链接
<description><description>//新闻描述
</item>
</channel>
</rss>
以下是可能用到的一些函数的准备
FirstChildElement(const char* value=0): 获取第一个值为value的子节点,value默认值为空,则返回第一个子节点。
RootElement(): 获取根节点,相当于FirstChildElement的空参数版本;
const XMLAttribute* FirstAttribute() const: 获取第一个属性值;
Attribute(“Password”) //获取password属性;
XMLHandle NextSiblingElement( const char* _value=0 ) :获得下一个节点。
NextSiblingElement//获取下一个相邻的节点
//
class information{
public:
~information(){
cout<<"~information"<<endl;
}
public:
string title;
string link;
string description;
string content;
string lagnguage;
string copyright;
string pubdate;
string guid;
string dccreator;
};
void traverse(vector <information> * _vecotr){
// string filename = "pagelib.dat";
std::ofstream _ostream("pagelib.dat",std::ios::app);
vector<information>::iterator _begin= _vecotr->begin();
vector<information>::iterator _end= _vecotr->end();
++_begin;
while(_begin!=_end){
_ostream<<"<doc>"<<endl;
_ostream<<"<title>"<<_begin->title<<"</title>"<<endl;
_ostream<<"<link>"<<_begin->link<<"</link>"<<endl;
_ostream<<"<description>"<<_begin->description<<"</description>"<<endl;
// _ostream<<"<content>"<<_begin->content<<"</content>"<<endl;
_ostream<<"</doc>"<<endl;
++_begin;
}
}
void example1(){
vector<information> information_vector;
XMLDocument doc;
doc.LoadFile("douban.book.review.xml");
cout<<doc.Error()<<endl;
if(0!=doc.Error()){
cout<<"load file error"<<endl;
return;
}
// doc.Print();
XMLElement* root = doc.RootElement();
XMLElement* channel = root->FirstChildElement("channel");
if(NULL==channel){
cout<<"get first chile element error "<<endl;
return ;
}
XMLElement * title = channel->FirstChildElement("title");
cout<<title->GetText()<<endl;
cout<<title->Name()<<endl;
XMLElement * description = title->NextSiblingElement();
cout<<description->GetText()<<endl;
cout<<description->Name()<<endl;
XMLElement * language = description->NextSiblingElement();
cout<<language->GetText()<<endl;
cout<<language->Name()<<endl;
XMLElement * copyright = language->NextSiblingElement();
cout<<copyright->GetText()<<endl;
cout<<copyright->Name()<<endl;
XMLElement * pubDate = copyright->NextSiblingElement();
cout<<pubDate->GetText()<<endl;
cout<<pubDate->Name()<<endl;
XMLElement * item=pubDate->NextSiblingElement();
while(item){
information tmp;
cout<<"sucess in item"<<endl;
XMLElement *item_child= item->FirstChildElement();
while(item_child){
cout<<item_child->GetText()<<endl;
if(strcmp(item_child->Name(),"title")==0){
// tmp.title.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.title.clear();
tmp.title.append(item_child->GetText());
cout<<"title-------------------------------------------"<<endl;
}else if(strcmp(item_child->Name(),"link")==0){
// tmp.link.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.link.clear();
tmp.link.append(item_child->GetText());
cout<<"link-------------------------------------------"<<endl;
}else if(strcmp(item_child->Name(),"description")==0){
// tmp.link.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.description.clear();
tmp.description.append(item_child->GetText());
cout<<"description------------------------------------"<<endl;
}else if(strncmp(item_child->Name(),"content",7)==0){
// tmp.content.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.content.clear();
tmp.content.append(item_child->GetText());
cout<<"content------------------------------------"<<endl;
}else if(strcmp(item_child->Name(),"dc:creator")==0){
// tmp.dccreator.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.dccreator.clear();
tmp.dccreator.append(item_child->GetText());
cout<<"dc:creator ------------------------------------"<<endl;
}else if(strcmp(item_child->Name(),"pubdate")==0){
// tmp.pubdate.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.pubdate.clear();
tmp.pubdate.append(item_child->GetText());
cout<<"pubdate ------------------------------------"<<endl;
}else if(strcmp(item_child->Name(),"guid")==0){
// tmp.guid.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
tmp.guid.clear();
tmp.guid.append(item_child->GetText());
cout<<"guid------------------------------------"<<endl;
}
item_child = item_child->NextSiblingElement();
}
information_vector.push_back(tmp);
// delete tmp;
item = item->NextSiblingElement();
}
traverse(&information_vector);
}
int main()
{
example1();
return 0;
}