tinyxml2解析RSS文件 并将其转换为.dat格式

参考博客:
tinyXml2 – https://github.com/leethomason/tinyxml2
rss – http://www.runoob.com/rss/rss-tutorial.htm
mxl格式 – http://blog.csdn.net/zhubinqiang/article/details/7185322
在c++中使用tinyxml2 解析XML格式文件例子 – http://blog.csdn.net/educast/article/details/12908455
tinyxml2 使用教程 – http://blog.csdn.net/K346K346/article/details/48750417
xmldocument 类 – http://www.cnblogs.com/kissdodog/archive/2013/02/24/2924236.html
参考接口:
struct RssItem{
string title;
string link;
string description;
string content;
};
class RssReader{
public:
RssReader();
void parseRss();//解析
void dump();//输出
private:
vector _rss;
};
要求:最后生成一个 pagelib.dat, 其格式:

    <doc>
      <docid>1</docid>
      <title> ... </title>
      <link> ...  </link>
      <content> ... </content>
    </doc>
    <doc>
      ...
    </doc>
    <doc>
      ...
    </doc>

rss 格式

<?xml version="1.0" encoding="utf-8" ?>   //声明该文档是xml文档
<rss version="2.0"> //声明当前文件为rss格式文件
<channel> 
<title></title> //对网站和当前rss进行描述
 <description>国内焦点新闻列表</description>   //对当前rss进行描述
<link></link>
<item>//一条信息
<title>最高检:严惩公务员利用审批等权力索贿受贿</title>
<link></link>//新闻链接
<description><description>//新闻描述
</item>
</channel>
</rss>

以下是可能用到的一些函数的准备
FirstChildElement(const char* value=0): 获取第一个值为value的子节点,value默认值为空,则返回第一个子节点。
RootElement(): 获取根节点,相当于FirstChildElement的空参数版本;
const XMLAttribute* FirstAttribute() const: 获取第一个属性值;
Attribute(“Password”) //获取password属性;
XMLHandle NextSiblingElement( const char* _value=0 ) :获得下一个节点。
NextSiblingElement//获取下一个相邻的节点

//
class information{
    public:
        ~information(){
            cout<<"~information"<<endl;
        }
    public:
        string title;
        string link;
        string description;
        string  content;
        string lagnguage;
        string copyright;
        string pubdate; 
        string guid;
        string dccreator;
};
void traverse(vector <information> * _vecotr){
//  string filename = "pagelib.dat";
    std::ofstream _ostream("pagelib.dat",std::ios::app);

    vector<information>::iterator _begin= _vecotr->begin();
    vector<information>::iterator _end= _vecotr->end();

    ++_begin;
    while(_begin!=_end){
        _ostream<<"<doc>"<<endl;
        _ostream<<"<title>"<<_begin->title<<"</title>"<<endl;
        _ostream<<"<link>"<<_begin->link<<"</link>"<<endl;
        _ostream<<"<description>"<<_begin->description<<"</description>"<<endl;
//      _ostream<<"<content>"<<_begin->content<<"</content>"<<endl;
        _ostream<<"</doc>"<<endl;
        ++_begin;
    }
}
void example1(){
    vector<information> information_vector;
    XMLDocument doc;
    doc.LoadFile("douban.book.review.xml");

    cout<<doc.Error()<<endl;
    if(0!=doc.Error()){
        cout<<"load file error"<<endl;
        return;
    }
//  doc.Print();
    XMLElement*  root = doc.RootElement();
    XMLElement* channel = root->FirstChildElement("channel");
    if(NULL==channel){
        cout<<"get first chile element error "<<endl;
        return ;
    }
    XMLElement * title = channel->FirstChildElement("title");
    cout<<title->GetText()<<endl;
    cout<<title->Name()<<endl;

    XMLElement * description = title->NextSiblingElement();
    cout<<description->GetText()<<endl;
    cout<<description->Name()<<endl;
    XMLElement * language = description->NextSiblingElement();
    cout<<language->GetText()<<endl;
    cout<<language->Name()<<endl;
    XMLElement * copyright = language->NextSiblingElement();
    cout<<copyright->GetText()<<endl;
    cout<<copyright->Name()<<endl;
    XMLElement * pubDate = copyright->NextSiblingElement();
    cout<<pubDate->GetText()<<endl;
    cout<<pubDate->Name()<<endl;
    XMLElement * item=pubDate->NextSiblingElement();

    while(item){
        information  tmp;
        cout<<"sucess in item"<<endl;
        XMLElement *item_child= item->FirstChildElement();
        while(item_child){
            cout<<item_child->GetText()<<endl;
            if(strcmp(item_child->Name(),"title")==0){
//              tmp.title.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.title.clear();
                tmp.title.append(item_child->GetText());
                cout<<"title-------------------------------------------"<<endl;
            }else if(strcmp(item_child->Name(),"link")==0){
//              tmp.link.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.link.clear();
                tmp.link.append(item_child->GetText());
                cout<<"link-------------------------------------------"<<endl;
            }else if(strcmp(item_child->Name(),"description")==0){
//              tmp.link.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.description.clear();
                tmp.description.append(item_child->GetText());
                cout<<"description------------------------------------"<<endl;
            }else if(strncmp(item_child->Name(),"content",7)==0){
//              tmp.content.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.content.clear();
                tmp.content.append(item_child->GetText());
                cout<<"content------------------------------------"<<endl;
            }else if(strcmp(item_child->Name(),"dc:creator")==0){
//              tmp.dccreator.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.dccreator.clear();
                tmp.dccreator.append(item_child->GetText());
                cout<<"dc:creator ------------------------------------"<<endl;
            }else if(strcmp(item_child->Name(),"pubdate")==0){
//              tmp.pubdate.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.pubdate.clear();
                tmp.pubdate.append(item_child->GetText());
                cout<<"pubdate ------------------------------------"<<endl;
            }else if(strcmp(item_child->Name(),"guid")==0){
//              tmp.guid.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());
                tmp.guid.clear();
                tmp.guid.append(item_child->GetText());
                cout<<"guid------------------------------------"<<endl;
            }
            item_child = item_child->NextSiblingElement();
        }
        information_vector.push_back(tmp);
//      delete tmp;
        item = item->NextSiblingElement();
    }
    traverse(&information_vector);
}

int main()
{
    example1();
    return 0;
}
  • 3
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值