XML读取小例

最新推荐文章于 2024-09-10 10:40:40 发布

ArvinStudy

最新推荐文章于 2024-09-10 10:40:40 发布

阅读量2.1k

点赞数

分类专栏： XML 文章标签： xml string regex user image encoding

本文链接：https://blog.csdn.net/ArvinStudy/article/details/7698173

版权

XML 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

<?xml version="1.0" encoding="UTF-8"?>
<statuses>
  <status>
    <created_at>Wed Jun 27 18:47:10 +0800 2012</created_at>
    <id>3461603414815109</id>
    <text>#节能电视选海尔#激情七月天，奥运会、暑期档连番上阵，宅男腐女们要和电视机频频约会！电视要节能？那是必须滴~~ Hai TV独有的SCM智能护眼技术，可根据智能光感器感应收看环境的光亮度，将电视调整到最适合的观看亮度，从而实现由“被动费电”转变为“主动节能”！这个夏天与Hai TV一起high一夏~</text>
    <source>
      <a href=""/>
    </source>
    <favorited>false</favorited>
    <truncated>false</truncated>
    <geo/>
    <in_reply_to_status_id/>
    <in_reply_to_user_id/>
    <in_reply_to_screen_name/>
    <thumbnail_pic>http://ww4.sinaimg.cn/thumbnail/84f351afjw1ducwrs7zxlj.jpg</thumbnail_pic>
    <bmiddle_pic>http://ww4.sinaimg.cn/bmiddle/84f351afjw1ducwrs7zxlj.jpg</bmiddle_pic>
    <original_pic>http://ww4.sinaimg.cn/large/84f351afjw1ducwrs7zxlj.jpg</original_pic>
    <mid>3461603414815109</mid>
    <user>
      <id>2230538671</id>
      <screen_name>海尔济南</screen_name>
      <name>海尔济南</name>
      <province>37</province>
      <city>1</city>
      <location>山东 济南</location>
      <description>全球品质 乐享生活 创新家电 带您进入环保乐享新生活<br />
海尔官网http://www.haier.com 海尔商城http://www.ehaier.com</description>
      <url/>
      <profile_image_url>http://tp4.sinaimg.cn/2230538671/50/5610160791/1</profile_image_url>
      <domain/>
      <gender>m</gender>
      <followers_count>25199</followers_count>
      <friends_count>151</friends_count>
      <statuses_count>943</statuses_count>
      <favourites_count>2</favourites_count>
      <created_at>Wed Jul 06 11:57:18 +0800 2011</created_at>
      <following>false</following>
      <verified>true</verified>
      <allow_all_act_msg>false</allow_all_act_msg>
      <geo_enabled>true</geo_enabled>
    </user>
  </status>
  <status>

代码：

        protected override List<CrawlerResult> GetArticleByHtml(string xmlTopic, int task_ID)
        {

            Regex isTopic = new Regex("(:#[^#]*#)|(^#[^#]*#)");//判断是否话题转发，如果是话题的转发，就将搜索的关键词添加到标题中
            List<CrawlerResult> arrayList = new List<CrawlerResult>();
            if (!string.IsNullOrEmpty(xmlTopic))
            {
                XmlDocument doc = new XmlDocument();
                doc.LoadXml(xmlTopic);

                XmlNodeList list = doc.GetElementsByTagName("status");
                if (list.Count > 0)
                {
                    foreach (XmlNode item in list)
                    {
                        string pageid = "";
                        CrawlerResult topic = new CrawlerResult();
                        XmlNodeList nls = item.ChildNodes;
                        foreach (XmlNode node in nls)
                        {
                            string text = node.Name;
                            if (text == "created_at")
                            {
                                topic.CreateTime = ConverDateTime(node.InnerText);
                            }
                            else if (text == "id")
                            {
                                pageid = node.InnerText;
                            }
                            else if (text == "comments_count")
                            {
                                topic.ReplyCount = int.Parse(node.InnerText);
                            }
                            else if (text == "reposts_count")
                            {
                                topic.ViewCount = int.Parse(node.InnerText);
                            }
                            else if (text == "user")
                            {
                                XmlNodeList user = node.ChildNodes;
                                foreach (XmlNode usernode in user)
                                {
                                    string sinaurl = "";
                                    if (usernode.Name == "id")
                                    {
                                        sinaurl = "http://api.t.sina.com.cn/" + usernode.InnerText + "/statuses/" + pageid;

                                        DownWebFile downwebfile = new DownWebFile();
                                        topic.Url = downwebfile.GetLatURl(sinaurl);
                                    }
                                    if (usernode.Name == "screen_name")
                                    {
                                        topic.Author = usernode.InnerText;
                                    }
                                    if (usernode.Name == "followers_count")
                                    {
                                        topic.ViewCount = int.Parse(usernode.InnerText);
                                    }
                                    

                                }
                            }
                            else if (text == "text")
                            {
                                if (isTopic.Match(node.InnerText).Success)
                                {
                                    topic.Title = Keyword  +"：" + node.InnerText;
                                }
                                else
                                {
                                    topic.Title = node.InnerText;
                                }
                            }
                            topic.SiteName = "新浪微博";
                            topic.FilterType = FilterType.FilterNo;
                        }
                        arrayList.Add(topic);
                    }
                }
            }

            return arrayList;

        }