<?xml version="1.0" encoding="UTF-8"?>
<statuses>
<status>
<created_at>Wed Jun 27 18:47:10 +0800 2012</created_at>
<id>3461603414815109</id>
<text>#节能电视选海尔#激情七月天,奥运会、暑期档连番上阵,宅男腐女们要和电视机频频约会!电视要节能?那是必须滴~~ Hai TV独有的SCM智能护眼技术,可根据智能光感器感应收看环境的光亮度,将电视调整到最适合的观看亮度,从而实现由“被动费电”转变为“主动节能”!这个夏天与Hai TV一起high一夏~</text>
<source>
<a href=""/>
</source>
<favorited>false</favorited>
<truncated>false</truncated>
<geo/>
<in_reply_to_status_id/>
<in_reply_to_user_id/>
<in_reply_to_screen_name/>
<thumbnail_pic>http://ww4.sinaimg.cn/thumbnail/84f351afjw1ducwrs7zxlj.jpg</thumbnail_pic>
<bmiddle_pic>http://ww4.sinaimg.cn/bmiddle/84f351afjw1ducwrs7zxlj.jpg</bmiddle_pic>
<original_pic>http://ww4.sinaimg.cn/large/84f351afjw1ducwrs7zxlj.jpg</original_pic>
<mid>3461603414815109</mid>
<user>
<id>2230538671</id>
<screen_name>海尔济南</screen_name>
<name>海尔济南</name>
<province>37</province>
<city>1</city>
<location>山东 济南</location>
<description>全球品质 乐享生活 创新家电 带您进入环保乐享新生活<br />
海尔官网http://www.haier.com 海尔商城http://www.ehaier.com</description>
<url/>
<profile_image_url>http://tp4.sinaimg.cn/2230538671/50/5610160791/1</profile_image_url>
<domain/>
<gender>m</gender>
<followers_count>25199</followers_count>
<friends_count>151</friends_count>
<statuses_count>943</statuses_count>
<favourites_count>2</favourites_count>
<created_at>Wed Jul 06 11:57:18 +0800 2011</created_at>
<following>false</following>
<verified>true</verified>
<allow_all_act_msg>false</allow_all_act_msg>
<geo_enabled>true</geo_enabled>
</user>
</status>
<status>
代码:
protected override List<CrawlerResult> GetArticleByHtml(string xmlTopic, int task_ID)
{
Regex isTopic = new Regex("(:#[^#]*#)|(^#[^#]*#)");//判断是否话题转发,如果是话题的转发,就将搜索的关键词添加到标题中
List<CrawlerResult> arrayList = new List<CrawlerResult>();
if (!string.IsNullOrEmpty(xmlTopic))
{
XmlDocument doc = new XmlDocument();
doc.LoadXml(xmlTopic);
XmlNodeList list = doc.GetElementsByTagName("status");
if (list.Count > 0)
{
foreach (XmlNode item in list)
{
string pageid = "";
CrawlerResult topic = new CrawlerResult();
XmlNodeList nls = item.ChildNodes;
foreach (XmlNode node in nls)
{
string text = node.Name;
if (text == "created_at")
{
topic.CreateTime = ConverDateTime(node.InnerText);
}
else if (text == "id")
{
pageid = node.InnerText;
}
else if (text == "comments_count")
{
topic.ReplyCount = int.Parse(node.InnerText);
}
else if (text == "reposts_count")
{
topic.ViewCount = int.Parse(node.InnerText);
}
else if (text == "user")
{
XmlNodeList user = node.ChildNodes;
foreach (XmlNode usernode in user)
{
string sinaurl = "";
if (usernode.Name == "id")
{
sinaurl = "http://api.t.sina.com.cn/" + usernode.InnerText + "/statuses/" + pageid;
DownWebFile downwebfile = new DownWebFile();
topic.Url = downwebfile.GetLatURl(sinaurl);
}
if (usernode.Name == "screen_name")
{
topic.Author = usernode.InnerText;
}
if (usernode.Name == "followers_count")
{
topic.ViewCount = int.Parse(usernode.InnerText);
}
}
}
else if (text == "text")
{
if (isTopic.Match(node.InnerText).Success)
{
topic.Title = Keyword +":" + node.InnerText;
}
else
{
topic.Title = node.InnerText;
}
}
topic.SiteName = "新浪微博";
topic.FilterType = FilterType.FilterNo;
}
arrayList.Add(topic);
}
}
}
return arrayList;
}