今天,有位群友问我如何获新浪新闻列表相关问题,我想,用正则表达式网页中取显然既复杂又不一定准确,现在许多大型网站都有RSS集合,所以我就跟他说用RSS应该好办一些。
一年前我写过一个RSS阅读器,不过,打新浪相关的XML文件看了一下,发现RSS2.0 和一年前的不大一样了,但具体怎么处理,几句话也很难讲得清楚,所以,我干脆写了一个类库给他,直接调用。
类库不是很复杂,主要两个功能:
一、通过新浪的根频道XML在把所有频道的信息读出来,使用递归连同子节点也读取出来。
二、指定频道URL的XML文件来获取新闻信息。
首先,我们写两个类,一个用于保存新闻个息,另一个用于保存频道信息。
/// <summary>
/// 新闻记录实体
/// </summary>
[Serializable]
public class NewsItem
{
/// <summary>
/// 新闻标题
/// </summary>
public string Title { get; set; }
/// <summary>
/// 新闻链接
/// </summary>
public string Link { get; set; }
/// <summary>
/// 作者
/// </summary>
public string Author { get; set; }
/// <summary>
/// 分类
/// </summary>
public string Category { get; set; }
/// <summary>
/// 发布时间
/// </summary>
public DateTime PubDate { get; set; }
/// <summary>
/// 描述
/// </summary>
public string Description { get; set; }
/// <summary>
/// 其它说明
/// </summary>
public string Comments { get; set; }
}
/// <summary>
/// 新闻频道列表
/// </summary>
[Serializable]
public class OutLine
{
/// <summary>
/// 频道标题
/// </summary>
public string Title { get; set; }
/// <summary>
/// 频道文本
/// </summary>
public string Text { get; set; }
/// <summary>
/// 频道类型-RSS
/// </summary>
public string Type { get; set; }
/// <summary>
/// XML地址
/// </summary>
public string xmlUrl { get; set; }
/// <summary>
/// HTML地址
/// </summary>
public string htmlUrl { get; set; }
private List<OutLine> _olChildren = new List<OutLine>();
/// <summary>
/// 子频道
/// </summary>
public List<OutLine> ChildrenOutline
{
get { return _olChildren; }
}
}
好,接下来对应的两类,分别获取频道列表和新闻列表。
/// <summary>
/// 新闻项管理类
/// </summary>
public class NewsManager
{
/// <summary>
/// 根据输入的XML地址获取新闻列表。
/// </summary>
/// <param name="xmlUrl">新闻频道的XML地址</param>
/// <returns>NewsItem的结果集合</returns>
public List<NewsItem> GetNewsItemList(string xmlUrl)
{
List<NewsItem> _myNews = new List<NewsItem>();
XElement myRoot = XElement.Load(xmlUrl);
var theItems =
from xe in myRoot.Element("channel").Elements("item")
select xe;
foreach (XElement e in theItems)
{
_myNews.Add(new NewsItem()
{
Title = (string)e.Element("title"),
Link = (string)e.Element("link"),
Author = (string)e.Element("author"),
Category = (string)e.Element("category"),
PubDate = (DateTime)e.Element("pubDate"),
Comments = (string)e.Element("comments"),
Description = (string)e.Element("description")
});
}
return _myNews;
}
}
/// <summary>
/// 自动获取频道列表类
/// </summary>
public class OutlineManager
{
/// <summary>
/// 获取频道列表,包含子节点
/// </summary>
/// <param name="xmlUrl">根频道地址</param>
/// <returns></returns>
public List<OutLine> GetCannels(string xmlUrl)
{
List<OutLine> _list = new List<OutLine>();
XElement root = XElement.Load(xmlUrl);
var firstOutline = root.Element("body").Elements("outline");
foreach (XElement xitem in firstOutline)
{
OutLine myRootOutline = new OutLine
{
Title = (string)xitem.Attribute("title") ?? "",
Text = (string)xitem.Attribute("text") ?? "",
Type = (string)xitem.Attribute("type") ?? "",
xmlUrl = (string)xitem.Attribute("xmlUrl") ?? "",
htmlUrl = (string)xitem.Attribute("htmlUrl") ?? ""
};
AddChildElements(xitem, myRootOutline);
_list.Add(myRootOutline);
}
return _list;
}
private void AddChildElements(XElement xNode, OutLine ol)
{
if (xNode == null) return;
var xc = xNode.Elements("outline");
// 递归,添加子节点
foreach (XElement xe in xc)
{
OutLine outline = new OutLine()
{
Title = xe.Attribute("title").Value,
Text = xe.Attribute("text").Value,
Type = xe.Attribute("type").Value,
xmlUrl = xe.Attribute("xmlUrl").Value,
htmlUrl = xe.Attribute("htmlUrl").Value
};
ol.ChildrenOutline.Add(outline);
AddChildElements(xe, outline);
}
}
}
OK,简单的类库写好了,程序集名称为SinaRssAPIs_CS,然后,我们建一个程序来测试一下。
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using SinaRssAPIs_CS;
namespace NewsApiTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
this.WindowState = FormWindowState.Maximized;
this.Text = "新浪RSS类库示例程序";
this.treeView1.AfterSelect += new TreeViewEventHandler(treeView1_AfterSelect);
this.dataGridView1.AutoGenerateColumns = false; //不自动创建列
//添加列
DataGridViewTextBoxColumn colTitle = new DataGridViewTextBoxColumn();
colTitle.HeaderText = "新闻标题";
colTitle.DataPropertyName = "Title";
this.dataGridView1.Columns.Add(colTitle);
DataGridViewTextBoxColumn colDesc = new DataGridViewTextBoxColumn();
colDesc.HeaderText = "描述";
colDesc.DataPropertyName = "Description";
colDesc.Width = 280;
this.dataGridView1.Columns.Add(colDesc);
DataGridViewTextBoxColumn colDate = new DataGridViewTextBoxColumn();
colDate.DefaultCellStyle.Format = "yyyy-MM-dd";
colDate.HeaderText = "发布日期";
colDate.DataPropertyName = "PubDate";
this.dataGridView1.Columns.Add(colDate);
DataGridViewTextBoxColumn colAuthor = new DataGridViewTextBoxColumn();
colAuthor.HeaderText = "发布者";
colAuthor.DataPropertyName = "Author";
this.dataGridView1.Columns.Add(colAuthor);
DataGridViewTextBoxColumn colLink = new DataGridViewTextBoxColumn();
colLink.DataPropertyName = "Link";
colLink.Name = "link";
colLink.Visible = false;
this.dataGridView1.Columns.Add(colLink);
this.dataGridView1.SelectionChanged += new EventHandler(dataGridView1_SelectionChanged);
}
void dataGridView1_SelectionChanged(object sender, EventArgs e)
{
if (this.dataGridView1.CurrentRow == null) return;
string link = this.dataGridView1.CurrentRow.Cells["link"].Value.ToString();
this.webBrowser1.Navigate(link);
}
void treeView1_AfterSelect(object sender, TreeViewEventArgs e)
{
if (e.Node.Tag == null) return;
string xml = e.Node.Tag.ToString();
List<NewsItem> items = null;
NewsManager mg = new NewsManager();
items = mg.GetNewsItemList(xml);
this.dataGridView1.DataSource = items;
}
private void Form1_Load(object sender, EventArgs e)
{
OutlineManager omg = new OutlineManager();
List<OutLine> cnList = omg.GetCannels(@"http://rss.sina.com.cn/sina_all_opml.xml");
this.treeView1.BeginUpdate();
this.treeView1.Nodes.Clear();
//根节点
foreach (OutLine root in cnList)
{
TreeNode tnRoot = new TreeNode();
tnRoot.Text = root.Title.Split('-')[0];
AddNodes(root, tnRoot);
this.treeView1.Nodes.Add(tnRoot);
}
this.treeView1.EndUpdate();
}
private void AddNodes(OutLine ol, TreeNode nd)
{
foreach (OutLine oits in ol.ChildrenOutline)
{
TreeNode tn = new TreeNode();
tn.Text = oits.Title;
tn.Tag = oits.xmlUrl;
AddNodes(oits, tn);
nd.Nodes.Add(tn);
}
}
}
}
大致的运行效果如下:
现在,我说一下技术要点,不多,就一个,对,就是LinQ To XML。