【原创】RSS开发心得小结

几经面试和简历更新,发现自己做了这么久的开发,却少于做总结,一个个项目过去了,但是知识的累积沉淀却很少……

借着这次机会,把以前的skill整理一下,浓缩的才是精华。为自己也为其他初学的朋友做个参考。

RSS(全称Really Simple Syndication) 目前广泛用于网上新闻频道,blog和wiki,主要的版本有0.91, 1.0, 2.0。

另外还有Google制定的ATOM格式,以及作为Feed集合的OPML文件。

最常见的Feed格式是Rss1.0,2.0和ATOM,解析时通过不同的命名空间来处理不同的版本,下面是解析的主要代码:

 

代码
 
   
/// <summary>
/// 根据xml内容解析Feed
/// </summary>
/// <param name="url"> Feed源地址 </param>
/// <param name="xmlContent"> Feed xml 内容 </param>
/// <returns> 返回解析后的Feed对象实例 </returns>
public static Feed AnalyseFeedContent( string url, string xmlContent)
{
Feed feed
= new Feed();
feed.Url
= url;
feed.ChannelInfo
= new FeedChannel();

XmlDocument doc
= new XmlDocument();
doc
= ReadGlobals.LoadXml(doc, xmlContent, url);

// 添加常用的命名空间
XmlNamespaceManager mgr = new XmlNamespaceManager(doc.NameTable);
mgr.AddNamespace(
" rdf " , " http://purl.org/rss/1.0/ " );
mgr.AddNamespace(
" content " , " http://purl.org/rss/1.0/modules/content/ " );
mgr.AddNamespace(
" dc " , " http://purl.org/dc/elements/1.1/ " );
XmlNode nodeRoot
= doc.DocumentElement;
XmlNode nodeChannel;
XmlNodeList nodeList;

try
{
if (nodeRoot != null )
{
// enclosure地址
if (nodeRoot.Attributes[ " xmlns:enc " ] != null )
{
mgr.AddNamespace(
" enc " , nodeRoot.Attributes[ " xmlns:enc " ].Value);
}
else
{
mgr.AddNamespace(
" enc " , " http://crocodile.org/ns/rss/2.0/enclosures " );
}

// trackback地址
if (nodeRoot.Attributes[ " xmlns:trackback " ] != null )
{
mgr.AddNamespace(
" trackback " , nodeRoot.Attributes[ " xmlns:trackback " ].Value);
}
else
{
mgr.AddNamespace(
" trackback " , " http://madskills.com/public/xml/rss/module/trackback/ " );
}

if (nodeRoot.Name.ToLower() == " opml " )
{
// 是opml文件
feed.Type = FeedType.OPML;
}

if (nodeRoot.Name.ToLower() == " feed " )
{
// 是atom文件
feed.Type = FeedType.ATOM_0_3;

string strAtomNameSpace = " http://www.w3.org/2005/Atom " ;
// 以头部的命名空间为准;
if (nodeRoot.Attributes[ " xmlns " ] != null )
{
strAtomNameSpace
= nodeRoot.Attributes[ " xmlns " ].Value;
}
mgr.AddNamespace(
" atom " , strAtomNameSpace);

feed.ChannelInfo
= GetChannel(doc, mgr, " atom " );

nodeList
= doc.SelectNodes( " //atom:entry " , mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}

if (nodeRoot.Name.ToLower() == " rdf:rdf " )
{
// 是rss1.0文件
feed.Type = FeedType.RSS_1_0;
feed.ChannelInfo
= GetChannel(doc, mgr, " rdf " );

nodeList
= doc.SelectNodes( " //rdf:item " , mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}

if (nodeRoot.Name.ToLower() == " rss " )
{
// 是rss2.0文件
feed.Type = FeedType.RSS_2_0;

nodeChannel
= doc.SelectSingleNode( " rss/channel " );
feed.ChannelInfo
= GetChannelForRss20(nodeChannel);

nodeList
= nodeChannel.SelectNodes( " item " , mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}
}
}
catch (Exception ex)
{
Log.Write(ex);
}

return feed;
}

 Feed主要分为两部分,Channel和ItemList部分,分别用如下方法解析:

 

FeedChannel解析
 
   
/// <summary>
/// 获取 FeedChannel 数据
/// </summary>
private static FeedChannel GetChannel(XmlDocument xdtDoc, XmlNamespaceManager mgr, string prefix)
{
FeedChannel channel
= new FeedChannel();

if (xdtDoc != null )
{
string title = string .Empty, generator = string .Empty, link = string .Empty, description = string .Empty, language = string .Empty;
string managingeditor = string .Empty, webmaster = string .Empty, copyright = string .Empty, pubdate = string .Empty, lastbuilddate = string .Empty;

string strPrefix = prefix;

XmlNode xneLogo
= xdtDoc.SelectSingleNode( " // " + strPrefix + " :logo " , mgr);
if (xneLogo != null )
{
channel.Logo.Src
= xneLogo.InnerText;
}

XmlNode snTitle
= xdtDoc.SelectSingleNode( " // " + strPrefix + " :title " , mgr);
if (snTitle != null )
{
channel.Title
= snTitle.InnerText;
}

XmlNode snLink
= xdtDoc.SelectSingleNode( " // " + strPrefix + " :link[@rel='alternate']/@href " , mgr);
if (snLink != null )
{
channel.Link
= snLink.InnerText;
}
else
{
channel.Link
= xdtDoc.SelectSingleNode( " // " + strPrefix + " :link " , mgr) == null ? string .Empty : xdtDoc.SelectSingleNode( " // " + strPrefix + " :link " , mgr).InnerText;
}


XmlNode snDescription
= xdtDoc.SelectSingleNode( " // " + strPrefix + " :tagline " , mgr);
if (snDescription != null )
{
channel.Description
= snDescription.InnerText;
}

XmlNode snLanguage
= xdtDoc.SelectSingleNode( " // " + strPrefix + " :feed/@xml:lang " , mgr);
if (snLanguage != null )
{
try
{
Thread.CurrentThread.CurrentUICulture
= new CultureInfo(snLanguage.InnerText);
channel.Language
= CultureInfo.CreateSpecificCulture(language).LCID;
}
catch
{
channel.Language
= 0 ;
}
}
}
return channel;
}

 

FeedItem解析
 
   
/// <summary>
/// 获取item列表
/// </summary>
/// <param name="xnlItems"></param>
public static List < FeedItem > GetItems(XmlNodeList xnlItems, FeedType type, XmlNamespaceManager mgr)
{
List
< FeedItem > lstItems = new List < FeedItem > ();
if (xnlItems != null )
{
switch (type)
{
case FeedType.RSS_1_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode( " rdf:title " , mgr) != null ? xne.SelectSingleNode( " rdf:title " , mgr).InnerText : string .Empty;
fim.Link
= xne.SelectSingleNode( " rdf:link " , mgr) != null ? xne.SelectSingleNode( " rdf:link " , mgr).InnerText : string .Empty;
// rim.Description = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode( " content:encoded " , mgr) != null ? xne.SelectSingleNode( " content:encoded " , mgr).InnerText : string .Empty;
string strDescription = xne.SelectSingleNode( " rdf:description " , mgr) != null ? xne.SelectSingleNode( " rdf:description " , mgr).InnerText : string .Empty;
fim.Description
= strContent == string .Empty ? strDescription : strContent;
fim.Description
= fim.Description.Replace( " <![CDATA[ " , string .Empty).Replace( " ]]> " , string .Empty);
string strAuthorName = xne.SelectSingleNode( " //rdf:author " , mgr) != null ? xne.SelectSingleNode( " //rdf:author " , mgr).InnerText : string .Empty;
if (strAuthorName != string .Empty)
{
strAuthorName
= xne.SelectSingleNode( " dc:creator " , mgr) != null ? xne.SelectSingleNode( " dc:creator " , mgr).InnerText : string .Empty;
}
if (strAuthorName != string .Empty)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= strAuthorName;
}
fim.PubDate
= xne.SelectSingleNode( " rdf:pubDate " , mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode( " rdf:pubDate " , mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode( " dc:date " , mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode( " dc:date " , mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlSubjects
= xne.SelectNodes( " dc:subject " , mgr);
if (xnlSubjects != null )
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + " , " ;
}
}
XmlNode xndEnclosure
= xne.SelectSingleNode( " enclosure " , mgr);
if (xndEnclosure != null )
{
fim.Enclosures
= new List < FeedEnclosure > ();
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes[ " type " ] != null ? xndEnclosure.Attributes[ " type " ].Value : string .Empty;
string strLength = xndEnclosure.Attributes[ " length " ] != null ? xndEnclosure.Attributes[ " length " ].Value : string .Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes[ " url " ] != null ? xndEnclosure.Attributes[ " url " ].Value : string .Empty;
fim.Enclosures.Add(enc);
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode( " trackback:ping " , mgr) != null ? xne.SelectSingleNode( " trackback:ping " , mgr).InnerText : string .Empty;
lstItems.Add(fim);
}
break ;
case FeedType.RSS_2_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode( " title " , mgr) != null ? xne.SelectSingleNode( " title " , mgr).InnerText : string .Empty;
fim.Link
= xne.SelectSingleNode( " link " , mgr) != null ? xne.SelectSingleNode( " link " , mgr).InnerText : string .Empty;
string strContent = xne.SelectSingleNode( " content:encoded " , mgr) != null ? xne.SelectSingleNode( " content:encoded " , mgr).InnerText : string .Empty;
string strDescription = xne.SelectSingleNode( " description " , mgr) != null ? xne.SelectSingleNode( " description " , mgr).InnerText : string .Empty;
fim.Description
= strContent == string .Empty ? strDescription : strContent;
fim.Description
= fim.Description.Replace( " <![CDATA[ " , string .Empty).Replace( " ]]> " , string .Empty);
string strAuthorName = xne.SelectSingleNode( " author " , mgr) != null ? xne.SelectSingleNode( " author " , mgr).InnerText : string .Empty;
if (strAuthorName != string .Empty)
{
strAuthorName
= xne.SelectSingleNode( " dc:creator " , mgr) != null ? xne.SelectSingleNode( " dc:creator " , mgr).InnerText : string .Empty;
}
if (strAuthorName != string .Empty)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= strAuthorName;
}
fim.PubDate
= xne.SelectSingleNode( " pubDate " , mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode( " pubDate " , mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode( " dc:date " , mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode( " dc:date " , mgr).InnerText) : DateTime.MinValue;
}
fim.Guid
= xne.SelectSingleNode( " guid " , mgr) != null ? xne.SelectSingleNode( " guid " , mgr).InnerText : string .Empty;
XmlNodeList xnlSubjects
= xne.SelectNodes( " dc:subject " , mgr);
if (xnlSubjects != null )
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + " , " ;
}
}
XmlNodeList xnlCategorys
= xne.SelectNodes( " category " , mgr);
if (xnlCategorys != null )
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category
+= xnlCategory != null ? xnlCategory.InnerText : string .Empty;
fim.Category
+= " , " ;
}
}
XmlNode xndEnclosure
= xne.SelectSingleNode( " enclosure " , mgr);
if (xndEnclosure != null )
{
fim.Enclosures
= new List < FeedEnclosure > ();
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes[ " type " ] != null ? xndEnclosure.Attributes[ " type " ].Value : string .Empty;
string strLength = xndEnclosure.Attributes[ " length " ] != null ? xndEnclosure.Attributes[ " length " ].Value : string .Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes[ " url " ] != null ? xndEnclosure.Attributes[ " url " ].Value : string .Empty;
fim.Enclosures.Add(enc);
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode( " trackback:ping " , mgr) != null ? xne.SelectSingleNode( " trackback:ping " , mgr).InnerText : string .Empty;
lstItems.Add(fim);
}
break ;
case FeedType.ATOM_0_3:

foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode( " atom:title " , mgr) != null ? xne.SelectSingleNode( " atom:title " , mgr).InnerText : string .Empty;
fim.Link
= xne.SelectSingleNode( " atom:link[@rel='alternate'] " , mgr) != null ? xne.SelectSingleNode( " atom:link[@rel='alternate'] " , mgr).Attributes[ " href " ].InnerText : string .Empty;
fim.Summary
= xne.SelectSingleNode( " atom:summary " , mgr) != null ? xne.SelectSingleNode( " atom:summary " , mgr).InnerText : string .Empty;
fim.Description
= xne.SelectSingleNode( " atom:content " , mgr) != null ? xne.SelectSingleNode( " atom:content " , mgr).InnerText : string .Empty;
fim.Description
= fim.Description.Replace( " <![CDATA[ " , string .Empty).Replace( " ]]> " , string .Empty);

fim.Guid
= xne.SelectSingleNode( " atom:id " , mgr) != null ? xne.SelectSingleNode( " atom:id " , mgr).InnerText : string .Empty;
fim.Contributor
= xne.SelectSingleNode( " atom:contributor " , mgr) != null ? xne.SelectSingleNode( " atom:contributor " , mgr).InnerText : string .Empty;
XmlNode xneAuthor
= xne.SelectSingleNode( " atom:author " , mgr);
if (xneAuthor != null )
{
fim.Author
= new FeedPerson();
fim.Author.Name
= xneAuthor.SelectSingleNode( " atom:name " , mgr) != null ? xneAuthor.SelectSingleNode( " atom:name " , mgr).InnerText : string .Empty;
fim.Author.Url
= xneAuthor.SelectSingleNode( " atom:uri " , mgr) != null ? xneAuthor.SelectSingleNode( " atom:uri " , mgr).InnerText : string .Empty;
fim.Author.Email
= xneAuthor.SelectSingleNode( " atom:email " , mgr) != null ? xneAuthor.SelectSingleNode( " atom:email " , mgr).InnerText : string .Empty;
}
fim.UpdateDate
= xne.SelectSingleNode( " atom:updated " , mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode( " atom:updated " , mgr).InnerText) : DateTime.MinValue;
if (fim.UpdateDate == DateTime.MinValue)
{
fim.UpdateDate
= xne.SelectSingleNode( " atom:modified " , mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode( " atom:modified " , mgr).InnerText) : DateTime.MinValue;
}
fim.PubDate
= xne.SelectSingleNode( " atom:published " , mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode( " atom:published " , mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode( " atom:issued " , mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode( " atom:issued " , mgr).InnerText) : DateTime.MinValue;
}
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode( " atom:created " , mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode( " atom:created " , mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlTags
= xne.SelectNodes( " dc:subject " , mgr);
XmlNodeList xnlSubjects
= xne.SelectNodes( " dc:subject " , mgr);
if (xnlSubjects != null )
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + " , " ;
}
}
XmlNodeList xnlCategorys
= xne.SelectNodes( " atom:category " , mgr);
if (xnlCategorys != null )
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category
+= xnlCategory.Attributes[ " term " ] != null ? xnlCategory.Attributes[ " term " ].Value : string .Empty;
fim.Category
+= " , " ;
}
}
XmlNodeList xnlEnclosures
= xne.SelectNodes( " atom:link[@rel='enclosure'] " , mgr);
if (xnlEnclosures != null )
{
fim.Enclosures
= new List < FeedEnclosure > ();
foreach (XmlNode xndEnclosure in xnlEnclosures)
{
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes[ " type " ] != null ? xndEnclosure.Attributes[ " type " ].Value : string .Empty;
string strLength = xndEnclosure.Attributes[ " length " ] != null ? xndEnclosure.Attributes[ " length " ].Value : string .Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes[ " href " ] != null ? xndEnclosure.Attributes[ " href " ].Value : string .Empty;
enc.Title
= xndEnclosure.Attributes[ " title " ] != null ? xndEnclosure.Attributes[ " title " ].Value : string .Empty;
fim.Enclosures.Add(enc);
}
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode( " trackback:ping " , mgr) != null ? xne.SelectSingleNode( " trackback:ping " , mgr).InnerText : string .Empty;
fim.Rights
= xne.SelectSingleNode( " atom:rights " , mgr) != null ? xne.SelectSingleNode( " atom:rights " , mgr).InnerText : string .Empty;
lstItems.Add(fim);
}
break ;
default :
return null ;
}
}
return lstItems;

}

 

 

 

转载于:https://www.cnblogs.com/litsword/archive/2010/10/14/1851827.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值