.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

/*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

最近在写一个 Simple Rss Reader
网上找到现成代码两种:
1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
2.代码复杂的,但没有足够时间去消化 (如: rssbandit)

遂自己动手:
由于 rss 的基本属性大家都有!
但一些特殊不通用属性,如:
slash:comments
wfw:comment
wfw:commentRss
trackbackping
不一定存在!
如何处理???
我想到了 Reflection,就此提出以下解决方案:
1. Class RssHeader 用于表示 Rss 的头信息
 你可以在为其添加新属性,原则是:
 成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
 如: <dc:language>zh-CHS</dc:language>
 将其影射为:
  private string _dclanguage
  public string DcLanguage
  {
   get
   {
    return this._dclanguage;
   }
  }

2. Class RssItem 用于表示 Rss 的 Item
 添加新属性的原则同 RssHeader!

3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
 根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
 请仔细参阅 class SimpleRssReader 的 Travel 方法!

4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
 表:
 Channels (主表)
 ChannelsDetails (细表)
 字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
 存储过程:
 SP_AddChannel
 SP_AddChannelsDetails
 参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!


 命令行编译:
csc SimpleRsReader.cs /r:C:/WINDOWS/Microsoft.NET/Framework/v1.1.4322/System.Data.OracleClient.dll


全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar

*/
namespace Microshaoft
{
 using System;
 using System.Xml;
 using System.Text;
 using System.Reflection;
 using System.Collections;
 using System.Text.RegularExpressions;


全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar

*/
namespace Microshaoft
{
 using System;
 using System.Xml;
 using System.Text;
 using System.Reflection;
 using System.Collections;
 using System.Text.RegularExpressions;

 public class RssHeader
 {
  //feed URL
  public RssHeader(string URL)
  {
   this._URL = URL;
  }

  public string Title
  {
   get
   {
    return this._title;
   }
  }

  public string Description
  {
   get
   {
    return this._description;
   }
  }

  public string Link
  {
   get
   {
    return this._link;
   }
  }

  public string Language
  {
   get
   {
    return this._language;
   }
  }

  public string Generator
  {
   get
   {
    return this._generator;
   }
  }

  public string Ttl
  {
   get
   {
    return this._ttl;
   }
  }

  public string Copyright
  {
   get
   {
    return this._copyright;
   }
  }

  public DateTime PubDate
  {
   get
   {
    return Util.ParseDateTime(this._pubDate);
   }
  }

  public string Category
  {
   get
   {
    return this._category;
   }
  }

  public DateTime LastBuildDate
  {
   get
   {
    return Util.ParseDateTime(this._lastBuildDate);
   }
  }
  public string ManagingEditor
  {
   get
   {
    return this._managingEditor;
   }
  }

  public string URL
  {
   get
   {
    return this._URL;
   }
  }

  public string DcLanguage
  {
   get
   {
    return this._dclanguage;
   }
  }

  //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
  private string _dclanguage; //dc:language
  private string _URL;
  private string _managingEditor;
  private string _lastBuildDate;
  private string _title;
  private string _description;
  private string _link;
  private string _language;
  private string _generator;
  private string _ttl;
  private string _copyright;
  private string _pubDate;
  private string _category;
  

 }
 public class RssItem
 {
  private RssHeader _Header;

  public RssHeader Header
  {
   get
   {
    return this._Header;
   }
  }

  //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
  private string _title;
  private string _link;
  private string _description;
  private string _category;
  private string _author;
  private string _pubDate;
  private string _comments;
  private string _guid;
  private string _slashcomments;
  private string _wfwcomment;
  private string _wfwcommentRss;
  private string _trackbackping;

  public string TrackbackPing
  {
   get
   {
    return this._trackbackping;
   }
  }

  public string WfwCommentRss
  {
   get
   {
    return this._wfwcommentRss;
   }
  }

  public string WfwComment
  {
   get
   {
    return this._wfwcomment;
   }
  }
  

  public string SlashComments
  {
   get
   {
    return this._slashcomments;
   }
  }
  public string Title
  {
   get
   {
    return this._title;
   }
  }

  public string Link
  {
   get
   {
    return this._link;
   }
  }

  public string Description
  {
   get
   {
    return this._description;
   }
  }

  public string Category
  {
   get
   {
    return this._category;
   }
  }

  public string Author
  {
   get
   {
    return this._author;
   }
  }

  public DateTime PubDate
  {
   get
   {
    return Util.ParseDateTime(this._pubDate);
   }
  }

  public string Comments
  {
   get
   {
    return this._comments;
   }
  }

  public string Guid
  {
   get
   {
    return this._guid;
   }
  }
 }
 public class SimpleRssReader
 {
  //RssHeader header 解析处理完毕事件
  public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
  public event RssHeaderReceiveEventHandler RssHeaderReceive;

  //某一个 RssItem 解析处理完毕事件
  public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
  public event RssItemReceiveEventHandler RssItemReceive;

  private Type _TRS; //typeof(RssHeader)
  private Type _tri; //typeof(RssItem)

  private ArrayList _RssItemsAL;

  private RssHeader _rs;
  public RssHeader RssHeader
  {
   get
   {
    return this._rs;
   }
  }

  //用于存储所有的 RssItem
  private RssItem[] _RssItems;

  public RssItem[] RssItems
  {
   get
   {
    return this._RssItems;
   }
  }

  public void Rss(string URL)
  {
   XmlDocument xd = new XmlDocument();
   //如果效率不高可采用 WebRequest 替代
   xd.Load(URL);
   XmlNodeList xnl = xd.SelectNodes("/rss/channel");

   this._rs = new RssHeader(URL);

   this._TRS = typeof(RssHeader);
   this._tri = typeof(RssItem);

   this._RssItemsAL = new ArrayList();

   foreach (XmlNode xn in xnl)
   {
    //递归遍历
    this.Travel(xn, 0);
   }

   if (this._RssItemsAL.Count > 0)
   {
    this._RssItems = new RssItem[this._RssItemsAL.Count];
    int i = 0;
    foreach (object o in this._RssItemsAL)
    {
     this._RssItems[i++] = (RssItem) o;
    }
   }
  }

  /// <Header>
  /// 递归遍历
  /// </Header>
  /// <param name="xn">节点</param>
  /// <param name="i">项目数</param>
  private void Travel(XmlNode xn, int i)
  {
   if (xn.HasChildNodes)
   {
    foreach (XmlNode x in xn.ChildNodes)
    {
     if (x.ParentNode != null)
     {
      if (x.ParentNode.Name == "channel")
      {
       if (x.Name == "item")
       {
        i ++;
        if (i >= 1)
        {
         XmlNode node = null;
         bool b = false; //是否是 Rss Item
         RssItem ri = null;
         if (i == 1) //Header
         {
          node = xn;
          b = false;
         }
         else if (i > 1) //Item
         {
          node = x;
          b = true;
          ri = new RssItem();
         }

         foreach (XmlNode n in node.ChildNodes)
         {
          if (n.Name != "item")
          {
           if (!b) //Rss Header Header
           {
            //根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
            FieldInfo fi = this._TRS.GetField("_" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
            if (fi != null)
            {
             fi.SetValue(this._rs,n.InnerText);
            }
           }
           else //Rss Item
           {
            //根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
            FieldInfo fi = this._tri.GetField("_" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
            if (fi != null)
            {
             fi.SetValue(ri,n.InnerText);
            }
           }

          }
         }
         if (!b)
         {
          //触发 RssHeaderReceive 事件
          if (this.RssHeaderReceive != null)
          {
           this.RssHeaderReceive(this,this._rs);
          }
         }
         else
         {
          //制定 RssItem 实例的 Header/Header
          FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
          if (fi != null)
          {
           fi.SetValue(ri,this._rs);
          }

          //触发 RssItemReceive 事件
          if (this.RssItemReceive != null)
          {
           this.RssItemReceive(this,ri);
          }
          this._RssItemsAL.Add(ri);
         }
        }
       }
      }
     }
     if (!x.HasChildNodes)
     {
      this.Travel(x, i);
     }
    }
   }
  }
 }

 public class Util
 {
  public static DateTime ParseDateTime(string s)
  {
   DateTime dt;
   if (s == null || s.ToString().Length <= 0)
   {
    dt = DateTime.Now;
   }
   else
   {
    try
    {
     dt = DateTime.Parse(s);
    }
    catch
    {
     dt = DateTime.Now;
    }
   }
   return dt;
  }
  /// <Header>
  /// 去除 HTML tag
  /// </Header>
  /// <param name="HTML">源</param>
  /// <returns>结果</returns>
  public static string StripHTML(string HTML) //google "StripHTML" 得到
  {
   string[] Regexs =
        {
         @"<script[^>]*?>.*?</script>",
         @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(//[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
         @"([/r/n])[/s]+",
         @"&(quot|#34);",
         @"&(amp|#38);",
         @"&(lt|#60);",
         @"&(gt|#62);",
         @"&(nbsp|#160);",
         @"&(iexcl|#161);",
         @"&(cent|#162);",
         @"&(pound|#163);",
         @"&(copy|#169);",
         @"&#(/d+);",
         @"-->",
         @"<!--.*/n"
        };

   string[] Replaces =
        {
         "",
         "",
         "",
         "/"",
         "&",
         "<",
         ">",
         " ",
         "/xa1", //chr(161),
         "/xa2", //chr(162),
         "/xa3", //chr(163),
         "/xa9", //chr(169),
         "",
         "/r/n",
         ""
        };

   string s = HTML;
   for (int i = 0; i < Regexs.Length; i++)
   {
    s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
   }
   s.Replace("<", "");
   s.Replace(">", "");
   s.Replace("/r/n", "");
   return s;
  }
 }
}

//测试程序
namespace Test
{
 using System;
 using System.Data;
 using System.Reflection;
 using System.Data.SqlClient;

 using Microshaoft;
 using Microshaoft.Data;

 class ConsoleApplication
 {
  private SqlConnection _Connection;
  public string _Channel;

  public SqlConnection Connection
  {
   set
   {
    this._Connection = value;
   }
   get
   {
    return this._Connection;
   }
  }

  static void Main()
  {
   
   string s = "http://www.ccw.com.cn/rss/news2/1.xml";
   s = "http://dzh.mop.com/topic/rss.jsp?type=28";
   s = "http://www.ccw.com.cn/rss/news2/15.xml";
   s = "http://www.cnblogs.com/rss.aspx?id=-1";
   s = "http://localhost/rss.xml";
   //s = "http://weblog.siliconvalley.com/column/dangillmor/index.xml";
   //s= "http://www.skyone.com.cn/sub/rss/list_jjsc.xml";

   ConsoleApplication a = new ConsoleApplication();

   a.Connection = new SqlConnection("server=SERVER//PSQLKE;user id=sa;password=;database=rss");
   a.Connection.Open();

   SimpleRssReader srr = new SimpleRssReader();

   srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);
   srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive);

   System.Console.WriteLine("waiting ....");
   srr.Rss(s); //以后改成多线程或异步

   System.Console.WriteLine("print all rss Header and items ....");
   System.Console.ReadLine();
   System.Console.WriteLine("Header: "+ srr.RssHeader.Title);
   foreach (RssItem ri in srr.RssItems)
   {
    System.Console.WriteLine("item: " + ri.Title);
   }
   System.Console.ReadLine();

  }

  private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)
  {
   System.Console.WriteLine("Header:" + Header.Link);
   System.Console.WriteLine("Header:" + Header.Title);

   this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header);

  }

  private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)
  {
   System.Console.WriteLine("Item: " + Item.Title);
   System.Console.WriteLine("Item: " + Item.Link);
   System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description));

   this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item);

  }
  private void SaveToDataBase(string sp, Type t,object instance)
  {
   //获取 sp 所有参数
   SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);
   System.Collections.Hashtable ht = new System.Collections.Hashtable();
   
   for (int i = 0; i < spa.Length; i++)
   {
    //保存 参数名称与其位置(次序) 的关系
    ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i);

    //相当于为存储过程的所有参数赋初值
    spa[i].Value = null;
   }

   //得到所有的属性
   PropertyInfo[] pi = t.GetProperties();
   foreach (PropertyInfo x in pi)
   {
    if (ht.ContainsKey( x.Name.ToLower()))
    {
     //根据参数(属性)名称得到参数的次序!
     int i = (int) ht[x.Name.ToLower()];
     if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)
     {
      object o;
      if (x.PropertyType.Name == "String")
      {
       o = x.GetValue(instance,null);
       if (o != null)
       {
        string s = Util.StripHTML((string) o);
        o = s;
       }
      }
      else
      {
       o = x.GetValue(instance,null);
      }
      
      spa[i].Value = o;
     }
    }
    
   }

   if (t == typeof(RssItem))
   {
    spa[0].Value = ((RssItem) instance).Header.URL;
   }

   SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);
   if (spa[spa.Length - 1].Value != System.DBNull.Value)
   {
    System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);
   }
   else
   {
    System.Console.WriteLine("save failed! may be duplicate!");
   }
  }
 }
}

//==========================================================================================================
/*
--sql Script
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannel]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannelsDetails]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[Channels]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[ChannelsDetails]
GO

CREATE TABLE [dbo].[Channels] (
 [ID] [int] IDENTITY (1, 1) NOT NULL ,
 [URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
 [Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
 [link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,
 [language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,
 [generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [pubDate] [datetime] NULL ,
 [category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO

CREATE TABLE [dbo].[ChannelsDetails] (
 [ID] [int] IDENTITY (1, 1) NOT NULL ,
 [ChannelID] [int] NULL ,
 [title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [pubDate] [datetime] NULL ,
 [comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO

SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO


CREATE   proc SP_AddChannel
@URL varchar(8000)
,@link varchar(8000)
,@Channel varchar(8000)
,@Title varchar(8000)
,@Image varchar(8000)
,@Description varchar(7999)
,@language varchar(8000)
,@generator varchar(8000)
,@ttl varchar(8000)
,@copyright varchar(8000)
,@pubDate datetime
,@category varchar(8000)
,@Docs varchar(8000)
,@ManagingEditor varchar(8000)
,@dclanguage varchar(8000)
,@ int out
as
set @ = 0
insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])
select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage
where not exists(select 1 from Channels where [URL] = @URL)
select @ = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO

CREATE     proc SP_AddChannelsDetails
@URL varchar(8000)
,@Title varchar(8000)
,@Description varchar(7000)
,@link varchar(8000)
,@pubDate datetime
,@category varchar(8000)
,@Comments varchar(8000)
,@Guid varchar(8000)
,@trackbackping varchar(8000)
,@ int out
as
set @ = 0
insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])
select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping
from Channels
where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL
select @ = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
*/

相关推荐
SystemVerilog听课学习笔记,包括讲义截取、知识点记录、注意事项等细节标注。 目录如下: 第一章 SV环境构建常识 1 1.1 数据类型 1 四、二值逻辑 4 定宽数组 9 foreach 13 动态数组 16 队列 19 关联数组 21 枚举类型 23 字符串 25 1.2 过程块和方法 27 initial和always 30 function逻辑电路 33 task时序电路 35 动态 静态变量 39 1.3 设计例化和连接 45 第二章 验证方法 393 动态仿真 395 静态检查 397 虚拟模型 403 硬件加速 405 效能验证 408 性能验证 410 第三章 SV组件实现 99 3.1 接口 100 什么是interface 101 接口优势 108 3.2 采样和数据驱动 112 竞争问题 113 接口中时序块clocking 123 利于clocking驱动 133 3.3 测试开始和结束 136 仿真开始 139 program隐式结束 143 program显式结束 145 软件域program 147 3.4 调试方法 150 第四章 验证计划 166 4.1 计划概述 166 4.2 计划内容 173 4.3 计划实现 185 4.4 计划进程评估 194 第五章 验证管理 277 6.1 验证周期检查 277 6.2 管理三要素 291 6.3 验证收敛 303 6.4 问题追踪 314 6.5 团队建设 321 6.6 验证专业化 330 第六章 验证平台结构 48 2.1 测试平台 49 2.2 硬件设计描述 55 MCDF接口描述 58 MCDF接口时序 62 MCDF寄存器描述 65 2.3 激励发生器 67 channel initiator 72 register initiator 73 2.4 监测器 74 2.5 比较器 81 2.6 验证结构 95 第七章 激励发生封装:类 209 5.1 概述 209 5.2 类成员 233 5.3 类继承 245 三种类型权限 protected/local/public 247 this super 253 成员覆盖 257 5.4 句柄使用 263 5.5 包使用 269 第八章 激励发生随机化 340 7.1 随机约束和分布 340 权重分布 353 条件约束 355 7.2 约束块控制 358 7.3 随机函数 366 7.4 数组约束 373 7.5 随机控制 388 第九章 线程与通信 432 9.1 线程使用 432 9.2 线程控制 441 三个fork...join 443 等待衍生线程 451 停止线程disable 451 9.3 线程通信 458 第十章 进程评估:覆盖率 495 10.1 覆盖率类型 495 10.2 功能覆盖策略 510 10.3 覆盖组 516 10.4 数据采样 524 10.5 覆盖选项 544 10.6 数据分析 550 第十一章 SV语言核心进阶 552 11.1 类型转换 552 11.2 虚方法 564 11.3 对象拷贝 575 11.4 回调函数 584 11.5 参数化类 590 第十二章 UVM简介 392 8.2 UVM简介 414 8.3 UVM组件 420 8.4 UVM环境 425
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页