网络神采关键词过滤NET插件

此处代码仅供参考,完整代码请下载附件阅读。

不说废话,直接贴代码:

插件接口实现:

using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Text;
using System.Windows.Forms;
using System.IO;
using Bget.Plugin;

namespace HX_Plug
{
    public class Plug : IBget  //插件接口
    {
        //创建插件
        public void Create(string taskPath, string pluginPath, BgetInformation bgetInfo, Action action, bool firstCall)
        {
            this.WriteLog("创建插件...");
        }

        //销毁插件
        public void Dispose(Bget.Plugin.Action action)
        {
            this.WriteLog("销毁插件...");
        }

        //正在下载内容文件
        public void DownloadContentFile(string url, string path, bool skipIfFileExisted, string cookie, string referer)
        {
            this.WriteLog("正在下载内容文件...");
        }

        //正在下载独立文件
        public string DownloadSingleFile(string url, string path, string fileNamePrefix, bool skipIfFileExisted, string cookie, string referer)
        {
            this.WriteLog("正在下载独立文件...");

            return fileNamePrefix + Path.GetFileName(path);
        }

        //提取结果
        public string ExtractResult(string extractionRule, string dataColumn, string htmlText, string url)
        {
            this.WriteLog("提取结果...");
            return "";
        }

        //正在进行采集结果筛选
        public bool Filter(string result, string extractionRule, string dataColumn, System.Data.DataRow extractingResultRow)
        {
            this.WriteLog("正在进行采集结果筛选...");
            return true;
        }

        //所需选项
        public RequiredOptions GetRequiredOptions()
        {
            this.WriteLog("所需选项...");

            return RequiredOptions.None;
        }

        public Form GetSettingForm(string taskPath, string pluginPath, Bget.Plugin.BgetInformation bgetInfo)
        {
            return new hx_Plug();
        }
        
        //获取代理
        public BgetWebProxy GetWebProxy(string requestingUrl, int retryTimes)
        {
            this.WriteLog("获取代理...");
            return null;
        }

        //从数据库载入起始地址
        public string LoadStartingUrl(string template, ref int position, string cookie)
        {
            this.WriteLog("从数据库载入起始地址...");

            return "http://www.sensite.cn";
        }

        //正在登录
        public string Login(string url)
        {
            this.WriteLog("正在登录...");

            return "";
        }

        //选择下一层网址
        public StringCollection PickNextLayerUrls(string htmlText, string layer, string url, string cookie)
        {
            return null;
        }

        //选择下一个网页网址
        public string PickNextPageUrl(string htmlText, string layer, string url, string cookie)
        {
            return "";
        }

        //正在处理下载后的内容文件
        public void ProcessContentFile(string path, bool skipped)
        {
            this.WriteLog("正在处理下载后的内容文件...");
        }

        //正在处理结果数据行
        public bool ProcessResultRow(System.Data.DataRow extractedResultRow)
        {
            this.WriteLog("《红星关键字过滤插件 V1.0》");
            this.WriteLog(string.Format("过滤:{0}", extractedResultRow[0].ToString()));

            KeywordFilter keyFilter = new KeywordFilter();
            extractedResultRow[1] = keyFilter.On_Filter(extractedResultRow[1].ToString());

            return true;
        }

        //正在处理下载后的独立文件
        public string ProcessSingleFile(string path, string fileNamePrefix, bool skipped)
        {
            this.WriteLog("正在处理下载后的独立文件...");

            return fileNamePrefix + Path.GetFileName(path);
        }

        //正在请求URL
        public string Visit(string url, byte[] postData, string layer, string cookie, string referer)
        {
            this.WriteLog("正在请求URL: " + url);

            return "<html>test</html>";
        }

        public event LogEventHanlder Log;
        
        private void WriteLog(string message)
        {
            if (this.Log != null)
            {
                this.Log(this, new LogEventArgs(message));
            }
        }

        private void WriteLog(string message, int indent)
        {
            if (this.Log != null)
            {
                this.Log(this, new LogEventArgs(message, indent));
            }
        }     
    }
}

具体实现功能代码:

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Windows.Forms;

namespace HX_Plug
{
    /// <summary>
    /// 文章内容过滤类
    /// </summary>
    public class KeywordFilter
    {
        private List<FilterStruct> _filter = new List<FilterStruct>();

        /// <summary>
        /// 构造函数,初始化关键字集合
        /// </summary>
        public KeywordFilter()
        {
            DataTable dt = new DataTable();
            using (DBase db = new DBase())
            {
                dt = db.GetDataTable("select oldValue,newValue from Filter");
            }
            if (dt != null)
            {
                if (dt.Rows.Count != 0)
                {
                    for (int i = 0; i < dt.Rows.Count; i++)
                    {
                        FilterStruct fil = new FilterStruct();
                        fil.OldValue = dt.Rows[i][0].ToString();
                        fil.NewValue = dt.Rows[i][1].ToString();
                        _filter.Add(fil);
                    }
                }
            }
        }

        /// <summary>
        /// 关键词过滤
        /// </summary>
        /// <param name="Content">内容</param>
        /// <returns>过滤后的内容</returns>
        public string On_Filter(string Content)
        {
            Content = ReplaceKeyword(Content);      //常规关键词过滤

            Content = SubContent(Content, 2000);    //切割文章为指定长度

            Content = SpltParagraph(Content);       //打乱句子

            if(Content != string.Empty)
                Content += "《红星关键字过滤系统V1.0》";
            return Content;
        }

        /// <summary>
        /// 过滤常规关键词
        /// </summary>
        /// <param name="Content">内容</param>
        /// <returns>过滤结果</returns>
        private string ReplaceKeyword(string Content)
        {
            for (int i = 0; i < _filter.Count; i++)
            {
                Content = Content.Replace(_filter[i].OldValue, _filter[i].NewValue);
            }
            return Content += _filter.Count.ToString();
        }

        /// <summary>
        /// 句子打乱
        /// </summary>
        /// <param name="Content">原始内容</param>
        /// <returns>打乱结果</returns>
        private string SpltParagraph(string Content)
        {
            string[] Paragraph = Content.Split('。');
            string src = string.Empty;

            if (Paragraph.Length != 0 && Paragraph.Length > 5)
            {
                //随即交换一部分文章以句号分割的段落
                Random r = new Random();
                for (int i = 0; i < Paragraph.Length / 20; i++)
                {
                    Paragraph = RandomParagraph(r.Next(Paragraph.Length), r.Next(Paragraph.Length), Paragraph);
                }

                //重新组合文章内容
                for (int i = 0; i < Paragraph.Length; i++)
                {
                    if (i == 0)
                    {
                        string line = Paragraph[i].ToString();
                        if (line.Length > 8)
                        {
                            line = line.Replace(",", string.Empty);
                            line = line.Replace("\"", string.Empty);
                            line = line.Replace(",", string.Empty);
                            line = line.Replace("“", string.Empty);
                            line = line.Replace("”", string.Empty);
                            line = line.Replace(" ", string.Empty);
                            line = "<h3>" + line.Substring(0, 8) + "</h3>";
                        }
                        src += (line + "<p>" + Paragraph[i].ToString());
                    }
                    else if (i % 5 == 0)
                    {
                        string line = Paragraph[i].ToString();
                        if (line.Length > 8)
                        {
                            line = line.Replace(",", string.Empty);
                            line = line.Replace("\"", string.Empty);
                            line = line.Replace(",", string.Empty);
                            line = line.Replace("“", string.Empty);
                            line = line.Replace("”", string.Empty);
                            line = line.Replace(" ", string.Empty);
                            line = "<h3>" + line.Substring(0, 8) + "</h3>";
                        }
                        src += ("。</p>" + line + "<p>" + Paragraph[i].ToString());
                    }
                    else
                    {
                        src += Paragraph[i].ToString();
                    }
                }

                return src;
            }
            else
            {
                return Content;
            }
        }

        /// <summary>
        /// 随即交换文章内容
        /// </summary>
        /// <param name="start">起始交换处</param>
        /// <param name="end">结束交换处</param>
        /// <param name="Paragraph">段落集合</param>
        /// <returns>交换结果</returns>
        private string[] RandomParagraph(int start, int end, string[] Paragraph)
        {
            if (start != end && start < Paragraph.Length && end < Paragraph.Length)
            {
                string swap = string.Empty;
                swap = Paragraph[start].ToString();
                Paragraph[start] = Paragraph[end].ToString();
                Paragraph[end] = swap;

                return Paragraph;
            }
            else
            {
                return Paragraph;
            }
        }

        /// <summary>
        /// 切割文章为指定长度
        /// </summary>
        /// <param name="Content">文章内容</param>
        /// <param name="length">切割长度</param>
        /// <returns>切割结果</returns>
        private string SubContent(string Content, int length)
        {
            if (Content.Length > length)
            {
                return Content = Content.Substring(0, length);
            }
            else if (Content.Length < 300)
            {
                return string.Empty;
            }
            else
            {
                return Content;
            }
        }
    }

    /// <summary>
    /// 关键词过滤数据结构
    /// </summary>
    public struct FilterStruct
    {
        /// <summary>
        /// 被替换的字符
        /// </summary>
        public string OldValue;

        /// <summary>
        /// 替换后的字符
        /// </summary>
        public string NewValue;
    }
}

数据库底层连接类:(我把这个类写成了个通用的DLL,N久都没换过了)

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.Windows.Forms;

namespace HX_Plug
{
    /// <summary>
    ///数据库基本操作类,提供Access数据库基本操作,生存于数据层
    /// </summary>
    public class DBase : IDisposable
    {
        /// <summary>
        /// 数据库是否打开成功标志。成功:True,失败False。
        /// </summary>
        public bool Is_OpenState = false;

        /// <summary>
        /// Access数据库连接字符串
        /// </summary>
        private string strOleConn;

        /// <summary>
        /// Access数据库连接对象
        /// </summary>
        private OleDbConnection oleConn;

        /// <summary>
        /// 构造函数,初始化数据库连接,但不打开数据库
        /// 使用步骤:1.构造对象。2.检测Is_OpenState是否打开成功。3.操作数据库。4.自动或手工释放资源
        /// </summary>
        public DBase()
        {
            strOleConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data source=User.mdb;";    //数据库连接字符串
            oleConn = new OleDbConnection(strOleConn);  //实例化数据库连接对象
            Is_OpenState = Open();  //设置当前数据库打开的状态
        }

        /// <summary>
        /// 打开数据库
        /// </summary>
        /// <returns>数据库打开是否成功。</returns>
        private bool Open()
        {
            try
            {
                //如果当前连接状态为关闭状态,则打开数据库连接
                if (oleConn.State == ConnectionState.Closed)
                {
                    oleConn.Open();
                }
                return true;
            }
            catch
            {
                return false;
            }
        }

        /// <summary>
        /// 关闭数据库
        /// </summary>
        /// <returns>数据库打开是否成功。</returns>
        private bool Close()
        {
            try
            {
                //如果当前连接状态为打开状态,则关闭数据库连接
                if (oleConn.State == ConnectionState.Open)
                {
                    oleConn.Close();
                }
                return true;
            }
            catch
            {
                return false;
            }
        }

        /// <summary>
        /// 释放资源
        /// </summary>
        public void Dispose()
        {
            Close();                //关闭连接
            if (oleConn != null)    //销毁对象
            {
                oleConn.Dispose();
            }
        }

        /// <summary>
        /// 析构函数,自动释放资源
        /// </summary>
        ~DBase()
        {
            Dispose();  //释放资源
        }

        /// <summary>
        /// 执行SqlCommand语句,返回一个DataTable
        /// </summary>
        /// <param name="sqlCommand">SqlCommand语句</param>
        /// <returns>执行成功返回DataTable对象,否则返回Null</returns>
        public DataTable GetDataTable(string sqlCommand)
        {
            DataSet ds = new DataSet();
            try
            {
                OleDbDataAdapter da = new OleDbDataAdapter(sqlCommand, oleConn);
                da.Fill(ds);
                int i = ds.Tables[0].Rows.Count;
                return ds.Tables[0];
            }
            catch
            {
                return null;
            }
        }

        void IDisposable.Dispose()
        {
        }
    }
}

悲哀,没有找到上传附件发功能。需要的话给我留个消息吧,我给你发过去。

附修改:

由于上网时间比较少,急需源代码的童鞋可以直接发送邮件To:549015917@qq.com;注明标题和内容,这样可以得到最快的处理!


评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值