C# 获得搜索引擎 关键字

using System;
using System.Collections;
using System.Configuration;
using System.Data;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.HtmlControls;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Xml.Linq;
using System.Text.Regularexpression_rs;
using System.Text;


namespace Counter.ui
{
 
   public partial class WebForm1 : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {
            //测试   运行结果为“熊春迪 博客”
           Response.Write(sekey.SearchKey("http://www.google.cn/search?hl=zh-CN&newwindow=1&q=%E7%86%8A%E6%98%A5%E8%BF%AA+%E5%8D%9A%E5%AE%A2&meta=&aq=f&oq="));       

       }

        //搜索引擎特征
        private string[][] _Enginers = new string[][]
            {
            new string[]{"google","utf8","q"},
            new string[]{"baidu","gb2312","wd"},
            new string[]{"yahoo","utf8","p"},
            new string[]{"yisou","utf8","search"},
            new string[]{"live","utf8","q"},
            new string[]{"tom","gb2312","word"},
            new string[]{"163","gb2312","q"},
            new string[]{"iask","gb2312","k"},
            new string[]{"soso","gb2312","w"},
            new string[]{"sogou","gb2312","query"},
            new string[]{"zhongsou","gb2312","w"},
            new string[]{"3721","gb2312","p"},
            new string[]{"openfind","utf8","q"},
            new string[]{"alltheweb","utf8","q"},
            new string[]{"lycos","utf8","query"},
            new string[]{"onseek","utf8","q"}
          };
          //搜索引擎名称
          private string _EngineName = "";
          public string EngineName
          {
            get
            {
              return _EngineName;
            }
          }
          //搜索引擎编码
          private string _Coding = "utf8";
          public string Coding
          {
            get
            {
              return _Coding;
            }
          }
          //搜索引擎关键字查询参数名称
          private string _RegexWord = "";
          public string RegexWord
          {
            get
            {
              return _RegexWord;
            }
          }

          private string _Regex = @"(";

          //搜索引擎关键字
          //建立搜索关键字正则表达式
          public void EngineRegEx(string myString)
          {
            for (int i = 0, j = _Enginers.Length; i < j; i++)
            {
              if (myString.Contains(_Enginers[i][0]))
              {
                _EngineName = _Enginers[i][0];
                _Coding = _Enginers[i][1];
                _RegexWord = _Enginers[i][2];
                _Regex += _EngineName + @".+.*[?/&]" + _RegexWord + @"[=:])(?<key>[^&]*)";
                break;
              }
            }
          }
          //得到搜索引擎关键字
          public string SearchKey(string myString)
          {
            EngineRegEx(myString.ToLower());
            if (_EngineName != "")
            {
              Regex myReg = new Regex(_Regex, RegexOptions.IgnoreCase);
              Match matche = myReg.Match(myString);
              myString = matche.Groups["key"].Value;
              //去处表示为空格的+
              myString = myString.Replace("+", " ");
              if (_Coding == "gb2312")
              {
                myString = GetUTF8String(myString);
              }
              else
              {
                myString = Uri.UnescapeDataString(myString);
              }
            }
            return myString;
          }
          //整句转码
          public string GetUTF8String(string myString)
          {
            Regex myReg = new Regex("(?<key>%..%..)", RegexOptions.IgnoreCase);
            MatchCollection matches = myReg.Matches(myString);
            string myWord;
            for (int i = 0, j = matches.Count; i < j; i++)
            {
              myWord = matches[i].Groups["key"].Value.ToString();
              myString = myString.Replace(myWord, GB2312ToUTF8(myWord));
            }
            return myString;
          }
          //单字GB2312转UTF8 URL编码
          public string GB2312ToUTF8(string myString)
          {
            string[] myWord = myString.Split('%');
            byte[] myByte = new byte[] { Convert.ToByte(myWord[1], 16), Convert.ToByte(myWord[2], 16) };
            Encoding GB = Encoding.GetEncoding("GB2312");
            Encoding U8 = Encoding.UTF8;
            myByte = Encoding.Convert(GB, U8, myByte);
            char[] Chars = new char[U8.GetCharCount(myByte, 0, myByte.Length)];
            U8.GetChars(myByte, 0, myByte.Length, Chars, 0);
            return new string(Chars);
          }
  
          //判断否为搜索引擎爬虫,并返回其类型
          public string isCrawler(string SystemInfo)
          {
            string[] BotList = new string[] { "Google", "Baidu", "MSN", "Yahoo", "TMCrawler", "iask", "Sogou" };
            foreach (string Bot in BotList)
            {
              if (SystemInfo.ToLower().Contains(Bot.ToLower()))
              {
                return Bot;
              }
            }
            return "null";
          }
    }
}
 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值