聚合搜索(四)

1.4  各搜索引擎专用类
搜索引擎专用类用来完成具体的搜索任务,例如,Baidu类用来执行Baidu搜索,Google类用来执行Google搜索。它们都继承自ISearch类,主要是执行一些正则表达式操作,把搜索结果匹配出来。搜索结果作为数据,保存在了XML文件中。最后,这个XML文件按照格式化文件result.xsl的格式把搜索结果显示出来。
Search()方法的大致思路是:首先调用GetPageString()方法把搜索的关键字、页码等信息发送到特定搜索引擎,并接收搜索引擎返回的信息。然后对这个信息进行解析,分析出搜索结果的每个记录,并记录在XML文件中。然后再对搜索结果的分页导航进行分析,转换成本系统的形式,并采用Base64编码,把结果记录在XML文件中。在Search()方法执行的最后,XML文件被格式化输出到客户端浏览器显示出来。下面是6大搜索引擎专用类的具体实现代码:
//Google搜索类 google.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
public class Google : ISearch
{
     public override System.Xml.XmlDocument Search()
     {
           string xmlstr = GetPageString();                    //获取搜索页面的字符串
           XmlDataDocument document = new XmlDataDocument();//用于返回的页面
           document.LoadXml("<search/>");
           string style = Tools.Match(xmlstr, "<head>[//s//S]*?</head>").Value;
           style = Tools.delTagArray(style, "script,meta,title", true);
                                                        //删除其他标记
           XmlNode xn = Tools.CreateNode(document, document.DocumentElement, "head");
           xn.InnerText = Tools.delTagArray(style, "head", false);//删除head标记
           //创建body
           XmlNode body = Tools.CreateNode(document, document.DocumentElement,
                "body");
           //搜索记录数描述
           XmlNode txt = Tools.CreateNode(document, body, "key");
           XmlNode count = Tools.CreateNode(document, body, "count");
           string sou = Tools.Match(xmlstr, "(?<=符合<b>)[//s//S]*?(?=</b>的查
                询)").Value;  //记录总数
           string count2 = Tools.Match(xmlstr, "(?<=约有<b>)[//s//S]*?(?=</
                b>)").Value;
           count.InnerText = count2;
           txt.InnerText = sou;                                //记录总数
           MatchCollection mtc = Tools.MatchCollection(xmlstr, "<div class[//s//S]*?
                </div>");
           StringBuilder sb = new StringBuilder(1000);
           //遍历每个结果,把搜索结果插入xml文档中
           foreach (Match mt in mtc)  
           {
                 XmlNode item = Tools.CreateNode(document, body, "item");
                                                    //在xml中插入一条搜索记录
                 XmlNode link = Tools.CreateNode(document, item, "url");
                 XmlNode desc = Tools.CreateNode(document, item, "desc");
                 XmlNode title = Tools.CreateNode(document, item, "title");
                 sb = sb.Remove(0, sb.Length);
                 sb.Append(Tools.Match(mt.Value, "(?<=<td[^>]*?>)[//s//S]*?(?=</td>)").
                      Value);
                 MatchCollection itemc = Tools.MatchCollection(sb.ToString(), "[//s
                      //S]*?<br[^>]*?>");
                 string ul = Tools.Match(mt.Value, "(?<=<h2[^>]*?>)[//s//S]*?(?=</h2>)").
                      Value;
                 ul = Tools.Match(ul, "<a[//s//S]*?</a>").Value;
                 string u_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
                      Value;
                 link.InnerText = Tools.Replace(u_li, "^/"", "");
                 string u_t = Tools.delTagArray(ul, "a", false);
                 title.InnerText = Tools.delHtml(u_t);           //删除html标记
                 if (itemc.Count > 0)
                 {
                         ul = Tools.delHtml(itemc[0].Value);
                         desc.InnerText = ul;
                 }
                 XmlNode sour1 = Tools.CreateNode(document, item, "sour");
                                                        //搜索得到的网页信息
                 string str1 = Tools.Match(mt.Value, "(?<=<h2[^>]*?>)[//s//S]*?(?=<
                      /h2>)").Value;
                 sour1.InnerText = str1;
                 XmlNode sour2 = Tools.CreateNode(document, item, "sour");
                 string str2 = Tools.Match(mt.Value, "(?<=<td[^>]*?>)[//s//S]*? (?=
                      </td>)").Value;
                 str2 = Tools.delTagArray(str2, "td", false);
                 sour2.InnerText = str2;
                 XmlNode begin = Tools.CreateNode(document, item, "begin");
                                                //单条搜索结果开始
                 string str3 = Tools.Match(mt.Value, "(?=<div[^>]*?>)[//s//S]*?(?=
                      <h2)").Value;
                 begin.InnerText = str3;
                 XmlNode end = Tools.CreateNode(document, item, "end");
                                                //单条搜索结果结束
                 str3 = Tools.Match(mt.Value, "(?<=</table>)[//s//S]*?(?<=</div>)").
                      Value;
                 end.InnerText = str3;
           }
           string page = Tools.Match(xmlstr, "(?=<div id=navbar class=n>)[//s//S]*?(?
                =<center>)").Value;
           MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[//s//
                S]*?</a>");
           foreach (Match mt in mcpage)                //遍历每个页码,替换为本系统的形式
           {
                 string s2 = mt.Value;
                 s2 = Tools.Match(s2, @"(?<=href=/search/?)[^/s>]*").Value;
                                                //和搜索相关的参数
                 page = page.Replace("/search?" + s2, "?nav_go_post=" + Tools.To
                      Base64(s2) + "&itemtype=" + ItemType);
           }
           XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");
                                                //插入分页导航
           page = Tools.delTagArray(page, "img", false);
           pageNv.InnerText = page;                //赋予导航内容
           return document;
     }
}
//百度搜索类 baidu.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Xml;
using System.Text.RegularExpressions;
     public class Baidu:ISearch
     {
          public override System.Xml.XmlDocument Search()
          {
               string xmlstr = GetPageString();     //获取搜索页
               XmlDocument document = new XmlDocument(); //返回的页面
               document.LoadXml("<search/>");
               string style = Tools.Match(xmlstr, "<head>[//s//S]*?</head>").Value;
                    //css
               style = Tools.delTagArray(style, "script", true); //删除脚本
               //创建头部
               XmlNode xn = Tools.CreateNode(document, document.DocumentElement,
                    "head");
               //删除head标记
               xn.InnerText = Tools.delTagArray(style, "head", false);
               //创建body
               XmlNode body = Tools.CreateNode(document, document.DocumentElement,
                    "body");
               //搜索记录数的描述
               XmlNode txt = Tools.CreateNode(document, body, "key");
               XmlNode count = Tools.CreateNode(document, body, "count");
                                                            //记录总数
               string sou = Tools.Match(xmlstr, "(?<=<input name=wd size=/"35/" class=
                    /"i/" value=/")[//s//S]*?(?=/" maxlength=/"100/")").Value;
               string count2 = Tools.Match(xmlstr, "(?<=找到相关网页[^//d])[//s//S]*?
                    (?=篇)").Value;
               count.InnerText = count2;
               txt.InnerText = sou                              ;//记录总数
               //搜索结果的记录集
               MatchCollection mtc = Tools.MatchCollection(xmlstr, "<table border=/"0/"
                    cellpadding=/"0/" cellspacing=/"0/">[//s//S]*?</table>");
               StringBuilder sb = new StringBuilder(1000);
               //遍历每个结果,把搜索结果插入xml文档中
               foreach (Match mt in mtc)
               {
                     XmlNode item = Tools.CreateNode(document, body, "item");
                                                    //在xml中插入一条搜索记录
                     XmlNode link = Tools.CreateNode(document, item, "url");                                                              //单条超链接
                     XmlNode desc = Tools.CreateNode(document, item, "desc");                                                             //单条搜索结果的描述
                     XmlNode title = Tools.CreateNode(document, item, "title");                                                           //单条搜索结果的标题
                     sb.Remove(0, sb.Length);         //清空
                     sb.Append(Tools.Match(mt.Value, "(?<=<td[^>]*?>)[//s//S]*?(?=
                          </td>)").Value);
                     MatchCollection itemc = Tools.MatchCollection(sb.ToString(),
                          "[//s//S]*?<br[^>]*?>");
                     if(itemc.Count>=3)
                     {
                            string u1 = Tools.Match(itemc[0].Value, "<a[//s//S]*?</
                                 a>").Value;
                            string u_li=Tools.Match(u1,"(?<=href=[/"]?).*?(?=[/"]?
                                 [//s>])").Value;
                            link.InnerText = Tools.Replace(u_li, "^/"", "");
                            string u_t = Tools.delTagArray(u1, "a", false);
                            title.InnerText = Tools.delHtml(u_t);   //删除html标记
                            u1 = Tools.delHtml(itemc[1].Value);
                            desc.InnerText = u1;
                            foreach (Match mt1 in itemc)
                            {
                                 //搜索的网页信息
                                 XmlNode sour1 = Tools.CreateNode(document, item,
                                      "sour");
                                 string da = Tools.delTagArray(mt1.Value, "br", false);                                                              //删除br
                                 sour1.InnerText=da;
                            }
                     }
                     XmlNode end = Tools.CreateNode(document, item, "end");                                                                   //搜索结果结束
                     end.InnerText ="</font><br/>";
               }
               string page = Tools.Match(xmlstr, "<div class=/"p/">[//s//S]*?</div>").
                    Value;  //分页
               MatchCollection mcpage = Tools.MatchCollection(page,"<a[^>]*?>[//s
                    //S]*?</a>");
               //遍历每个页码,替换为本系统的形式
               foreach (Match mt in mcpage)
               {
                     string s2 = mt.Value;
                     s2 = Tools.Match(s2, @"(?<=href=s/?)[^/s>]*").Value;
                     page = page.Replace("s?" + s2,"?nav_go_post="+Tools.ToBase64(s2)
                          +"&itemtype=" + ItemType);   //替换超链接
               }
               XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");                                                                       //插入分页导航
               pageNv.InnerText = page;                         //赋予导航内容
               return document;
          }
     }
//搜狗类 sogou.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Web;
public class Sogou : ISearch
{
     public override System.Xml.XmlDocument Search()
     {
          string xmlstr = GetPageString();                     //获取搜索页
          xmlstr = Tools.delTagArray(xmlstr, "script", true);  //删除脚本
          XmlDataDocument document = new XmlDataDocument();
          document.LoadXml("<search/>");
          string style = Tools.Match(xmlstr, "(?=<style[^>]*?)[//s//S]*?(?<=</style>)").
               Value;//css
          //头部
          XmlNode head = Tools.CreateNode(document, document.DocumentElement, "head");
          head.InnerText = style;
          XmlNode body = Tools.CreateNode(document, document.DocumentElement, "body");
          //记录描述
          XmlNode count = Tools.CreateNode(document, body, "count");   //记录总数
          XmlNode txt = Tools.CreateNode(document, body, "key");
          string sou = Tools.Match(xmlstr, "(?<=<input name=/"query/" type=/"text/" class
               =/"query/" size=/"35/" tabindex=/"1/" value=/")[//s//S]*?(?=/"/>)").Value;
          string count2 = Tools.Match(xmlstr, "(?<=找到)[//s//S]*?(?=个网页)").
               Value;
          count.InnerText = count2;
          txt.InnerText = sou;
          string xmlstr2 = Tools.Match(xmlstr, "(?<=<div id=/"content/">)[//s
               //S]*?(?<=<div id=/"pagebar/">)").Value;
          MatchCollection mtc = Tools.MatchCollection(xmlstr2, "(<div>)[//s//S]*?
               (</div>)");
          //遍历每个结果,把搜索结果插入xml文档中
          foreach (Match mt in mtc)
          {
                XmlNode item = Tools.CreateNode(document, body, "item");
                XmlNode url = Tools.CreateNode(document, item, "url");
                XmlNode desc = Tools.CreateNode(document, item, "desc");
                XmlNode title = Tools.CreateNode(document, item, "title");
                XmlNode begin = Tools.CreateNode(document, item, "begin");
                XmlNode end = Tools.CreateNode(document, item, "end");
                string ul = Tools.Match(mt.Value, "(?<=<h2>)[//s//S]*?(?=</h2>)").
                     Value;
                string ul_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
                     Value;
                url.InnerText = Tools.Replace(ul_li, "[/"']", "");
                string u_t = Tools.delTagArray(ul, "a", false);  //删除超链接
                title.InnerText = Tools.delHtml(u_t);
                MatchCollection itemc = Tools.MatchCollection(mt.Value, "(?<=<p[^>]*?>)
                     [//s//S]*?(?=</p>)");
                if (itemc.Count > 0)
                {
                        string de = itemc[0].Value;
                        de = Tools.delHtml(de);
                        desc.InnerText = Tools.Replace(de, "[/"']", "");
                }
                XmlNode sour1 = Tools.CreateNode(document, item, "sour");//网页内容
                string sout1str = Tools.Match(mt.Value, "(?<=<h2>)[//s//S]*?(?=
                     </h2>)").Value;
                sout1str = Tools.Replace(sout1str, "(<br^[>]*?)*", "");
                sour1.InnerText = Tools.Replace(sout1str, "(?=οnclick=)[//s//S]*?
                     (?<=;/")", "");
                XmlNode sour2 = Tools.CreateNode(document, item, "sour");
                string content = Tools.Match(mt.Value, "(?<=</h2>)[//s//S]*?(?=</
                     div>)").Value;
                content = Tools.Replace(content, "(<br^[>]*?)*", "");
                Tools.CreateCData(document, sour2, content);     //添加一条搜索记录
                begin.InnerText = "<div>";                       //每条记录的开始
                end.InnerText = "</div>";                        //每条记录的结束
          }
          string page = Tools.Match(xmlstr, "(?=<!-- begin of page up/down -->)[//s
               //S]*?(?=<!-- end of page up/down -->)").Value;//分页
          MatchCollection mcpage = Tools.MatchCollection(page, "(?=<a[//s//S]*?
               >)[//s//S]*?(?<=</a>)");
          //遍历每个页码,替换为本系统的形式
          foreach (Match mt in mcpage)
          {
                string s2 = mt.Value;
                s2 = Tools.Replace(s2, "/"", "");
                s2 = Tools.Match(s2, @"(?<=href=/?)[/s/S]*?(?=>)").Value;
                page = page.Replace("?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                     + "&itemtype=" + ItemType);
          }
          XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");//插入分页导航
          pageNv.InnerText = page;                                   //赋予导航内容
          return document;
     }
}
//爱问搜索类 iask.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
public class Iask : ISearch
{
     public override System.Xml.XmlDocument Search()
     {
           string xmlstr = GetPageString();                        //获取搜索页
           XmlDataDocument document = new XmlDataDocument();   //返回的页面
           document.LoadXml("<search/>");
           XmlNode head = Tools.CreateNode(document, document.DocumentElement,
                "head");
           XmlNode body = Tools.CreateNode(document, document.DocumentElement,
                "body");
           //搜索记录数描述
           XmlNode count = Tools.CreateNode(document, body, "count");
           XmlNode txt = Tools.CreateNode(document, body, "key");
           string sou = Tools.Match(xmlstr, "(?<=<title>)[//s//S]*?(?= - 爱问搜索)").
                Value;
                                                            //记录总数
           string count2 = Tools.Match(xmlstr, "(?<=找到 <span class=/"ar/">) [//s
                //S]*?(?=</span> 篇网页)").Value;
           count.InnerText = count2;
           txt.InnerText = sou;
           string style = Tools.Match(xmlstr, "(?<=<head>)[//s//S]*?(?<=</head>)").
                Value;
           style = Tools.Match(xmlstr, "(?=<style[^>]*?>)[//s//S]*?(?<=</style>)").
                Value;
           style = Tools.delTagArray(style, "script", true);
           head.InnerText = style;
           string xmlstr2 = Tools.Match(xmlstr, "(?<=<!-- 网页搜索结果 begin -->)[//s
                //S]*?(?=<!-- 网页搜索结果 end -->)").Value;
           MatchCollection mtc = Tools.MatchCollection(xmlstr2, "<table[^>]*?>[//s
                //S]*?</table>");
           //遍历每个结果,把搜索结果插入xml文档中
           foreach (Match mt in mtc)
           {
                 XmlNode item = Tools.CreateNode(document, body, "item");
                 XmlNode url = Tools.CreateNode(document, item, "url");
                 XmlNode desc = Tools.CreateNode(document, item, "desc");
                 XmlNode title = Tools.CreateNode(document, item, "title");
                 MatchCollection itemc = Tools.MatchCollection(mt.Value, "[//s//S]*?
                      (?<=<br[^>]*?>)");
                 string ul = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").Value;
                 string u_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
                      Value;
                 url.InnerText = Tools.Replace(u_li, "^/"", "");
                 string u_t = Tools.delTagArray(ul, "a", false);
                 title.InnerText = Tools.delHtml(u_t);
                 ul = Tools.delHtml(itemc[1].Value);
                 desc.InnerText = ul;
                 //网页信息
                 XmlNode sour1 = Tools.CreateNode(document, item, "sour");
                 sour1.InnerText = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").
                      Value;
                 XmlNode sour2 = Tools.CreateNode(document, item, "sour");
                 string str = "";
                 for (int j = 1; j < itemc.Count; j++)
                {
                        str = str + itemc[j].Value;
                }
                sour2.InnerText = str;
           }
           string page = Tools.Match(xmlstr, "(?<=<!-- 左侧搜索结果 end -->)[//s//S]*?
                (?<=</table>)").Value;
           MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[//s //S]*?
                </a>");
           //遍历每个页码,替换为本系统的形式
           foreach (Match mt in mcpage)
           {
                 string s2 = mt.Value;
                 s2 = Tools.Replace(s2, "/"", "");
                 s2 = Tools.Match(s2, @"(?<=href=/s/?)[^/s>]*").Value;
                 page = page.Replace("/s?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                      + "&itemtype=" + ItemType);
           }
           XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");                                                              //插入分页导航
           pageNv.InnerText = page;                    //赋予导航内容
           return document;
     }
}
//雅虎类 yahoo.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Xml;
using System.Text.RegularExpressions;
public class Yahoo : ISearch
{
     public override System.Xml.XmlDocument Search()
     {
          string xmlstr = GetPageString();             //获取搜索页
          xmlstr = Tools.Replace(xmlstr, "(?=<div class=/"pm r/">)[//s//S]*?(?=</
               table>)", "");
          XmlDocument document = new XmlDocument();        //返回的页面
          document.LoadXml("<search/>");
          XmlNode head = Tools.CreateNode(document, document.DocumentElement,
               "head");
          XmlNode body = Tools.CreateNode(document, document.DocumentElement,
               "body");
          //记录描述
          XmlNode count = Tools.CreateNode(document, body, "count");
          XmlNode txt = Tools.CreateNode(document, body, "key");
          string sou = Tools.Match(xmlstr, "(?<=<title>雅虎搜索_)[//s//S]*?
               (?=</title>)").Value;
          //记录总数
          count.InnerText = Tools.Match(xmlstr, "(?<=共返回[^//d])[//s//S]*?(?=
               项)").Value; 
          txt.InnerText = sou;
          string style = Tools.Match(xmlstr, "(?<=<head>*?)[//s//S]*?(?<=</head>)").
               Value;//css
          style = Tools.delTagArray(style, "script,title", true);//删除脚本和title
          style = Tools.delTagArray(style, "meta", false);     //删除meta
          style = Tools.Match(style, "(?=<style>)[//s//S]*?(?<=</style>)").Value;
          head.InnerText = style;
          MatchCollection mtc = Tools.MatchCollection(xmlstr, "(<div class=/"i/">)[//s
               //S]*?(</table>)");
          //遍历每个结果,把搜索结果插入xml文档中
          foreach (Match mt in mtc)
          {
                XmlNode item = Tools.CreateNode(document, body, "item");
                string link = Tools.Match(mt.Value, "(?<=<div class=/"i/">)[//s//S]*?
                     (?=<table cellspacing=/"0/">)").Value;
                string ul_li = Tools.Match(link, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
                     Value;
                if (ul_li != "")
                {
                       //信息节点
                       XmlNode url = Tools.CreateNode(document, item, "url");
                       XmlNode desc = Tools.CreateNode(document, item, "desc");
                       XmlNode title = Tools.CreateNode(document, item, "title");
                       url.InnerText = Tools.Replace(ul_li, "^/"", "");
                       string u_t = Tools.delTagArray(link, "a", false);
                       title.InnerText = Tools.delHtml(u_t);
                       string de = Tools.Match(mt.Value, "(?=<td class=/"d/">)[//s//S]
                            *?(?<=<div class=/"rel/">)").Value;
                       de = Tools.delHtml(de);
                       desc.InnerText = de;
                       XmlNode begin = Tools.CreateNode(document, item, "begin");                                                              //单条搜索结果的开始
                       begin.InnerText = "<div class=/"i/">";
                       XmlNode end = Tools.CreateNode(document, item, "end");                                                                  //单条搜索结果的结束
                       end.InnerText = "</div>";
                       XmlNode sour1 = Tools.CreateNode(document, item, "sour");                                                               //单条结果的内容
                       sour1.InnerText = Tools.Replace(link, "(?=οnclick=)[//s//S]*?
                            (?<=;/")", "");
                       XmlNode sour2 = Tools.CreateNode(document, item, "sour");
                       string sourstr2 = Tools.Match(mt.Value, "(?=<table cellspacing
                            =/"0/">)[//s//S]*?(?<=</table>)").Value;
                       sourstr2 = Tools.Replace(sourstr2, "(<a[^>]*?)[//s//S]*?(?<=
                            -&nbsp;)", "");
                       Tools.CreateCData(document, sour2, Tools.Replace(sourstr2,
                            "(?=οnclick=)[//s//S]*?(?<=;/")", ""));
                }
          }
          string page = Tools.Match(xmlstr, "(<div id=/"pg/">)[//s//S]*?(?<=</div>)").
               Value;//分页
          MatchCollection mcpage = Tools.MatchCollection(page, "(?=<a[//s//S]*?>)
               [//s//S]*?(?<=</a>)");
          //遍历每个页码,替换为本系统的形式
          foreach (Match mt in mcpage)
          {
                string s2 = mt.Value;
                s2 = Tools.Replace(s2, "/"", "");
                s2 = Tools.Match(s2, @"(?<=href=/?)[/s/S]*?(?=>)").Value;
                page = page.Replace("?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                     + "&itemtype=" + ItemType);
          }
          XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");
                                                            //插入分页导航
          pageNv.InnerText = page;                             //赋予分页导航内容
          return document;
     }
}
中搜 zhongsou.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
public class Zhongsou : ISearch
{
     public override System.Xml.XmlDocument Search()
     {
          string xmlstr = GetPageString();                     //获取搜索页
          XmlDocument document = new XmlDocument();                //返回的xml
          document.LoadXml("<search/>");
          XmlNode head = Tools.CreateNode(document, document.DocumentElement,
               "head");//创建头部
          XmlNode body = Tools.CreateNode(document, document.DocumentElement,
               "body");//创建body
          //搜索记录数描述
          XmlNode count = Tools.CreateNode(document, body, "count");
          XmlNode txt2 = Tools.CreateNode(document, body, "key");
          string sou = Tools.Match(xmlstr, "(?<=<title>中搜网页_)[//s//S]*? (?=</
               title>)").Value;
          string count2 = Tools.Match(xmlstr, "(?<=找到)[//s//S]*?(?=条结果)").Value;                                                                //搜索结果总数
          count.InnerText = count2;
          txt2.InnerText = sou;
          string style = Tools.Match(xmlstr, "(?=<head>)[//s//S]*?(?<=</head>)").
               Value;                                                //样式
          style = Tools.Match(style, "(?=<style[^>]*?>)[//s//S]*?(?<=</style>)").
               Value;
          style = Tools.delTagArray(style, "script", true);
          head.InnerText = style;
          MatchCollection mtc = Tools.MatchCollection(xmlstr, "(?=<table cellspacing
               =/"0/" cellpadding=/"0/">)[//s//S]*?(?<=</table>)");
          //遍历每个结果,把搜索结果插入xml文档中
          foreach (Match mt in mtc)
          {
                XmlNode item = Tools.CreateNode(document, body, "item");
                                                    //在xml中插入一条搜索记录
                XmlNode url = Tools.CreateNode(document, item, "url");//单条超链接
                XmlNode desc = Tools.CreateNode(document, item, "desc");
                                                    //单条搜索结果的描述
                XmlNode title = Tools.CreateNode(document, item, "title");                                                               //单条搜索结果的标题
                MatchCollection itemc = Tools.MatchCollection(mt.Value, "[//s//S]*?
                     <br[^>]*?>");
                string ul = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").Value;
                string ul_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
                     Value;
                url.InnerText = Tools.Replace(ul_li, "^/"", "");
                string u_t = Tools.delTagArray(ul, "a", false);
                title.InnerText = Tools.delHtml(u_t);            //删除html
                if (itemc.Count > 1)
                {
                       ul = Tools.delHtml(itemc[1].Value);
                       desc.InnerText = ul;                        //获取描述信息
               }
               //显示信息
               XmlNode sour1 = Tools.CreateNode(document, item, "sour");
               string sourstr = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").
                    Value;
               sour1.InnerText = Tools.Replace(sourstr, "(?=οnmοusedοwn=)[//s//S]*?
                    (?<=//)/")", "");
               XmlNode sour2 = Tools.CreateNode(document, item, "sour");
               string txt = Tools.Match(mt.Value, "(?=<td[^>]*?)[//s//S]*?(?<=</td>)").
                    Value;
               txt = Tools.Replace(txt, "<a[//s//S]*?</a>", ""); //删除超链接
               txt = Tools.delTagArray(txt, "td", false);
               sour2.InnerText = txt;                           //单条记录的内容
               XmlNode begin = Tools.CreateNode(document, item, "begin");
                                                            //单条记录的开始
               begin.InnerText = "<table cellspacing=/"0/" cellpadding=/"0/"><tr><td
                    class=/"f/">";
               XmlNode end = Tools.CreateNode(document, item, "end");//单条记录的结束
               end.InnerText = "</td></tr></table>";
          }
          string page = Tools.Match(xmlstr, "(<table ><tr><td class=db>)[//s//S]*?
               (</table>)").Value;
          MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[//s//S]*?
               </a>");
          //遍历每个页码,替换为本系统的形式
          foreach (Match mt in mcpage)
          {
                string s2 = mt.Value;
                s2 = Tools.Replace(s2, "/"", "");
                s2 = Tools.Match(s2, @"(?<=href=p/?)[^/s>]*").Value;
                page = page.Replace("p?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                     + "&itemtype=" + ItemType);
          }
          XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");//插入分页导航
          pageNv.InnerText = page;                                 //赋予导航内容
          return document;
     }
}
 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值