1.4 各搜索引擎专用类
搜索引擎专用类用来完成具体的搜索任务,例如,Baidu类用来执行Baidu搜索,Google类用来执行Google搜索。它们都继承自ISearch类,主要是执行一些正则表达式操作,把搜索结果匹配出来。搜索结果作为数据,保存在了XML文件中。最后,这个XML文件按照格式化文件result.xsl的格式把搜索结果显示出来。
Search()方法的大致思路是:首先调用GetPageString()方法把搜索的关键字、页码等信息发送到特定搜索引擎,并接收搜索引擎返回的信息。然后对这个信息进行解析,分析出搜索结果的每个记录,并记录在XML文件中。然后再对搜索结果的分页导航进行分析,转换成本系统的形式,并采用Base64编码,把结果记录在XML文件中。在Search()方法执行的最后,XML文件被格式化输出到客户端浏览器显示出来。下面是6大搜索引擎专用类的具体实现代码:
//Google搜索类 google.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
public class Google : ISearch
{
public override System.Xml.XmlDocument Search()
{
string xmlstr = GetPageString(); //获取搜索页面的字符串
XmlDataDocument document = new XmlDataDocument();//用于返回的页面
document.LoadXml("<search/>");
string style = Tools.Match(xmlstr, "<head>[//s//S]*?</head>").Value;
style = Tools.delTagArray(style, "script,meta,title", true);
//删除其他标记
XmlNode xn = Tools.CreateNode(document, document.DocumentElement, "head");
xn.InnerText = Tools.delTagArray(style, "head", false);//删除head标记
//创建body
XmlNode body = Tools.CreateNode(document, document.DocumentElement,
"body");
"body");
//搜索记录数描述
XmlNode txt = Tools.CreateNode(document, body, "key");
XmlNode count = Tools.CreateNode(document, body, "count");
string sou = Tools.Match(xmlstr, "(?<=符合<b>)[//s//S]*?(?=</b>的查
询)").Value; //记录总数
询)").Value; //记录总数
string count2 = Tools.Match(xmlstr, "(?<=约有<b>)[//s//S]*?(?=</
b>)").Value;
b>)").Value;
count.InnerText = count2;
txt.InnerText = sou; //记录总数
MatchCollection mtc = Tools.MatchCollection(xmlstr, "<div class[//s//S]*?
</div>");
</div>");
StringBuilder sb = new StringBuilder(1000);
//遍历每个结果,把搜索结果插入xml文档中
foreach (Match mt in mtc)
{
XmlNode item = Tools.CreateNode(document, body, "item");
//在xml中插入一条搜索记录
XmlNode link = Tools.CreateNode(document, item, "url");
XmlNode desc = Tools.CreateNode(document, item, "desc");
XmlNode title = Tools.CreateNode(document, item, "title");
sb = sb.Remove(0, sb.Length);
sb.Append(Tools.Match(mt.Value, "(?<=<td[^>]*?>)[//s//S]*?(?=</td>)").
Value);
Value);
MatchCollection itemc = Tools.MatchCollection(sb.ToString(), "[//s
//S]*?<br[^>]*?>");
//S]*?<br[^>]*?>");
string ul = Tools.Match(mt.Value, "(?<=<h2[^>]*?>)[//s//S]*?(?=</h2>)").
Value;
Value;
ul = Tools.Match(ul, "<a[//s//S]*?</a>").Value;
string u_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
Value;
Value;
link.InnerText = Tools.Replace(u_li, "^/"", "");
string u_t = Tools.delTagArray(ul, "a", false);
title.InnerText = Tools.delHtml(u_t); //删除html标记
if (itemc.Count > 0)
{
ul = Tools.delHtml(itemc[0].Value);
desc.InnerText = ul;
}
XmlNode sour1 = Tools.CreateNode(document, item, "sour");
//搜索得到的网页信息
string str1 = Tools.Match(mt.Value, "(?<=<h2[^>]*?>)[//s//S]*?(?=<
/h2>)").Value;
/h2>)").Value;
sour1.InnerText = str1;
XmlNode sour2 = Tools.CreateNode(document, item, "sour");
string str2 = Tools.Match(mt.Value, "(?<=<td[^>]*?>)[//s//S]*? (?=
</td>)").Value;
</td>)").Value;
str2 = Tools.delTagArray(str2, "td", false);
sour2.InnerText = str2;
XmlNode begin = Tools.CreateNode(document, item, "begin");
//单条搜索结果开始
string str3 = Tools.Match(mt.Value, "(?=<div[^>]*?>)[//s//S]*?(?=
<h2)").Value;
<h2)").Value;
begin.InnerText = str3;
XmlNode end = Tools.CreateNode(document, item, "end");
//单条搜索结果结束
str3 = Tools.Match(mt.Value, "(?<=</table>)[//s//S]*?(?<=</div>)").
Value;
Value;
end.InnerText = str3;
}
string page = Tools.Match(xmlstr, "(?=<div id=navbar class=n>)[//s//S]*?(?
=<center>)").Value;
=<center>)").Value;
MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[//s//
S]*?</a>");
S]*?</a>");
foreach (Match mt in mcpage) //遍历每个页码,替换为本系统的形式
{
string s2 = mt.Value;
s2 = Tools.Match(s2, @"(?<=href=/search/?)[^/s>]*").Value;
//和搜索相关的参数
page = page.Replace("/search?" + s2, "?nav_go_post=" + Tools.To
Base64(s2) + "&itemtype=" + ItemType);
Base64(s2) + "&itemtype=" + ItemType);
}
XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");
//插入分页导航
page = Tools.delTagArray(page, "img", false);
pageNv.InnerText = page; //赋予导航内容
return document;
}
}
//百度搜索类 baidu.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Xml;
using System.Text.RegularExpressions;
public class Baidu:ISearch
{
public override System.Xml.XmlDocument Search()
{
string xmlstr = GetPageString(); //获取搜索页
XmlDocument document = new XmlDocument(); //返回的页面
document.LoadXml("<search/>");
string style = Tools.Match(xmlstr, "<head>[//s//S]*?</head>").Value;
//css
//css
style = Tools.delTagArray(style, "script", true); //删除脚本
//创建头部
XmlNode xn = Tools.CreateNode(document, document.DocumentElement,
"head");
"head");
//删除head标记
xn.InnerText = Tools.delTagArray(style, "head", false);
//创建body
XmlNode body = Tools.CreateNode(document, document.DocumentElement,
"body");
"body");
//搜索记录数的描述
XmlNode txt = Tools.CreateNode(document, body, "key");
XmlNode count = Tools.CreateNode(document, body, "count");
//记录总数
string sou = Tools.Match(xmlstr, "(?<=<input name=wd size=/"35/" class=
/"i/" value=/")[//s//S]*?(?=/" maxlength=/"100/")").Value;
/"i/" value=/")[//s//S]*?(?=/" maxlength=/"100/")").Value;
string count2 = Tools.Match(xmlstr, "(?<=找到相关网页[^//d])[//s//S]*?
(?=篇)").Value;
(?=篇)").Value;
count.InnerText = count2;
txt.InnerText = sou ;//记录总数
//搜索结果的记录集
MatchCollection mtc = Tools.MatchCollection(xmlstr, "<table border=/"0/"
cellpadding=/"0/" cellspacing=/"0/">[//s//S]*?</table>");
cellpadding=/"0/" cellspacing=/"0/">[//s//S]*?</table>");
StringBuilder sb = new StringBuilder(1000);
//遍历每个结果,把搜索结果插入xml文档中
foreach (Match mt in mtc)
{
XmlNode item = Tools.CreateNode(document, body, "item");
//在xml中插入一条搜索记录
XmlNode link = Tools.CreateNode(document, item, "url"); //单条超链接
XmlNode desc = Tools.CreateNode(document, item, "desc"); //单条搜索结果的描述
XmlNode title = Tools.CreateNode(document, item, "title"); //单条搜索结果的标题
sb.Remove(0, sb.Length); //清空
sb.Append(Tools.Match(mt.Value, "(?<=<td[^>]*?>)[//s//S]*?(?=
</td>)").Value);
</td>)").Value);
MatchCollection itemc = Tools.MatchCollection(sb.ToString(),
"[//s//S]*?<br[^>]*?>");
"[//s//S]*?<br[^>]*?>");
if(itemc.Count>=3)
{
string u1 = Tools.Match(itemc[0].Value, "<a[//s//S]*?</
a>").Value;
a>").Value;
string u_li=Tools.Match(u1,"(?<=href=[/"]?).*?(?=[/"]?
[//s>])").Value;
[//s>])").Value;
link.InnerText = Tools.Replace(u_li, "^/"", "");
string u_t = Tools.delTagArray(u1, "a", false);
title.InnerText = Tools.delHtml(u_t); //删除html标记
u1 = Tools.delHtml(itemc[1].Value);
desc.InnerText = u1;
foreach (Match mt1 in itemc)
{
//搜索的网页信息
XmlNode sour1 = Tools.CreateNode(document, item,
"sour");
"sour");
string da = Tools.delTagArray(mt1.Value, "br", false); //删除br
sour1.InnerText=da;
}
}
XmlNode end = Tools.CreateNode(document, item, "end"); //搜索结果结束
end.InnerText ="</font><br/>";
}
string page = Tools.Match(xmlstr, "<div class=/"p/">[//s//S]*?</div>").
Value; //分页
Value; //分页
MatchCollection mcpage = Tools.MatchCollection(page,"<a[^>]*?>[//s
//S]*?</a>");
//S]*?</a>");
//遍历每个页码,替换为本系统的形式
foreach (Match mt in mcpage)
{
string s2 = mt.Value;
s2 = Tools.Match(s2, @"(?<=href=s/?)[^/s>]*").Value;
page = page.Replace("s?" + s2,"?nav_go_post="+Tools.ToBase64(s2)
+"&itemtype=" + ItemType); //替换超链接
+"&itemtype=" + ItemType); //替换超链接
}
XmlNode pageNv = Tools.CreateNode(document, body, "pageSite"); //插入分页导航
pageNv.InnerText = page; //赋予导航内容
return document;
}
}
//搜狗类 sogou.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Web;
public class Sogou : ISearch
{
public override System.Xml.XmlDocument Search()
{
string xmlstr = GetPageString(); //获取搜索页
xmlstr = Tools.delTagArray(xmlstr, "script", true); //删除脚本
XmlDataDocument document = new XmlDataDocument();
document.LoadXml("<search/>");
string style = Tools.Match(xmlstr, "(?=<style[^>]*?)[//s//S]*?(?<=</style>)").
Value;//css
Value;//css
//头部
XmlNode head = Tools.CreateNode(document, document.DocumentElement, "head");
head.InnerText = style;
XmlNode body = Tools.CreateNode(document, document.DocumentElement, "body");
//记录描述
XmlNode count = Tools.CreateNode(document, body, "count"); //记录总数
XmlNode txt = Tools.CreateNode(document, body, "key");
string sou = Tools.Match(xmlstr, "(?<=<input name=/"query/" type=/"text/" class
=/"query/" size=/"35/" tabindex=/"1/" value=/")[//s//S]*?(?=/"/>)").Value;
=/"query/" size=/"35/" tabindex=/"1/" value=/")[//s//S]*?(?=/"/>)").Value;
string count2 = Tools.Match(xmlstr, "(?<=找到)[//s//S]*?(?=个网页)").
Value;
Value;
count.InnerText = count2;
txt.InnerText = sou;
string xmlstr2 = Tools.Match(xmlstr, "(?<=<div id=/"content/">)[//s
//S]*?(?<=<div id=/"pagebar/">)").Value;
//S]*?(?<=<div id=/"pagebar/">)").Value;
MatchCollection mtc = Tools.MatchCollection(xmlstr2, "(<div>)[//s//S]*?
(</div>)");
(</div>)");
//遍历每个结果,把搜索结果插入xml文档中
foreach (Match mt in mtc)
{
XmlNode item = Tools.CreateNode(document, body, "item");
XmlNode url = Tools.CreateNode(document, item, "url");
XmlNode desc = Tools.CreateNode(document, item, "desc");
XmlNode title = Tools.CreateNode(document, item, "title");
XmlNode begin = Tools.CreateNode(document, item, "begin");
XmlNode end = Tools.CreateNode(document, item, "end");
string ul = Tools.Match(mt.Value, "(?<=<h2>)[//s//S]*?(?=</h2>)").
Value;
Value;
string ul_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
Value;
Value;
url.InnerText = Tools.Replace(ul_li, "[/"']", "");
string u_t = Tools.delTagArray(ul, "a", false); //删除超链接
title.InnerText = Tools.delHtml(u_t);
MatchCollection itemc = Tools.MatchCollection(mt.Value, "(?<=<p[^>]*?>)
[//s//S]*?(?=</p>)");
[//s//S]*?(?=</p>)");
if (itemc.Count > 0)
{
string de = itemc[0].Value;
de = Tools.delHtml(de);
desc.InnerText = Tools.Replace(de, "[/"']", "");
}
XmlNode sour1 = Tools.CreateNode(document, item, "sour");//网页内容
string sout1str = Tools.Match(mt.Value, "(?<=<h2>)[//s//S]*?(?=
</h2>)").Value;
</h2>)").Value;
sout1str = Tools.Replace(sout1str, "(<br^[>]*?)*", "");
sour1.InnerText = Tools.Replace(sout1str, "(?=οnclick=)[//s//S]*?
(?<=;/")", "");
(?<=;/")", "");
XmlNode sour2 = Tools.CreateNode(document, item, "sour");
string content = Tools.Match(mt.Value, "(?<=</h2>)[//s//S]*?(?=</
div>)").Value;
div>)").Value;
content = Tools.Replace(content, "(<br^[>]*?)*", "");
Tools.CreateCData(document, sour2, content); //添加一条搜索记录
begin.InnerText = "<div>"; //每条记录的开始
end.InnerText = "</div>"; //每条记录的结束
}
string page = Tools.Match(xmlstr, "(?=<!-- begin of page up/down -->)[//s
//S]*?(?=<!-- end of page up/down -->)").Value;//分页
//S]*?(?=<!-- end of page up/down -->)").Value;//分页
MatchCollection mcpage = Tools.MatchCollection(page, "(?=<a[//s//S]*?
>)[//s//S]*?(?<=</a>)");
>)[//s//S]*?(?<=</a>)");
//遍历每个页码,替换为本系统的形式
foreach (Match mt in mcpage)
{
string s2 = mt.Value;
s2 = Tools.Replace(s2, "/"", "");
s2 = Tools.Match(s2, @"(?<=href=/?)[/s/S]*?(?=>)").Value;
page = page.Replace("?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
+ "&itemtype=" + ItemType);
+ "&itemtype=" + ItemType);
}
XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");//插入分页导航
pageNv.InnerText = page; //赋予导航内容
return document;
}
}
//爱问搜索类 iask.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
public class Iask : ISearch
{
public override System.Xml.XmlDocument Search()
{
string xmlstr = GetPageString(); //获取搜索页
XmlDataDocument document = new XmlDataDocument(); //返回的页面
document.LoadXml("<search/>");
XmlNode head = Tools.CreateNode(document, document.DocumentElement,
"head");
"head");
XmlNode body = Tools.CreateNode(document, document.DocumentElement,
"body");
"body");
//搜索记录数描述
XmlNode count = Tools.CreateNode(document, body, "count");
XmlNode txt = Tools.CreateNode(document, body, "key");
string sou = Tools.Match(xmlstr, "(?<=<title>)[//s//S]*?(?= - 爱问搜索)").
Value;
Value;
//记录总数
string count2 = Tools.Match(xmlstr, "(?<=找到 <span class=/"ar/">) [//s
//S]*?(?=</span> 篇网页)").Value;
//S]*?(?=</span> 篇网页)").Value;
count.InnerText = count2;
txt.InnerText = sou;
string style = Tools.Match(xmlstr, "(?<=<head>)[//s//S]*?(?<=</head>)").
Value;
Value;
style = Tools.Match(xmlstr, "(?=<style[^>]*?>)[//s//S]*?(?<=</style>)").
Value;
Value;
style = Tools.delTagArray(style, "script", true);
head.InnerText = style;
string xmlstr2 = Tools.Match(xmlstr, "(?<=<!-- 网页搜索结果 begin -->)[//s
//S]*?(?=<!-- 网页搜索结果 end -->)").Value;
//S]*?(?=<!-- 网页搜索结果 end -->)").Value;
MatchCollection mtc = Tools.MatchCollection(xmlstr2, "<table[^>]*?>[//s
//S]*?</table>");
//S]*?</table>");
//遍历每个结果,把搜索结果插入xml文档中
foreach (Match mt in mtc)
{
XmlNode item = Tools.CreateNode(document, body, "item");
XmlNode url = Tools.CreateNode(document, item, "url");
XmlNode desc = Tools.CreateNode(document, item, "desc");
XmlNode title = Tools.CreateNode(document, item, "title");
MatchCollection itemc = Tools.MatchCollection(mt.Value, "[//s//S]*?
(?<=<br[^>]*?>)");
(?<=<br[^>]*?>)");
string ul = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").Value;
string u_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
Value;
Value;
url.InnerText = Tools.Replace(u_li, "^/"", "");
string u_t = Tools.delTagArray(ul, "a", false);
title.InnerText = Tools.delHtml(u_t);
ul = Tools.delHtml(itemc[1].Value);
desc.InnerText = ul;
//网页信息
XmlNode sour1 = Tools.CreateNode(document, item, "sour");
sour1.InnerText = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").
Value;
Value;
XmlNode sour2 = Tools.CreateNode(document, item, "sour");
string str = "";
for (int j = 1; j < itemc.Count; j++)
{
str = str + itemc[j].Value;
}
sour2.InnerText = str;
}
string page = Tools.Match(xmlstr, "(?<=<!-- 左侧搜索结果 end -->)[//s//S]*?
(?<=</table>)").Value;
(?<=</table>)").Value;
MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[//s //S]*?
</a>");
</a>");
//遍历每个页码,替换为本系统的形式
foreach (Match mt in mcpage)
{
string s2 = mt.Value;
s2 = Tools.Replace(s2, "/"", "");
s2 = Tools.Match(s2, @"(?<=href=/s/?)[^/s>]*").Value;
page = page.Replace("/s?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
+ "&itemtype=" + ItemType);
+ "&itemtype=" + ItemType);
}
XmlNode pageNv = Tools.CreateNode(document, body, "pageSite"); //插入分页导航
pageNv.InnerText = page; //赋予导航内容
return document;
}
}
//雅虎类 yahoo.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Xml;
using System.Text.RegularExpressions;
public class Yahoo : ISearch
{
public override System.Xml.XmlDocument Search()
{
string xmlstr = GetPageString(); //获取搜索页
xmlstr = Tools.Replace(xmlstr, "(?=<div class=/"pm r/">)[//s//S]*?(?=</
table>)", "");
table>)", "");
XmlDocument document = new XmlDocument(); //返回的页面
document.LoadXml("<search/>");
XmlNode head = Tools.CreateNode(document, document.DocumentElement,
"head");
"head");
XmlNode body = Tools.CreateNode(document, document.DocumentElement,
"body");
"body");
//记录描述
XmlNode count = Tools.CreateNode(document, body, "count");
XmlNode txt = Tools.CreateNode(document, body, "key");
string sou = Tools.Match(xmlstr, "(?<=<title>雅虎搜索_)[//s//S]*?
(?=</title>)").Value;
(?=</title>)").Value;
//记录总数
count.InnerText = Tools.Match(xmlstr, "(?<=共返回[^//d])[//s//S]*?(?=
项)").Value;
项)").Value;
txt.InnerText = sou;
string style = Tools.Match(xmlstr, "(?<=<head>*?)[//s//S]*?(?<=</head>)").
Value;//css
Value;//css
style = Tools.delTagArray(style, "script,title", true);//删除脚本和title
style = Tools.delTagArray(style, "meta", false); //删除meta
style = Tools.Match(style, "(?=<style>)[//s//S]*?(?<=</style>)").Value;
head.InnerText = style;
MatchCollection mtc = Tools.MatchCollection(xmlstr, "(<div class=/"i/">)[//s
//S]*?(</table>)");
//S]*?(</table>)");
//遍历每个结果,把搜索结果插入xml文档中
foreach (Match mt in mtc)
{
XmlNode item = Tools.CreateNode(document, body, "item");
string link = Tools.Match(mt.Value, "(?<=<div class=/"i/">)[//s//S]*?
(?=<table cellspacing=/"0/">)").Value;
(?=<table cellspacing=/"0/">)").Value;
string ul_li = Tools.Match(link, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
Value;
Value;
if (ul_li != "")
{
//信息节点
XmlNode url = Tools.CreateNode(document, item, "url");
XmlNode desc = Tools.CreateNode(document, item, "desc");
XmlNode title = Tools.CreateNode(document, item, "title");
url.InnerText = Tools.Replace(ul_li, "^/"", "");
string u_t = Tools.delTagArray(link, "a", false);
title.InnerText = Tools.delHtml(u_t);
string de = Tools.Match(mt.Value, "(?=<td class=/"d/">)[//s//S]
*?(?<=<div class=/"rel/">)").Value;
*?(?<=<div class=/"rel/">)").Value;
de = Tools.delHtml(de);
desc.InnerText = de;
XmlNode begin = Tools.CreateNode(document, item, "begin"); //单条搜索结果的开始
begin.InnerText = "<div class=/"i/">";
XmlNode end = Tools.CreateNode(document, item, "end"); //单条搜索结果的结束
end.InnerText = "</div>";
XmlNode sour1 = Tools.CreateNode(document, item, "sour"); //单条结果的内容
sour1.InnerText = Tools.Replace(link, "(?=οnclick=)[//s//S]*?
(?<=;/")", "");
(?<=;/")", "");
XmlNode sour2 = Tools.CreateNode(document, item, "sour");
string sourstr2 = Tools.Match(mt.Value, "(?=<table cellspacing
=/"0/">)[//s//S]*?(?<=</table>)").Value;
=/"0/">)[//s//S]*?(?<=</table>)").Value;
sourstr2 = Tools.Replace(sourstr2, "(<a[^>]*?)[//s//S]*?(?<=
- )", "");
- )", "");
Tools.CreateCData(document, sour2, Tools.Replace(sourstr2,
"(?=οnclick=)[//s//S]*?(?<=;/")", ""));
"(?=οnclick=)[//s//S]*?(?<=;/")", ""));
}
}
string page = Tools.Match(xmlstr, "(<div id=/"pg/">)[//s//S]*?(?<=</div>)").
Value;//分页
Value;//分页
MatchCollection mcpage = Tools.MatchCollection(page, "(?=<a[//s//S]*?>)
[//s//S]*?(?<=</a>)");
[//s//S]*?(?<=</a>)");
//遍历每个页码,替换为本系统的形式
foreach (Match mt in mcpage)
{
string s2 = mt.Value;
s2 = Tools.Replace(s2, "/"", "");
s2 = Tools.Match(s2, @"(?<=href=/?)[/s/S]*?(?=>)").Value;
page = page.Replace("?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
+ "&itemtype=" + ItemType);
+ "&itemtype=" + ItemType);
}
XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");
//插入分页导航
pageNv.InnerText = page; //赋予分页导航内容
return document;
}
}
中搜 zhongsou.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
public class Zhongsou : ISearch
{
public override System.Xml.XmlDocument Search()
{
string xmlstr = GetPageString(); //获取搜索页
XmlDocument document = new XmlDocument(); //返回的xml
document.LoadXml("<search/>");
XmlNode head = Tools.CreateNode(document, document.DocumentElement,
"head");//创建头部
"head");//创建头部
XmlNode body = Tools.CreateNode(document, document.DocumentElement,
"body");//创建body
"body");//创建body
//搜索记录数描述
XmlNode count = Tools.CreateNode(document, body, "count");
XmlNode txt2 = Tools.CreateNode(document, body, "key");
string sou = Tools.Match(xmlstr, "(?<=<title>中搜网页_)[//s//S]*? (?=</
title>)").Value;
title>)").Value;
string count2 = Tools.Match(xmlstr, "(?<=找到)[//s//S]*?(?=条结果)").Value; //搜索结果总数
count.InnerText = count2;
txt2.InnerText = sou;
string style = Tools.Match(xmlstr, "(?=<head>)[//s//S]*?(?<=</head>)").
Value; //样式
Value; //样式
style = Tools.Match(style, "(?=<style[^>]*?>)[//s//S]*?(?<=</style>)").
Value;
Value;
style = Tools.delTagArray(style, "script", true);
head.InnerText = style;
MatchCollection mtc = Tools.MatchCollection(xmlstr, "(?=<table cellspacing
=/"0/" cellpadding=/"0/">)[//s//S]*?(?<=</table>)");
=/"0/" cellpadding=/"0/">)[//s//S]*?(?<=</table>)");
//遍历每个结果,把搜索结果插入xml文档中
foreach (Match mt in mtc)
{
XmlNode item = Tools.CreateNode(document, body, "item");
//在xml中插入一条搜索记录
XmlNode url = Tools.CreateNode(document, item, "url");//单条超链接
XmlNode desc = Tools.CreateNode(document, item, "desc");
//单条搜索结果的描述
XmlNode title = Tools.CreateNode(document, item, "title"); //单条搜索结果的标题
MatchCollection itemc = Tools.MatchCollection(mt.Value, "[//s//S]*?
<br[^>]*?>");
<br[^>]*?>");
string ul = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").Value;
string ul_li = Tools.Match(ul, "(?<=href=[/"]?).*?(?=[/"]?[//s>])").
Value;
Value;
url.InnerText = Tools.Replace(ul_li, "^/"", "");
string u_t = Tools.delTagArray(ul, "a", false);
title.InnerText = Tools.delHtml(u_t); //删除html
if (itemc.Count > 1)
{
ul = Tools.delHtml(itemc[1].Value);
desc.InnerText = ul; //获取描述信息
}
//显示信息
XmlNode sour1 = Tools.CreateNode(document, item, "sour");
string sourstr = Tools.Match(itemc[0].Value, "<a[//s//S]*?</a>").
Value;
Value;
sour1.InnerText = Tools.Replace(sourstr, "(?=οnmοusedοwn=)[//s//S]*?
(?<=//)/")", "");
(?<=//)/")", "");
XmlNode sour2 = Tools.CreateNode(document, item, "sour");
string txt = Tools.Match(mt.Value, "(?=<td[^>]*?)[//s//S]*?(?<=</td>)").
Value;
Value;
txt = Tools.Replace(txt, "<a[//s//S]*?</a>", ""); //删除超链接
txt = Tools.delTagArray(txt, "td", false);
sour2.InnerText = txt; //单条记录的内容
XmlNode begin = Tools.CreateNode(document, item, "begin");
//单条记录的开始
begin.InnerText = "<table cellspacing=/"0/" cellpadding=/"0/"><tr><td
class=/"f/">";
class=/"f/">";
XmlNode end = Tools.CreateNode(document, item, "end");//单条记录的结束
end.InnerText = "</td></tr></table>";
}
string page = Tools.Match(xmlstr, "(<table ><tr><td class=db>)[//s//S]*?
(</table>)").Value;
(</table>)").Value;
MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[//s//S]*?
</a>");
</a>");
//遍历每个页码,替换为本系统的形式
foreach (Match mt in mcpage)
{
string s2 = mt.Value;
s2 = Tools.Replace(s2, "/"", "");
s2 = Tools.Match(s2, @"(?<=href=p/?)[^/s>]*").Value;
page = page.Replace("p?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
+ "&itemtype=" + ItemType);
+ "&itemtype=" + ItemType);
}
XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");//插入分页导航
pageNv.InnerText = page; //赋予导航内容
return document;
}
}