1.首先要建立一个简易过滤器。
代码如下:
至此,过滤器定义完毕了,接下来还需要把这个过滤器装配到HttpResponse 对象中。
为了能够截获整站的aspx 页面输出的内容,我们可以定义一个HttpModule 来完成。
代码如下:
HttpModule 准备完毕,也装配上了过滤器,接下来还需要在配置文件中配置HttpModules配置节 ,把自定义的HttpModule 加入到HTTP处理管道中。
在Web.config 中增加配置节如下:
测试成功,能准确的获得服务器向客户端输出的HTML内容。
代码如下:
using
System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Web;
/**/ /**/ /**/ /// <summary>
/// 定义原始数据EventArgs,便于在截获完整数据后,由事件传递数据
/// </summary>
public class RawDataEventArgs : EventArgs
... {
private string sourceCode;
public RawDataEventArgs(string SourceCode)
...{
sourceCode = SourceCode;
}
public string SourceCode
...{
get ...{ return sourceCode; }
set ...{ sourceCode = value; }
}
}
// 自定义过滤器
public class RawFilter : Stream
... {
Stream responseStream;
long position;
StringBuilder responseHtml;
/**//**//**//// <summary>
/// 当原始数据采集成功后激发。
/// </summary>
public event EventHandler<RawDataEventArgs> OnRawDataRecordedEvent;
public RawFilter(Stream inputStream)
...{
responseStream = inputStream;
responseHtml = new StringBuilder();
}
//实现Stream 虚方法
Filter OverridesFilter Overrides#region Filter Overrides
public override bool CanRead
...{
get
...{
return true;
}
}
public override bool CanSeek
...{
get
...{
return true;
}
}
public override bool CanWrite
...{
get
...{
return true;
}
}
public override void Close()
...{
responseStream.Close();
}
public override void Flush()
...{
responseStream.Flush();
}
public override long Length
...{
get
...{
return 0;
}
}
public override long Position
...{
get
...{
return position;
}
set
...{
position = value;
}
}
public override int Read(byte[] buffer, int offset, int count)
...{
return responseStream.Read(buffer, offset, count);
}
public override long Seek(long offset, SeekOrigin origin)
...{
return responseStream.Seek(offset, origin);
}
public override void SetLength(long length)
...{
responseStream.SetLength(length);
}
#endregion
//关键的点,在HttpResponse 输入内容的时候,一定会调用此方法输入数据,所以要在此方法内截获数据
public override void Write(byte[] buffer, int offset, int count)
...{
string strBuffer = System.Text.UTF8Encoding.UTF8.GetString(buffer, offset, count);
//采用正则,检查输入的是否有页面结束符</html>
Regex eof = new Regex("</html>", RegexOptions.IgnoreCase);
if (!eof.IsMatch(strBuffer))
...{
//页面没有输出完毕,继续追加内容
responseHtml.Append(strBuffer);
}
else
...{
//页面输出已经完毕,截获内容
responseHtml.Append(strBuffer);
string finalHtml = responseHtml.ToString();
//激发数据已经获取事件
OnRawDataRecordedEvent(this, new RawDataEventArgs(finalHtml));
//继续传递要发出的内容写入流
byte[] data = System.Text.UTF8Encoding.UTF8.GetBytes(finalHtml);
responseStream.Write(data, 0, data.Length);
}
}
}
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Web;
/**/ /**/ /**/ /// <summary>
/// 定义原始数据EventArgs,便于在截获完整数据后,由事件传递数据
/// </summary>
public class RawDataEventArgs : EventArgs
... {
private string sourceCode;
public RawDataEventArgs(string SourceCode)
...{
sourceCode = SourceCode;
}
public string SourceCode
...{
get ...{ return sourceCode; }
set ...{ sourceCode = value; }
}
}
// 自定义过滤器
public class RawFilter : Stream
... {
Stream responseStream;
long position;
StringBuilder responseHtml;
/**//**//**//// <summary>
/// 当原始数据采集成功后激发。
/// </summary>
public event EventHandler<RawDataEventArgs> OnRawDataRecordedEvent;
public RawFilter(Stream inputStream)
...{
responseStream = inputStream;
responseHtml = new StringBuilder();
}
//实现Stream 虚方法
Filter OverridesFilter Overrides#region Filter Overrides
public override bool CanRead
...{
get
...{
return true;
}
}
public override bool CanSeek
...{
get
...{
return true;
}
}
public override bool CanWrite
...{
get
...{
return true;
}
}
public override void Close()
...{
responseStream.Close();
}
public override void Flush()
...{
responseStream.Flush();
}
public override long Length
...{
get
...{
return 0;
}
}
public override long Position
...{
get
...{
return position;
}
set
...{
position = value;
}
}
public override int Read(byte[] buffer, int offset, int count)
...{
return responseStream.Read(buffer, offset, count);
}
public override long Seek(long offset, SeekOrigin origin)
...{
return responseStream.Seek(offset, origin);
}
public override void SetLength(long length)
...{
responseStream.SetLength(length);
}
#endregion
//关键的点,在HttpResponse 输入内容的时候,一定会调用此方法输入数据,所以要在此方法内截获数据
public override void Write(byte[] buffer, int offset, int count)
...{
string strBuffer = System.Text.UTF8Encoding.UTF8.GetString(buffer, offset, count);
//采用正则,检查输入的是否有页面结束符</html>
Regex eof = new Regex("</html>", RegexOptions.IgnoreCase);
if (!eof.IsMatch(strBuffer))
...{
//页面没有输出完毕,继续追加内容
responseHtml.Append(strBuffer);
}
else
...{
//页面输出已经完毕,截获内容
responseHtml.Append(strBuffer);
string finalHtml = responseHtml.ToString();
//激发数据已经获取事件
OnRawDataRecordedEvent(this, new RawDataEventArgs(finalHtml));
//继续传递要发出的内容写入流
byte[] data = System.Text.UTF8Encoding.UTF8.GetBytes(finalHtml);
responseStream.Write(data, 0, data.Length);
}
}
}
为了能够截获整站的aspx 页面输出的内容,我们可以定义一个HttpModule 来完成。
代码如下:
using
System;
using System.Web;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Diagnostics;
public class HttpRawDataModule : IHttpModule
... {
IHttpModule 成员IHttpModule 成员#region IHttpModule 成员
public void Dispose()
...{
}
public void Init(HttpApplication context)
...{
//绑定事件,在对此请求处理过程全部结束后进行过滤操作
context.ReleaseRequestState += new EventHandler(context_ReleaseRequestState);
}
#endregion
/**//**//**//// <summary>
/// 对此HTTP请求处理的过程全部结束
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void context_ReleaseRequestState(object sender, EventArgs e)
...{
HttpApplication application = (HttpApplication)sender;
//这里需要针对ASPX页面进行拦截,测试发现如果不这么做,Wap 访问站点图片容易显示为X,奇怪
string[] temp = application.Request.CurrentExecutionFilePath.Split('.');
if (temp.Length > 0 && temp[temp.Length - 1].ToLower() == "aspx")
...{
//装配过滤器
application.Response.Filter = new RawFilter(application.Response.Filter);
//绑定过滤器事件
RawFilter filter = (RawFilter)application.Response.Filter;
filter.OnRawDataRecordedEvent += new EventHandler<RawDataEventArgs>(filter_OnRawDataRecordedEvent);
}
}
/**//**//**//// <summary>
/// 当原始数据采集到以后,入库
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void filter_OnRawDataRecordedEvent(object sender, RawDataEventArgs e)
...{
string allcode = e.SourceCode;
WapSite.SiteDataClass wapdata = new WapSite.SiteDataClass();
wapdata.WriteRawDataLog(allcode);
}
}
using System.Web;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Diagnostics;
public class HttpRawDataModule : IHttpModule
... {
IHttpModule 成员IHttpModule 成员#region IHttpModule 成员
public void Dispose()
...{
}
public void Init(HttpApplication context)
...{
//绑定事件,在对此请求处理过程全部结束后进行过滤操作
context.ReleaseRequestState += new EventHandler(context_ReleaseRequestState);
}
#endregion
/**//**//**//// <summary>
/// 对此HTTP请求处理的过程全部结束
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void context_ReleaseRequestState(object sender, EventArgs e)
...{
HttpApplication application = (HttpApplication)sender;
//这里需要针对ASPX页面进行拦截,测试发现如果不这么做,Wap 访问站点图片容易显示为X,奇怪
string[] temp = application.Request.CurrentExecutionFilePath.Split('.');
if (temp.Length > 0 && temp[temp.Length - 1].ToLower() == "aspx")
...{
//装配过滤器
application.Response.Filter = new RawFilter(application.Response.Filter);
//绑定过滤器事件
RawFilter filter = (RawFilter)application.Response.Filter;
filter.OnRawDataRecordedEvent += new EventHandler<RawDataEventArgs>(filter_OnRawDataRecordedEvent);
}
}
/**//**//**//// <summary>
/// 当原始数据采集到以后,入库
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void filter_OnRawDataRecordedEvent(object sender, RawDataEventArgs e)
...{
string allcode = e.SourceCode;
WapSite.SiteDataClass wapdata = new WapSite.SiteDataClass();
wapdata.WriteRawDataLog(allcode);
}
}
在Web.config 中增加配置节如下:
<
system.web
>
< httpModules >
< add name = " RawDataModule " type = " HttpRawDataModule " />
</ httpModules >
</ system.web >
< httpModules >
< add name = " RawDataModule " type = " HttpRawDataModule " />
</ httpModules >
</ system.web >