做网站统计首先要有数据,数据从何而来?这需要网站要记录客户所访问的轨迹,记录用户访问每个页面的流向,给网站加过百度统计与cnzz的站长们估计都清楚,要想对某个页面进行统计,就要在该页面上加上统计代码,将用户的访问数据记录下来,进而生成统计数据。现在就来自己实现一个这样的数据采集与统计功能,具体步骤与相关代码如下:
一、设计表结构
先考虑数据的记录方式与保存位置,由于后期要用户这些数据,因此考虑将数据存储到数据库中,根据需要创建了三个表,Visitor用于记录来访用户的相关数据,VisitorRecord用于记录来访用户访问时间,着陆页面与跳出页面,表ViewRecord是访问记录,用于记录访问的基础数据。表结构如下:
二、插入数据
表创建好了,如何向其中插入数据呢,既然ViewRecord是记录基础数据,毫无疑问当用户每访问一次一个页面,就要向该表中插入一条数据;一个访问者就向Visitor表中插入一条数据,现在问题来了,我们如何来判断该用户是否来过呢,这里我们定义一台PC端为一个用户,如果表Vistor中已存在该用户的数据,只需更新表中访问次数visitingNum,而不需要再向Visitor表中添加数据。至于应该如何判断,这里采用的是用一插件fingerprint2.js来帮助识别是否为同一用户,这种文件类似于指纹识别,插件请移步至下载频道下载(下载地址:指纹识别插件 fingerprint2.js)。另外表VisitorRecord用来存储用户的来访时间、跳出时间及访问时长。
至于各个表插入数据的方法这里就不再赘述了,相信看到此文的读者都不在话下,这里只阐述具体的调用与实现,功能是放到一般处理程序中实现的,读者可自由变通。费话不多说,上代码:
Stat.ashx
public class Stat : IHttpHandler
{
public void ProcessRequest(HttpContext context)
{
string url = context.Request.PathInfo;
string IP = Labbase.Common.Utils.GetIP();
Labbase.BLL.TJ_LocalIP bllIP = new Labbase.BLL.TJ_LocalIP();
Labbase.Model.TJ_Visitor mdTJVisitor = new Labbase.Model.TJ_Visitor();
Labbase.BLL.TJ_Visitor bllTJVisitor = new Labbase.BLL.TJ_Visitor();
Labbase.Model.TJ_VisitorRecord mdTJVisitorRecord = new Labbase.Model.TJ_VisitorRecord();
Labbase.BLL.MVisitorRecord bllMVisitorRecord = new Labbase.BLL.MVisitorRecord();
if (bllIP.Exists("LIP= '" + IP + "'"))
return;
if (isEngine(IP, context.Request.ServerVariables["HTTP_USER_AGENT"]))
return;
if (!string.IsNullOrEmpty(context.Request["InPage"]))
{
if (context.Request.Cookies["lbGUID"] == null)
{
HttpCookie cooklbGUID = new HttpCookie("lbGUID");
cooklbGUID.Value = Guid.NewGuid().ToString();
cooklbGUID.Expires = DateTime.MaxValue;
context.Response.Cookies.Add(cooklbGUID);
}
else
{
Guid lbGUID;
try
{
lbGUID = new Guid(context.Request.Cookies["lbGUID"].Value.ToString());
}
catch
{
return;
}
int lbGUIDCount = bllTJVisitor.Exists(new Guid(context.Request.Cookies["lbGUID"].Value)) == false ? 0 : 1;
if (lbGUIDCount == 0)
{
mdTJVisitor.visitingNum = 1;
mdTJVisitor.VisitorID = lbGUID;
mdTJVisitor.Bfingerprinting = context.Request["fingerprint"];
mdTJVisitor.LastVisitingTime = DateTime.Now;
mdTJVisitor.VIp = IP;
mdTJVisitor.Loction = IPShowAddress(IP);//查询IP库
bllTJVisitor.Add(mdTJVisitor);
mdTJVisitorRecord.VRID = Guid.NewGuid();
mdTJVisitorRecord.VisitorID = mdTJVisitor.VisitorID;
mdTJVisitorRecord.InTime = DateTime.Now;
mdTJVisitorRecord.outTime = DateTime.Now.AddSeconds(1);
mdTJVisitorRecord.Entrance = context.Request.UrlReferrer.ToString();
mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();
bllMVisitorRecord.Add(mdTJVisitorRecord);
}
else
{
mdTJVisitor = bllTJVisitor.GetModel(lbGUID);
if ((DateTime.Now - (DateTime)mdTJVisitor.LastVisitingTime).Minutes > 5)
{
mdTJVisitorRecord.VRID = Guid.NewGuid();
mdTJVisitorRecord.VisitorID = lbGUID;
mdTJVisitorRecord.InTime = DateTime.Now;
mdTJVisitorRecord.outTime = DateTime.Now.AddSeconds(1);
mdTJVisitorRecord.Entrance = context.Request.UrlReferrer.ToString();
mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();
bllMVisitorRecord.Add(mdTJVisitorRecord);
mdTJVisitor.LastVisitingTime = DateTime.Now;
bllTJVisitor.Update(mdTJVisitor);
}
else
{
string vrid = bllMVisitorRecord.GetVRIDByGUID(lbGUID);
if (vrid != "")
{
mdTJVisitor = bllTJVisitor.GetModel(lbGUID);
mdTJVisitor.LastVisitingTime = DateTime.Now;
bllTJVisitor.Update(mdTJVisitor);
mdTJVisitorRecord.VRID = new Guid(vrid);
}
}
}
Labbase.Model.ViewRecord mdViewRecord = new Labbase.Model.ViewRecord();
Labbase.BLL.MViewRecord bllMViewRecord = new Labbase.BLL.MViewRecord();
mdViewRecord.VRID = mdTJVisitorRecord.VRID;
mdViewRecord.ViewID = Guid.NewGuid();
mdViewRecord.referenceUrl = System.Web.HttpUtility.UrlDecode(context.Request["referrer"]);
if (Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))
mdViewRecord.referenceUrl = "";
mdViewRecord.FullPagePath = System.Web.HttpUtility.UrlDecode(context.Request["InPage"]);
mdViewRecord.ViewIP = IP;
mdViewRecord.Localarea = IPShowAddress(IP);//查询IP库
mdViewRecord.Vtitle = System.Web.HttpUtility.UrlDecode(context.Request["title"]);
mdViewRecord.ViewTime = DateTime.Now;
if (!Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))
mdViewRecord.SId = Enginer(new Uri(mdViewRecord.referenceUrl).DnsSafeHost);
else
mdViewRecord.SId = 0;
string AbsolutePath = context.Request.UrlReferrer.AbsolutePath.TrimStart(new char[] { '/' });
if (AbsolutePath.Contains('-'))
{
AbsolutePath = AbsolutePath.Substring(0, AbsolutePath.IndexOf('-'));
}
if (AbsolutePath.Contains('.'))
{
AbsolutePath = AbsolutePath.Substring(0, AbsolutePath.IndexOf('.'));
}
int parID = 0;
string[] pars;
object objCompany;
Labbase.BLL.supply bllsupply = new Labbase.BLL.supply();
Labbase.BLL.product_category bllcategory = new Labbase.BLL.product_category();
Labbase.Model.supplyInfo mdSupply = new Labbase.Model.supplyInfo();
int companyID = 0;
switch (AbsolutePath)
{
case "IndustryNewsDetial":
parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));
Labbase.BLL.News bllNews = new Labbase.BLL.News();
objCompany = bllNews.GetCompanyID("NewsID=" + parID);
if (objCompany != null && !string.IsNullOrEmpty(objCompany.ToString()) && objCompany.ToString() != "0")
{
mdViewRecord.Querypar1 = int.Parse(objCompany.ToString());
}
else
{
mdViewRecord.Querypar1 = 0;
}
mdViewRecord.channel = "技术资料";
break;
case "SupplyDetial"://供应详情
parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));
string supplyID = parID.ToString();
mdSupply = bllsupply.GetModel(Int32.Parse(supplyID));
DataTable dtclass = bllcategory.GetProductClassInfo(" ClassXXID=" + mdSupply.ClassXXID);
mdViewRecord.Querypar1 = mdSupply.CompanyID;
companyID = int.Parse(mdSupply.CompanyID.ToString());
if (dtclass.Rows.Count > 0)
{
mdViewRecord.QueryPar2 = Int32.Parse(dtclass.Rows[0]["ClassID"].ToString());
mdViewRecord.QueryPar3 = Int32.Parse(dtclass.Rows[0]["ClassXID"].ToString());
}
mdViewRecord.QueryPar4 = mdSupply.ClassXXID;
mdViewRecord.channel = "供求信息";
break;
case "ProductLDetail"://产品详情
parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));
companyID = new Labbase.BLL.product().GetCompanyIDByProductID(parID);
mdViewRecord.Querypar1 = companyID;
mdViewRecord.channel = "产品信息";
break;
case "CompanyIndex"://公司库首页
case "CompanyNewsList"://公司技术资料
case "CompanyContact":
case "CompanyInfo":
parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));
mdViewRecord.Querypar1 = parID;
companyID = parID;
mdViewRecord.channel = "公司主页";
break;
case "CompanyProduct"://公司供应
case "CompanyProdutDetail"://公司供应详情
case "CompanyNews"://公司技术资料详情
pars = context.Request.UrlReferrer.PathAndQuery.TrimEnd(".html".ToCharArray()).Split('-');
mdViewRecord.Querypar1 = int.Parse(pars[1]);
companyID = int.Parse(pars[1]);
mdViewRecord.channel = "公司主页";
break;
case "NewsLDetails":
case "PrimeList":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "资讯";
break;
case "IndustryNewsList":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "技术资料";
break;
case "SupplyList":
case "ProClass":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "供求信息";
break;
case "ProductBList":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "产品信息";
break;
case "BrandsList":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "品牌专区";
break;
case "CompanyList":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "公司库";
break;
case "Exhibition":
case "Exhibition/Detail":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "展会信息";
break;
case "ProductSearch":
case "SupplySearch":
case "NewsSearch":
case "CompanySearch":
case "BrandSearch":
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "搜索";
break;
case "purchase":
case "AboutmUs":
case "ContactmUs":
default:
mdViewRecord.Querypar1 = 0;
mdViewRecord.channel = "其它";
break;
}
if (!Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))
{
//Engine_wrod EW = EngineWord(WebRequest.Create(mdViewRecord.referenceUrl));
string keyws = GetKeyWord(mdViewRecord.referenceUrl);
if (!string.IsNullOrEmpty(keyws))
{
Labbase.BLL.SearchWord bllSearchWord = new Labbase.BLL.SearchWord();
Labbase.Model.SearchWord mdSearchWord = new Labbase.Model.SearchWord();
object obj = bllSearchWord.GetSearchWordId("Word='" + keyws + "'");
if (obj == null)
{
mdSearchWord.SWID = Guid.NewGuid();
mdSearchWord.LastDate = DateTime.Now;
mdSearchWord.SumNum = 1;
mdSearchWord.UserIP = IP;
mdSearchWord.Word = keyws;
mdSearchWord.CompanyID = companyID;
bllSearchWord.Add(mdSearchWord);
mdViewRecord.SWId = mdSearchWord.SWID;
}
else
{
mdViewRecord.SWId = new Guid(obj.ToString());
mdSearchWord = bllSearchWord.GetModel(new Guid(obj.ToString()));
mdSearchWord.SumNum = mdSearchWord.SumNum + 1;
mdSearchWord.LastDate = DateTime.Now;
mdSearchWord.CompanyID = companyID;
bllSearchWord.Update(mdSearchWord);
}
}
}
bllMViewRecord.Add(mdViewRecord);
}
}
if (!string.IsNullOrEmpty(context.Request["OutPage"]))
{
Guid lbGUID;
try
{
lbGUID = new Guid(context.Request.Cookies["lbGUID"].Value.ToString());
}
catch
{
return;
}
int lbGUIDCount = bllTJVisitor.Exists(new Guid(context.Request.Cookies["lbGUID"].Value)) == false ? 0 : 1;
if (lbGUIDCount < 1) return;
string vrid = bllMVisitorRecord.GetVRIDByGUID(lbGUID);
if (vrid != "")
{
mdTJVisitorRecord = bllMVisitorRecord.GetModel(new Guid(vrid));
mdTJVisitorRecord.outTime = Convert.ToDateTime(DateTime.Now);
mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();
bllMVisitorRecord.Update(mdTJVisitorRecord);
}
}
context.Response.ContentType = "text/plain";
context.Response.Write("");
}
private class Engine_wrod
{
public bool isEngine { get; set; }
public string keyWord { get; set; }
}
/// <summary>
/// 获取全部的搜索引擎
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
private int Enginer(string input)
{
Labbase.BLL.SearchEngine bll = new Labbase.BLL.SearchEngine();
foreach (DataRow dr in bll.GetList("").Tables[0].Rows)
{
if (input.Contains(dr["SDomin"].ToString()))
{
return int.Parse(dr["SID"].ToString());
}
}
return 0;
}
/// <summary>
/// 判定是否是搜索引擎
/// </summary>
/// <param name="IP"></param>
/// <param name="useragent"></param>
/// <returns></returns>
private bool isEngine(string IP, string useragent)
{
Labbase.BLL.SerchEngineMark bllSerchEngineMark = new Labbase.BLL.SerchEngineMark();
Labbase.BLL.SearchEngineIP bllSearchEngineIP = new Labbase.BLL.SearchEngineIP();
if (string.IsNullOrEmpty(useragent)) return true;//如果没有useragent 设定为搜索引擎,不再继续统计.
foreach (DataRow dr in bllSerchEngineMark.GetList("").Tables[0].Rows)
{
if (useragent.Contains(dr["SEMString"].ToString()))
{
return true;
}
}
return bllSearchEngineIP.Exists("SEIP='" + IP + "'");
}
/// <summary>
/// 获取搜索关键词
/// </summary>
/// <param name="url">来源地址</param>
/// <returns></returns>
private string GetKeyWord(string url)
{
string keyword = "";
string[] _uOsr = { "google", "yahoo", "baidu", "soso", "bing", "sogou", "so.com" }; //将几个搜索引擎与对应的搜索关系词写入对应的数组中
string[] _uOkw = { "q", "q", "wd|word|kw|keyword", "w", "q", "query", "q" };
for (int i = 0; i < _uOsr.Length; i++)
{
if (url.Contains(_uOsr[i])) //如果URL中包含这几个搜索引擎则进入处理
{
if (_uOsr[i] == "baidu")
{
string[] temp = _uOkw[i].Split('|'); //来自百度的关系词 有WD和WORD,分开处理
#region 现在的代码
keyword = GetQuerystring(temp[0], url); //当以WD取不到的时候,则用WORD取词
if (string.IsNullOrEmpty(keyword)) //指定对应的编码来消除乱码
{
keyword = GetQuerystring(temp[1], url); //从URL中取得关键词的方法
}
if (string.IsNullOrEmpty(keyword))
{
keyword = GetQuerystring(temp[2], url);
}
if (string.IsNullOrEmpty(keyword))
{
keyword = GetQuerystring(temp[3], url);
}
#endregion
}
else
{
keyword = GetQuerystring(_uOkw[i], url);
}
break;
}
}
string ecode = GBorUTF(keyword, url); //获得文字的编码格式
keyword = HttpUtility.UrlDecode(keyword, Encoding.GetEncoding(ecode));
keyword = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("UTF-8"));
keyword = HttpUtility.UrlDecode(keyword, Encoding.GetEncoding("UTF-8"));
return keyword;
}
/// <summary>
/// 获取文字的编码
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
private string GBorUTF(string input, string url)
{
string en_code = "UTF-8";
if (url.Contains("baidu"))
{
if (url.Contains("ie="))
{
if (url.Contains("ie=gb2312"))
{
en_code = "GB2312";
}
}
else
{
string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));
string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));
if (input.ToLower() != U_TO_R.ToLower())
{
en_code = "GB2312";
}
else
{
en_code = "UTF-8";
}
}
}
else if (url.Contains("sogou"))
{
if (url.Contains("ie="))
{
if (url.Contains("ie=gb2312"))
{
en_code = "GB2312";
}
}
else
{
string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));
string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));
if (input.ToLower() != U_TO_R.Replace("(", "%28").Replace(")", "%29").ToLower())
{
en_code = "GB2312";
}
}
}
else if (url.Contains("so.com"))
{
if (url.Contains("ie="))
{
if (url.Contains("ie=gb2312"))
{
en_code = "GB2312";
}
}
else
{
string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));
string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));
if (input.ToLower() != U_TO_R.ToLower())
{
en_code = "GB2312";
}
}
}
return en_code;
}
/// <summary>
/// 从URL地址中通过queryname提取关键词
/// </summary>
/// <param name="queryname">wd,word,q,query,w...</param>
/// <param name="url">URL地址</param>
/// <returns></returns>
private string GetQuerystring(string queryname, string url)
{
string keyword = string.Empty;
Dictionary<string, string> dic = new Dictionary<string, string>();
string re = "[?&]([^=]+)(?:=([^&]*))?"; //通进正则将URL中参数分拆 放入字典中
MatchCollection mc = Regex.Matches(url, re);
foreach (Match item in mc)
{
if (item.Success)
{
dic.Add(item.Groups[1].Value, item.Groups[2].Value);
}
}
if (dic.ContainsKey(queryname)) //如果字典中有传入的匹配关键词的键,则取其值返回
{
keyword = dic[queryname];
}
return keyword;
}
/// <summary>
/// 获取链接的参数
/// </summary>
/// <param name="strQuery"></param>
/// <param name="strSplit"></param>
/// <returns></returns>
protected string wordFromUrlQuery(string strQuery, string strSplit)
{
strQuery = strQuery.TrimStart('?');
strSplit += "=";
foreach (string str in strQuery.Split('&'))
{
if (str.StartsWith(strSplit))
{
string[] qValue = str.Split('=');
if (qValue.Length > 1)
return qValue[1];
}
}
return "";
}
/// <summary>
///根据IP获取地址
/// </summary>
/// <param name="strChar"></param>
/// <returns></returns>
protected string IPShowAddress(string strChar)
{
string ip = strChar;
IPScaner objScan = new IPScaner();
objScan.DataPath = System.Web.HttpContext.Current.Server.MapPath(@"/js/QQWry.Dat");
objScan.IP = ip;
string addre = objScan.IPLocation();
return addre;
}
public bool IsReusable
{
get
{
return false;
}
}
}
三、数据采集
在实现了具体功能之后,我们所要做的就是需要一个js脚本来控制该一般处理程序Stat.aspx的运行,具体脚本如下:
statistics.js:
$(function () {
var urlreferrer = escape(document.referrer);
var locationurl = escape(document.location);
var ffreashed = getCookie("freash");
if (ffreashed == document.location) {
return;
}
if (getCookie("lbGUID") == null) {
$.ajax({
url: "/js/fingerprint2.js",
dataType: "script",
cache: true
}).done(function (data, status, jqxhr) {
var fp = new Fingerprint2();
fp.get(function (result) {
$.ajax({
type: "POST",
url: "/tools/Stat.ashx",
data: { InPage: locationurl, referrer: urlreferrer, title: escape(document.title) },
async: false,
success: function () {
$.ajax({
type: "POST",
url: "/tools/Stat.ashx",
data: { InPage: locationurl, fingerprint: result, referrer: urlreferrer, title: escape(document.title) },
async: false
});
},
error: function (XMLHttpRequest, textStatus, errorThrown) {
alert(XMLHttpRequest.status)
}
});
});
});
}
else {
$.ajax({
type: "POST",
url: "/tools/Stat.ashx",
data: { InPage: locationurl, referrer: urlreferrer, title: escape(document.title) },
async: false,
error: function (XMLHttpRequest, textStatus, errorThrown) {
alert(XMLHttpRequest.status)
}
});
}
});
//给重新刷新设置一个cookie
$(window).unload(function () {
$.ajax({
type: "POST",
url: "/tools/Stat.ashx",
data: { OutPage: escape(document.location) },
async: false
});
setCookie("freash", document.location);
});
function setCookie(name, value) {
var exp = new Date();
exp.setTime(exp.getTime() + 2.5 * 1000);
document.cookie = name + "=" + escape(value) + ";expires=" + exp.toGMTString();
}
function getCookie(name) {
var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)");
if (arr = document.cookie.match(reg))
return unescape(arr[2]);
else
return null;
}
function delCookie(name) {
var exp = new Date();
exp.setTime(exp.getTime() - 1);
var cval = getCookie(name);
if (cval != null)
document.cookie = name + "=" + cval + ";expires=" + exp.toGMTString();
}
到此基本的工作已经做完,剩下的就是直接将脚本引用到页面中,用户点击相应的页面自然也就可以实现数据采集与数据统计了。为了避免重复动作,最好将脚本引用到页面共用的用户控件中,下面是经过规范化处理的引用示例:
<script type="text/javascript">
(function () {
//网站内部统计
var oHead = document.getElementsByTagName('HEAD').item(0);
var oScript = document.createElement("script"); oScript.type = "text/javascript"; oScript.async = true;
oScript.src = "/js/statistics.js";
oHead.appendChild(oScript);
})();
</script>
总的来说,数据采集无非就是记录下用户在本网站各页面的具体浏览轨迹,从而用于对用户的需求进行分析,采集就是统计的数据来源。