因工作需要,要采集Alexa流量信息及每天的IP、PV数据,现编写自动采集工具!
功能如下:
一、指定时间段采集Alexa及IP、PV信息
二、窗体隐藏至windows任务栏右下角
工具源码下载,请至我的资源共享里下载:工具下载地址
工具的运行效果截图如下:
Alexa流量信息自动采集工具部分代码如下
winform页面后台.cs文件代码:
public partial class alexaForm : Form
{
//300000
public alexaForm()
{
InitializeComponent();
}
bool flagExit = true;//标记是否退出,true为不退出,false为退出
List<Alexa> list = new List<Alexa>();
int indexAlexa = 0;//官网下标
//int indexChinaz = 0;//Chinaz下标
bool startAlexa = false;//标记是否从官网开始采集
//bool startChinaz = false;//标记是否从Chinaz开始采集
bool IsRecording = false;//标记是否开始采集
StringBuilder sb = new StringBuilder();
StringBuilder sbState = new StringBuilder();
HtmlDocument document = null; int aaa = 0;
string domainHost, result, url;
private static CookieCollection cookies = new CookieCollection();
private static CookieContainer c = new CookieContainer();
private static HttpWebRequest req;
private static HttpWebResponse res;
private static WebHeaderCollection whc;
#region 事件
private void Form1_Load(object sender, EventArgs e)//窗体加载事件
{
//窗体加载后,暂停2秒
System.Threading.Thread.Sleep(2000);
list.Add(new Alexa { DomainName = "她时代", DomainUrl = "smartshe.com" });
list.Add(new Alexa { DomainName = "elle", DomainUrl = "ellechina.com" });
list.Add(new Alexa { DomainName = "onlylady", DomainUrl = "onlylady.com" });
list.Add(new Alexa { DomainName = "pclady", DomainUrl = "pclady.com.cn" });
list.Add(new Alexa { DomainName = "yoka", DomainUrl = "yoka.com" });
list.Add(new Alexa { DomainName = "贝太厨房", DomainUrl = "bettyskitchen.com.cn" });
list.Add(new Alexa { DomainName = "都市主妇", DomainUrl = "herschina.com" });
list.Add(new Alexa { DomainName = "嘉人", DomainUrl = "marieclairechina.com" });
list.Add(new Alexa { DomainName = "女友", DomainUrl = "ny1988.com" });
list.Add(new Alexa { DomainName = "瑞丽", DomainUrl = "rayli.com.cn" });
list.Add(new Alexa { DomainName = "时尚", DomainUrl = "trends.com.cn" });
list.Add(new Alexa { DomainName = "悦己", DomainUrl = "self.com.cn" });
timer1.Enabled = true;
//webBrowser1.StatusTextChanged += new EventHandler(webBrowser1_StatusTextChanged);
webBrowser1.Navigate(new Uri(@"about:blank"));
tboCurUrl.Text = "about:blank";
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)//WebBrowser加载完成事件
{
//if ((webBrowser1.ReadyState == WebBrowserReadyState.Interactive) && startChinaz)
//{
// GetAlexaFromChinaz();
//}
if (startAlexa && webBrowser1.ReadyState == WebBrowserReadyState.Interactive)
{
if (aaa == 0)
{
HtmlElement a = webBrowser1.Document.All["trafficstats_a"];
if (a != null)
{
aaa = 1;
a.Click += new HtmlElementEventHandler(a_Click);
a.InvokeMember("click");
}
}
else if (aaa == 1)
{
aaa = 2;
//tw-tabList ul 下第一个li
HtmlElement a = webBrowser1.Document.All["tw-tabList"];
if (a != null)
{
a.Children[0].Click += new HtmlElementEventHandler(Form1_Click);
a.InvokeMember("click");
}
}
else
{
//设置Alexa信息
SetAlexaInfo();
//暂停2秒
System.Threading.Thread.Sleep(2000);
aaa = 0;
GetAlexa(indexAlexa);
}
}
}
void Form1_Click(object sender, HtmlElementEventArgs e)//Traffic Rank选项点击事件
{
aaa = 2;
}
void a_Click(object sender, HtmlElementEventArgs e)//Traffic Stats选项点击事件
{
aaa = 1;
}
private void timer1_Tick(object sender, EventArgs e)//每隔5分钟自动检查时间
{
DateTime dt = DateTime.Now;
if (dt.Hour == 9 || dt.Hour == 13 || dt.Hour == 17)
{
if (!IsRecording)
{
startAlexa = true;//标记开始记录
GetAlexa(0);
}
}
}
private void Form1_FormClosing(object sender, FormClosingEventArgs e)//窗体关闭事件
{
if (flagExit)
{
e.Cancel = true;
NormalToMinimized();
}
}
private void Form1_Resize(object sender, EventArgs e)//窗体大小改变事件
{
if (this.WindowState == FormWindowState.Minimized)
{
NormalToMinimized();
}
}
private void tsmi_GetNow_Click(object sender, EventArgs e)//手工采集按钮点击事件
{
startAlexa = true;//标记开始记录
GetAlexa(0);
}
private void tsmi_BtnExit_Click(object sender, EventArgs e)//退出按钮点击事件
{
DialogResult digres = MessageBox.Show("确定退出本软件吗?", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
if (digres == DialogResult.Yes)
{
flagExit = false;
System.Windows.Forms.Application.Exit();
}
}
private void button1_Click(object sender, EventArgs e)//访问按钮点击事件
{
GoUrl();
}
private void tboCurUrl_PreviewKeyDown(object sender, PreviewKeyDownEventArgs e)//地址文本框按钮事件
{
if (e.KeyValue == 13 && tboCurUrl.Text.Trim().Length > 0)//回车访问
{
GoUrl();
}
}
private void notifyIcon1_Click(object sender, EventArgs e)//系统托盘单击
{
MinimizedToNormal();
}
#endregion
#region 处理方法
/// <summary>
/// 设置集合中各对象的排名信息
/// </summary>
private void SetAlexaInfo()//设置集合中各对象的排名信息
{
try
{
document = webBrowser1.Document;
sb.Remove(0, sb.ToString().Length);
sb.Append(document.GetElementById("rank").InnerText);
//使用正则匹配得出(昨日排名、最近七天、最近七天)
Match m = Regex.Match(sb.ToString(), @"Yesterday([\d,]+)", RegexOptions.IgnoreCase);
if (m.Success)
{
list[indexAlexa].TodayRank = m.Groups[1].Value.Replace(",", "");
}
else
{
list[indexAlexa].TodayRank = "-";
}
m = Regex.Match(sb.ToString(), @"7 day([\d,]+)", RegexOptions.IgnoreCase);
if (m.Success)
{
list[indexAlexa].WeekRank = m.Groups[1].Value.Replace(",", "");
}
else
{
list[indexAlexa].WeekRank = "-";
}
m = Regex.Match(sb.ToString(), @"1 month([\d,]+)", RegexOptions.IgnoreCase);
if (m.Success)
{
list[indexAlexa].MonthRank = m.Groups[1].Value.Replace(",", "");
}
else
{
list[indexAlexa].MonthRank = "-";
}
//综合排名
sb.Remove(0, sb.ToString().Length);
sb.Append(document.GetElementById("siteStats").InnerText);
sb = new StringBuilder(sb.ToString().Replace("\r\n", "").Replace(" ", "").Replace(",", ""));
m = Regex.Match(sb.ToString(), @"AlexaTrafficRankReputation([\d]+)", RegexOptions.IgnoreCase);
if (m.Success)
{
list[indexAlexa].Rank = m.Groups[1].Value;
}
else
{
list[indexAlexa].Rank = "-";
}
sb.Remove(0, sb.ToString().Length);
sb.Append(document.GetElementById("traffic-rank-by-country").InnerText);
m = Regex.Match(sb.ToString().Replace(" ", ""), @"China([\d,]+)", RegexOptions.IgnoreCase);
if (m.Success)
{
list[indexAlexa].ChinaRank = m.Groups[1].Value.Replace(",", "");
}
else
{
list[indexAlexa].ChinaRank = "-";
}
}
catch (Exception e)
{
list[indexAlexa].TodayRank = "-";
list[indexAlexa].Rank = "-";
list[indexAlexa].WeekRank = "-";
list[indexAlexa].MonthRank = "-";
list[indexAlexa].ChinaRank = "-";
}
indexAlexa += 1;
}
/// <summary>
/// 将结果插入至数据库
/// </summary>
private void ExportToSql()//导入数据库
{
foreach (Alexa alexa in list)
{
DBHelp.ExecuteNonQuery(alexa);
}
}
private string GetHtml(string url)//根据URL获取页面返回页面源代码信息
{
req = WebRequest.Create(url) as HttpWebRequest;
req.Method = "GET";
req.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
res = null;
req.CookieContainer = c;
res = req.GetResponse() as HttpWebResponse;
Stream str = res.GetResponseStream();
whc = res.Headers;
cookies = res.Cookies;
c = req.CookieContainer;
string contenttype = res.ContentType;
Encoding encode = System.Text.Encoding.Default;
StreamReader sr = new StreamReader(str, encode);
return sr.ReadToEnd();
}
private string GetHtml(string url, string urlRef)//根据URL及URL需要的来源页返回页面源代码信息
{
req = WebRequest.Create(url) as HttpWebRequest;
req.Method = "GET";
req.Referer = urlRef;
req.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
//req.Headers = whc;
//CookieContainer c = new CookieContainer();
//c.Add(cookies);
req.CookieContainer = c;
req.Accept = "application/javascript, */*;q=0.8";
req.AutomaticDecompression = DecompressionMethods.GZip;
req.KeepAlive = true;
res = null;
res = req.GetResponse() as HttpWebResponse;
Stream str = res.GetResponseStream();
string contenttype = res.ContentType;
Encoding encode = System.Text.Encoding.Default; ;
StreamReader sr = new StreamReader(str, encode);
return sr.ReadToEnd();
}
private void GoUrl()//根据URL访问网站
{
if (tboCurUrl.Text.Trim().StartsWith("http://www."))
{
webBrowser1.Navigate(new Uri(@"" + tboCurUrl.Text.Trim()));
}
else if (tboCurUrl.Text.Trim().StartsWith("www."))
{
webBrowser1.Navigate(new Uri(@"http://" + tboCurUrl.Text.Trim()));
}
else if (tboCurUrl.Text.Trim().StartsWith("http://"))
{
webBrowser1.Navigate(new Uri(@"" + tboCurUrl.Text.Trim()));
}
else
{
webBrowser1.Navigate(new Uri(@"http://" + tboCurUrl.Text.Trim()));
}
}
/// <summary>
/// 最小化
/// </summary>
private void NormalToMinimized()//最小化
{
this.Visible = false;
this.WindowState = FormWindowState.Minimized;
notifyIcon1.Visible = true;
}
/// <summary>
/// 恢复正常窗体大小
/// </summary>
private void MinimizedToNormal()//恢复正常
{
this.Visible = true;
this.WindowState = FormWindowState.Normal;
notifyIcon1.Visible = false;
}
#endregion
/// <summary>
/// 从Alexa官网查询alexa基本信息(排名)
/// </summary>
/// <param name="num"></param>
private void GetAlexa(int num)//从Alexa官网查询alexa基本信息(排名)
{
if (num < list.Count)
{
timer1.Enabled = false;
IsRecording = true;
webBrowser1.Navigate("http://www.alexa.com/siteinfo/" + list[num].DomainUrl);
tboCurUrl.Text = "http://www.alexa.com/siteinfo/" + list[num].DomainUrl;
}
else
{
startAlexa = false;
//startChinaz = true;//标记开始读取IP、PV值
GetAlexaFromChinaz();
}
}
/// <summary>
/// 从站长之家查询ip、pv信息
/// </summary>
private void GetAlexaFromChinaz()//从站长之家查询ip、pv信息
{
#region 查询并IP、PV信息
for (int num = 0; num < list.Count; num++)
{
System.Threading.Thread.Sleep(5000);
domainHost = string.Format("http://alexa.chinaz.com/?domain={0}", list[num].DomainUrl);//"http://alexa.chinaz.com/?domain=yoka.com";
result = GetHtml(domainHost);
result = result.Replace("\r\n", "");
Regex r = new Regex(@"(http://alexa.chinaz.com/Get_Data.asp[^\""]+)");
Match match = r.Match(result);
if (match.Success)
{
//"document.getElementById('Rank').innerHTML='2,224';document.getElementById('DayRank').innerHTML='2,961';document.getElementById('WeekRank').innerHTML='2,871';document.getElementById('MonthRank').innerHTML='2,357';document.getElementById('QuarterRank').innerHTML='2,224';document.getElementById('NextRank').innerHTML='2,224';document.getElementById('IpNum').innerHTML='鈮?1699.80964214712';document.getElementById('PvNum').innerHTML='鈮?35695.833499006';"
url = match.Groups[0].Value;
#region 排名信息查询接口
//参数F
string param_f = Regex.Match(url, @"f=.{2,2}(\d+\.\d+)", RegexOptions.IgnoreCase).Groups[1].Value;
//参数G
string param_g = Regex.Match(url, @"\d+\.\d+$", RegexOptions.IgnoreCase).Groups[0].Value;
//拼接查询接口
url = string.Format("{0}{1}{2}&g={1}{3}", url.Substring(0, url.IndexOf("f=") + 2), "%A1%D6%20", param_f, param_g);
#endregion
result = GetHtml(url, domainHost).Replace("\r\n", "").Replace(",", "");
//string ip = "", pv = "";
r = new Regex(@"(\d+)");
MatchCollection mc = r.Matches(result);
if (mc.Count > 0)
{
//ip = mc[mc.Count - 2].Value;
//pv = mc[mc.Count - 1].Value;
list[num].IpNum = mc[mc.Count - 2].Value;
list[num].PvNum = mc[mc.Count - 1].Value;
}
else
{
list[num].IpNum = "-";
list[num].PvNum = "-";
}
}
else
{
//访问下一个,并将当前网站的IP、PV数据设置为0
//GetHtml("");
list[num].IpNum = "-";
list[num].PvNum = "-";
}
}
#endregion
//自动执行导出方法
ExportToSql();
timer1.Enabled = true;
startAlexa = false;
//startChinaz = false;
//indexChinaz = 0;
indexAlexa = 0;
GC.Collect();
timer1.Interval = 600000;
webBrowser1.Navigate("about:blank");
tboCurUrl.Text = "about:blank";
}
}
#region Alexa实体
public class Alexa
{
public Alexa()
{
TodayRank = "-";
Rank = "-";
WeekRank = "-";
MonthRank = "-";
ChinaRank = "-";
}
/// <summary>
/// 网站名称
/// </summary>
public string DomainName { get; set; }
/// <summary>
/// 网站地址,去掉http://wwww后的地址
/// </summary>
public string DomainUrl { get; set; }
/// <summary>
/// 今日排名
/// </summary>
public string TodayRank { get; set; }
/// <summary>
/// 综合排名
/// </summary>
public string Rank { get; set; }
/// <summary>
/// 周平均排名
/// </summary>
public string WeekRank { get; set; }
/// <summary>
/// 月平均排名
/// </summary>
public string MonthRank { get; set; }
/// <summary>
/// 中文排名
/// </summary>
public string ChinaRank { get; set; }
/// <summary>
/// IP值
/// </summary>
public string IpNum { get; set; }
/// <summary>
/// PV值
/// </summary>
public string PvNum { get; set; }
/// <summary>
/// 插入时间
/// </summary>
public DateTime InDate { get; set; }
}
#endregion
DBHelper类:
private static string constr = "Data Source=.;Initial Catalog=WebAnalyticsDB;uid=bauer;pwd=smartshe!@#$%^&*90;";
/// <summary>
/// 获取数据库连接
/// </summary>
/// <returns>连接</returns>
private static SqlConnection GetConnection()
{
try
{
return new SqlConnection(constr);
}
catch (Exception e)
{
}
return null;
}
public static int ExecuteNonQuery(Alexa alexa)
{
int result = 0;
using (SqlConnection conn = GetConnection())
{
string sql = "insert into alexa values(@DomainName,@DomainUrl,@TodayRank,@Rank,@WeekRank,@MonthRank,@ChinaRank,@IpNum,@PvNum,@Date)";
SqlParameter[] paras = {
new SqlParameter("@DomainName",alexa.DomainName),
new SqlParameter("@DomainUrl",alexa.DomainUrl),
new SqlParameter("@TodayRank",alexa.TodayRank),
new SqlParameter("@Rank",alexa.Rank),
new SqlParameter("@WeekRank",alexa.WeekRank),
new SqlParameter("@MonthRank",alexa.MonthRank),
new SqlParameter("@ChinaRank",alexa.ChinaRank),
new SqlParameter("@IpNum",alexa.IpNum),
new SqlParameter("@PvNum",alexa.PvNum),
new SqlParameter("@Date",DateTime.Now)
};
SqlCommand cmd = new SqlCommand(sql, conn);
cmd.CommandType = CommandType.Text;
try
{
cmd.Parameters.AddRange(paras);
conn.Open();
result = cmd.ExecuteNonQuery();
}
catch (Exception e)
{
}
finally
{
conn.Close();
cmd.Dispose();
}
}
return result;
}
SQL建表语句:
/****** Object: Table [dbo].[alexa] Script Date: 03/28/2013 22:03:35 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[alexa](
[ID] [int] IDENTITY(1,1) NOT NULL,
[DomainName] [nvarchar](50) NULL,
[DomainUrl] [nvarchar](50) NULL,
[TodayRank] [nvarchar](10) NULL,
[Rank] [nvarchar](10) NULL,
[WeekRank] [nvarchar](10) NULL,
[MonthRank] [nvarchar](10) NULL,
[ChinaRank] [nvarchar](10) NULL,
[IpNum] [nvarchar](10) NULL,
[PvNum] [nvarchar](10) NULL,
[InDate] [datetime] NULL,
CONSTRAINT [PK_alex] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO