Alexa流量自动采集工具,采集网站IP、PV信息

因工作需要,要采集Alexa流量信息及每天的IP、PV数据,现编写自动采集工具!

功能如下:

一、指定时间段采集Alexa及IP、PV信息

二、窗体隐藏至windows任务栏右下角

工具源码下载,请至我的资源共享里下载:工具下载地址

工具的运行效果截图如下:


Alexa流量信息自动采集工具部分代码如下

winform页面后台.cs文件代码:

 public partial class alexaForm : Form
    {
        //300000
        public alexaForm()
        {
            InitializeComponent();
        }
        bool flagExit = true;//标记是否退出,true为不退出,false为退出
        List<Alexa> list = new List<Alexa>();
        int indexAlexa = 0;//官网下标
        //int indexChinaz = 0;//Chinaz下标
        bool startAlexa = false;//标记是否从官网开始采集
        //bool startChinaz = false;//标记是否从Chinaz开始采集
        bool IsRecording = false;//标记是否开始采集
        StringBuilder sb = new StringBuilder();
        StringBuilder sbState = new StringBuilder();
        HtmlDocument document = null; int aaa = 0;
        string domainHost, result, url;
        private static CookieCollection cookies = new CookieCollection();
        private static CookieContainer c = new CookieContainer();
        private static HttpWebRequest req;
        private static HttpWebResponse res;
        private static WebHeaderCollection whc;
        #region 事件
        private void Form1_Load(object sender, EventArgs e)//窗体加载事件
        {
            //窗体加载后,暂停2秒
            System.Threading.Thread.Sleep(2000);
            list.Add(new Alexa { DomainName = "她时代", DomainUrl = "smartshe.com" });
            list.Add(new Alexa { DomainName = "elle", DomainUrl = "ellechina.com" });
            list.Add(new Alexa { DomainName = "onlylady", DomainUrl = "onlylady.com" });
            list.Add(new Alexa { DomainName = "pclady", DomainUrl = "pclady.com.cn" });
            list.Add(new Alexa { DomainName = "yoka", DomainUrl = "yoka.com" });
            list.Add(new Alexa { DomainName = "贝太厨房", DomainUrl = "bettyskitchen.com.cn" });
            list.Add(new Alexa { DomainName = "都市主妇", DomainUrl = "herschina.com" });
            list.Add(new Alexa { DomainName = "嘉人", DomainUrl = "marieclairechina.com" });
            list.Add(new Alexa { DomainName = "女友", DomainUrl = "ny1988.com" });
            list.Add(new Alexa { DomainName = "瑞丽", DomainUrl = "rayli.com.cn" });
            list.Add(new Alexa { DomainName = "时尚", DomainUrl = "trends.com.cn" });
            list.Add(new Alexa { DomainName = "悦己", DomainUrl = "self.com.cn" });
            timer1.Enabled = true;
            //webBrowser1.StatusTextChanged += new EventHandler(webBrowser1_StatusTextChanged);
            webBrowser1.Navigate(new Uri(@"about:blank"));
            tboCurUrl.Text = "about:blank";
        }

        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)//WebBrowser加载完成事件
        {
            //if ((webBrowser1.ReadyState == WebBrowserReadyState.Interactive) && startChinaz)
            //{

            //    GetAlexaFromChinaz();
            //}
            if (startAlexa && webBrowser1.ReadyState == WebBrowserReadyState.Interactive)
            {
                if (aaa == 0)
                {
                    HtmlElement a = webBrowser1.Document.All["trafficstats_a"];
                    if (a != null)
                    {
                        aaa = 1;
                        a.Click += new HtmlElementEventHandler(a_Click);
                        a.InvokeMember("click");
                    }
                }
                else if (aaa == 1)
                {
                    aaa = 2;
                    //tw-tabList   ul  下第一个li
                    HtmlElement a = webBrowser1.Document.All["tw-tabList"];
                    if (a != null)
                    {
                        a.Children[0].Click += new HtmlElementEventHandler(Form1_Click);
                        a.InvokeMember("click");
                    }
                }
                else
                {
                    //设置Alexa信息
                    SetAlexaInfo();
                    //暂停2秒
                    System.Threading.Thread.Sleep(2000);
                    aaa = 0;
                    GetAlexa(indexAlexa);
                }
            }
        }

        void Form1_Click(object sender, HtmlElementEventArgs e)//Traffic Rank选项点击事件
        {
            aaa = 2;
        }

        void a_Click(object sender, HtmlElementEventArgs e)//Traffic Stats选项点击事件
        {
            aaa = 1;
        }

        private void timer1_Tick(object sender, EventArgs e)//每隔5分钟自动检查时间
        {
            DateTime dt = DateTime.Now;
            if (dt.Hour == 9 || dt.Hour == 13 || dt.Hour == 17)
            {
                if (!IsRecording)
                {
                    startAlexa = true;//标记开始记录
                    GetAlexa(0);
                }
            }
        }

        private void Form1_FormClosing(object sender, FormClosingEventArgs e)//窗体关闭事件
        {
            if (flagExit)
            {
                e.Cancel = true;
                NormalToMinimized();
            }
        }


        private void Form1_Resize(object sender, EventArgs e)//窗体大小改变事件
        {
            if (this.WindowState == FormWindowState.Minimized)
            {
                NormalToMinimized();
            }
        }

        private void tsmi_GetNow_Click(object sender, EventArgs e)//手工采集按钮点击事件
        {
            startAlexa = true;//标记开始记录
            GetAlexa(0);
        }

        private void tsmi_BtnExit_Click(object sender, EventArgs e)//退出按钮点击事件
        {
            DialogResult digres = MessageBox.Show("确定退出本软件吗?", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
            if (digres == DialogResult.Yes)
            {
                flagExit = false;
                System.Windows.Forms.Application.Exit();
            }
        }

        private void button1_Click(object sender, EventArgs e)//访问按钮点击事件
        {
            GoUrl();
        }

        private void tboCurUrl_PreviewKeyDown(object sender, PreviewKeyDownEventArgs e)//地址文本框按钮事件
        {
            if (e.KeyValue == 13 && tboCurUrl.Text.Trim().Length > 0)//回车访问
            {
                GoUrl();
            }
        }

        private void notifyIcon1_Click(object sender, EventArgs e)//系统托盘单击
        {
            MinimizedToNormal();
        }
        #endregion

        #region 处理方法

        /// <summary>
        /// 设置集合中各对象的排名信息
        /// </summary>
        private void SetAlexaInfo()//设置集合中各对象的排名信息
        {
            try
            {

                document = webBrowser1.Document;
                sb.Remove(0, sb.ToString().Length);
                sb.Append(document.GetElementById("rank").InnerText);
                //使用正则匹配得出(昨日排名、最近七天、最近七天)
                Match m = Regex.Match(sb.ToString(), @"Yesterday([\d,]+)", RegexOptions.IgnoreCase);
                if (m.Success)
                {
                    list[indexAlexa].TodayRank = m.Groups[1].Value.Replace(",", "");
                }
                else
                {
                    list[indexAlexa].TodayRank = "-";
                }

                m = Regex.Match(sb.ToString(), @"7 day([\d,]+)", RegexOptions.IgnoreCase);
                if (m.Success)
                {
                    list[indexAlexa].WeekRank = m.Groups[1].Value.Replace(",", "");
                }
                else
                {
                    list[indexAlexa].WeekRank = "-";
                }

                m = Regex.Match(sb.ToString(), @"1 month([\d,]+)", RegexOptions.IgnoreCase);
                if (m.Success)
                {
                    list[indexAlexa].MonthRank = m.Groups[1].Value.Replace(",", "");
                }
                else
                {
                    list[indexAlexa].MonthRank = "-";
                }
                //综合排名

                sb.Remove(0, sb.ToString().Length);
                sb.Append(document.GetElementById("siteStats").InnerText);
                sb = new StringBuilder(sb.ToString().Replace("\r\n", "").Replace(" ", "").Replace(",", ""));
                m = Regex.Match(sb.ToString(), @"AlexaTrafficRankReputation([\d]+)", RegexOptions.IgnoreCase);
                if (m.Success)
                {
                    list[indexAlexa].Rank = m.Groups[1].Value;
                }
                else
                {
                    list[indexAlexa].Rank = "-";
                }

                sb.Remove(0, sb.ToString().Length);
                sb.Append(document.GetElementById("traffic-rank-by-country").InnerText);
                m = Regex.Match(sb.ToString().Replace(" ", ""), @"China([\d,]+)", RegexOptions.IgnoreCase);
                if (m.Success)
                {
                    list[indexAlexa].ChinaRank = m.Groups[1].Value.Replace(",", "");
                }
                else
                {
                    list[indexAlexa].ChinaRank = "-";
                }
            }
            catch (Exception e)
            {
                list[indexAlexa].TodayRank = "-";
                list[indexAlexa].Rank = "-";
                list[indexAlexa].WeekRank = "-";
                list[indexAlexa].MonthRank = "-";
                list[indexAlexa].ChinaRank = "-";
            }
            indexAlexa += 1;
        }

        /// <summary>
        /// 将结果插入至数据库
        /// </summary>
        private void ExportToSql()//导入数据库
        {
            foreach (Alexa alexa in list)
            {
                DBHelp.ExecuteNonQuery(alexa);
            }
        }

        private string GetHtml(string url)//根据URL获取页面返回页面源代码信息
        {
            req = WebRequest.Create(url) as HttpWebRequest;
            req.Method = "GET";
            req.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
            res = null;
            req.CookieContainer = c;
            res = req.GetResponse() as HttpWebResponse;
            Stream str = res.GetResponseStream();
            whc = res.Headers;
            cookies = res.Cookies;
            c = req.CookieContainer;
            string contenttype = res.ContentType;
            Encoding encode = System.Text.Encoding.Default;
            StreamReader sr = new StreamReader(str, encode);
            return sr.ReadToEnd();
        }

        private string GetHtml(string url, string urlRef)//根据URL及URL需要的来源页返回页面源代码信息
        {
            req = WebRequest.Create(url) as HttpWebRequest;
            req.Method = "GET";
            req.Referer = urlRef;
            req.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
            //req.Headers = whc;
            //CookieContainer c = new CookieContainer();
            //c.Add(cookies);
            req.CookieContainer = c;
            req.Accept = "application/javascript, */*;q=0.8";
            req.AutomaticDecompression = DecompressionMethods.GZip;
            req.KeepAlive = true;
            res = null;
            res = req.GetResponse() as HttpWebResponse;
            Stream str = res.GetResponseStream();
            string contenttype = res.ContentType;
            Encoding encode = System.Text.Encoding.Default; ;
            StreamReader sr = new StreamReader(str, encode);
            return sr.ReadToEnd();
        }

        private void GoUrl()//根据URL访问网站
        {
            if (tboCurUrl.Text.Trim().StartsWith("http://www."))
            {
                webBrowser1.Navigate(new Uri(@"" + tboCurUrl.Text.Trim()));
            }
            else if (tboCurUrl.Text.Trim().StartsWith("www."))
            {

                webBrowser1.Navigate(new Uri(@"http://" + tboCurUrl.Text.Trim()));
            }
            else if (tboCurUrl.Text.Trim().StartsWith("http://"))
            {
                webBrowser1.Navigate(new Uri(@"" + tboCurUrl.Text.Trim()));
            }
            else
            {
                webBrowser1.Navigate(new Uri(@"http://" + tboCurUrl.Text.Trim()));
            }
        }

        /// <summary>
        /// 最小化
        /// </summary>
        private void NormalToMinimized()//最小化
        {
            this.Visible = false;
            this.WindowState = FormWindowState.Minimized;
            notifyIcon1.Visible = true;

        }

        /// <summary>
        /// 恢复正常窗体大小
        /// </summary>
        private void MinimizedToNormal()//恢复正常
        {
            this.Visible = true;
            this.WindowState = FormWindowState.Normal;
            notifyIcon1.Visible = false;

        }
        #endregion

        /// <summary>
        /// 从Alexa官网查询alexa基本信息(排名)
        /// </summary>
        /// <param name="num"></param>
        private void GetAlexa(int num)//从Alexa官网查询alexa基本信息(排名)
        {
            if (num < list.Count)
            {
                timer1.Enabled = false;
                IsRecording = true;
                webBrowser1.Navigate("http://www.alexa.com/siteinfo/" + list[num].DomainUrl);
                tboCurUrl.Text = "http://www.alexa.com/siteinfo/" + list[num].DomainUrl;
            }
            else
            {
                startAlexa = false;
                //startChinaz = true;//标记开始读取IP、PV值
                GetAlexaFromChinaz();
            }
        }

        /// <summary>
        /// 从站长之家查询ip、pv信息
        /// </summary>
        private void GetAlexaFromChinaz()//从站长之家查询ip、pv信息
        {
            #region 查询并IP、PV信息
            for (int num = 0; num < list.Count; num++)
            {
                System.Threading.Thread.Sleep(5000);
                domainHost = string.Format("http://alexa.chinaz.com/?domain={0}", list[num].DomainUrl);//"http://alexa.chinaz.com/?domain=yoka.com";
                result = GetHtml(domainHost);
                result = result.Replace("\r\n", "");
                Regex r = new Regex(@"(http://alexa.chinaz.com/Get_Data.asp[^\""]+)");
                Match match = r.Match(result);
                if (match.Success)
                {
                    //"document.getElementById('Rank').innerHTML='2,224';document.getElementById('DayRank').innerHTML='2,961';document.getElementById('WeekRank').innerHTML='2,871';document.getElementById('MonthRank').innerHTML='2,357';document.getElementById('QuarterRank').innerHTML='2,224';document.getElementById('NextRank').innerHTML='2,224';document.getElementById('IpNum').innerHTML='鈮?1699.80964214712';document.getElementById('PvNum').innerHTML='鈮?35695.833499006';"
                    url = match.Groups[0].Value;

                    #region 排名信息查询接口
                    //参数F
                    string param_f = Regex.Match(url, @"f=.{2,2}(\d+\.\d+)", RegexOptions.IgnoreCase).Groups[1].Value;
                    //参数G
                    string param_g = Regex.Match(url, @"\d+\.\d+$", RegexOptions.IgnoreCase).Groups[0].Value;
                    //拼接查询接口
                    url = string.Format("{0}{1}{2}&g={1}{3}", url.Substring(0, url.IndexOf("f=") + 2), "%A1%D6%20", param_f, param_g);

                    #endregion
                    result = GetHtml(url, domainHost).Replace("\r\n", "").Replace(",", "");

                    //string ip = "", pv = "";
                    r = new Regex(@"(\d+)");
                    MatchCollection mc = r.Matches(result);
                    if (mc.Count > 0)
                    {
                        //ip = mc[mc.Count - 2].Value;
                        //pv = mc[mc.Count - 1].Value;
                        list[num].IpNum = mc[mc.Count - 2].Value;
                        list[num].PvNum = mc[mc.Count - 1].Value;
                    }
                    else
                    {
                        list[num].IpNum = "-";
                        list[num].PvNum = "-";
                    }

                }
                else
                {
                    //访问下一个,并将当前网站的IP、PV数据设置为0
                    //GetHtml("");
                    list[num].IpNum = "-";
                    list[num].PvNum = "-";
                }
            }
            #endregion

            //自动执行导出方法
            ExportToSql();

            timer1.Enabled = true;
            startAlexa = false;
            //startChinaz = false;
            //indexChinaz = 0;
            indexAlexa = 0;
            GC.Collect();
            timer1.Interval = 600000;
            webBrowser1.Navigate("about:blank");
            tboCurUrl.Text = "about:blank";
        }

    }

    #region Alexa实体
    public class Alexa
    {
        public Alexa()
        {
            TodayRank = "-";
            Rank = "-";
            WeekRank = "-";
            MonthRank = "-";
            ChinaRank = "-";
        }

        /// <summary>
        /// 网站名称
        /// </summary>
        public string DomainName { get; set; }
        /// <summary>
        /// 网站地址,去掉http://wwww后的地址
        /// </summary>
        public string DomainUrl { get; set; }
        /// <summary>
        /// 今日排名
        /// </summary>
        public string TodayRank { get; set; }
        /// <summary>
        /// 综合排名
        /// </summary>
        public string Rank { get; set; }
        /// <summary>
        /// 周平均排名
        /// </summary>
        public string WeekRank { get; set; }
        /// <summary>
        /// 月平均排名
        /// </summary>
        public string MonthRank { get; set; }
        /// <summary>
        /// 中文排名
        /// </summary>
        public string ChinaRank { get; set; }
        /// <summary>
        /// IP值
        /// </summary>
        public string IpNum { get; set; }
        /// <summary>
        /// PV值
        /// </summary>
        public string PvNum { get; set; }
        /// <summary>
        /// 插入时间
        /// </summary>
        public DateTime InDate { get; set; }
    }
    #endregion

DBHelper类:

private static string constr = "Data Source=.;Initial Catalog=WebAnalyticsDB;uid=bauer;pwd=smartshe!@#$%^&*90;";
        /// <summary>
        /// 获取数据库连接
        /// </summary>
        /// <returns>连接</returns>
        private static SqlConnection GetConnection()
        {
            try
            {

                return new SqlConnection(constr);
            }
            catch (Exception e)
            {


            }
            return null;
        }

        public static int ExecuteNonQuery(Alexa alexa)
        {
            int result = 0;
            using (SqlConnection conn = GetConnection())
            {
                string sql = "insert into alexa values(@DomainName,@DomainUrl,@TodayRank,@Rank,@WeekRank,@MonthRank,@ChinaRank,@IpNum,@PvNum,@Date)";
                SqlParameter[] paras = { 
                                       new SqlParameter("@DomainName",alexa.DomainName),
                                       new SqlParameter("@DomainUrl",alexa.DomainUrl),
                                       new SqlParameter("@TodayRank",alexa.TodayRank),
                                       new SqlParameter("@Rank",alexa.Rank),
                                       new SqlParameter("@WeekRank",alexa.WeekRank),
                                       new SqlParameter("@MonthRank",alexa.MonthRank),
                                       new SqlParameter("@ChinaRank",alexa.ChinaRank),
                                       new SqlParameter("@IpNum",alexa.IpNum),
                                       new SqlParameter("@PvNum",alexa.PvNum),
                                       new SqlParameter("@Date",DateTime.Now)
                                       };
                SqlCommand cmd = new SqlCommand(sql, conn);
                cmd.CommandType = CommandType.Text;
                try
                {
                    cmd.Parameters.AddRange(paras);
                    conn.Open();
                    result = cmd.ExecuteNonQuery();
                }
                catch (Exception e)
                {

                }
                finally
                {
                    conn.Close();
                    cmd.Dispose();
                }

            }
            return result;
        }

SQL建表语句:

/****** Object:  Table [dbo].[alexa]    Script Date: 03/28/2013 22:03:35 ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE TABLE [dbo].[alexa](
	[ID] [int] IDENTITY(1,1) NOT NULL,
	[DomainName] [nvarchar](50) NULL,
	[DomainUrl] [nvarchar](50) NULL,
	[TodayRank] [nvarchar](10) NULL,
	[Rank] [nvarchar](10) NULL,
	[WeekRank] [nvarchar](10) NULL,
	[MonthRank] [nvarchar](10) NULL,
	[ChinaRank] [nvarchar](10) NULL,
	[IpNum] [nvarchar](10) NULL,
	[PvNum] [nvarchar](10) NULL,
	[InDate] [datetime] NULL,
 CONSTRAINT [PK_alex] PRIMARY KEY CLUSTERED 
(
	[ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO



评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值