C# 网页信息采集(Form.cs)

using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.Text;

using System.Windows.Forms;

 

using System.Web;

using System.Net;

using System.IO;

 

using System.Data.SqlClient;

//using Microsoft.Office.Interop.Excel;

using System.Threading;

using WebBee;

using System.Text.RegularExpressions;

 

 

namespace WebBee

{

    public partial class Form1 : Form

    {

        public Form1()

        {

            InitializeComponent();

        }

        /// <summary>

        /// kijiji 网

        /// </summary>

        /// <param ></param>

        /// <param ></param>

        private void button1_Click(object sender, EventArgs e)

        {

 

            //try

            //{

            //    Microsoft.Office.Interop.Excel.Application objApp = new Microsoft.Office.Interop.Excel.Application();

            //    Microsoft.Office.Interop.Excel.Workbooks workbooks = objApp.Workbooks;

 

 

            //    //判断目标文件是否存在,若不存在则先创建再打开,若存在则打开;

 

            //    string strDestFileFullName = "D://datadb.xls";

            //    FileInfo destFile = new FileInfo(strDestFileFullName);

            //    destFile = new FileInfo(strDestFileFullName);

            //    object missing = System.Type.Missing;

            //    objApp.DisplayAlerts = false;

            //    workbooks.Open(strDestFileFullName, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing);

 

 

            //    Microsoft.Office.Interop.Excel.Sheets objSheets = objApp.Worksheets;

            //    Microsoft.Office.Interop.Excel._Worksheet objSheet;

            //    objSheet = (Microsoft.Office.Interop.Excel._Worksheet)objSheets.get_Item(1);

 

 

 

 

            //    int fromId = Convert.ToInt32(this.fromID.Text);

            //    int toId = Convert.ToInt32(this.toId.Text);

 

            //    for (int j = fromId; j <= toId; j++)

            //    {

            //        string Url = fixTextBox.Text;

            //        Url = Url + j;

 

            //        int i = j - fromId;

            //        this.listBox1.Items.Add(Url);

 

            //        //得到指定Url的源码

            //        Encoding encoding = Encoding.GetEncoding("utf-8");

            //        string strWebContent = GetWebContent(Url, encoding);

            //        if (strWebContent == "") continue;

 

            //        //只取数据部分的Html代码

            //        int titleStart = strWebContent.IndexOf("<title>", 0);

            //        int titleEnd = strWebContent.IndexOf("</title>", 0);

            //        string temp_title = strWebContent.Substring(titleStart, titleEnd - titleStart + 8);

            //        int bodyStart = strWebContent.IndexOf("<div main_view/">", 0);

 

            //        //没有想要的信息,继续下一个

            //        if (bodyStart == -1) continue;

            //        int bodyEnd = strWebContent.IndexOf("</body>", 0);

            //        string needString = strWebContent.Substring(bodyStart, bodyEnd - bodyStart);

            //        needString = temp_title + needString;

            //        try

            //        {

            //            //生成HtmlDocument

            //            WebBrowser webb = new WebBrowser();

            //            webb.Navigate("about:blank");

 

            //            HtmlDocument htmldoc = webb.Document.OpenNew(false);

            //            htmldoc.Write(needString);

 

            //            string title = htmldoc.Title;

            //            string content = htmldoc.GetElementById("view_content").InnerText;

 

 

            //            string[] firstlinestrs = title.Split('|');

 

            //            //如果数据不完整,放弃这个文件

            //            if (firstlinestrs.Length != 3) continue;

            //            string[] areas = firstlinestrs[1].Split(',');

            //            string[] sorts ={ "", "", "" };

 

            //            if (firstlinestrs[2] != null)

            //            {

            //                sorts = firstlinestrs[2].Split(',');

            //            }

            //            else

            //            {

 

            //            }

 

            //            objSheet.Cells[i + 1, 1] = firstlinestrs[0];

 

            //            //区域--------------------------------------------

            //            objSheet.Cells[i + 1, 2] = areas[0];

            //            if (areas.Length >= 2)

            //            {

            //                objSheet.Cells[i + 1, 3] = areas[1];

            //            }

            //            else

            //            {

            //                objSheet.Cells[i + 1, 3] = "";

            //            }

            //            if (areas.Length >= 3)

            //            {

            //                objSheet.Cells[i + 1, 4] = areas[2];

            //            }

            //            else

            //            {

            //                objSheet.Cells[i + 1, 4] = "";

            //            }

 

            //            //分类-----------------------------------------

            //            objSheet.Cells[i + 1, 5] = sorts[0];

            //            if (sorts.Length >= 2)

            //            {

            //                objSheet.Cells[i + 1, 6] = sorts[1];

            //            }

            //            else

            //            {

            //                objSheet.Cells[i + 1, 6] = "";

            //            }

            //            if (sorts.Length >= 3)

            //            {

            //                objSheet.Cells[i + 1, 7] = sorts[2];

            //            }

            //            else

            //            {

            //                objSheet.Cells[i + 1, 7] = "";

            //            }

 

            //            //内容

            //            objSheet.Cells[i + 1, 8] = content;

 

 

 

 

 

 

 

            //        }

            //        catch (Exception ex)

            //        {

 

            //        }

 

            //    }

 

            //    string filename = "d://dd.xls";

            //    objSheet.SaveAs(filename, missing, missing, missing, missing, missing, missing, missing, missing, missing);

 

            //    workbooks.Close();

            //    objApp.Quit();

 

 

            //}

            //catch (Exception ex)

            //{

 

            //}

        }

 

 

 

        //根据Url地址得到网页的html源码

        private string GetWebContent(string Url, Encoding encoding)

        {

            string strResult = "";

            try

            {

                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);

                //声明一个HttpWebRequest请求

                request.Timeout = 30000;

                //设置连接超时时间

                request.Headers.Set("Pragma", "no-cache");

                // request.Headers.Set("KeepAlive", "true");

                request.CookieContainer = new CookieContainer();

                request.Credentials = CredentialCache.DefaultCredentials;

                request.Referer = Url;

 

                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";

 

                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                Stream streamReceive = response.GetResponseStream();

 

                StreamReader streamReader = new StreamReader(streamReceive, encoding);

                strResult = streamReader.ReadToEnd();

                streamReceive.Close();

                streamReader.Close();

                streamReceive = null;

                streamReader = null;

            }

            catch

            {

                return "";

            }

            return strResult;

        }

 

 

 

 

 

 

 

        /// <summary>

        /// 点评网

        /// </summary>

        /// <param ></param>

        /// <param ></param>

        private void button2_Click(object sender, EventArgs e)

        {

 

            int fromID = Convert.ToInt32(this.fromID.Text);

            int toId = Convert.ToInt32(this.toId.Text);

            for (int i = fromID; i < toId; i++)

            {

 

                string Url = this.fixTextBox.Text + i;

 

                //得到指定Url的源码

                Encoding encoding = Encoding.GetEncoding("utf-8");

                string strWebContent = GetWebContent(Url, encoding);

                if (strWebContent.IndexOf("该商户不存在</title>") != -1) continue;

                string needstr = "";

                int infostart = strWebContent.IndexOf("<div Main/">", 0);

                int infoend = strWebContent.IndexOf("<div Votes/">", 0);

 

 

                needstr = strWebContent.Substring(infostart, infoend - infostart);

 

 

                //

 

                try

                {

                    //生成HtmlDocument

                    WebBrowser webb = new WebBrowser();

                    webb.Navigate("about:blank");

 

                    HtmlDocument htmldoc = webb.Document.OpenNew(false);

                    htmldoc.Write(needstr);

                    ShopInfo shopInfo = GetShopInfo(htmldoc);

                    shopInfo.Shopurl = Url;

 

 

                    this.insertData(shopInfo);

 

 

                }

                catch (Exception ex)

                {

 

                }

            }

        }

 

        /// <summary>

        /// 取出列表页面的店铺URL

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

 

        private string[] getNeedpages(HtmlDocument htmldoc)

        {

            string[] s = new string[20];

            for (int i = 0; i < 20; i++)

            {

                string idname = "_ctl0__ctl" + i + "_HShopName";

                string url = htmldoc.GetElementById(idname).GetAttribute("href");

                url = url.Substring(11, url.Length - 11);

                url = "http://www.dianping.com/" + url;

                s[i] = url;

            }

 

            return s;

        }

 

        /// <summary>

        /// 取出shopInfo对象

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

        private ShopInfo GetShopInfo(HtmlDocument htmldoc)

        {

            string bodyHtml = htmldoc.Body.InnerHtml;

 

            ShopInfo Si = new ShopInfo();

 

            HtmlElementCollection areas_hc = htmldoc.GetElementsByTagName("div");

 

            if (areas_hc != null)

            {

 

                Si.Areas = areas_hc[2].InnerText;

                Si.Areas = Si.Areas.Replace(">", "|");

 

            }

 

 

            HtmlElementCollection h1_hc = htmldoc.GetElementsByTagName("h1");

 

 

            if (h1_hc.Count != 0)

            {

                Si.Name = h1_hc[0].InnerText;

                Si.Name = Si.Name.Replace("&nbsp;", " ").Trim(); ;

 

            }

            else

            {

                Si.Name = "";

            }

 

            HtmlElementCollection span_hc = htmldoc.GetElementsByTagName("span");

            if (span_hc.Count != 0)

            {

                Si.ChildName = span_hc[0].InnerText;

                Si.ChildName = Si.ChildName.Replace("&nbsp;", " ");

            }

            else

            {

                Si.ChildName = "";

            }

            int startAdress = bodyHtml.IndexOf("<B>地址:</B>", 0);

            int endAddress = 0;

 

            if (startAdress != -1)

            {

                endAddress = bodyHtml.IndexOf("<BR>", startAdress, 200);

                Si.Address = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);

                Si.Address = Si.Address.Replace("&nbsp;", " ").Trim();

                if (Si.Address.IndexOf("<A class") != -1)

                {

                    Si.Address = Si.Address.Substring(0, Si.Address.IndexOf("<A class"));

 

                }

            }

            else

            {

                Si.Address = "";

            }

            //------------------------------

            startAdress = bodyHtml.IndexOf("<B>电话:</B>", 0);

 

            if (startAdress != -1)

            {

                endAddress = bodyHtml.IndexOf("<BR>", startAdress, 100);

                Si.Phone = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);

                Si.Phone = Si.Phone.Replace("&nbsp;", " ").Trim();

            }

            else

            {

                Si.Phone = "";

            }

 

            //------------------------------

            startAdress = bodyHtml.IndexOf("<B>别名:</B>", 0);

 

 

            if (startAdress != -1)

            {

                endAddress = bodyHtml.IndexOf("<BR>", startAdress, 100);

                Si.Bm = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);

                Si.Bm = Si.Bm.Replace("&nbsp;", " ").Trim();

            }

            else

            {

                Si.Bm = "";

            }

 

            //-----------------------------

            if (htmldoc.GetElementById("ShopTag") != null)

            {

 

 

                Si.Tag = htmldoc.GetElementById("ShopTag").InnerText;

                Si.Tag = parseTag(Si.Tag);

            }

            else

            {

 

                Si.Tag = "";

 

            }

            //-----------------------------ShopDishs

            if (htmldoc.GetElementById("ShopComment") != null)

            {

 

 

                Si.Content = htmldoc.GetElementById("ShopComment").InnerText;

            }

            else

            {

 

                Si.Content = "";

 

            }

            //-----------------------------

            if (htmldoc.GetElementById("ShopDish") != null)

            {

 

 

                Si.Dish = htmldoc.GetElementById("ShopDish").InnerText;

                Si.Dish = parseTag(Si.Dish);

            }

            else

            {

 

                Si.Dish = "";

 

            }

            if (htmldoc.GetElementById("ShopDish") != null)

            {

                Si.Imagesurl = htmldoc.GetElementById("ShopPhoto").GetElementsByTagName("IMG")[0].GetAttribute("src");

 

 

            }

 

            htmldoc = null;

            bodyHtml = null;

            areas_hc = null;

            span_hc = null;

            return Si;

        }

 

        /// <summary>

        /// 解析字符串

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

 

        private string parseTag(string tagStr)

        {

            string[] temp = tagStr.Split(' ');

            string returnStr = "";

            for (int i = 0; i < temp.Length; i++)

            {

                if (temp[i] != "")

                {

                    returnStr = returnStr + temp[i].Substring(0, temp[i].IndexOf("(")) + "|";

                }

 

            }

            if (returnStr.EndsWith("|")) returnStr = returnStr.Substring(0, returnStr.Length - 1);

 

            return returnStr;

 

 

 

        }

 

 

        private void insertData(ShopInfo si)

        {

 

 

            StringBuilder strSql = new StringBuilder();

            strSql.Append("insert into getTempData(");

            strSql.Append("name,bm,childName,phone,address,tag,dish,content,areas,imagesurl,url");

            strSql.Append(") values (");

            strSql.Append("@name,@bm,@childName,@phone,@address,@tag,@dish,@content,@areas,@imagesurl,@url)");

 

            SqlParameter[] parameters = {

     new SqlParameter("@name", SqlDbType.VarChar,128),

     new SqlParameter("@bm",SqlDbType.VarChar,50),

     new SqlParameter("@childName", SqlDbType.VarChar,50),

     new SqlParameter("@phone", SqlDbType.VarChar,100),

     new SqlParameter("@address", SqlDbType.VarChar,200),

     new SqlParameter("@tag", SqlDbType.VarChar,256),

     new SqlParameter("@dish", SqlDbType.VarChar,256),

     new SqlParameter("@content", SqlDbType.VarChar,4096),

     new SqlParameter("@areas", SqlDbType.VarChar,256),

     new SqlParameter("@imagesurl", SqlDbType.VarChar,128),

                    new SqlParameter("@url", SqlDbType.VarChar,64)

                                };

            parameters[0].Value = si.Name;

            parameters[1].Value = si.Bm;

            parameters[2].Value = si.ChildName;

            parameters[3].Value = si.Phone;

            parameters[4].Value = si.Address;

            parameters[5].Value = si.Tag;

            parameters[6].Value = si.Dish;

            parameters[7].Value = si.Content;

            parameters[8].Value = si.Areas;

            parameters[9].Value = si.Imagesurl;

            parameters[10].Value = si.Shopurl;

 

            DBHelper.ExecuteSql(strSql.ToString(), parameters);

        }

        private bool mIsRunCtrip = false;

 

        public bool IsRunCtrip

        {

            get { return mIsRunCtrip = false; }

            set { mIsRunCtrip = value; }

        }

        delegate void dFrist(int pIndext);

        delegate void d8JMain(int i);

        dFrist dfrist = null;

        d8JMain d8jmain = null;

        private void button3_Click(object sender, EventArgs e)

        {

            progressBar1.Maximum = Convert.ToInt32(this.toId.Text) - Convert.ToInt32(this.fromID.Text);

            this.Text = "正在采集携程网,请等待。。。";

            dfrist = new dFrist(showmsg);

            Thread tCtrip = new Thread(StartCtrip);

            tCtrip.IsBackground = true;

            tCtrip.Start();

 

 

            // this.fixTextBox.Text = "已完成携程网采集,请根目录查看日志";

         

        }

        void showmsg(int i)

        {

 

            progressBar1.Value++;

            label4.Text = "现在ID:" + i.ToString() + ",已完成:" + (progressBar1.Value / (float)progressBar1.Maximum).ToString("p");

            if (progressBar1.Value == progressBar1.Maximum)

                MessageBox.Show("finish");

        }

        void StartCtrip()

        {

            CtripInfo ci = null;

            int fromID = Convert.ToInt32(this.fromID.Text);

            int toId = Convert.ToInt32(this.toId.Text);

            for (int i = fromID; i <= toId; i++)

            {

                this.BeginInvoke(dfrist, new object[] { i });

                try

                {

                    ci = new CtripInfo().GetCtripInfoByHotelID(i);

                    if (ci != null)

                    {

                        int System.Windows.Forms.Application.DoEvents();

                        if (null != ci.HotelPrice)

                        {

                            for (int ii = 1; ii < ci.HotelPrice.Count; ii++)

                            {

                                ci.AddHotelPrice(ci.HotelPrice[ii], id);

                            }

                        }

                        else

                        {

                            continue;

                        }

                    }

                    else

                    {

                        continue;

                    }

                    ci = null;

 

                }

                catch (Exception ex)

                {

 

                    TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "//CtripErrorlog.log", true);

                    tw.Flush();

                    tw.WriteLine("错误ID: " + i.ToString() + "/r/n 错误原因:" + ex.Message);

                    tw.WriteLine("-------------------------------------------");

                    tw.Flush();

                    tw.Close();

                    tw = null;

                    ci = null;

                    continue;

                }

            }

 

 

 

        }

        void GetCtripHotel(int pStart, int pEnd)

        {

            IsRunCtrip = false;

        }

 

        private void button4_Click(object sender, EventArgs e)

        {

            //System.Data.DataSet dt = DBHelper.Query("select * from hotel price");

            //string sql = "update hotelprice2 set RoomNewPrice='{0}',BroadBand='{1}',BedType='{2}' where ";

            //if (dt != null)

            //{

            //    for (int i = 0; i < dt.Tables[0].Rows.Count; i++)

            //    {

            //        string id"].ToString();

            //        string Oldprice = dt.Tables[0].Rows[i]["RoomNewPrice"].ToString();

            //        string Oldband = dt.Tables[0].Rows[i]["BroadBand"].ToString();

            //        string OldBedType = dt.Tables[0].Rows[i]["BroadBand"].ToString();

            //        if (Oldprice.IndexOf("&nbsp") > 1)

            //        {

            //            int start = Oldprice.IndexOf("&nbsp");

            //            string newprice = Oldprice.Substring(0, start);

            //            string newband = Oldprice.Substring(0, start + 5);

 

            //            sql = string.Format(sql, newprice, newband, Oldband);

            //            if (DBHelper.ExecuteSql(sql) > 0)

            //            {

            //                newband = newprice = null;

            //                continue;

            //            }

            //        }

            //        else

            //        {

            //            continue;

            //        }

 

            //    }

 

 

            //}

 

            //System.Data.DataSet dt = DBHelper.Query("select * from hotelinfo");

            //string sql = "update hotelinfo set subshop='{0}',Name='{1}' where ;

            //if (dt != null)

            //{

            //    for (int i = 0; i < dt.Tables[0].Rows.Count; i++)

            //    {

            //        sql = "update hotelinfo set subshop='{0}',Name='{1}' where ;

            //        string id"].ToString();

            //        string OldName = dt.Tables[0].Rows[i]["Name"].ToString();

            //        if (OldName.IndexOf("(") > 1)

            //        {

            //            int start = OldName.IndexOf("(");

            //            string newName = OldName.Substring(0, start);

            //            string subshop = OldName.Substring(start + 1, OldName.Length - start - 2);

            //            sql = string.Format(sql, subshop, newName, id);

 

            //            if (DBHelper.ExecuteSql(sql) > 0)

            //            {

            //                = newName = subshop = null;

            //                continue;

            //            }

            //        }

            //        else

            //        {

            //            continue;

            //        }

 

 

 

            //    }

 

 

            //}

            //          北京    >                          朝阳区    >                          燕莎/酒仙桥/丽都         

            System.Data.DataSet dt = DBHelper.Query("select * from hotelinfo");

            string sql = "update hotelinfo set position='{0}' where ;

            if (dt != null)

            {

                for (int i = 37; i < dt.Tables[0].Rows.Count; i++)

                {

                    sql = "update hotelinfo set position='{0}' where ;

                    string id"].ToString();

                    string OldName = dt.Tables[0].Rows[i]["area"].ToString();

                    if (OldName.IndexOf(">") > 1)

                    {

                        OldName = OldName.Replace(" ", "");

                        int start = OldName.IndexOf(">");

                        start++;

                        int end = OldName.IndexOf(">", start);

                        if (end > -1)

                            OldName = OldName.Substring(start, end - start);

                        else

                            OldName = OldName.Substring(start);

                        sql = string.Format(sql, OldName, id);

 

                        if (DBHelper.ExecuteSql(sql) > 0)

                        {

                            = null;

                            continue;

                        }

                    }

                    else

                    {

                        continue;

                    }

                }

            }

            MessageBox.Show("i am ok");

 

        }

        /// <summary>

        /// 删除HTML标识

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

        public string DropHTMLTag(string htmlString)

        {

            htmlString = Regex.Replace(htmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);

            htmlString = Regex.Replace(htmlString, @"([/r/n])[/s]+", "", RegexOptions.IgnoreCase);

            htmlString = Regex.Replace(htmlString, @"-->", "", RegexOptions.IgnoreCase);

            htmlString = Regex.Replace(htmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);

            htmlString = Regex.Replace(htmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);

            htmlString = Regex.Replace(htmlString, @"<!--.*", "", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(quot|#34);", "/"", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(iexcl|#161);", "/xa1", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(cent|#162);", "/xa2", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(pound|#163);", "/xa3", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&(copy|#169);", "/xa9", RegexOptions.IgnoreCase);

            //htmlString = Regex.Replace(htmlString, @"&#(/d+);", "", RegexOptions.IgnoreCase);

 

            //htmlString.Replace("<", "");

            //htmlString.Replace(">", "");

            //htmlString.Replace("/r/n", "");

 

 

            return htmlString;

        }

 

        #region 悟能啊悟能

 

 

        private void btn8j_Click(object sender, EventArgs e)

        {

            progressBar1.Maximum = 18;

            this.Text = "正在采集悟能,请等待。。。";

            d8jmain = new d8JMain(showmsg8j);

            Thread tCtrip = new Thread(Start8j);

            tCtrip.IsBackground = true;

            tCtrip.Start();

            Start8j();

 

        }

        void showmsg8j(int i)

        {

 

            progressBar1.Value++;

            label4.Text = string.Format("现是ID:{0},已完成:{1}" + i, (progressBar1.Value / (float)progressBar1.Maximum).ToString("p"));

            if (progressBar1.Value == progressBar1.Maximum)

                MessageBox.Show("finish");

        }

        void Start8j()

        {

 

 

            string url = "http://bj.8j.com/biz/restaurants/BJS0{0}";

            List<string> HotelLink = new List<string>();

            for (int i = 1; i <= 18; i++)

            {

 

                url = string.Format("http://bj.8j.com/biz/restaurants/BJS0{0:d2}", i);

                url = GetWebContent(url, Encoding.UTF8);

                if (null == url && string.Empty == url && url.IndexOf("对不起") < 1)

                {

                    continue;

                }

                int count = GetAreaHotelCount(url);

                url = GetShortHTMLContent(url);

 

                HotelLink.AddRange(GetHtml(url));

                for (int ss = 0; ss < HotelLink.Count; ss++)

                {

                    insert8jAllHotelLink(HotelLink[ss], i);

                }

                HotelLink.Clear();

                for (int j = 2; j <= count; j++)

                {

                    try

                    {

 

                        // Thread.Sleep(10);

 

                        url = null;

                        url = string.Format("http://bj.8j.com/biz/restaurants/BJS0{0:d2}/{1}", i, j);

                        url = GetWebContent(url, Encoding.UTF8);

                        if (null == url && string.Empty == url && url.IndexOf("对不起") < 1)

                        {

                            continue;

                        }

                        url = GetShortHTMLContent(url);

                        HotelLink.AddRange(GetHtml(url));

                        for (int ss = 0; ss < HotelLink.Count; ss++)

                        {

                            insert8jAllHotelLink(HotelLink[ss], i);

                        }

                        HotelLink.Clear();

                        // this.BeginInvoke(d8jmain, new object[] { i, j, count });

                    }

                    catch (Exception ex)

                    {

 

                        TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "//CtripErrorlog.log", true);

                        tw.Flush();

                        tw.WriteLine("错误ID: " + j.ToString() + "/r/n 错误原因:" + ex.Message);

                        tw.WriteLine("-------------------------------------------");

                        tw.Flush();

                        tw.Close();

                        tw = null;

                        continue;

                    }

                }

 

 

            }

            url = null;

 

        }

        /// <summary>

        /// 把所有抓到的HTMlLink存取

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

        bool insert8jAllHotelLink(string kk, int id)

        {

            string sql = "INSERT INTO [a]([ssss],area) VALUES('{0}','{1}')";

            sql = string.Format(sql, kk, GetAreaInfo(id));

            return DBHelper.ExecuteSql(sql) > 0;

        }

        string GetAreaInfo(int id)

        {

            switch (id)

            {

                case 1:

                    return "朝阳区";

 

                case 2:

                    return "海淀区";

 

                case 3:

 

                    return "东城区";

 

                case 4:

                    return "西城区";

                case 5:

                    return "宣武区";

                case 6:

 

                    return "崇文区";

                case 7:

                    return "丰台区";

                case 8:

                    return "石景山区";

                case 9:

                    return "房山区";

                case 10:

                    return "通州区";

                case 11:

                    return "昌平区";

                case 12:

                    return "顺义区";

                case 13:

                    return "大兴区";

                case 14:

                    return "怀柔区";

                case 15:

                    return "门头沟区";

                case 16:

                    return "平谷区";

                case 17:

                    return "延庆县";

                case 18:

                    return "密云县";

                default:

                    return "其它地区";

 

            }

 

 

        }

        /// <summary>

        /// 去掉干扰字符

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

        string GetShortHTMLContent(string line)

        {

            int start = 0, end = 0;

 

            start = line.IndexOf("<!-- bizlist 列表1开始  -->");

            end = line.IndexOf("<!-- bizlist 列表1结束  -->", start);

            line = line.Substring(start, end - start);

            line = line.Replace("/r", "");

            line = line.Replace("/n", "");

            line = line.Replace("/t", "");

            line = line.Replace("<a", "ウ");

            line = line.Replace("/a>", "ウ");

            return DropHTMLTag(line);

 

 

        }

        /// <summary>

        /// 页的所有酒店

        /// </summary>

        /// <param ></param>

        /// <returns>string[]</returns>

        string[] GetHtml(string line)

        {

            try

            {

 

                string tmp = null;

                int start = 0, end = 0;

                List<string> sss = new List<string>();

                while (line.IndexOf("href=/"", start) > 0)

                {

                    line = line.Replace(" ", "");

                    start = line.IndexOf("href=/"", start);

                    if (start < 1)

                        continue;

 

                    start = start + 6;

                    end = line.IndexOf("/"target", start);

                    if (end < 1)

                        continue;

                    tmp = line.Substring(start, end - start);

                    if (sss.Count == 0)

                    {

 

                        sss.Add(tmp);

                    }

                    else

                    {

                        if (sss[sss.Count - 1] == tmp)

                        {

                            continue;

                        }

                        else

                        {

                            sss.Add(tmp);

                        }

                    }

 

                    start = end;

                }

                return sss.ToArray();

            }

            catch (Exception ex)

            {

                return null;

 

            }

 

        }

        /// <summary>

        /// 获得酒店总页数

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

        int GetAreaHotelCount(string line)

        {

            int start = 0, end = 0;

            start = line.IndexOf("<!-- pgdn start  -->");

            if (start < 1) return 0;

 

            end = line.IndexOf("<!-- pgdn end  -->", start);

            if (end < 1) return 0;

            line = line.Substring(start, end - start);

            line = DropHTMLTag(line);

            start = line.IndexOf("共");

            if (start < 1) return 0;

            start++;

            end = line.IndexOf("页", start);

            if (end < 1) return 0;

            return int.Parse(line.Substring(start, end - start));

 

        }

        #endregion

 

 

        #region 悟能详细页

 

 

        private void button5_Click(object sender, EventArgs e)

        {

            progressBar1.Maximum = int.Parse(DBHelper.GetSingle("select count(*) from a").ToString());

            this.Text = "正在采集悟能,请等待。。。";

            dfrist = new dFrist(showmsg);

            Thread tCtrip = new Thread(beginGetHotel);

            tCtrip.IsBackground = true;

            tCtrip.Start();

            //  beginGetHotel();

        }

        void beginGetHotel()

        {

            System.Data.DataSet dt = DBHelper.Query("select * from a");

            string url = null, tmp = null;

            string tel = null, jAdd = null, onlyfoot = null, Address = null, postcode = null, area = null, tag = null, siteurl = null, remark = null;

            for (int i = 0; i < dt.Tables[0].Rows.Count; i++)

            {

 

                try

                {

                    this.BeginInvoke(dfrist, new object[] { i });

                    jAdd = url = dt.Tables[0].Rows[i][1].ToString();

                    url = GetWebContent(url, Encoding.UTF8);

                    url = url.Replace("/r", "");

                    url = url.Replace("/n", "");

                    url = url.Replace("/t", "");

                    if (null != url && string.Empty != url)

                    {

                        int start = 0, end = 0;

                        start = url.IndexOf("<!-- qyxx  开始  -->");

                        if (start < 1)

                        {

                            continue;

                        }

                        end = url.IndexOf("<!-- info nav end -->", start);

                        tmp = url.Substring(start, end - start);

                        start = tmp.IndexOf("<strong>");

                        if (start < 1)

                        {

                            ;

                        }

                        else

                        {

                            start += 8;

                            end = tmp.IndexOf("</strong>", start);

                            end - start);

                            }

 

                        start = tmp.IndexOf("<strong>", end);

                        start += 8;

                        end = tmp.IndexOf("</strong>", start);

                        tel = tmp.Substring(start, end - start);

                        start = tmp.IndexOf("<p>", end);

                        start += 3;

                        end = tmp.IndexOf("<br/>", start);

                        Address = tmp.Substring(start, end - start);

                        Address = ReplaceSingleQuotes(Address);

                        start = end;

                        start += 5;

                        end = tmp.IndexOf("</p>", end);

                        postcode = tmp.Substring(start, end - start);

                        //area=dt.Tables[0].Rows[i][2].ToString();

                        start = tmp.IndexOf("区域:", end);

                        if (start < 1)

                        {

                            area = "未知";

                        }

                        else

                        {

                            start += 3;

                            end = tmp.IndexOf("</p>", start);

                            area = DropHTMLTag(tmp.Substring(start, end - start));

                            area = ReplaceSingleQuotes(area);

                        }

 

 

                        start = tmp.IndexOf("标签:", end);

                        if (start < 1)

                        {

                            tag = "未知";

                        }

                        else

                        {

                            start += 3;

                            end = tmp.IndexOf("</p>", start);

                            tag = DropHTMLTag(tmp.Substring(start, end - start));

                            tag = ReplaceSingleQuotes(tag);

                        }

 

 

                        start = tmp.IndexOf("网址:", end);

                        if (start < 1)

                        {

                            siteurl = "未知";

                        }

                        else

                        {

                            start += 3;

                            end = tmp.IndexOf("</p>", start);

                            siteurl = DropHTMLTag(tmp.Substring(start, end - start));

                        }

                        start = url.IndexOf("特色推荐:");

                        if (start < 1)

                        {

                            onlyfoot = "未知";

                        }

                        else

                        {

                            start += 5;

                            end = url.IndexOf("</p>", start);

                            onlyfoot = url.Substring(start, end - start);

                            onlyfoot = ReplaceSingleQuotes(onlyfoot);

                        }

 

 

 

                        start = url.IndexOf("<!-- jj开始  -->");

                        if (start < 1)

                        {

                            remark = "未知";

                        }

                        else

                        {

 

                            end = url.IndexOf("<!-- jj结束  -->", start);

                            tmp = url.Substring(start, end - start);

                            if (tmp.IndexOf("简介") < 1)

                            {

                                remark = "未知";

 

                            }

                            else

                            {

                                start = tmp.IndexOf("简介");

                                end = tmp.IndexOf("</h4>", start);

                                remark = DropHTMLTag(tmp.Substring(start, end - start));

                                remark = ReplaceSingleQuotes(remark);

                            }

 

                        }

 

                        inser8jHotlInfo(name, tel, onlyfoot, Address, postcode, area, tag, siteurl, remark, jAdd);

                        = onlyfoot = Address = postcode = area = tag = siteurl = remark = null;

                    }

                }

                catch (Exception ex)

                {

 

                    TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "//CtripErrorlog.log", true);

                    tw.Flush();

                    tw.WriteLine("错误ID: " + i.ToString() + "/r/n 错误原因:" + ex.Message);

                    tw.WriteLine("-------------------------------------------");

                    tw.Flush();

                    tw.Close();

                    tw = null;

                    continue;

                }

            }

 

        }

        /// <summary>

        /// 替换单引号成中文的单引号

        /// </summary>

        /// <param ></param>

        /// <returns></returns>

        public string ReplaceSingleQuotes(string hTML)

        {

            return hTML.Replace("/'", "`");

        }

        void inser8jHotlInfo(string name, string tel, string onlyfoot, string Address, string postcode, string area, string tag, string siteurl, string remark, string jAdd)

        {

            string sql = "INSERT INTO [HotelInfo]([name], [Address], [Tel], [Postcode], [area], [tag], [siteurl], [onlyfoot], [remark],[8jAddress]) VALUES('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}')";

            sql = string.Format(sql, name, Address, tel, postcode, area, tag, siteurl, onlyfoot, remark, jAdd);

            DBHelper.ExecuteSql(sql);

        }

 

        #endregion

 

        private void Form1_Load(object sender, EventArgs e)

        {

 

        }

 

        private void button6_Click(object sender, EventArgs e)

        {

            System.Data.DataSet dt1 = DBHelper.Query("select star5,city from [00]");

            System.Data.DataSet dt2 = DBHelper.Query("select id,city from ctriptwo");

            string sql = null;

            for (int i = 0; i < dt2.Tables[0].Rows.Count; i++)

            {

                for (int ii = 0; ii < dt1.Tables[0].Rows.Count; ii++)

                {

                    if (dt2.Tables[0].Rows[i]["city"].ToString() == dt1.Tables[0].Rows[ii]["city"].ToString())

                    {

                        sql = "UPDATE ctriptwo set startLevelcount0={0} where ;

                        sql = string.Format(sql, dt1.Tables[0].Rows[ii]["star5"], dt2.Tables[0].Rows[i]["id"]);

                        DBHelper.ExecuteSql(sql);

                    }

                    else

                        continue;

 

                }

 

                /*

         INSERT INTO [LocalTest].[dbo].[ctripTwo]

           ([startLevelcount0]

           ,[startLevel5count4]

           ,[startLevel5count5]

           ,[startLevel5count3]

           ,[startLevel5count2]

           ,[startLevel5count1]

           ,[City])

     VALUES

                 */

               

              

            }

            MessageBox.Show("ok");

        }

 

 

       

 

        private void button7_Click(object sender, EventArgs e)

        {

           

            this.Text = "正在采集点评,请等待...";

            label5.Text=System.DateTime.Now.ToString("hh时mm分ss秒");          

            Thread tCtrip = new Thread(Shop);

            tCtrip.IsBackground = true;

            tCtrip.Start();                      

                                 

        }

        public void Shop()

        {

            int shopid;

            int log = 0; int iDivStart = 0;

            string StrSql="";

            string ShopTag = "";

            string DianpingInfo="";

           

            string ShopCity = ""; string ShopArea = ""; string ShopID = ""; string ShopName = ""; string ShopSort = ""; string ShopAddress = ""; string ShopPhone = "";

            try

            {

                for (shopid = 1612436; shopid < 2700000; shopid++)

                {

                    string Url = "http://www.dianping.com/shop/" + shopid;

                    string strResult = GetHtmlCode(Url);

                    if (ReturnMsg(strResult))

                    {

                        try

                        {

                            string divStart = @"<div ShopGuide"">";

                            string divEnd = @"<div Reviews"">";

                            iDivStart = strResult.IndexOf(divStart);

                            int iDivEnd = strResult.IndexOf(divEnd);

                            DianpingInfo = strResult.Substring(iDivStart, iDivEnd - iDivStart);

                        }

                        catch (Exception ex)

                        {

 

                            TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                            tw.Flush();

                            tw.WriteLine("错误原因:" + ex.Message + Url);

                            tw.WriteLine("-------------------------------------------");

                            tw.Flush();

                            tw.Close();

                            tw = null;

 

                        }

                        //提取导航条中信息

                        try

                        {

                            string NavigationInfo = @"<div Shop"">";

                            int NavigationEnd = strResult.IndexOf(NavigationInfo);

                            string Navigation = strResult.Substring(iDivStart, NavigationEnd - iDivStart);

                            string str = DropHTMLTag(Navigation).Replace("&nbsp;", "ml").Replace(">", "ml");

                            string ml = "ml";

                            string[] resultString = Regex.Split(str, ml, RegexOptions.IgnoreCase);

                            string str1 = (resultString.Length).ToString();

                            string str2 = "5";

                            string str3 = "7";

                            string str4 = "6";

 

                            if (str1 == str2)

                            {

                                try

                                {

                                    //提取地址ShopAddress

                                    int AddressEnd = DianpingInfo.IndexOf("地图");

                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");

                                    if (AddressEnd > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else

                                    {

                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

                                //商店ID名ShopID

                                ShopID = shopid.ToString();

                                //提取所在城市ShopCity

                                ShopCity = resultString[0].ToString();

                                //商店名ShopName

                                ShopName = resultString[4].ToString().Replace("'", ".");

                                //商店所在区

                                ShopArea = resultString[2].ToString();

                                //商店属于购物类别

                                ShopSort = resultString[3].ToString();

 

                                try

                                {   //商店联系电话

                                    int PhoneStart = strResult.IndexOf("电话:");

                                    //有电话走这边,没电话不添加

                                    if (PhoneStart.ToString() != "-1")

                                    {

                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");

                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

 

                                    }

                                    else

                                    {

                                        ShopPhone = "无联系电话";

                                    }

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

                                try

                                {

                                    //分类标签ShopTag

                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");

                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

 

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");

                                }

 

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

                            }

                            else if (str1 == str3)

                            {

 

                                try

                                {

                                    //提取地址ShopAddress

                                    int AddressEnd = DianpingInfo.IndexOf("地图");

                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");

                                    if (AddressEnd > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else

                                    {

                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

                                //商店ID名ShopID

                                ShopID = shopid.ToString();

                                //提取所在城市ShopCity

                                ShopCity = resultString[0].ToString();

                                //商店名ShopName

                                ShopName = resultString[5].ToString().Replace("'", ".");

                                //商店所在区

                                ShopArea = resultString[2].ToString();

                                //商店属于购物类别

                                ShopSort = resultString[4].ToString();

 

                                try

                                {   //商店联系电话

                                    int PhoneStart = strResult.IndexOf("电话:");

                                    //有电话走这边,没电话不添加

                                    if (PhoneStart.ToString() != "-1")

                                    {

                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");

                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

 

                                    }

                                    else

                                    {

                                        ShopPhone = "无联系电话";

                                    }

 

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

                                try

                                {

                                    //分类标签ShopTag

                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");

                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

 

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");

                                }

 

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

 

                            }

                            else if (str1 == str4)

                            {

 

                                try

                                {

                                    //提取地址ShopAddress

                                    int AddressEnd = DianpingInfo.IndexOf("地图");

                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");

                                    if (AddressEnd > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else

                                    {

                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

                                //商店ID名ShopID

                                ShopID = shopid.ToString();

                                //提取所在城市ShopCity

                                ShopCity = resultString[0].ToString();

                                //商店名ShopName

                                ShopName = resultString[5].ToString().Replace("'", ".");

                                //商店所在区

                                ShopArea = resultString[3].ToString();

                                //商店属于购物类别

                                ShopSort = resultString[4].ToString();

 

                                try

                                {   //商店联系电话

                                    int PhoneStart = strResult.IndexOf("电话:");

                                    //有电话走这边,没电话不添加

                                    if (PhoneStart.ToString() != "-1")

                                    {

                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");

                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

 

                                    }

                                    else

                                    {

                                        ShopPhone = "无联系电话";

                                    }

 

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

                                try

                                {

                                    //分类标签ShopTag

                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");

                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

 

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");

                                }

 

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

 

                            }

                            else

                            {

                                try

                                {

                                    //提取地址ShopAddress

                                    int AddressEnd = DianpingInfo.IndexOf("地图");

                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");

                                    if (AddressEnd > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)

                                    {

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                    else

                                    {

                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");

                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");

                                    }

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

                                //商店ID名ShopID

                                ShopID = shopid.ToString();

                                //提取所在城市ShopCity

                                ShopCity = resultString[0].ToString();

                                //商店名ShopName

                                ShopName = resultString[3].ToString().Replace("'", ".");

                                //商店没有所在区

                                ShopArea = "无";

                                //商店属于购物类别

                                ShopSort = resultString[2].ToString();

 

                                try

                                {   //商店联系电话

                                    int PhoneStart = strResult.IndexOf("电话:");

                                    //有电话走这边,没电话不添加

                                    if (PhoneStart.ToString() != "-1")

                                    {

                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");

                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

 

                                    }

                                    else

                                    {

                                        ShopPhone = "无联系电话";

                                    }

                                }

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

                                try

                                {

                                    //分类标签ShopTag

                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");

                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

 

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");

                                }

 

                                catch (Exception ex)

                                {

 

                                    TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                                    tw.Flush();

                                    tw.WriteLine("错误原因:" + ex.Message + Url);

                                    tw.WriteLine("-------------------------------------------");

                                    tw.Flush();

                                    tw.Close();

                                    tw = null;

 

                                }

 

 

                            }

 

 

                        }

                        catch (Exception ex)

                        {

 

                            TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                            tw.Flush();

                            tw.WriteLine("错误原因:" + ex.Message + Url);

                            tw.WriteLine("-------------------------------------------");

                            tw.Flush();

                            tw.Close();

                            tw = null;

 

                        }

 

                        //插入数据库

                        StrSql = "insert into ShopInfo values(" + ShopID + ",'" + ShopName + "','" + ShopAddress + "','" + ShopPhone + "','" + ShopCity + "','" + ShopArea + "','" + ShopSort + "','" + ShopTag + "')";

                        log = DBHelper.ExecuteSql(StrSql) + log;

                       

 

                    }

                    else

                    {

 

                    }

                    continue;

                }

            }

            catch (Exception ex)

            {

 

                TextWriter tw = new StreamWriter("D://WebBee//ShoppingErrorlog.log", true);

                tw.Flush();

                tw.WriteLine("错误原因:" + ex.Message);

                tw.WriteLine("-------------------------------------------");

                tw.Flush();

                tw.Close();

                tw = null;

               

            }       

            finally

            {

 

                MessageBox.Show("完成" + System.DateTime.Now.ToString("hh时mm分ss秒"));

            }

           

          

           

           

        }

        //获取网源码方法

        public string GetHtmlCode(string Url)

        {

            Encoding encoding = Encoding.GetEncoding("utf-8");

            string strResult = "";

            try

            {

                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);

                request.Timeout = 30000;

                request.Headers.Set("Pragma", "no-cache");

                request.CookieContainer = new CookieContainer();

                request.Credentials = CredentialCache.DefaultCredentials;

                request.Referer = Url;

                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";

                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                Stream streamReceive = response.GetResponseStream();

                StreamReader streamReader = new StreamReader(streamReceive, encoding);

                strResult = streamReader.ReadToEnd();

                streamReceive.Close();

                streamReader.Close();

                streamReceive = null;

                streamReader = null;

            }

            catch (Exception ex)

            {

               

            }

            return strResult;

        }

        //提供BooL判断是否继续

        public bool ReturnMsg(string strResult)

        {   //获取ID

            int iTitleStart = strResult.IndexOf("<title>");

            int iTitleEnd = strResult.IndexOf("</title>");

            string StrWeb = strResult.Substring(iTitleStart, iTitleEnd - iTitleStart);

            string StrTitle = DropHTMLTag(StrWeb);

            //获取时候是购物类型网StrSort

            string StrSort="";

           

            try

            {

                string divStart = @"<div ShopGuide"">";

                string divEnd = @"<div Reviews"">";

                int iDivStart = strResult.IndexOf(divStart);

                int iDivEnd = strResult.IndexOf(divEnd);

                string NavigationInfo = @"<div Shop"">";

                int NavigationEnd = strResult.IndexOf(NavigationInfo);

                string Navigation = strResult.Substring(iDivStart, NavigationEnd - iDivStart);

                string str = DropHTMLTag(Navigation).Replace("&nbsp;", "ml").Replace(">", "ml");

                string ml = "ml";

                string[] resultString= Regex.Split(str, ml, RegexOptions.IgnoreCase);

                StrSort = resultString[1].ToString();

            }

            catch

            {

                StrSort = "出错";

 

            }

            finally

            {

               

            }

            if (StrSort!="购物"||StrTitle == "该商户不存在或已被删除" )

            {

                return false;

            }

            else

            {

                return true;

            }

 

        }

     

    }     

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值