C#(ASP.NET) 下载数据 C#实现通过程序自动抓取远程Web网页信息

 

以该URL为例:http://list.mp3.baidu.com/topso/mp3topsong.html?id=1#top2

 

private string GetWebContent(string Url)
        {
            string strResult = "";
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                //声明一个HttpWebRequest请求
                request.Timeout = 30000;
                //设置连接超时时间
                request.Headers.Set("Pragma", "no-cache");
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream streamReceive = response.GetResponseStream();
                Encoding encoding = Encoding.GetEncoding("GB2312");
                StreamReader streamReader = new StreamReader(streamReceive, encoding);
                strResult = streamReader.ReadToEnd();
            }
            catch
            {
                MessageBox.Show("出错");
            }
            return strResult;
        }

        private void button1_Click_1(object sender, EventArgs e)
        {
            //要抓取的URL地址
            string Url = this.textBox1.Text;            //得到指定Url的源码
            string strResult = GetWebContent(Url);
            this.richTextBox1.Text = strResult;
            //取出和数据有关的那段源码
            int iBodyStart = strResult.IndexOf("<body", 0);
            int iStart = strResult.IndexOf("歌曲TOP500", iBodyStart);
            int iTableStart = strResult.IndexOf("<table", iStart);
            int iTableEnd = strResult.IndexOf("</table>", iTableStart);
            string strWeb = strResult.Substring(iTableStart, iTableEnd - iTableStart + 8);

            //
            DataTable dt = new DataTable();
            dt.Columns.Add("Id");
            dt.Columns.Add("MusicName");
            dt.Columns.Add("Singer");

            //生成HtmlDocument
            WebBrowser webb = new WebBrowser();
            webb.Navigate("about:blank");
            HtmlDocument htmldoc = webb.Document.OpenNew(true);
            htmldoc.Write(strWeb);
            HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");
            foreach (HtmlElement tr in htmlTR)
            {
                string strID = tr.GetElementsByTagName("TD")[0].InnerText;
                string strName = SplitName(tr.GetElementsByTagName("TD")[1].InnerText, "MusicName");
                string strSinger = SplitName(tr.GetElementsByTagName("TD")[1].InnerText, "Singer");
                strID = strID.Replace(".", "");
                //插入DataTable
                AddLine(strID, strName, strSinger, dt);

                string strID1 = tr.GetElementsByTagName("TD")[2].InnerText;
                string strName1 = SplitName(tr.GetElementsByTagName("TD")[3].InnerText, "MusicName");
                string strSinger1 = SplitName(tr.GetElementsByTagName("TD")[3].InnerText, "Singer");
                //插入DataTable
                strID1 = strID1.Replace(".", "");
                AddLine(strID1, strName1, strSinger1, dt);
                string strID2 = tr.GetElementsByTagName("TD")[4].InnerText;
                string strName2 = SplitName(tr.GetElementsByTagName("TD")[5].InnerText, "MusicName");
                string strSinger2 = SplitName(tr.GetElementsByTagName("TD")[5].InnerText, "Singer");
                //插入DataTable
                strID2 = strID2.Replace(".", "");
                AddLine(strID2, strName2, strSinger2, dt);
                if (strID2.Equals("498"))
                    break;

            }

            dataGridView1.DataSource = dt.DefaultView;
            dataGridView1.Columns[1].Width = 165;
            dataGridView1.Columns[2].Width = 165;
        }

        public string SplitName(string longStrName, string name)
        {
            string result = "";
            if (name.Equals("MusicName"))
            {
                if (longStrName.IndexOf("(") == -1)
                    result = longStrName;
                else result = longStrName.Substring(0, longStrName.IndexOf("(") - 1);
            }
            else if (name.Equals("Singer"))
            {
                if (longStrName.IndexOf("(") == -1)
                    result = "";
                else result = longStrName.Substring(longStrName.IndexOf("(") + 1).TrimEnd(new char[] { ')' });
            }
            return result;
        }


        public void AddLine(string id, string music, string sing, DataTable dt)
        {
            //DataTable dt = (DataTable)this.ViewState["dt"];
            DataRow dr = dt.NewRow();
            dr["Id"] = id;
            dr["MusicName"] = music;
            dr["Singer"] = sing;
            dt.Rows.Add(dr);
            dt.AcceptChanges();
            string ConnString = @"server=./newdb;database=Downloaddata;uid=sa;pwd=sa";
            SqlConnection conn = new SqlConnection(ConnString);
            SqlCommand comm = new SqlCommand();
            comm.Connection = conn;
            conn.Open();
            string sql = "insert into Music(MusicName,Singer) values('" + music + "','" + sing + "')";
            comm.CommandText = sql;
            comm.ExecuteNonQuery();
            conn.Close();
        }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值