网页采集c#

        private string GetWebContent(string Url)

        {

            string strResult = "";

            try

            {

                HttpWebRequest request =

                    (HttpWebRequest)WebRequest.Create(Url);

                //声明一个HttpWebRequest请求

                request.Timeout = 30000;

                //设置连接超时时间

                request.Headers.Set("Pragma","no-cache");

                HttpWebResponse response =

                    (HttpWebResponse)request.GetResponse();

                Stream streamReceive = response.GetResponseStream();

                Encoding encoding =Encoding.GetEncoding("GB2312");

                StreamReader streamReader =

                    new StreamReader(streamReceive, encoding);

                strResult = streamReader.ReadToEnd();

            }

            catch

            {

                MessageBox.Show("出错");

            }

 

            return strResult;

        }  

为了使用HttpWebRequest和HttpWebResponse,需填名字空间引用

  using System.Net;

以下是程序具体实现过程:

        private void button1_Click(object sender, EventArgs e)   

        {   

            //要抓取的URL地址   

            string Url =

                "<a href='http://list.mp3.baidu.com/topso/" +

                "mp3topsong.html?id=1#top2' target='_blank'>" +

                "http://list.mp3.baidu.com/...</a>";   

  

            //得到指定Url的源码   

         string strWebContent = GetWebContent(Url);   

  

            richTextBox1.Text = strWebContent;   

         //取出和数据有关的那段源码   

            int iBodyStart = strWebContent.IndexOf("<body", 0);   

            int iStart = strWebContent.IndexOf("歌曲TOP500", iBodyStart);   

            int iTableStart = strWebContent.IndexOf("<table", iStart);   

            int iTableEnd = strWebContent.IndexOf("</table>", iTableStart);   

  

            string strWeb =

                strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 8);   

            //生成HtmlDocument   

         WebBrowser webb = new WebBrowser();   

            webb.Navigate("about:blank");   

            HtmlDocument htmldoc = webb.Document.OpenNew(true);   

            htmldoc.Write(strWeb);   

            HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");   

            foreach (HtmlElement trin htmlTR)   

            {   

                string strID = tr.GetElementsByTagName("TD")[0].InnerText;   

                string strName =

                    SplitName(tr.GetElementsByTagName("TD")[1].InnerText,"MusicName");   

                string strSinger =

                    SplitName(tr.GetElementsByTagName("TD")[1].InnerText,"Singer");   

  

                strID = strID.Replace(".","");   

                //插入DataTable   

                AddLine(strID, strName, strSinger,"0");   

                string strID1 = tr.GetElementsByTagName("TD")[2].InnerText;   

                string strName1 =

                    SplitName(tr.GetElementsByTagName("TD")[3].InnerText,"MusicName");   

                string strSinger1 =

                    SplitName(tr.GetElementsByTagName("TD")[3].InnerText,"Singer");   

                //插入DataTable   

                strID1 = strID1.Replace(".","");   

                AddLine(strID1, strName1, strSinger1,"0");   

                string strID2 = tr.GetElementsByTagName("TD")[4].InnerText;   

                string strName2 =

                    SplitName(tr.GetElementsByTagName("TD")[5].InnerText,"MusicName");   

                string strSinger2 =

                    SplitName(tr.GetElementsByTagName("TD")[5].InnerText,"Singer");   

                //插入DataTable   

                strID2 = strID2.Replace(".","");   

                AddLine(strID2, strName2, strSinger2,"0");   

            }   

            //插入数据库   

            InsertData(dt);   

      

            dataGridView1.DataSource = dt.DefaultView;   

        }

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值