.NET2.0抓取网页全部链接

效果图

 

后台代码

 

 

以下为引用的内容:
using System;

using System.Data;

using System.Configuration;

using System.Web;

using System.Web.Security;

using System.Web.UI;

using System.Web.UI.WebControls;

using System.Web.UI.WebControls.WebParts;

using System.Web.UI.HtmlControls;

using System.Text.RegularExpressions;

using System.Net;

using System.IO;

using System.Collections;

public partial class _Default : System.Web.UI.Page
{

    protected void Page_Load(object sender, EventArgs e)

    {

        if (!IsPostBack)

        {

           

        }

       

    }


    protected void Button1_Click(object sender, EventArgs e)

    {

        TextBox2.Text = "";

        string web_url = this.TextBox1.Text;//"http://blog.csdn.net/21aspnet/"

        string all_code = "";

        HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url);

        WebResponse all_codeResponse = all_codeRequest.GetResponse();

        StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream());

        all_code = the_Reader.ReadToEnd();

        the_Reader.Close();

        ArrayList my_list = new ArrayList();

        string p = @"http://([/w-]+/.)+[/w-]+(/[/w- ./?%&=]*)?";

        Regex re = new Regex(p, RegexOptions.IgnoreCase);

        MatchCollection mc = re.Matches(all_code);

        for (int i = 0; i <= mc.Count - 1; i++)
        {

            bool _foo = false;

            string name = mc[i].ToString();

            foreach (string list in my_list)

            {

                if (name == list)

                {

                    _foo = true;

                    break;

                }


            }//
过滤

            if (!_foo)
            {

                TextBox2.Text += name + "/n";

            }

        }

    }

}

 

前台

 

 

以下为引用的内容:
<%@ Page Language="C#" AutoEventWireup="true"  CodeFile="Default.aspx.cs" Inherits="_Default" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">

    <title>
抓取网页所有链接</title>
   

</head>

<body >

    <form id="form1" runat="server">

    <div>

        <asp:TextBox ID="TextBox1" runat="server" Width="481px"></asp:TextBox>

        <asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="
提取" />
        <br />

        <asp:TextBox ID="TextBox2" runat="server" Height="304px" TextMode="MultiLine" Width="524px"></asp:TextBox></div>

    </form>

</body>

</html>

转载于:https://www.cnblogs.com/ymyglhb/archive/2008/08/08/1263512.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值