采用了写正则,具体可以看这里 不过我用的不是这个。呵呵
代码还有点粗糙,比如还没有实现,统一写到xml中,然后显示出来。
还有些东西还要过滤,一点一点来吧。先记录一下,免得以后忘记。
default.aspx
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" ValidateRequest="false" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
<title>Untitled Page</title>
</head>
<body>
<form id="aspBuffer" method=post runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<br />
<div>
<asp:TextBox ID="UrlText" runat="server" Style="z-index: 100; left: 9px; position: absolute;
top: 47px" Width="400px"></asp:TextBox>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到" style="z-index: 101; left: 444px; position: absolute; top: 45px" OnClick="WebRequestButton_Click"></asp:Button>
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine" style="z-index: 102; left: 3px; position: absolute; top: 92px">
</asp:TextBox>
<asp:Button ID="getUrl" runat="server" OnClick="getUrl_Click" Style="z-index: 104;
left: 675px; position: absolute; top: 45px" Text="得到网页链接" />
</div>
</form>
</body>
</html>
default.aspx.cs
using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
public partial class _Default : System.Web.UI.Page
{
public string urlPage = "";
protected void Page_Load(object sender, EventArgs e)
{
}
protected void WebRequestButton_Click(object sender, EventArgs e)
{
urlPage = UrlText.Text;
WebRequest request = WebRequest.Create(urlPage);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = Server.HtmlEncode(sr.ReadToEnd());
resStream.Close();
sr.Close();
response.Close();
}
protected void getUrl_Click(object sender, EventArgs e)
{
ArrayList allLinks;
allLinks = GetHyperLinks(ContentHtml.Text.ToString());
ContentHtml.Text = "";
string strTemp = "";
for (int j = 0; j< allLinks.Count - 1; j++)
{
strTemp += allLinks[j].ToString();
}
ContentHtml.Text = strTemp;
}
static ArrayList GetHyperLinks(string htmlCode)
{
ArrayList myal = new ArrayList();
string strRegex = @"http:///S+/./S+";
Regex rg = new Regex(strRegex, RegexOptions.IgnoreCase);
MatchCollection m = rg.Matches(htmlCode);
for( int i=0; i<=m.Count-1; i++)
{
bool rep = false;
string strNew = m[i].ToString( );
// 过滤重复的URL
foreach( string str in myal )
{
if( strNew==str )
{
rep =true;
break;
}
}
if( !rep ) myal.Add( strNew );
}
myal.Sort( );
return myal;
}
}