private string FetchPage(String url) //取页面指定URL页面的源码
{
String page = "null";
try
{
WebClient mywc = new WebClient();
using(Stream strm = mywc.OpenRead(url))
{
StreamReader sr = new StreamReader(strm);
page = sr.ReadToEnd();
strm.Close();
}
}
catch{}
return page;
}
public string GetLink(String url) //绝对链接的
{
String content = this.FetchPage(url);
//Regex linkRegex=new Regex("href//s*=//s*(?:/"(?<1>[^/"]*)/"|(?<1>//S+)", RegexOptions.IgnoreCase);
//@"^/w+((-/w+)|(/./w+))*/@/w+((/.|-)/w+)*/./w+$";
Regex link=new Regex(@"http://([/w-]+/.)+[/w-]+(/[/w-./?%&=]*)?",RegexOptions.IgnoreCase);
StringBuilder sb = new StringBuilder();
MatchCollection emailmatchs = link.Matches(content);
foreach(Match n in emailmatchs)
{
sb.Append(n.ToString());
sb.Append(";");
}
if( sb.Length == 0 )
{
sb.Append("null");
}
return sb.ToString();
}
{
String content = this.FetchPage(url);
//Regex linkRegex=new Regex("href//s*=//s*(?:/"(?<1>[^/"]*)/"|(?<1>//S+)", RegexOptions.IgnoreCase);
//@"^/w+((-/w+)|(/./w+))*/@/w+((/.|-)/w+)*/./w+$";
Regex r=new Regex(@"/w+((-/w+)|(/./w))*/@/w+((/.|-)/w+)*/./w+[com|cn|com.cn|net|org|cc|uk]{1,6}",RegexOptions.IgnoreCase);
StringBuilder sb = new StringBuilder();
MatchCollection emailmatchs = r.Matches(content);
foreach(Match n in emailmatchs)
{
sb.Append(n.ToString());
sb.Append(";");
}
if( sb.Length == 0 )
sb.Append("null");
return sb.ToString();
}