public static ArrayList GetAbsoluteLink(System.Text.StringBuilder sbs,string site)
{
string content = sbs.ToString();
string item = string.Empty;
Regex re = new Regex(@"<a[^>]+href=/s*(?:(?<href>[^]+)|""(?<href>[^""]+)""|(?<href>[^>/s]+))/s*[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
ArrayList urls = new ArrayList();
MatchCollection emailmatchs = re.Matches(content);
foreach(Match n in emailmatchs)
{
item =n.Groups["href"].Value.ToString().Trim();
item = item.Replace("/"","");
item = item.Replace("/'","");
if(item.IndexOf("http")<0)
{
if(item.Substring(0,1)!="/")
{
item = site + "/" + item;
}
else
{
item = site + item;
}
}
if(ValidPage(item))
{
urls.Add(item);
}
}
return urls;
}
2007-03-02 |提取HTML文本中的所有超级链接! 20100321
最新推荐文章于 2021-07-11 07:27:52 发布