叮当猫咪
尝试这个 : public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void Form1_Load(object sender, EventArgs e) { var res = Find(html); } public static List Find(string file) { List list = new List(); // 1. // Find all matches in file. MatchCollection m1 = Regex.Matches(file, @"(.*?)", RegexOptions.Singleline); // 2. // Loop over each match. foreach (Match m in m1) { string value = m.Groups[1].Value; LinkItem i = new LinkItem(); // 3. // Get href attribute. Match m2 = Regex.Match(value, @"href=\""(.*?)\""", RegexOptions.Singleline); if (m2.Success) { i.Href = m2.Groups[1].Value; } // 4. // Remove inner tags from text. string t = Regex.Replace(value, @"\s*<.>\s*", "", RegexOptions.Singleline); i.Text = t; list.Add(i); } return list; } public struct LinkItem { public string Href; public string Text; public override string ToString() { return Href + "\n\t" + Text; } } } 输入: string html = " 2. "; 结果:[0] = {www.aaa.xx/xx.zz?id=xxxx&name=xxxx}[1] = {http://www.aaa.xx/xx.zz?id=xxxx&name=xxxx}C#抓取HTML链接刮HTML提取重要的页面元素。它对网站管理员和ASP.NET开发人员有许多法律用途。使用Regex类型和WebClient,我们实现了HTML的屏幕抓取。已编辑另一种简单的方法:您可以使用web browser控件href从tag 进行获取a,例如:(请参阅我的示例) public Form1() { InitializeComponent(); webBrowser1.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(webBrowser1_DocumentCompleted); } private void Form1_Load(object sender, EventArgs e) { webBrowser1.DocumentText = ""; } void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { List href = new List(); foreach (HtmlElement el in webBrowser1.Document.GetElementsByTagName("a")) { href.Add(el.GetAttribute("href")); } }