获取页面内容-CSDN博客

本文链接：https://blog.csdn.net/mowuyan/article/details/4379066

using Microsoft.ApplicationBlocks.Data; //SQLHelper类 using System.Text.RegularExpressions; //正则 using System.Net; WebClient hq1 = new WebClient(); //获取给定页面的源代码 Byte[] pageData1 = hq1.DownloadData(目标uri); string pageHtml1 = Encoding.Default.GetString(pageData1); //判断页面编码 string a_href = ""; //获取想得到的页面内容至a_href中 string str_a = @"正则表达式"; Regex htmlRegex1 = new Regex(str_a, RegexOptions.IgnoreCase|RegexOptions.Compiled); Match mc1 = htmlRegex1.Match(pageHtml1); a_href = mc1.Value; string htmls = ""; Regex HtmlRegex = new Regex(@"表达式", RegexOptions.IgnoreCase | RegexOptions.Compiled); MatchCollection mc = HtmlRegex.Matches(pageHtml); string[] u_uir =new string[mc.Count]; for (int i = 0; i < mc.Count; i++) //将匹配成功的uri存入变量中 { u_uir = mc.Value; u_uir = u_uir.Substring(8); //从第八个字符位开始截取 u_uir = u_uir.Remove(u_uir.Length - 1, 1); //去除最后一位字符 u_uir = "http://www.xxxxxxx.com" + u_uir;//提取uri }