Collection collection = new Collection();
collection.Coding = "GB2312";
string HtmlCode = collection.ReaderHTMLCode("http://www.qqtu88.com/qqjq/index.html");
string pageurl = "http://www.qqtu88.com/qqjq/QQkongjianjishu/2011/0806/27479.html";
string pReg = "\\d+";
string Link_str = "";
Uri u = new Uri(pageurl);
pageurl = pageurl.ToLower().Replace("http://", "").Replace(u.Host.ToString().ToLower(), "");
pageurl = Regex.Replace(pageurl, pReg, "\\\\d+");
pageurl = pageurl.Replace(".", "\\\\.");
Label1.Text = pageurl;
MatchCollection m = Regex.Matches(HtmlCode.ToLower(), Regex.(pageurl));
for (int c = 0; c <= m.Count - 1; c++)
{
string strNew = m[c].ToString();
// 过滤重复的URL
if (Link_str.IndexOf(strNew) == -1)
{
Link_str += "http://" + u.Host + strNew + ",";
}
}
TextBox1.Text = Link_str;
最主要的是Regex.这个函数可以对变量中的转义字符进行转义
下面方法也可以
Collection collection = new Collection();
collection.Coding = "GB2312";
string HtmlCode = collection.ReaderHTMLCode("http://www.qqtu88.com/qqjq/index.html");
string pageurl = "http://www.qqtu88.com/qqjq/QQkongjianjishu/2011/0806/27479.html";
string pReg = "\\d+";
string Link_str = "";
Uri u = new Uri(pageurl);
pageurl = pageurl.ToLower().Replace("http://", "").Replace(u.Host.ToString().ToLower(), "");
pageurl = Regex.Replace(pageurl, pReg, "\\d+");
pageurl = pageurl.Replace(".", "\\.");
Label1.Text = pageurl;
Regex r = new Regex(pageurl , RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(HtmlCode.ToLower());
for (int c = 0; c <= m.Count - 1; c++)
{
string strNew = m[c].ToString();
// 过滤重复的URL
if (Link_str.IndexOf(strNew) == -1)
{
Link_str += "http://" + u.Host + strNew + ",";
}
}
TextBox1.Text = Link_str;