/// 将Html标签转化为空格
/// </summary>
/// <param name="strHtml">待转化的字符串</param>
/// <returns>经过转化的字符串</returns>
private string stripHtml( string strHtml)
<img none';="" codehighlighter1_175_390_open_text.style.display="none" ;="" codehighlighter1_175_390_closed_image.style.display="inline" codehighlighter1_175_390_closed_text.style.display="inline" ;"="" src="http://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockStart.gif" align="top" twffan="done" style="border: 0px; max-width: 100%;"> {
Regex objRegExp = new Regex("<(.|\n)+?>");
string strOutput = objRegExp.Replace(strHtml, "");
strOutput = strOutput.Replace("<", "<");
strOutput = strOutput.Replace(">", ">");
return strOutput;
}
//把所有空格变为一个空格
Regex r = new Regex(@"\s+");
wordsOnly = r.Replace(strResponse, " ");
wordsOnly.Trim();
// 解析页面,查找链接
// 此处尚需扩展,还有某些形式的链接不被识别
string strRef = @"(href|HREF|src|SRC|action|ACTION|Action)[ ]*=[ ]*[""'][^""'#>]+[""']";
MatchCollection matches = new Regex(strRef).Matches(strResponse);
strStatus += "找到: "+matches.Count+" 个链接\r\n";
//获取标题
Match TitleMatch = Regex.Match(strResponse, "<title>([^<]*)</title>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
title = TitleMatch.Groups[1].Value;
//获取描述信息
Match Desc = Regex.Match(strResponse, "<Meta name=\"DESCRIPTION\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
strdesc = Desc.Groups[1].Value;
//获取网页的大小
size = strResponse.Length;
<img none';="" codehighlighter1_2_129_open_text.style.display="none" ;="" codehighlighter1_2_129_closed_image.style.display="inline" codehighlighter1_2_129_closed_text.style.display="inline" ;"="" src="http://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockStart.gif" align="top" twffan="done" style="line-height: 22px; border: 0px; max-width: 100%; color: rgb(51, 51, 51); font-family: 'Hiragino Sans GB W3', 'Hiragino Sans GB', Arial, Helvetica, simsun, u5b8bu4f53; font-size: 13px;"> /// <summary>
/// 将Html标签转化为空格
/// </summary>
/// <param name="strHtml">待转化的字符串</param>
/// <returns>经过转化的字符串</returns>
private string stripHtml(string strHtml)
<img none';="" codehighlighter1_175_390_open_text.style.display="none" ;="" codehighlighter1_175_390_closed_image.style.display="inline" codehighlighter1_175_390_closed_text.style.display="inline" ;"="" src="http://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockStart.gif" align="top" twffan="done" style="border: 0px; max-width: 100%;"> {
Regex objRegExp = new Regex("<(.|\n)+?>");
string strOutput = objRegExp.Replace(strHtml, "");
strOutput = strOutput.Replace("<", "<");
strOutput = strOutput.Replace(">", ">");
return strOutput;
}