去除String字符串中的HTML元素(包含script元素)
/// <summary>
/// 取出内容中的HTML元素
/// add:zhangfl@2012-10-18
/// </summary>
/// <param name="content">信息内容</param>
/// <returns>取出后的HTML元素</returns>
public static string DelHTML(string content)//将HTML去除
{
#region
//删除脚本
content = System.Text.RegularExpressions.Regex.Replace(content, @"<script[^>]*?>.*?</script>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//删除HTML
content = System.Text.RegularExpressions.Regex.Replace(content, @"<(.[^>]*)>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"([\r\n])[\s]+", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"-->", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"<!--.*", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");
//Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(quot|#34);", "\"", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(amp|#38);", "&", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(lt|#60);", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(gt|#62);", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(nbsp|#160);", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(iexcl|#161);", "\xa1", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(cent|#162);", "\xa2", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(pound|#163);", "\xa3", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&(copy|#169);", "\xa9", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = System.Text.RegularExpressions.Regex.Replace(content, @"&#(\d+);", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content.Replace("<", "");
content.Replace(">", "");
content.Replace("\r\n", "");
//Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
#endregion
return content;
}