方法1:
public static string ClearHTMLTags1(string HTML)
{string[] Regexs ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n",
};
string[] Replaces ={
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1", //chr(161),
"\xa2", //chr(162),
"\xa3", //chr(163),
"\xa9", //chr(169),
"",
"\r\n",
"",
""
};
string s = HTML;
for (int i = 0; i < Regexs.Length; i++)
{
s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
}
s.Replace("<", "");
s.Replace(">", "");
s.Replace("\r\n", "");
return s;
}
方法2:
方法3:
方法4:
转载 自.http://blog.csdn.net/hougelou/article/details/7901066
- /// <summary>
- /// 清除文本中Html的标签
- /// </summary>
- /// <param name="Content"></param>
- /// <returns></returns>
- protected string ClearHtml(string Content)
- {
- Content = Zxj_ReplaceHtml("&#[^>]*;", "", Content);
- Content = Zxj_ReplaceHtml("</?marquee[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?object[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?param[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?embed[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?table[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml(" ", "", Content);
- Content = Zxj_ReplaceHtml("</?tr[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?th[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?p[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?a[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?img[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?tbody[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?li[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?span[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?div[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?th[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?td[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?script[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("(javascript|jscript|vbscript|vbs):", "", Content);
- Content = Zxj_ReplaceHtml("on(mouse|exit|error|click|key)", "", Content);
- Content = Zxj_ReplaceHtml("<\\?xml[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("<\\/?[a-z]+:[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?font[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?b[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?u[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?i[^>]*>", "", Content);
- Content = Zxj_ReplaceHtml("</?strong[^>]*>", "", Content);
- string clearHtml = Content;
- return clearHtml;
- }
- /// <summary>
- /// 清除文本中的Html标签
- /// </summary>
- /// <param name="patrn">要替换的标签正则表达式</param>
- /// <param name="strRep">替换为的内容</param>
- /// <param name="content">要替换的内容</param>
- /// <returns></returns>
- private string Zxj_ReplaceHtml(string patrn, string strRep, string content)
- {
- if (string.IsNullOrEmpty(content))
- {
- content = "";
- }
- Regex rgEx = new Regex(patrn, RegexOptions.IgnoreCase);
- string strTxt = rgEx.Replace(content, strRep);
- return strTxt;
- }