这是参考CSDN上高手们的过滤方法,我修改后的方法
就是把 "" 换成string.Empty 从理论上(当数据多的时候)要快点
- public string NoHTML(string Htmlstring) //去除HTML标记
- {
- //删除脚本
- Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*? </script>",string.Empty, RegexOptions.IgnoreCase);
- //删除HTML
- Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)>",string.Empty, RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+",string.Empty, RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"-->",string.Empty, RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @" <!--.*",string.Empty, RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "/"", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", " <", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);"," ", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "/xa1", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "/xa2", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "/xa3", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "/xa9", RegexOptions.IgnoreCase);
- Htmlstring = Regex.Replace(Htmlstring, @"&#(/d+);",string.Empty, RegexOptions.IgnoreCase);
- Htmlstring.Replace(" <",string.Empty);
- Htmlstring.Replace(">",string.Empty);
- Htmlstring.Replace("/r/n",string.Empty);
- Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
- return Htmlstring;
- }