C#正则清理网络字符

  public static string NoHTML(string Htmlstring)
        {
            //删除脚本
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            //删除HTML
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&rdquo;", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&ldquo;", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&mdash;", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&hellip;", "", RegexOptions.IgnoreCase);
            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("rn", "");

            return Htmlstring;
        }

        public static string CleanWORD(string Htmlstring)  //清理字号
        {
            //删除脚本
            Htmlstring = Regex.Replace(Htmlstring, @"<font size[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<font face[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<font>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</font>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<span style[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<span lang[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</span>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<span>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div style[^>]*?>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p style[^>]*?>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p class[^>]*?>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p align", "<div align", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<st1[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</st1[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<o:p>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</o:p>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanWORDA(string Htmlstring)  //清理空格
        {
            //删除脚本
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanWORDB(string Htmlstring)  //清理空行
        {
            //删除脚本
            Htmlstring = Regex.Replace(Htmlstring, @"<p>[ ]*</p>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div>[ ]*</div>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<strong>[ ]*</strong>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<br />", "</p><p>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</p>", "</div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div align[^>]*?></div>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div></div>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanCSS(string Htmlstring)
        {
            Htmlstring = Regex.Replace(Htmlstring, @"<div align[^>]*?></div>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div></div>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanHTML(string Htmlstring)  //清理超链接
        {
            Htmlstring = Regex.Replace(Htmlstring, @"<a[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</a>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanX(string Htmlstring, string rule, string newstr)  //清理关键词
        {
            Htmlstring = Regex.Replace(Htmlstring, @rule, newstr, RegexOptions.IgnoreCase);

            return Htmlstring;
        }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值