使用C#代码清除多余HTML

数据库中所有公司简介都是从 文本编辑器存入的 当然样式也存到了数据库 在读取的时候我只想获取文本内容 怎么办呢?

 

ExpandedBlockStart.gif 代码
  #region  过滤HTML
        
///   <summary>
        
///  过滤html标签
        
///   </summary>
        
///   <param name="strHtml"> html的内容 </param>
        
///   <returns></returns>
         public   static   string  StripHTML( string  strHtml)
        {
            
string [] aryReg  = {
                                  
@" <script[^>]*?>.*?</script> " ,

                                  
@" <(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?> " ,
                                  
@" ([\r\n])[\s]+ " ,
                                  
@" &(quot|#34); " ,
                                  
@" &(amp|#38); " ,
                                  
@" &(lt|#60); " ,
                                  
@" &(gt|#62); "
                                  
@" &(nbsp|#160); "
                                  
@" &(iexcl|#161); " ,
                                  
@" &(cent|#162); " ,
                                  
@" &(pound|#163); " ,
                                  
@" &(copy|#169); " ,
                                  
@" &#(\d+); " ,
                                  
@" --> " ,
                                  
@" <!--.*\n "
                              };

            
string [] aryRep  =  {
                                   
"" ,
                                   
"" ,
                                   
"" ,
                                   
" \ "" ,
                                    " & " ,
                                   
" < " ,
                                   
" > " ,
                                   
"   " ,
                                   
" \xa1 " , // chr(161),
                                    " \xa2 " , // chr(162),
                                    " \xa3 " , // chr(163),
                                    " \xa9 " , // chr(169),
                                    "" ,
                                   
" \r\n " ,
                                   
""
                               };

            
string  newReg  =  aryReg[ 0 ];
            
string  strOutput  =  strHtml;
            
for  ( int  i  =   0 ; i  <  aryReg.Length; i ++ )
            {
                System.Text.RegularExpressions.Regex regex 
=   new  System.Text.RegularExpressions.Regex(aryReg[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                strOutput 
=  regex.Replace(strOutput, aryRep[i]);
            }
            strOutput.Replace(
" < " "" );
            strOutput.Replace(
" > " "" );
            strOutput.Replace(
" \r\n " "" );
            
return  strOutput;
        }
        
#endregion

 

 

转载于:https://www.cnblogs.com/Simcoder/archive/2010/09/11/1823869.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值