html字符串处理

最近我正在做一个博客系统,其中有两个页面需要截取和处理HTML字符串。现在将用到的几个函数写出来和大家交流。如果大家有什么好的修改建议或者意见,请告诉我。

 

重载的3个方法列表如下:

public static string GetContentSummary(string content, int length, bool StripHTML){}

public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML){}

public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML){}

 

[c-sharp]  view plain copy
  1. /// <summary>  
  2.         /// 按字节长度截取字符串(支持截取带HTML标记的字符串)  
  3.         /// </summary>  
  4.         /// <param name="content">将要截取的字符串参数</param>  
  5.         /// <param name="length">截取的字节长度</param>  
  6.         /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记;否则保留html标记。</param>  
  7.         /// <returns>截取的字符串</returns>  
  8.         public static string GetContentSummary(string content, int length, bool StripHTML)  
  9.         {  
  10.             if (string.IsNullOrEmpty(content) || length == 0)  
  11.                 return "";  
  12.             if (StripHTML)  
  13.             {  
  14.                 System.Text.RegularExpressions.Regex re = new System.Text.RegularExpressions.Regex("<[^>]*>");  
  15.                 content = re.Replace(content, "");  
  16.                 content = content.Replace(" """).Replace(" """).Replace(" """);  
  17.                 if (content.Length <= length)  
  18.                     return content;  
  19.                 else  
  20.                     return content.Substring(0, length) + "...";  
  21.             }  
  22.             else  
  23.             {  
  24.                 if (content.Length <= length)  
  25.                     return content;  
  26.   
  27.                 int pos = 0, npos = 0, size = 0;  
  28.                 bool firststop = false, notr = false, noli = false;  
  29.                 System.Text.StringBuilder sb = new System.Text.StringBuilder();  
  30.                 while (true)  
  31.                 {  
  32.                     if (pos >= content.Length)  
  33.                         break;  
  34.                     string cur = content.Substring(pos, 1);  
  35.                     if (cur == "<")  
  36.                     {  
  37.                         string next = content.Substring(pos + 1, 3).ToLower();  
  38.                         if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0)  
  39.                         {  
  40.                             npos = content.IndexOf(">", pos) + 1;  
  41.                         }  
  42.                         else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0)  
  43.                         {  
  44.                             npos = content.IndexOf(">", pos) + 1;  
  45.                             if (size < length)  
  46.                                 sb.Append("<br />");  
  47.                         }  
  48.                         else if (next.IndexOf("br") == 0)  
  49.                         {  
  50.                             npos = content.IndexOf(">", pos) + 1;  
  51.                             if (size < length)  
  52.                                 sb.Append("<br />");  
  53.                         }  
  54.                         else if (next.IndexOf("img") == 0)  
  55.                         {  
  56.                             npos = content.IndexOf(">", pos) + 1;  
  57.                             if (size < length)  
  58.                             {  
  59.                                 sb.Append(content.Substring(pos, npos - pos));  
  60.                                 size += npos - pos + 1;  
  61.                             }  
  62.                         }  
  63.                         else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0)  
  64.                         {  
  65.                             npos = content.IndexOf(">", pos) + 1;  
  66.                             if (size < length)  
  67.                             {  
  68.                                 sb.Append(content.Substring(pos, npos - pos));  
  69.                             }  
  70.                             else  
  71.                             {  
  72.                                 if (!noli && next.IndexOf("/li") == 0)  
  73.                                 {  
  74.                                     sb.Append(content.Substring(pos, npos - pos));  
  75.                                     noli = true;  
  76.                                 }  
  77.                             }  
  78.                         }  
  79.                         else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0)  
  80.                         {  
  81.                             npos = content.IndexOf(">", pos) + 1;  
  82.                             if (size < length)  
  83.                             {  
  84.                                 sb.Append(content.Substring(pos, npos - pos));  
  85.                             }  
  86.                             else  
  87.                             {  
  88.                                 if (!notr && next.IndexOf("/tr") == 0)  
  89.                                 {  
  90.                                     sb.Append(content.Substring(pos, npos - pos));  
  91.                                     notr = true;  
  92.                                 }  
  93.                             }  
  94.                         }  
  95.                         else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0)  
  96.                         {  
  97.                             npos = content.IndexOf(">", pos) + 1;  
  98.                             if (size < length)  
  99.                             {  
  100.                                 sb.Append(content.Substring(pos, npos - pos));  
  101.                             }  
  102.                             else  
  103.                             {  
  104.                                 if (!notr)  
  105.                                 {  
  106.                                     sb.Append(content.Substring(pos, npos - pos));  
  107.                                 }  
  108.                             }  
  109.                         }  
  110.                         else  
  111.                         {  
  112.                             npos = content.IndexOf(">", pos) + 1;  
  113.                             sb.Append(content.Substring(pos, npos - pos));  
  114.                         }  
  115.                         if (npos <= pos)  
  116.                             npos = pos + 1;  
  117.                         pos = npos;  
  118.                     }  
  119.                     else  
  120.                     {  
  121.                         if (size < length)  
  122.                         {  
  123.                             sb.Append(cur);  
  124.                             size++;  
  125.                         }  
  126.                         else  
  127.                         {  
  128.                             if (!firststop)  
  129.                             {  
  130.                                 sb.Append("...");  
  131.                                 firststop = true;  
  132.                             }  
  133.                         }  
  134.                         pos++;  
  135.                     }  
  136.   
  137.                 }  
  138.                 return sb.ToString();  
  139.             }  
  140.         }  
  141.   
  142.         /// <summary>  
  143.         /// 按字节长度截取DataSet对象中的字符串(支持截取带HTML标记的字符串)  
  144.         /// </summary>  
  145.         /// <param name="ds">DataSet对象</param>  
  146.         /// <param name="TableName">字符串所在的数据表的名称</param>  
  147.         /// <param name="column">字符串所在的数据列的名称</param>  
  148.         /// <param name="length">截取的字节长度</param>  
  149.         /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记;否则保留html标记。</param>  
  150.         /// <return>因为需要更改的DataSet对象已经通过参数传递过来了,所以不需要返回值。</return>  
  151.         public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML)  
  152.         {  
  153.             string content = "";  
  154.             DataTable dt = ds.Tables[TableName];  
  155.             int ColumnCount = dt.Rows.Count;  
  156.   
  157.             for (int i = 0; i < ColumnCount; i++)  
  158.             {  
  159.                 content = dt.Rows[i][column].ToString();  
  160.                 dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true);  
  161.             }  
  162.         }  
  163.   
  164.         /// <summary>  
  165.         /// 按字节长度截取DataTable对象中的字符串(支持截取带HTML标记的字符串)  
  166.         /// </summary>  
  167.         /// <param name="dt">DataTable对象</param>  
  168.         /// <param name="column">字符串所在的列的名称</param>  
  169.         /// <param name="length">截取的字节长度</param>  
  170.         /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记;否则保留html标记。</param>  
  171.         /// <return>因为需要更改的DataTable对象已经通过参数传递过来了,所以不需要返回值。</return>  
  172.         public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML)  
  173.         {  
  174.             string content = "";  
  175.             int ColumnCount = dt.Rows.Count;  
  176.   
  177.             for (int i = 0; i < ColumnCount; i++)  
  178.             {  
  179.                 content = dt.Rows[i][column].ToString();  
  180.                 dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true);  
  181.             }  
  182.         }  

原文章:http://blog.csdn.net/byygyy/article/details/5531921?reload

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值