html字符串处理

最新推荐文章于 2024-05-20 15:31:49 发布

安妮家_de小宠

最新推荐文章于 2024-05-20 15:31:49 发布

阅读量880

点赞数

分类专栏： c#

c# 专栏收录该内容

16 篇文章 0 订阅

订阅专栏

最近我正在做一个博客系统，其中有两个页面需要截取和处理HTML字符串。现在将用到的几个函数写出来和大家交流。如果大家有什么好的修改建议或者意见，请告诉我。

重载的3个方法列表如下：

public static string GetContentSummary(string content, int length, bool StripHTML){}

public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML){}

public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML){}

[c-sharp]view plaincopy 
   
 /// <summary>  
         /// 按字节长度截取字符串(支持截取带HTML标记的字符串)  
         /// </summary>  
         /// <param name="content">将要截取的字符串参数</param>  
         /// <param name="length">截取的字节长度</param>  
         /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记；否则保留html标记。</param>  
         /// <returns>截取的字符串</returns>  
         public static string GetContentSummary(string content, int length, bool StripHTML)  
         {  
             if (string.IsNullOrEmpty(content) || length == 0)  
                 return "";  
             if (StripHTML)  
             {  
                 System.Text.RegularExpressions.Regex re = new System.Text.RegularExpressions.Regex("<[^>]*>");  
                 content = re.Replace(content, "");  
                 content = content.Replace("　", "").Replace(" ", "").Replace(" ", "");  
                 if (content.Length <= length)  
                     return content;  
                 else  
                     return content.Substring(0, length) + "...";  
             }  
             else  
             {  
                 if (content.Length <= length)  
                     return content;  
   
                 int pos = 0, npos = 0, size = 0;  
                 bool firststop = false, notr = false, noli = false;  
                 System.Text.StringBuilder sb = new System.Text.StringBuilder();  
                 while (true)  
                 {  
                     if (pos >= content.Length)  
                         break;  
                     string cur = content.Substring(pos, 1);  
                     if (cur == "<")  
                     {  
                         string next = content.Substring(pos + 1, 3).ToLower();  
                         if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                         }  
                         else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             if (size < length)  
                                 sb.Append("<br />");  
                         }  
                         else if (next.IndexOf("br") == 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             if (size < length)  
                                 sb.Append("<br />");  
                         }  
                         else if (next.IndexOf("img") == 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             if (size < length)  
                             {  
                                 sb.Append(content.Substring(pos, npos - pos));  
                                 size += npos - pos + 1;  
                             }  
                         }  
                         else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             if (size < length)  
                             {  
                                 sb.Append(content.Substring(pos, npos - pos));  
                             }  
                             else  
                             {  
                                 if (!noli && next.IndexOf("/li") == 0)  
                                 {  
                                     sb.Append(content.Substring(pos, npos - pos));  
                                     noli = true;  
                                 }  
                             }  
                         }  
                         else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             if (size < length)  
                             {  
                                 sb.Append(content.Substring(pos, npos - pos));  
                             }  
                             else  
                             {  
                                 if (!notr && next.IndexOf("/tr") == 0)  
                                 {  
                                     sb.Append(content.Substring(pos, npos - pos));  
                                     notr = true;  
                                 }  
                             }  
                         }  
                         else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0)  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             if (size < length)  
                             {  
                                 sb.Append(content.Substring(pos, npos - pos));  
                             }  
                             else  
                             {  
                                 if (!notr)  
                                 {  
                                     sb.Append(content.Substring(pos, npos - pos));  
                                 }  
                             }  
                         }  
                         else  
                         {  
                             npos = content.IndexOf(">", pos) + 1;  
                             sb.Append(content.Substring(pos, npos - pos));  
                         }  
                         if (npos <= pos)  
                             npos = pos + 1;  
                         pos = npos;  
                     }  
                     else  
                     {  
                         if (size < length)  
                         {  
                             sb.Append(cur);  
                             size++;  
                         }  
                         else  
                         {  
                             if (!firststop)  
                             {  
                                 sb.Append("...");  
                                 firststop = true;  
                             }  
                         }  
                         pos++;  
                     }  
   
                 }  
                 return sb.ToString();  
             }  
         }  
   
         /// <summary>  
         /// 按字节长度截取DataSet对象中的字符串(支持截取带HTML标记的字符串)  
         /// </summary>  
         /// <param name="ds">DataSet对象</param>  
         /// <param name="TableName">字符串所在的数据表的名称</param>  
         /// <param name="column">字符串所在的数据列的名称</param>  
         /// <param name="length">截取的字节长度</param>  
         /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记；否则保留html标记。</param>  
         /// <return>因为需要更改的DataSet对象已经通过参数传递过来了，所以不需要返回值。</return>  
         public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML)  
         {  
             string content = "";  
             DataTable dt = ds.Tables[TableName];  
             int ColumnCount = dt.Rows.Count;  
   
             for (int i = 0; i < ColumnCount; i++)  
             {  
                 content = dt.Rows[i][column].ToString();  
                 dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true);  
             }  
         }  
   
         /// <summary>  
         /// 按字节长度截取DataTable对象中的字符串(支持截取带HTML标记的字符串)  
         /// </summary>  
         /// <param name="dt">DataTable对象</param>  
         /// <param name="column">字符串所在的列的名称</param>  
         /// <param name="length">截取的字节长度</param>  
         /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记；否则保留html标记。</param>  
         /// <return>因为需要更改的DataTable对象已经通过参数传递过来了，所以不需要返回值。</return>  
         public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML)  
         {  
             string content = "";  
             int ColumnCount = dt.Rows.Count;  
   
             for (int i = 0; i < ColumnCount; i++)  
             {  
                 content = dt.Rows[i][column].ToString();  
                 dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true);  
             }  
         }  

原文章：http://blog.csdn.net/byygyy/article/details/5531921?reload

安妮家_de小宠

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
html字符串处理

最近我正在做一个博客系统，其中有两个页面需要截取和处理HTML字符串。现在将用到的几个函数写出来和大家交流。如果大家有什么好的修改建议或者意见，请告诉我。重载的3个方法列表如下：public static string GetContentSummary(string content, int length, bool StripHTML){}public sta
复制链接

扫一扫