C#中，截取Html字符串的函数（参数可以是string、DataSet或者DataTable）

最新推荐文章于 2021-06-17 04:50:33 发布

weixin_30952535

最新推荐文章于 2021-06-17 04:50:33 发布

阅读量115

点赞数

文章标签： c#

原文链接：http://www.cnblogs.com/lihuanhuan/archive/2010/04/26/10612238.html

版权

最近我正在做一个博客系统，其中有两个页面需要截取和处理HTML字符串。现在将用到的几个函数写出来和大家交流。如果大家有什么好的修改建议或者意见，请告诉我。

重载的3个方法列表如下：

public static string GetContentSummary(string content, int length, bool StripHTML){}

public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML){}

public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML){}

/// <summary> /// 按字节长度截取字符串(支持截取带HTML标记的字符串) /// </summary> /// <param name="content">将要截取的字符串参数</param> /// <param name="length">截取的字节长度</param> /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记；否则保留html标记。</param> /// <returns>截取的字符串</returns> public static string GetContentSummary(string content, int length, bool StripHTML) { if (string.IsNullOrEmpty(content) || length == 0) return ""; if (StripHTML) { System.Text.RegularExpressions.Regex re = new System.Text.RegularExpressions.Regex("<[^>]*>"); content = re.Replace(content, ""); content = content.Replace("　", "").Replace(" ", "").Replace(" ", ""); if (content.Length <= length) return content; else return content.Substring(0, length) + "..."; } else { if (content.Length <= length) return content; int pos = 0, npos = 0, size = 0; bool firststop = false, notr = false, noli = false; System.Text.StringBuilder sb = new System.Text.StringBuilder(); while (true) { if (pos >= content.Length) break; string cur = content.Substring(pos, 1); if (cur == "<") { string next = content.Substring(pos + 1, 3).ToLower(); if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0) { npos = content.IndexOf(">", pos) + 1; } else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0) { npos = content.IndexOf(">", pos) + 1; if (size < length) sb.Append("<br />"); } else if (next.IndexOf("br") == 0) { npos = content.IndexOf(">", pos) + 1; if (size < length) sb.Append("<br />"); } else if (next.IndexOf("img") == 0) { npos = content.IndexOf(">", pos) + 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); size += npos - pos + 1; } } else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0) { npos = content.IndexOf(">", pos) + 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); } else { if (!noli && next.IndexOf("/li") == 0) { sb.Append(content.Substring(pos, npos - pos)); noli = true; } } } else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0) { npos = content.IndexOf(">", pos) + 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); } else { if (!notr && next.IndexOf("/tr") == 0) { sb.Append(content.Substring(pos, npos - pos)); notr = true; } } } else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0) { npos = content.IndexOf(">", pos) + 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); } else { if (!notr) { sb.Append(content.Substring(pos, npos - pos)); } } } else { npos = content.IndexOf(">", pos) + 1; sb.Append(content.Substring(pos, npos - pos)); } if (npos <= pos) npos = pos + 1; pos = npos; } else { if (size < length) { sb.Append(cur); size++; } else { if (!firststop) { sb.Append("..."); firststop = true; } } pos++; } } return sb.ToString(); } } /// <summary> /// 按字节长度截取DataSet对象中的字符串(支持截取带HTML标记的字符串) /// </summary> /// <param name="ds">DataSet对象</param> /// <param name="TableName">字符串所在的数据表的名称</param> /// <param name="column">字符串所在的数据列的名称</param> /// <param name="length">截取的字节长度</param> /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记；否则保留html标记。</param> /// <return>因为需要更改的DataSet对象已经通过参数传递过来了，所以不需要返回值。</return> public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML) { string content = ""; DataTable dt = ds.Tables[TableName]; int ColumnCount = dt.Rows.Count; for (int i = 0; i < ColumnCount; i++) { content = dt.Rows[i][column].ToString(); dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true); } } /// <summary> /// 按字节长度截取DataTable对象中的字符串(支持截取带HTML标记的字符串) /// </summary> /// <param name="dt">DataTable对象</param> /// <param name="column">字符串所在的列的名称</param> /// <param name="length">截取的字节长度</param> /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记；否则保留html标记。</param> /// <return>因为需要更改的DataTable对象已经通过参数传递过来了，所以不需要返回值。</return> public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML) { string content = ""; int ColumnCount = dt.Rows.Count; for (int i = 0; i < ColumnCount; i++) { content = dt.Rows[i][column].ToString(); dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true); } }

转载于:https://www.cnblogs.com/lihuanhuan/archive/2010/04/26/10612238.html

weixin_30952535

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
C#中，截取Html字符串的函数（参数可以是string、DataSet或者DataTable）

最近我正在做一个博客系统，其中有两个页面需要截取和处理HTML字符串。现在将用到的几个函数写出来和大家交流。如果大家有什么好的修改建议或者意见，请告诉我。重载的3个方法列表如下：public static string GetContentSummary(string content, int length, bool StripHTML){}public static void...
复制链接

扫一扫