最近我正在做一个博客系统,其中有两个页面需要截取和处理HTML字符串。现在将用到的几个函数写出来和大家交流。如果大家有什么好的修改建议或者意见,请告诉我。
重载的3个方法列表如下:
public static string GetContentSummary(string content, int length, bool StripHTML){}
public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML){}
public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML){}
- /// <summary>
- /// 按字节长度截取字符串(支持截取带HTML标记的字符串)
- /// </summary>
- /// <param name="content">将要截取的字符串参数</param>
- /// <param name="length">截取的字节长度</param>
- /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记;否则保留html标记。</param>
- /// <returns>截取的字符串</returns>
- public static string GetContentSummary(string content, int length, bool StripHTML)
- {
- if (string.IsNullOrEmpty(content) || length == 0)
- return "";
- if (StripHTML)
- {
- System.Text.RegularExpressions.Regex re = new System.Text.RegularExpressions.Regex("<[^>]*>");
- content = re.Replace(content, "");
- content = content.Replace(" ", "").Replace(" ", "").Replace(" ", "");
- if (content.Length <= length)
- return content;
- else
- return content.Substring(0, length) + "...";
- }
- else
- {
- if (content.Length <= length)
- return content;
- int pos = 0, npos = 0, size = 0;
- bool firststop = false, notr = false, noli = false;
- System.Text.StringBuilder sb = new System.Text.StringBuilder();
- while (true)
- {
- if (pos >= content.Length)
- break;
- string cur = content.Substring(pos, 1);
- if (cur == "<")
- {
- string next = content.Substring(pos + 1, 3).ToLower();
- if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- }
- else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- if (size < length)
- sb.Append("<br />");
- }
- else if (next.IndexOf("br") == 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- if (size < length)
- sb.Append("<br />");
- }
- else if (next.IndexOf("img") == 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- if (size < length)
- {
- sb.Append(content.Substring(pos, npos - pos));
- size += npos - pos + 1;
- }
- }
- else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- if (size < length)
- {
- sb.Append(content.Substring(pos, npos - pos));
- }
- else
- {
- if (!noli && next.IndexOf("/li") == 0)
- {
- sb.Append(content.Substring(pos, npos - pos));
- noli = true;
- }
- }
- }
- else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- if (size < length)
- {
- sb.Append(content.Substring(pos, npos - pos));
- }
- else
- {
- if (!notr && next.IndexOf("/tr") == 0)
- {
- sb.Append(content.Substring(pos, npos - pos));
- notr = true;
- }
- }
- }
- else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0)
- {
- npos = content.IndexOf(">", pos) + 1;
- if (size < length)
- {
- sb.Append(content.Substring(pos, npos - pos));
- }
- else
- {
- if (!notr)
- {
- sb.Append(content.Substring(pos, npos - pos));
- }
- }
- }
- else
- {
- npos = content.IndexOf(">", pos) + 1;
- sb.Append(content.Substring(pos, npos - pos));
- }
- if (npos <= pos)
- npos = pos + 1;
- pos = npos;
- }
- else
- {
- if (size < length)
- {
- sb.Append(cur);
- size++;
- }
- else
- {
- if (!firststop)
- {
- sb.Append("...");
- firststop = true;
- }
- }
- pos++;
- }
- }
- return sb.ToString();
- }
- }
- /// <summary>
- /// 按字节长度截取DataSet对象中的字符串(支持截取带HTML标记的字符串)
- /// </summary>
- /// <param name="ds">DataSet对象</param>
- /// <param name="TableName">字符串所在的数据表的名称</param>
- /// <param name="column">字符串所在的数据列的名称</param>
- /// <param name="length">截取的字节长度</param>
- /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记;否则保留html标记。</param>
- /// <return>因为需要更改的DataSet对象已经通过参数传递过来了,所以不需要返回值。</return>
- public static void GetContentSummary(DataSet ds, string TableName, string column, int length, bool StripHTML)
- {
- string content = "";
- DataTable dt = ds.Tables[TableName];
- int ColumnCount = dt.Rows.Count;
- for (int i = 0; i < ColumnCount; i++)
- {
- content = dt.Rows[i][column].ToString();
- dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true);
- }
- }
- /// <summary>
- /// 按字节长度截取DataTable对象中的字符串(支持截取带HTML标记的字符串)
- /// </summary>
- /// <param name="dt">DataTable对象</param>
- /// <param name="column">字符串所在的列的名称</param>
- /// <param name="length">截取的字节长度</param>
- /// <param name="StripHTML">截取的结果是否为html代码。如果为true,则去掉Html标记;否则保留html标记。</param>
- /// <return>因为需要更改的DataTable对象已经通过参数传递过来了,所以不需要返回值。</return>
- public static void GetContentSummary(DataTable dt, string column, int length, bool StripHTML)
- {
- string content = "";
- int ColumnCount = dt.Rows.Count;
- for (int i = 0; i < ColumnCount; i++)
- {
- content = dt.Rows[i][column].ToString();
- dt.Rows[i][column] = HTML.FormatString.GetContentSummary(content, length, true);
- }
- }
原文章:http://blog.csdn.net/byygyy/article/details/5531921?reload