using System; using System.Text.RegularExpressions; namespace Kwanhong.Utilities { /// /// HtmlToText 的摘要说明。 /// public class HtmlToText { public string Convert(string source) { string result; //remove line breaks,tabs result = source.Replace("/r", " "); result = result.Replace("/n", " "); result = result.Replace("/t", " "); //remove the header result = Regex.Replace(result, "(
).*()", string.Empty, RegexOptions.IgnoreCase); result = Regex.Replace(result, @"])*>", ")", string.Empty, RegexOptions.IgnoreCase); //remove all styles result = Regex.Replace(result, @"])*>", ")", string.Empty, RegexOptions.IgnoreCase); //insert tabs in spaces of tags result = Regex.Replace(result, @"])*>", " ", RegexOptions.IgnoreCase); //insert line breaks in places ofand tags result = Regex.Replace(result, @"", "/r", RegexOptions.IgnoreCase); result = Regex.Replace(result, @"", "/r", RegexOptions.IgnoreCase); //insert line paragraphs in places of and
tags result = Regex.Replace(result, @"])*>", "/r/r", RegexOptions.IgnoreCase); result = Regex.Replace(result, @"])*>", "/r/r", RegexOptions.IgnoreCase); //remove anything thats enclosed inside < > result = Regex.Replace(result, @"]*>", string.Empty, RegexOptions.IgnoreCase); //replace special characters: result = Regex.Replace(result, @"&", "&", RegexOptions.IgnoreCase); result = Regex.Replace(result, @" ", " ", RegexOptions.IgnoreCase); result = Regex.Replace(result, @"<", "", RegexOptions.IgnoreCase); result = Regex.Replace(result, @"&(.{2,6});", string.Empty, RegexOptions.IgnoreCase); //remove extra line breaks and tabs result = Regex.Replace(result, @" ( )+", " "); result = Regex.Replace(result, "(/r)( )+(/r)", "/r/r"); result = Regex.Replace(result, @"(/r/r)+", "/r/n"); return result; } }//end class }//end namespace