public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"”", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"“", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"—", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"…", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("rn", "");
return Htmlstring;
}
public static string CleanWORD(string Htmlstring) //清理字号
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<font size[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<font face[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<font>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"</font>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<span style[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<span lang[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"</span>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<span>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<div style[^>]*?>", "<div>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<p style[^>]*?>", "<div>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<p class[^>]*?>", "<div>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<p align", "<div align", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<st1[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"</st1[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<o:p>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"</o:p>", "", RegexOptions.IgnoreCase);
return Htmlstring;
}
public static string CleanWORDA(string Htmlstring) //清理空格
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
return Htmlstring;
}
public static string CleanWORDB(string Htmlstring) //清理空行
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<p>[ ]*</p>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<div>[ ]*</div>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<strong>[ ]*</strong>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<br />", "</p><p>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<p>", "<div>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"</p>", "</div>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<div align[^>]*?></div>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<div></div>", "", RegexOptions.IgnoreCase);
return Htmlstring;
}
public static string CleanCSS(string Htmlstring)
{
Htmlstring = Regex.Replace(Htmlstring, @"<div align[^>]*?></div>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<div></div>", "", RegexOptions.IgnoreCase);
return Htmlstring;
}
public static string CleanHTML(string Htmlstring) //清理超链接
{
Htmlstring = Regex.Replace(Htmlstring, @"<a[^>]*?>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"</a>", "", RegexOptions.IgnoreCase);
return Htmlstring;
}
public static string CleanX(string Htmlstring, string rule, string newstr) //清理关键词
{
Htmlstring = Regex.Replace(Htmlstring, @rule, newstr, RegexOptions.IgnoreCase);
return Htmlstring;
}