1.只保留p img a b strong标签
var regex2 = new Regex("<(?!/?p|/?img|/?a|/?b[^a-zA-Z]|/?strong)[^<>]*>", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
content = regex2.Replace(content, "");
2.去掉标签属性(不包括img a)
//匹配所有的标签属性
var regex1 = new Regex("<([a-zA-Z1-6]+)(\\s*[^>]*)?>", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
//匹配出所有不是img a标签
var regex5 = new Regex("<(?!(img|a)[ >])[^>/]*>", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
//查找出所有的不是img a的标签
var matches = regex5.Matches(content);
for (int i = 0; i < matches.Count; i++)
{
var str = regex1.Replace(matches[i].Value, "<$1>");
content = content.Replace(matches[i].Value, str);
}
3.img标签保留src alt属性(a标签保留href和这个一样,故省略)
//匹配img标签
var regex6=new Regex("<img.*?/?>", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
//匹配src=""字段
var regex7=new Regex("src=\"(.*?)\"", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
//匹配alt字段
var regex8=new Regex("alt=\"(.*?)\"", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
//找出所有的img标签
var imgMatches = regex6.Matches(content);
for (int i = 0; i < imgMatches.Count; i++)
{
//拼接新的img
var src = regex7.Match(imgMatches[i].Value);
var rep = "<img "+src+" ";
if (regex8.IsMatch(imgMatches[i].Value))
{
var alt = regex8.Match(imgMatches[i].Value);
rep = rep + alt + ">";
}
else
{
rep += ">";
}
content = content.Replace(imgMatches[i].Value, rep);
}
4.&字符开头,之后的1-5个字符全部删除,比如&nbsap;
var regex4=new Regex("&[a-zA-Z]{0,5};?", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
content = regex4.Replace(content, "");
5.移除空标签
var regex = new Regex("<([a-z]*)(?:\\s+?[^>]*?)?>\\s*?<\\/\\1>", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase);
while (regex.IsMatch(content))
{
content = regex.Replace(content, "");
}
完结撒花~