/// <summary> /// 正则双重过滤 /// </summary> /// <param name="content"></param> /// <param name="splitKey1"></param> /// <param name="splitKey2"></param> /// <param name="newChars"></param> /// <returns></returns> private static string GetReplace(string content, string splitKey1, string splitKey2, string newChars) { //splitKey1 第一个正则式匹配 //splitKey2 匹配结果中再次匹配进行替换 if (splitKey1 != null && splitKey1 != "" && splitKey2 != null && splitKey2 != "") { System.Text.RegularExpressions.Regex rg = new System.Text.RegularExpressions.Regex(splitKey1); System.Text.RegularExpressions.MatchCollection mc = rg.Matches(content); foreach (System.Text.RegularExpressions.Match mc1 in mc) { string oldChar = mc1.ToString(); string newChar = new System.Text.RegularExpressions.Regex(splitKey2, System.Text.RegularExpressions.RegexOptions.IgnoreCase).Replace(oldChar, newChars); content = content.Replace(oldChar, newChar); } return content; } else { if (splitKey2 != null && splitKey2 != "") { System.Text.RegularExpressions.Regex rg = new System.Text.RegularExpressions.Regex(splitKey2, System.Text.RegularExpressions.RegexOptions.IgnoreCase); return rg.Replace(content, newChars); } } return content; } /*此处过滤危险HTML方法*/ public string FilterHTML(string html) { if (html == null) return ""; #region 过滤 script System.Text.RegularExpressions.Regex regex_script1 = new System.Text.RegularExpressions.Regex("(<script[//s//S]*?///script//s*>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex_script2 = new System.Text.RegularExpressions.Regex("(<(script[//s//S]*?)>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); html = regex_script1.Replace(html, ""); html = regex_script1.Replace(html, ""); #endregion 过滤 script #region 过滤 <iframe> 标签 System.Text.RegularExpressions.Regex regex_iframe1 = new System.Text.RegularExpressions.Regex("(<iframe [//s//S]+<iframe//s*>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex_iframe2 = new System.Text.RegularExpressions.Regex("(<(iframe [//s//S]*?)>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); html = regex_iframe1.Replace(html, ""); html = regex_iframe2.Replace(html, ""); #endregion 过滤 <iframe> 标签 #region 过滤 <frameset> 标签 System.Text.RegularExpressions.Regex regex_frameset1 = new System.Text.RegularExpressions.Regex("(<frameset [//s//S]+<frameset //s*>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex_frameset2 = new System.Text.RegularExpressions.Regex("(<(frameset [//s//S]*?)>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); html = regex_frameset1.Replace(html, ""); html = regex_frameset2.Replace(html, ""); #endregion 过滤 <frameset> 标签 #region 过滤 <frame> 标签 System.Text.RegularExpressions.Regex regex_frame1 = new System.Text.RegularExpressions.Regex("(<frame[//s//S]+<frame //s*>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex_frame2 = new System.Text.RegularExpressions.Regex("(<(frame[//s//S]*?)>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); html = regex_frame1.Replace(html, ""); html = regex_frame2.Replace(html, ""); #endregion 过滤 <frameset> 标签 #region 过滤 <form> 标签 System.Text.RegularExpressions.Regex regex_form1 = new System.Text.RegularExpressions.Regex("(<(form [//s//S]*?)>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex_form2 = new System.Text.RegularExpressions.Regex("(<(/form[//s//S]*?)>)", System.Text.RegularExpressions.RegexOptions.IgnoreCase); html = regex_form1.Replace(html, ""); html = regex_form2.Replace(html, ""); #endregion 过滤 <form> 标签 #region 过滤 on: 的事件 //过滤on 带单引号的 过滤on 带双引号的 过滤on 不带有引号的 html = GetReplace(html, "<[//s//S]+ (on)[a-zA-Z]{4,20} *= *[//S ]{3,}>", "((on)[a-zA-Z]{4,20} *= *'[^']{3,}')|((on)[a-zA-Z]{4,20} *= */"[^/"]{3,}/")|((on)[a-zA-Z]{4,20} *= *[^>/ ]{3,})", ""); #endregion 过滤 on: 的事件 #region 过滤 javascript: 的事件 html = GetReplace(html, "<[//s//S]+ (href|src|background|url|dynsrc|expression|codebase) *= *[ /"/']? *(javascript:)[//S]{1,}>" , "(' *(javascript|vbscript):([//S^'])*')|(/" *(javascript|vbscript):[//S^/"]*/")|([^=]*(javascript|vbscript):[^/> ]*)", "#"); #endregion 过滤 javascript: 的事件 return html; }