(来自QQ群"MSDN 微软开发者网络",作者:DEVIN )
代码
1
using
System;
2 using System.Text;
3 using System.Text.RegularExpressions;
4
5 namespace Onfly.Common.Utility
6 {
7 /// <summary>
8 /// 过滤类
9 /// </summary>
10 public class Filter
11 {
12 /// <summary>
13 /// 需要过滤的字符(多个以|相隔)
14 /// </summary>
15 public static String keyWord = "" ;
16 /// <summary>
17 /// 需要过滤的字符(多个以|相隔)
18 /// </summary>
19 public static String KeyWord
20 {
21 get { return keyWord; }
22 set { keyWord = value; }
23 }
24 /// <summary>
25 /// 过滤 javascript
26 /// </summary>
27 /// <param name="content"> 需过滤文本内容 </param>
28 /// <returns></returns>
29 public static String FilterScript(String content)
30 {
31 String commentPattern = @" (?'comment'<!--.*?--[ \n\r]*>) " ;
32 String embeddedScriptComments = @" (\/\*.*?\*\/|\/\/.*?[\n\r]) " ;
33 String scriptPattern = String.Format( @" (?'script'<[ \n\r]*script[^>]*>(.*?{0}?)*<[ \n\r]*/script[^>]*>) " , embeddedScriptComments);
34 String pattern = String.Format( @" (?s)({0}|{1}) " , commentPattern, scriptPattern);
35 return StripScriptAttributesFromTags(Regex.Replace(content, pattern, String.Empty, RegexOptions.IgnoreCase));
36 }
37 /// <summary>
38 /// 过滤javascript属性值(如onclick等)
39 /// </summary>
40 /// <param name="content"> 需过滤文本内容 </param>
41 /// <returns></returns>
42 private static String StripScriptAttributesFromTags(String content)
43 {
44 String eventAttribs = @" on(blur|c(hange|lick)|dblclick|focus|keypress|(key|mouse)(down|up)|(un)?load
45 |mouse(move|o(ut|ver))|reset|s(elect|ubmit)) " ;
46
47 String pattern = String.Format( @" (?inx)
48 \<(\w+)\s+
49 (
50 (?'attribute'
51 (?'attributeName'{0})\s*=\s*
52 (?'delim'['""]?)
53 (?'attributeValue'[^'"">]+)
54 (\3)
55 )
56 |
57 (?'attribute'
58 (?'attributeName'href)\s*=\s*
59 (?'delim'['""]?)
60 (?'attributeValue'javascript[^'"">]+)
61 (\3)
62 )
63 |
64 [^>]
65 )*
66 \> " , eventAttribs);
67 Regex re = new Regex(pattern);
68 // 使用MatchEvaluator的委托
69 return re.Replace(content, new MatchEvaluator(StripAttributesHandler));
70 }
71 /// <summary>
72 /// 取得属性值
73 /// </summary>
74 /// <param name="m"></param>
75 /// <returns></returns>
76 private static String StripAttributesHandler(Match m)
77 {
78 if (m.Groups[ " attribute " ].Success)
79 {
80 return m.Value.Replace(m.Groups[ " attribute " ].Value, "" );
81 }
82 else
83 {
84 return m.Value;
85 }
86 }
87 /// <summary>
88 /// 去掉javascript(scr链接方式)
89 /// </summary>
90 /// <param name="content"> 需过滤文本内容 </param>
91 /// <returns></returns>
92 public static String FilterAHrefScript(String content)
93 {
94 String newstr = FilterScript(content);
95 String regexstr = @" href[ ^=]*= *[\s\S]*script *: " ;
96 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
97 }
98 /// <summary>
99 /// 去掉链接文件
100 /// </summary>
101 /// <param name="content"> 需过滤文本内容 </param>
102 /// <returns></returns>
103 public static String FilterSrc(String content)
104 {
105 String newstr = FilterScript(content);
106 String regexstr = @" src *= *['""]?[^\.]+\.(js|vbs|asp|aspx|php|jsp)['""] " ;
107 return Regex.Replace(newstr, regexstr, @"" , RegexOptions.IgnoreCase);
108 }
109 /// <summary>
110 /// 过滤HTML
111 /// </summary>
112 /// <param name="content"> 需过滤文本内容 </param>
113 /// <returns></returns>
114 public static String FilterHtml(String content)
115 {
116 String newstr = FilterScript(content);
117 String regexstr = @" <[^>]*> " ;
118 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
119 }
120 /// <summary>
121 /// 过滤 OBJECT
122 /// </summary>
123 /// <param name="content"> 需过滤文本内容 </param>
124 /// <returns></returns>
125 public static String FilterObject(String content)
126 {
127 String regexstr = @" (?i)<Object([^>])*>(\w|\W)*</Object([^>])*> " ;
128 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
129 }
130 /// <summary>
131 /// 过滤iframe
132 /// </summary>
133 /// <param name="content"> 需过滤文本内容 </param>
134 /// <returns></returns>
135 public static String FilterIframe(String content)
136 {
137 String regexstr = @" (?i)<Iframe([^>])*>(\w|\W)*</Iframe([^>])*> " ;
138 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
139 }
140 /// <summary>
141 /// 过滤frameset
142 /// </summary>
143 /// <param name="content"> 需过滤文本内容 </param>
144 /// <returns></returns>
145 public static String FilterFrameset(String content)
146 {
147 String regexstr = @" (?i)<Frameset([^>])*>(\w|\W)*</Frameset([^>])*> " ;
148 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
149 }
150 /// <summary>
151 /// 移除非法或不友好字符
152 /// </summary>
153 /// <param name="content"> 关键字列表,多个以 | 分隔 </param>
154 /// <returns></returns>
155 public static String FilterBadWords(String content)
156 {
157 // 这里的非法和不友好字符由你任意加,用“|”分隔,支持正则表达式,由于本Blog禁止贴非法和不友好字符,所以这里无法加上。
158 if (content == "" )
159 return "" ;
160 String[] bwords = keyWord.Split( ' | ' );
161 if (bwords.Length < 1 ) return content;
162 int i, j;
163 String str;
164 StringBuilder sb = new StringBuilder();
165 for (i = 0 ; i < bwords.Length; i ++ )
166 {
167 str = bwords[i].ToString().Trim();
168 String regStr, toStr;
169 regStr = str;
170 Regex r = new Regex(regStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
171 Match m = r.Match(content);
172 if (m.Success)
173 {
174 j = m.Value.Length;
175 sb.Insert( 0 , " * " , j);
176 toStr = sb.ToString();
177 content = Regex.Replace(content, regStr, toStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
178 }
179 sb.Remove( 0 , sb.Length);
180 }
181 return content;
182 }
183 /// <summary>
184 /// 过滤以上所有
185 /// </summary>
186 /// <param name="content"> 需过滤文本内容 </param>
187 /// <returns></returns>
188 public static String FilterAll(String content)
189 {
190 content = FilterHtml(content);
191 content = FilterScript(content);
192 content = FilterAHrefScript(content);
193 content = FilterObject(content);
194 content = FilterIframe(content);
195 content = FilterFrameset(content);
196 content = FilterSrc(content);
197 content = FilterBadWords(content);
198 return content;
199 }
200 }
201 }
下载 /Files/llbofchina/codes/Filter.cs.txt
2 using System.Text;
3 using System.Text.RegularExpressions;
4
5 namespace Onfly.Common.Utility
6 {
7 /// <summary>
8 /// 过滤类
9 /// </summary>
10 public class Filter
11 {
12 /// <summary>
13 /// 需要过滤的字符(多个以|相隔)
14 /// </summary>
15 public static String keyWord = "" ;
16 /// <summary>
17 /// 需要过滤的字符(多个以|相隔)
18 /// </summary>
19 public static String KeyWord
20 {
21 get { return keyWord; }
22 set { keyWord = value; }
23 }
24 /// <summary>
25 /// 过滤 javascript
26 /// </summary>
27 /// <param name="content"> 需过滤文本内容 </param>
28 /// <returns></returns>
29 public static String FilterScript(String content)
30 {
31 String commentPattern = @" (?'comment'<!--.*?--[ \n\r]*>) " ;
32 String embeddedScriptComments = @" (\/\*.*?\*\/|\/\/.*?[\n\r]) " ;
33 String scriptPattern = String.Format( @" (?'script'<[ \n\r]*script[^>]*>(.*?{0}?)*<[ \n\r]*/script[^>]*>) " , embeddedScriptComments);
34 String pattern = String.Format( @" (?s)({0}|{1}) " , commentPattern, scriptPattern);
35 return StripScriptAttributesFromTags(Regex.Replace(content, pattern, String.Empty, RegexOptions.IgnoreCase));
36 }
37 /// <summary>
38 /// 过滤javascript属性值(如onclick等)
39 /// </summary>
40 /// <param name="content"> 需过滤文本内容 </param>
41 /// <returns></returns>
42 private static String StripScriptAttributesFromTags(String content)
43 {
44 String eventAttribs = @" on(blur|c(hange|lick)|dblclick|focus|keypress|(key|mouse)(down|up)|(un)?load
45 |mouse(move|o(ut|ver))|reset|s(elect|ubmit)) " ;
46
47 String pattern = String.Format( @" (?inx)
48 \<(\w+)\s+
49 (
50 (?'attribute'
51 (?'attributeName'{0})\s*=\s*
52 (?'delim'['""]?)
53 (?'attributeValue'[^'"">]+)
54 (\3)
55 )
56 |
57 (?'attribute'
58 (?'attributeName'href)\s*=\s*
59 (?'delim'['""]?)
60 (?'attributeValue'javascript[^'"">]+)
61 (\3)
62 )
63 |
64 [^>]
65 )*
66 \> " , eventAttribs);
67 Regex re = new Regex(pattern);
68 // 使用MatchEvaluator的委托
69 return re.Replace(content, new MatchEvaluator(StripAttributesHandler));
70 }
71 /// <summary>
72 /// 取得属性值
73 /// </summary>
74 /// <param name="m"></param>
75 /// <returns></returns>
76 private static String StripAttributesHandler(Match m)
77 {
78 if (m.Groups[ " attribute " ].Success)
79 {
80 return m.Value.Replace(m.Groups[ " attribute " ].Value, "" );
81 }
82 else
83 {
84 return m.Value;
85 }
86 }
87 /// <summary>
88 /// 去掉javascript(scr链接方式)
89 /// </summary>
90 /// <param name="content"> 需过滤文本内容 </param>
91 /// <returns></returns>
92 public static String FilterAHrefScript(String content)
93 {
94 String newstr = FilterScript(content);
95 String regexstr = @" href[ ^=]*= *[\s\S]*script *: " ;
96 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
97 }
98 /// <summary>
99 /// 去掉链接文件
100 /// </summary>
101 /// <param name="content"> 需过滤文本内容 </param>
102 /// <returns></returns>
103 public static String FilterSrc(String content)
104 {
105 String newstr = FilterScript(content);
106 String regexstr = @" src *= *['""]?[^\.]+\.(js|vbs|asp|aspx|php|jsp)['""] " ;
107 return Regex.Replace(newstr, regexstr, @"" , RegexOptions.IgnoreCase);
108 }
109 /// <summary>
110 /// 过滤HTML
111 /// </summary>
112 /// <param name="content"> 需过滤文本内容 </param>
113 /// <returns></returns>
114 public static String FilterHtml(String content)
115 {
116 String newstr = FilterScript(content);
117 String regexstr = @" <[^>]*> " ;
118 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
119 }
120 /// <summary>
121 /// 过滤 OBJECT
122 /// </summary>
123 /// <param name="content"> 需过滤文本内容 </param>
124 /// <returns></returns>
125 public static String FilterObject(String content)
126 {
127 String regexstr = @" (?i)<Object([^>])*>(\w|\W)*</Object([^>])*> " ;
128 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
129 }
130 /// <summary>
131 /// 过滤iframe
132 /// </summary>
133 /// <param name="content"> 需过滤文本内容 </param>
134 /// <returns></returns>
135 public static String FilterIframe(String content)
136 {
137 String regexstr = @" (?i)<Iframe([^>])*>(\w|\W)*</Iframe([^>])*> " ;
138 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
139 }
140 /// <summary>
141 /// 过滤frameset
142 /// </summary>
143 /// <param name="content"> 需过滤文本内容 </param>
144 /// <returns></returns>
145 public static String FilterFrameset(String content)
146 {
147 String regexstr = @" (?i)<Frameset([^>])*>(\w|\W)*</Frameset([^>])*> " ;
148 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
149 }
150 /// <summary>
151 /// 移除非法或不友好字符
152 /// </summary>
153 /// <param name="content"> 关键字列表,多个以 | 分隔 </param>
154 /// <returns></returns>
155 public static String FilterBadWords(String content)
156 {
157 // 这里的非法和不友好字符由你任意加,用“|”分隔,支持正则表达式,由于本Blog禁止贴非法和不友好字符,所以这里无法加上。
158 if (content == "" )
159 return "" ;
160 String[] bwords = keyWord.Split( ' | ' );
161 if (bwords.Length < 1 ) return content;
162 int i, j;
163 String str;
164 StringBuilder sb = new StringBuilder();
165 for (i = 0 ; i < bwords.Length; i ++ )
166 {
167 str = bwords[i].ToString().Trim();
168 String regStr, toStr;
169 regStr = str;
170 Regex r = new Regex(regStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
171 Match m = r.Match(content);
172 if (m.Success)
173 {
174 j = m.Value.Length;
175 sb.Insert( 0 , " * " , j);
176 toStr = sb.ToString();
177 content = Regex.Replace(content, regStr, toStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
178 }
179 sb.Remove( 0 , sb.Length);
180 }
181 return content;
182 }
183 /// <summary>
184 /// 过滤以上所有
185 /// </summary>
186 /// <param name="content"> 需过滤文本内容 </param>
187 /// <returns></returns>
188 public static String FilterAll(String content)
189 {
190 content = FilterHtml(content);
191 content = FilterScript(content);
192 content = FilterAHrefScript(content);
193 content = FilterObject(content);
194 content = FilterIframe(content);
195 content = FilterFrameset(content);
196 content = FilterSrc(content);
197 content = FilterBadWords(content);
198 return content;
199 }
200 }
201 }
下载 /Files/llbofchina/codes/Filter.cs.txt