/**
* XSS 清除处理
*/
function xssClean($data, $htmlentities = 0)
{
$htmlentities && $data = htmlentities($data, ENT_QUOTES, 'utf-8');
// Fix &entity\n;
$data = str_replace(array('&','<','>'), array('&','<','>'), $data);
$data = preg_replace('/(*\w+)[\x00-\x20]+;/u', '$1;', $data);
$data = preg_replace('/(*[0-9A-F]+);*/iu', '$1;', $data);
$data = html_entity_decode($data, ENT_COMPAT, 'UTF-8');
// Remove any attribute starting with "on" or xmlns
$data = preg_replace('#(]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $data);
// Remove javascript: and vbscript: protocols
$data = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"\\\\]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $data);
$data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"\\\\]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $data);
$data = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"\\\\]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $data);
// Only works in IE:
$data = preg_replace('#(]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"\\\\]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
$data = preg_replace('#(]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"\\\\]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $data);
$data = preg_replace('#(]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"\\\\]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $data);
// Remove namespaced elements (we do not need them)
$data = preg_replace('#*\w+:\w[^>]*+>#i', '', $data);
do
{
// Remove really unwanted tags
$old_data = $data;
$data = preg_replace('#*(?:applet|b(?:ase|gsound|link)|embed|frame(?:set)?|i(?:frame|layer)|l(?:ayer|ink)|meta|object|s(?:cript|tyle)|title|xml)[^>]*+>#i', '', $data);
}
while ($old_data !== $data);
// we are done...
$data = filter_remote_img_type($data, FALSE);
return $data;
}
/**
* 过滤内容中有问题网络图片
* @author phpseyo
* @param string $text 过滤文本
* @param boolean $bbcode 是否为BBCODE类型
* @return string
*/
function filter_remote_img_type($text, $bbcode = TRUE)
{
$pattern = $bbcode ? "/\[img[^\]]*\]\s*(.*?)+\s*\[\/img\]/is" : "/]+src=[\'|\"]([^\'|\"]+)[\'|\"][^>]*[\/]?>/is";
preg_match_all($pattern, $text, $matches);
foreach ($matches[1] as $k => $src) {
$data = get_headers($src);
$header_str = implode('', $data);
if (FALSE === strpos($header_str, 'Content-Type: image') || FALSE !== strpos($header_str, 'HTTP/1.1 401') || FALSE !== strpos($header_str, 'HTTP/1.1 404')) {
$text = str_replace($matches[0][$k], '', $text);
}
}
return $text;
}