主要使用了 int substr_count ( string haystack, string needle [, int offset [, int length]] ) 这个方法,这个方法遍历待测的字符串 str中有没有 allergicWord数组中所包含的敏感词:
[php] view plain copy
$allergicWord = array('脏话','骂人话');
$str = '这句话里包含了脏话和骂人话';
for ($i=0;$i<count($allergicWord);$i++){
$content = substr_count($str, $allergicWord[$i]);
if($content>0){
$info = $content;
break;
}
}
if($info>0){
//有违法字符
return TRUE;
}else{
//没有违法字符
return FALSE;
}
如果需要将出现的敏感词替换,比如替换###或者*可以结合substr_replace ( mixed string, string replacement, int start [, int length] )方法使用
关键字的存放形式为txt,txt文件中以这样形式存放:|赌博机|卖血|出售肾|出售器官|眼角膜
<?php
function Filter_word( $str, $fileName )
{
if ( !($words = file_get_contents( $fileName )) ){
die('file read error!');
}
$str = strtolower($str);
//var_dump($words);
$word = preg_replace("/[1,2,3]\r\n|\r\n/i", '', $words);
//$wor = substr($word,0,-1);
//$w = preg_replace("|/|i", '\/', $word);
//echo "<pre>";
//var_dump($w);
//$words = "赌博机|卖血|出售肾|出售器官|眼角膜";
$matched = preg_replace('/'.$word.'/i', '***', $string);
return $matched;
}
$content = "<a href='#'>我要卖血fsdf卖血d 赌博机wo眼口交膜</a>";
if ($result = Filter_word($content, './words.txt') ){
echo $result;
echo "替换成功 ";
}else{
echo "替换失败! ";
}
?>