php关键字过滤

<?php

class TrieTree
{

    public $tree = array('我','过');

    /**
     * 增加关键词到字典树
     *
     * @param string $utf8_str            
     */
    public function add($utf8_str)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        // 串结尾字符
        $chars[] = null;
        $count = count($chars);
        $T = &$this->tree;
        for ($i = 0; $i < $count; $i ++) {
            $c = $chars[$i];
            if (! array_key_exists($c, $T)) {
                // 插入新字符,关联数组
                $T[$c] = array();
            }
            $T = &$T[$c];
        }
        return $this;
    }
    /**
     * 从字典树移除关键词
     *
     * @param string $utf8_str            
     */
    public function remove($utf8_str)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        $chars[] = null;
        // 先保证此串在树中
        if ($this->_find($chars)) {
            $chars[] = null;
            $count = count($chars);
            $T = &$this->tree;
            for ($i = 0; $i < $count; $i ++) {
                $c = $chars[$i];
                // 表明仅有此串
                if (count($T[$c]) == 1) {
                    unset($T[$c]);
                    return;
                }
                $T = &$T[$c];
            }
        }
        return $this;
    }

    /**
     * 从字典树查找关键词
     *
     * @param string $utf8_str            
     * @return boolean
     */
    public function exists($utf8_str)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        $chars[] = null;
        return $this->_find($chars);
    }

    private function _find(&$chars)
    {
        $count = count($chars);
        $T = &$this->tree;
        for ($i = 0; $i < $count; $i ++) {
            $c = $chars[$i];
            if (! array_key_exists($c, $T)) {
                return false;
            }
            $T = &$T[$c];
        }
        return true;
    }

    /**
     * 是否含有关键词
     *
     * @param string $utf8_str            
     * @param boolean $do_count            
     * @return boolean|number
     */
    public function contain($utf8_str, $do_count = false)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        $chars[] = null;
        $len = count($chars);
        $Tree = &$this->tree;
        $count = 0;
        for ($i = 0; $i < $len; $i ++) {
            $c = $chars[$i];
            // 起始字符匹配
            if (array_key_exists($c, $Tree)) {
                $T = &$Tree[$c];
                for ($j = $i + 1; $j < $len; $j ++) {
                    $c = $chars[$j];
                    if (array_key_exists(null, $T)) {
                        if ($do_count) {
                            $count ++;
                        } else {
                            return true;
                        }
                    }
                    if (! array_key_exists($c, $T)) {
                        break;
                    }
                    $T = &$T[$c];
                }
            }
        }
        return $do_count ? $count : false;
    }

    /**
     * 批量检查是否包含关键词
     *
     * @param array $str_array            
     * @return boolean
     */
    public function containMulti($str_array)
    {
        if (\is_array($str_array)) {
            foreach ($str_array as $str) {
                if ($this->contain($str)) {
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * 导出序列化后的字典树
     *
     * @return string
     */
    public function export()
    {
        return serialize($this->tree);
    }

    /**
     * 导入序列化后的字典树
     *
     * @param string $str            
     */
    public function import($str)
    {
        $this->tree = unserialize($str);
    }
}

class UTF8Util
{

    public static function getChars($utf8_str)
    {
        $s = $utf8_str;
        $len = strlen($s);
        if ($len == 0)
            return array();
        $chars = array();
        for ($i = 0; $i < $len; $i ++) {
            $c = $s[$i];
            $n = ord($c);
            // 0xxx xxxx, asci, single
            if (($n >> 7) == 0) {
                $chars[] = $c;
            } else
                // 1111 xxxx, first in four char
                if (($n >> 4) == 15) {
                    if ($i < $len - 3) {
                        $chars[] = $c . $s[$i + 1] . $s[$i + 2] . $s[$i + 3];
                        $i += 3;
                    }
                } else
                    // 111x xxxx, first in three char
                    if (($n >> 5) == 7) {
                        if ($i < $len - 2) {
                            $chars[] = $c . $s[$i + 1] . $s[$i + 2];
                            $i += 2;
                        }
                    } else
                        // 11xx xxxx, first in two char
                        if (($n >> 6) == 3) {
                            if ($i < $len - 1) {
                                $chars[] = $c . $s[$i + 1];
                                $i ++;
                            }
                        }
        }
        return $chars;
    }
}

$utf8_str = new UTF8Util();

$utf_char = $utf8_str->getChars('佛教飞机撒方式的回复回复后我我认为回复日无法核实的回复我五花肉覅福热火么光和热规划局狂热韩国关乎二后过过过过过过过群军过军若绿可我让我陪我二骗人富可敌国及时来构架了');

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值