PHP汉字转拼音

<?php
/**
 * 汉字转拼音
 * @author Dworry
 * mutil_pinyin.txt pinyin.txt 提取至JAVA类库JPinyin http://www.oschina.net/p/jpinyin
 * (mutil_pinyin.txt pinyin.txt)内容有些修改
 * 代码中部分逻辑也是复制 JPinyin中的。更多拼音相关功能请看 JPinyin 
 */
 
class lpinyin {
 
    public $MUTIL_PINYIN_TABLE = NULL; // 词组字典
    public $PINYIN_TABLE = NULL;  // 拼音字典
    public $PINYIN_SEPARATOR = ','; //拼音分隔符  字典中拼音分隔符
 
    function _init() {
        if($this->MUTIL_PINYIN_TABLE === NULL AND $this->PINYIN_TABLE === NULL){
            $this->MUTIL_PINYIN_TABLE = unserialize(file_get_contents(APPPATH . "libraries/pinyin/mutil_pinyin.txt"));
            $this->PINYIN_TABLE       = unserialize(file_get_contents(APPPATH . "libraries/pinyin/pinyin.txt"));
        }
    }
 
    /**
     * 增加多音词组
     * eg: $this->add_mulit_pinyin('瞿塘峡', 'qu,tang,xia');
     */
    function add_mulit_pinyin($hj, $py) {
        $this->_init();
 
        $this->MUTIL_PINYIN_TABLE[$hj] = $py;
        file_put_contents(APPPATH . "libraries/pinyin/mutil_pinyin.txt", serialize($this->MUTIL_PINYIN_TABLE));
        echo $this->MUTIL_PINYIN_TABLE[$hj];
    }
 
    /**
     * 新增或者修改拼音字典
     * eg: $this->add_mulit_pinyin('南', 'nan,na');
     */
    function adjust_pinyin($key, $value){
        $this->_init();
 
        $this->PINYIN_TABLE[$key] = $value;
        return file_put_contents(APPPATH . "libraries/pinyin/pinyin.txt", serialize($this->PINYIN_TABLE));
    }
 
 
 
    /**
     * 返回拼音全拼与简拼
     * eg: list($jp, $py) = $this->get_jp_py('中国人');
     */
    function get_jp_py($str) {
        $this->_init();
        // return false;
        $r = $this->topinyin($str);
        $jp = '';
        $py = '';
        foreach ($r as $row) {
            if($row[0]){
                $jp .= $row[1][0];
            }
            $py .= $row[1];
        }
        return array($jp, $py);
    }
 
    /**
     * 将字符串转换成相应格式的拼音
     */
    function topinyin($str){
        $str_arr = $this->utf8_str_split($str);
         
        $py_arr = array(count($str_arr));
 
        $len = count($str_arr);
 
        for($key = 0; $key < $len; $key++) {
             
            $value = $str_arr[$key];
 
            if($this->isChinese($value) || $value == '〇'){
 
                if(isset($this->PINYIN_TABLE[$value])){
                    $py = explode(',', $this->PINYIN_TABLE[$value]);
                    if(!isset($py[1])) {
                        $result[$key] = array(true, $py[0]);
                        continue; //非多音字,进下一个循环
                    }
 
                }else{
                    $result[$key] = array(false, $value); //沒有找到的
                    continue;
                }
 
                //多音字识别处理
                $isFoundFlag = false;
                $rightMove = 3;
 
                //将当前汉字依次与后面的3个、2个、1个汉字组合,判断下是否存在多音字词组
                for ($rightIndex = ($key + $rightMove) < $len ? ($key + $rightMove) : ($len - 1); $rightIndex > $key; $rightIndex--){
                     
                    $cizu =  implode('', array_slice($str_arr, $key, $rightIndex + 1));
                    if (isset($this->MUTIL_PINYIN_TABLE[$cizu]))
                    {
                        $pinyinArray = explode($this->PINYIN_SEPARATOR, $this->MUTIL_PINYIN_TABLE[$cizu]);
 
                        foreach($pinyinArray as $pk => $pv){
                            $result[$key + $pk] = array(true, $pv);
                        }
 
                        $key = $rightIndex;
                        $isFoundFlag = true;
                        break;
                    } //endif
                }//endfor
                if(!$isFoundFlag){
                    $result[$key] = array(true, $py[0]); //词典没找着,取多音数组第一个
                }
 
            }else{
                //不是漢字的
                $result[] = array(false, $value); //$this->noChinese;
            }
 
        }
        return $result;
 
    }
 
    /**
     * 作用类似于 str_split ,兼容UTF-8字符
     */
    function utf8_str_split($str,$split_len=1)
    {
        if(!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1){
            return FALSE;
        }
        $len = mb_strlen($str, 'UTF-8');
        if ($len <= $split_len){
            return array($str);
        }
        preg_match_all('/.{'.$split_len.'}|[^\x00]{1,'.$split_len.'}$/us', $str, $ar);
        return $ar[0];
    }
 
    /**
     * 判断是否为漢字
     */
    function isChinese($char) {
 
        if(preg_match('/^[\x{4e00}-\x{9fa5}]+$/u',$char)) {
            return true;
        }else{
            return false;
        }
    }
 
    /**
     * 测试
     */
    function test(){
        $s = microtime(true);
        var_dump($this->lpinyin->get_jp_py('南天门'));
        echo sprintf("%.8f", (microtime(true) - $s));
        exit();
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值