<?php
/**
* 汉字转拼音
* @author Dworry
* mutil_pinyin.txt pinyin.txt 提取至JAVA类库JPinyin http://www.oschina.net/p/jpinyin
* (mutil_pinyin.txt pinyin.txt)内容有些修改
* 代码中部分逻辑也是复制 JPinyin中的。更多拼音相关功能请看 JPinyin
*/
class lpinyin {
public $MUTIL_PINYIN_TABLE = NULL; // 词组字典
public $PINYIN_TABLE = NULL; // 拼音字典
public $PINYIN_SEPARATOR = ','; //拼音分隔符 字典中拼音分隔符
function _init() {
if($this->MUTIL_PINYIN_TABLE === NULL AND $this->PINYIN_TABLE === NULL){
$this->MUTIL_PINYIN_TABLE = unserialize(file_get_contents(APPPATH . "libraries/pinyin/mutil_pinyin.txt"));
$this->PINYIN_TABLE = unserialize(file_get_contents(APPPATH . "libraries/pinyin/pinyin.txt"));
}
}
/**
* 增加多音词组
* eg: $this->add_mulit_pinyin('瞿塘峡', 'qu,tang,xia');
*/
function add_mulit_pinyin($hj, $py) {
$this->_init();
$this->MUTIL_PINYIN_TABLE[$hj] = $py;
file_put_contents(APPPATH . "libraries/pinyin/mutil_pinyin.txt", serialize($this->MUTIL_PINYIN_TABLE));
echo $this->MUTIL_PINYIN_TABLE[$hj];
}
/**
* 新增或者修改拼音字典
* eg: $this->add_mulit_pinyin('南', 'nan,na');
*/
function adjust_pinyin($key, $value){
$this->_init();
$this->PINYIN_TABLE[$key] = $value;
return file_put_contents(APPPATH . "libraries/pinyin/pinyin.txt", serialize($this->PINYIN_TABLE));
}
/**
* 返回拼音全拼与简拼
* eg: list($jp, $py) = $this->get_jp_py('中国人');
*/
function get_jp_py($str) {
$this->_init();
// return false;
$r = $this->topinyin($str);
$jp = '';
$py = '';
foreach ($r as $row) {
if($row[0]){
$jp .= $row[1][0];
}
$py .= $row[1];
}
return array($jp, $py);
}
/**
* 将字符串转换成相应格式的拼音
*/
function topinyin($str){
$str_arr = $this->utf8_str_split($str);
$py_arr = array(count($str_arr));
$len = count($str_arr);
for($key = 0; $key < $len; $key++) {
$value = $str_arr[$key];
if($this->isChinese($value) || $value == '〇'){
if(isset($this->PINYIN_TABLE[$value])){
$py = explode(',', $this->PINYIN_TABLE[$value]);
if(!isset($py[1])) {
$result[$key] = array(true, $py[0]);
continue; //非多音字,进下一个循环
}
}else{
$result[$key] = array(false, $value); //沒有找到的
continue;
}
//多音字识别处理
$isFoundFlag = false;
$rightMove = 3;
//将当前汉字依次与后面的3个、2个、1个汉字组合,判断下是否存在多音字词组
for ($rightIndex = ($key + $rightMove) < $len ? ($key + $rightMove) : ($len - 1); $rightIndex > $key; $rightIndex--){
$cizu = implode('', array_slice($str_arr, $key, $rightIndex + 1));
if (isset($this->MUTIL_PINYIN_TABLE[$cizu]))
{
$pinyinArray = explode($this->PINYIN_SEPARATOR, $this->MUTIL_PINYIN_TABLE[$cizu]);
foreach($pinyinArray as $pk => $pv){
$result[$key + $pk] = array(true, $pv);
}
$key = $rightIndex;
$isFoundFlag = true;
break;
} //endif
}//endfor
if(!$isFoundFlag){
$result[$key] = array(true, $py[0]); //词典没找着,取多音数组第一个
}
}else{
//不是漢字的
$result[] = array(false, $value); //$this->noChinese;
}
}
return $result;
}
/**
* 作用类似于 str_split ,兼容UTF-8字符
*/
function utf8_str_split($str,$split_len=1)
{
if(!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1){
return FALSE;
}
$len = mb_strlen($str, 'UTF-8');
if ($len <= $split_len){
return array($str);
}
preg_match_all('/.{'.$split_len.'}|[^\x00]{1,'.$split_len.'}$/us', $str, $ar);
return $ar[0];
}
/**
* 判断是否为漢字
*/
function isChinese($char) {
if(preg_match('/^[\x{4e00}-\x{9fa5}]+$/u',$char)) {
return true;
}else{
return false;
}
}
/**
* 测试
*/
function test(){
$s = microtime(true);
var_dump($this->lpinyin->get_jp_py('南天门'));
echo sprintf("%.8f", (microtime(true) - $s));
exit();
}
}
PHP汉字转拼音
最新推荐文章于 2024-07-29 22:50:36 发布