原版地址: https://github.com/overtrue/pinyin
需下载原版,data词库放在同目录下
<?php
/*
### 拼音数组
```php
$pinyin = new Pinyin(); // 默认
$pinyin->convert('带着希望去旅行,比到达终点更美好');
// ["dai", "zhe", "xi", "wang", "qu", "lv", "xing", "bi", "dao", "da", "zhong", "dian", "geng", "mei", "hao"]
$pinyin->convert('带着希望去旅行,比到达终点更美好', PINYIN_UNICODE);
// ["dài","zhe","xī","wàng","qù","lǚ","xíng","bǐ","dào","dá","zhōng","diǎn","gèng","měi","hǎo"]
$pinyin->convert('带着希望去旅行,比到达终点更美好', PINYIN_ASCII);
//["dai4","zhe","xi1","wang4","qu4","lv3","xing2","bi3","dao4","da2","zhong1","dian3","geng4","mei3","hao3"]
```
选项:
| 选项 | 描述 |
| ------------- | ---------------------------------------------------|
| `PINYIN_NONE` | 不带音调输出: `mei hao` |
| `PINYIN_ASCII` | 带数字式音调: `mei3 hao3` |
| `PINYIN_UNICODE` | UNICODE 式音调:`měi hǎo` |
### 生成用于链接的拼音字符串
```php
$pinyin->permalink('带着希望去旅行'); // dai-zhe-xi-wang-qu-lv-xing
$pinyin->permalink('带着希望去旅行', '.'); // dai.zhe.xi.wang.qu.lv.xing
```
### 获取首字符字符串
```php
$pinyin->abbr('带着希望去旅行'); // dzxwqlx
$pinyin->abbr('带着希望去旅行', '-'); // d-z-x-w-q-l-x
```
### 翻译整段文字为拼音
将会保留中文字符:`,。 ! ? : “ ” ‘ ’` 并替换为对应的英文符号。
```php
$pinyin->sentence('带着希望去旅行,比到达终点更美好!');
// dai zhe xi wang qu lv xing, bi dao da zhong dian geng mei hao!
$pinyin->sentence('带着希望去旅行,比到达终点更美好!', true);
// dài zhe xī wàng qù lǚ xíng, bǐ dào dá zhōng diǎn gèng měi hǎo!
```
### 翻译姓名
姓名的姓的读音有些与普通字不一样,比如 ‘单’ 常见的音为 `dan`,而作为姓的时候读 `shan`。
```php
$pinyin->name('单某某'); // ['shan', 'mou', 'mou']
$pinyin->name('单某某', PINYIN_UNICODE); // ["shàn","mǒu","mǒu"]
```
## 在 Laravel 中使用
独立的包在这里:[overtrue/laravel-pinyin](https://github.com/overtrue/laravel-pinyin)
## Contribution
欢迎提意见及完善补充词库 [`overtrue/pinyin-dictionary-maker`](https://github.com/overtrue/pinyin-dictionary-maker/tree/master/patches) :kiss:
## 参考
- [详细参考资料](https://github.com/overtrue/pinyin-resources)
# License
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fovertrue%2Fpinyin.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fovertrue%2Fpinyin?ref=badge_large)
*/
class Pinyin
{
const NONE = 'none';
const ASCII = 'ascii';
const UNICODE = 'unicode';
protected $segmentName = 'words_%s';
protected $path = '';
/**
* Punctuations map.
*
* @var array
*/
protected $punctuations = array(
',' => ',',
'。' => '.',
'!' => '!',
'?' => '?',
':' => ':',
'“' => '"',
'”' => '"',
'‘' => "'",
'’' => "'",
);
/**
* Constructor.
*
*/
public function __construct()
{
$this->path = dirname(__FILE__).'/data';
}
/**
* Convert string to pinyin.
*
* @param string $string
* @param string $option
*
* @return array
*/
public function convert($string, $option = self::NONE)
{
$pinyin = $this->romanize($string);
return $this->splitWords($pinyin, $option);
}
/**
* Convert string (person name) to pinyin.
*
* @param string $stringName
* @param string $option
*
* @return array
*/
public function name($stringName, $option = self::NONE)
{
$pinyin = $this->romanize($stringName, true);
return $this->splitWords($pinyin, $option);
}
/**
* Return a pinyin permalink from string.
*
* @param string $string
* @param string $delimiter
*
* @return string
*/
public function permalink($string, $delimiter = '-')
{
return implode($delimiter, $this->convert($string, false));
}
/**
* Return first letters.
*
* @param string $string
* @param string $delimiter
*
* @return string
*/
public function abbr($string, $delimiter = '')
{
return implode($delimiter, array_map(function ($pinyin) {
return $pinyin[0];
}, $this->convert($string, false)));
}
/**
* Chinese phrase to pinyin.
*
* @param string $string
* @param string $delimiter
* @param string $option
*
* @return string
*/
public function phrase($string, $delimiter = ' ', $option = self::NONE)
{
return implode($delimiter, $this->convert($string, $option));
}
/**
* Chinese to pinyin sentense.
*
* @param string $sentence
* @param bool $withTone
*
* @return string
*/
public function sentence($sentence, $withTone = false)
{
$marks = array_keys($this->punctuations);
$punctuationsRegex = preg_quote(implode(array_merge($marks, $this->punctuations)), '/');
$regex = '/[^üāēīōūǖáéíóúǘǎěǐǒǔǚàèìòùǜɑa-z0-9'.$punctuationsRegex.'\s_]+/iu';
$pinyin = preg_replace($regex, '', $this->romanize($sentence));
$punctuations = array_merge($this->punctuations, array("\t" => ' ', ' ' => ' '));
$pinyin = trim(str_replace(array_keys($punctuations), $punctuations, $pinyin));
return $withTone ? $pinyin : $this->format($pinyin, false);
}
/**
* Preprocess.
*
* @param string $string
*
* @return string
*/
protected function prepare($string)
{
$string = preg_replace_callback('~[a-z0-9_-]+~i', function ($matches) {
return "\t".$matches[0];
}, $string);
return preg_replace("~[^\p{Han}\p{P}\p{Z}\p{M}\p{N}\p{L}\t]~u", '', $string);
}
/**
* Convert Chinese to pinyin.
*
* @param string $string
* @param bool $isName
*
* @return string
*/
protected function romanize($string, $isName = false)
{
$string = $this->prepare($string);
if ($isName) {
$string = $this->convertSurname($string);
}
for ($i = 0; $i < 100; ++$i) {
$segment = $this->path.'/'.sprintf($this->segmentName, $i);
if (file_exists($segment)) {
$dictionary = (array) include $segment;
$string = strtr($string, $dictionary);
}
}
return $string;
}
/**
* Convert Chinese Surname to pinyin.
*
* @param string $string
*
* @return string
*/
protected function convertSurname($string)
{
$surnames = $this->path.'/surnames';
if (file_exists($surnames)) {
$dictionary = (array) include $surnames;
foreach ($dictionary as $surname => $pinyin) {
if (0 === strpos($string, $surname)) {
$string = $pinyin.mb_substr($string, mb_strlen($surname, 'UTF-8'), mb_strlen($string, 'UTF-8') - 1, 'UTF-8');
break;
}
}
}
return $string;
}
/**
* Split pinyin string to words.
*
* @param string $pinyin
* @param string $option
*
* @return array
*/
public function splitWords($pinyin, $option)
{
$split = array_filter(preg_split('/[^üāēīōūǖáéíóúǘǎěǐǒǔǚàèìòùǜɑa-z\d]+/iu', $pinyin));
if (self::UNICODE !== $option) {
foreach ($split as $index => $pinyin) {
$split[$index] = $this->format($pinyin, self::ASCII === $option);
}
}
return array_values($split);
}
/**
* Format.
*
* @param string $pinyin
* @param bool $tone
*
* @return string
*/
protected function format($pinyin, $tone = false)
{
$replacements = array(
'üē' => array('ue', 1), 'üé' => array('ue', 2), 'üě' => array('ue', 3), 'üè' => array('ue', 4),
'ā' => array('a', 1), 'ē' => array('e', 1), 'ī' => array('i', 1), 'ō' => array('o', 1), 'ū' => array('u', 1), 'ǖ' => array('v', 1),
'á' => array('a', 2), 'é' => array('e', 2), 'í' => array('i', 2), 'ó' => array('o', 2), 'ú' => array('u', 2), 'ǘ' => array('v', 2),
'ǎ' => array('a', 3), 'ě' => array('e', 3), 'ǐ' => array('i', 3), 'ǒ' => array('o', 3), 'ǔ' => array('u', 3), 'ǚ' => array('v', 3),
'à' => array('a', 4), 'è' => array('e', 4), 'ì' => array('i', 4), 'ò' => array('o', 4), 'ù' => array('u', 4), 'ǜ' => array('v', 4),
);
foreach ($replacements as $unicde => $replacement) {
if (false !== strpos($pinyin, $unicde)) {
$pinyin = str_replace($unicde, $replacement[0], $pinyin).($tone ? $replacement[1] : '');
}
}
return $pinyin;
}
}