拆分中文字符串为数组
function str_split_unicode($str, $l = 0) {
if ($l > 0) {
$ret = array();
$len = mb_strlen($str, "UTF-8");
for ($i = 0; $i < $len; $i += $l) {
$ret[] = mb_substr($str, $i, $l, "UTF-8");
}
return $ret;
}
return preg_split("//u", $str, -1, PREG_SPLIT_NO_EMPTY);
}
$s = '中华人民abc共和\x01国';
print_r($this->str_split_unicode($s));
/**
* 结果:
*/
Array
(
[0] => 中
[1] => 华
[2] => 人
[3] => 民
[4] => a
[5] => b
[6] => c
[7] => 共
[8] => 和
[9] => \
[10] => x
[11] => 0
[12] => 1
[13] => 国
)
提取中文字符,与非中文字符到独立的数组
$str = '中华人民abc共和\x01国';
preg_match_all('/[\x{4e00}-\x{9fa5}]/u', $str, $chinese);
preg_match_all('/[a-zA-Z]/u', $str, $other);
var_dump($chinese);
var_dump($other);
die;
/**
* 结果: $chinese是纯中文字符数组,$other是其他字符数组
*/
array(1) {
[0]=>
array(7) {
[0]=>
string(3) "中"
[1]=>
string(3) "华"
[2]=>
string(3) "人"
[3]=>
string(3) "民"
[4]=>
string(3) "共"
[5]=>
string(3) "和"
[6]=>
string(3) "国"
}
}
array(1) {
[0]=>
array(7) {
[0]=>
string(1) "a"
[1]=>
string(1) "b"
[2]=>
string(1) "c"
[3]=>
string(1) "\"
[4]=>
string(1) "x"
[5]=>
string(1) "0"
[6]=>
string(1) "1"
}
}
出处: