以前在截取 字符 串的时候,字母和数字,都不会出现太多的问题, 尤其是中文 ,一截就会出先:把某个汉字给截成两办了, 这会儿就会出现乱码!
因此想大家提供,两个函数 绝对好用:
GBK截取函数:
function GBsubstr($str, $start, $len) {
if (strlen ( $str ) > $len) {
$strlen = $start + $len;
for($i = 0; $i < $strlen; $i ++) {
if (ord ( substr ( $str, $i, 1 ) ) > 0xa0) {
$tmpstr .= substr ( $str, $i, 2 );
$i ++;
} else {
$tmpstr .= substr ( $str, $i, 1 );
}
}
$tmpstr .= "";
return $tmpstr;
} else {
return $str;
}
}
UTF-8字符截取:
function subString_UTF8($str, $start, $lenth) {
$len = strlen ( $str );
$r = array ();
$n = 0;
$m = 0;
for($i = 0; $i < $len; $i ++) {
$x = substr ( $str, $i, 1 );
$a = base_convert ( ord ( $x ), 10, 2 );
$a = substr ( '00000000' . $a, - 8 );
if ($n < $start) {
if (substr ( $a, 0, 1 ) == 0) {
} elseif (substr ( $a, 0, 3 ) == 110) {
$i += 1;
} elseif (substr ( $a, 0, 4 ) == 1110) {
$i += 2;
}
$n ++;
} else {
if (substr ( $a, 0, 1 ) == 0) {
$r [] = substr ( $str, $i, 1 );
} elseif (substr ( $a, 0, 3 ) == 110) {
$r [] = substr ( $str, $i, 2 );
$i += 1;
} elseif (substr ( $a, 0, 4 ) == 1110) {
$r [] = substr ( $str, $i, 3 );
$i += 2;
} else {
$r [] = '';
}
if (++ $m >= $lenth) {
break;
}
}
}
return $r;
} // End subString_UTF8