今天在遇到了一个中英字符串截取的问题,在gbk里中文里每个字占两字节,如果全是中文的话,用substr()函数就可以实现了,但中英都有的话就麻烦了,在以前收藏的代码里找到了一个不错的函数,很好的实现了截取的功能
function
get_word(
$string
,
$length
,
$dot
=
'
..
'
,
$charset
=
'
gbk
'
) {
if ( strlen ( $string ) <= $length ) {
return $string ;
}
$string = str_replace ( array ( ' ' , ' ' , ' & ' , ' " ' , ' < ' , ' > ' ) , array ( '' , '' , ' & ' , ' " ' , ' < ' , ' > ' ) , $string );
$strcut = '' ;
if ( strtolower ( $charset ) == ' utf-8 ' ) {
$n = $tn = $noc = 0 ;
while ( $n < strlen ( $string )) {
$t = ord ( $string [ $n ]);
if ( $t == 9 || $t == 10 || ( 32 <= $t && $t <= 126 )) {
$tn = 1 ; $n ++ ; $noc ++ ;
} elseif ( 194 <= $t && $t <= 223 ) {
$tn = 2 ; $n += 2 ; $noc += 2 ;
} elseif ( 224 <= $t && $t < 239 ) {
$tn = 3 ; $n += 3 ; $noc += 2 ;
} elseif ( 240 <= $t && $t <= 247 ) {
$tn = 4 ; $n += 4 ; $noc += 2 ;
} elseif ( 248 <= $t && $t <= 251 ) {
$tn = 5 ; $n += 5 ; $noc += 2 ;
} elseif ( $t == 252 || $t == 253 ) {
$tn = 6 ; $n += 6 ; $noc += 2 ;
} else {
$n ++ ;
}
if ( $noc >= $length ) {
break ;
}
}
if ( $noc > $length ) {
$n -= $tn ;
}
$strcut = substr ( $string , 0 , $n );
} else {
for ( $i = 0 ; $i < $length ; $i ++ ) {
$strcut .= ord ( $string [ $i ]) > 127 ? $string [ $i ] . $string [ ++ $i ] : $string [ $i ];
}
}
return $strcut . $dot ;
}
if ( strlen ( $string ) <= $length ) {
return $string ;
}
$string = str_replace ( array ( ' ' , ' ' , ' & ' , ' " ' , ' < ' , ' > ' ) , array ( '' , '' , ' & ' , ' " ' , ' < ' , ' > ' ) , $string );
$strcut = '' ;
if ( strtolower ( $charset ) == ' utf-8 ' ) {
$n = $tn = $noc = 0 ;
while ( $n < strlen ( $string )) {
$t = ord ( $string [ $n ]);
if ( $t == 9 || $t == 10 || ( 32 <= $t && $t <= 126 )) {
$tn = 1 ; $n ++ ; $noc ++ ;
} elseif ( 194 <= $t && $t <= 223 ) {
$tn = 2 ; $n += 2 ; $noc += 2 ;
} elseif ( 224 <= $t && $t < 239 ) {
$tn = 3 ; $n += 3 ; $noc += 2 ;
} elseif ( 240 <= $t && $t <= 247 ) {
$tn = 4 ; $n += 4 ; $noc += 2 ;
} elseif ( 248 <= $t && $t <= 251 ) {
$tn = 5 ; $n += 5 ; $noc += 2 ;
} elseif ( $t == 252 || $t == 253 ) {
$tn = 6 ; $n += 6 ; $noc += 2 ;
} else {
$n ++ ;
}
if ( $noc >= $length ) {
break ;
}
}
if ( $noc > $length ) {
$n -= $tn ;
}
$strcut = substr ( $string , 0 , $n );
} else {
for ( $i = 0 ; $i < $length ; $i ++ ) {
$strcut .= ord ( $string [ $i ]) > 127 ? $string [ $i ] . $string [ ++ $i ] : $string [ $i ];
}
}
return $strcut . $dot ;
}