1. 截取GB2312中文字符串
1
2 < ? php
3 // 截取中文字符串
4 function mysubstr( $str , $start , $len ) {
5 $tmpstr = "" ;
6 $strlen = $start + $len ;
7 for ( $i = 0 ; $i < $strlen ; $i ++ ) {
8 if ( ord ( substr ( $str , $i , 1 )) > 0xa0 ) {
9 $tmpstr .= substr ( $str , $i , 2 );
10 $i ++ ;
11 } else
12 $tmpstr .= substr ( $str , $i , 1 );
13 }
14 return $tmpstr ;
15 }
16 ?>
2 < ? php
3 // 截取中文字符串
4 function mysubstr( $str , $start , $len ) {
5 $tmpstr = "" ;
6 $strlen = $start + $len ;
7 for ( $i = 0 ; $i < $strlen ; $i ++ ) {
8 if ( ord ( substr ( $str , $i , 1 )) > 0xa0 ) {
9 $tmpstr .= substr ( $str , $i , 2 );
10 $i ++ ;
11 } else
12 $tmpstr .= substr ( $str , $i , 1 );
13 }
14 return $tmpstr ;
15 }
16 ?>
2. 截取utf8编码的多字节字符串
1
<?
php
2
3 // 截取utf8字符串
4 function utf8Substr( $str , $from , $len )
5 {
6 return preg_replace ( ' #^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0, ' . $from . ' } ' .
7 ' ((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0, ' . $len . ' }).*#s ' ,
8 ' $1 ' , $str );
9 }
10 ?>
2
3 // 截取utf8字符串
4 function utf8Substr( $str , $from , $len )
5 {
6 return preg_replace ( ' #^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0, ' . $from . ' } ' .
7 ' ((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0, ' . $len . ' }).*#s ' ,
8 ' $1 ' , $str );
9 }
10 ?>
3. UTF-8、GB2312都支持的汉字截取函数
1
<?
php
2
3 /*
4 Utf-8、gb2312都支持的汉字截取函数
5 cut_str(字符串, 截取长度, 开始长度, 编码);
6 编码默认为 utf-8
7 开始长度默认为 0
8 */
9
10 function cut_str( $string , $sublen , $start = 0 , $code = ' UTF-8 ' )
11 {
12 if ( $code == ' UTF-8 ' )
13 {
14 $pa = " /[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/ " ;
15 preg_match_all ( $pa , $string , $t_string );
16
17 if ( count ( $t_string [ 0 ]) - $start > $sublen ) return join ( '' , array_slice ( $t_string [ 0 ] , $start , $sublen )) . " " ;
18 return join ( '' , array_slice ( $t_string [ 0 ] , $start , $sublen ));
19 }
20 else
21 {
22 $start = $start * 2 ;
23 $sublen = $sublen * 2 ;
24 $strlen = strlen ( $string );
25 $tmpstr = '' ;
26
27 for ( $i = 0 ; $i < $strlen ; $i ++ )
28 {
29 if ( $i >= $start && $i < ( $start + $sublen ))
30 {
31 if ( ord ( substr ( $string , $i , 1 )) > 129 )
32 {
33 $tmpstr .= substr ( $string , $i , 2 );
34 }
35 else
36 {
37 $tmpstr .= substr ( $string , $i , 1 );
38 }
39 }
40 if ( ord ( substr ( $string , $i , 1 )) > 129 ) $i ++ ;
41 }
42 if ( strlen ( $tmpstr ) < $strlen ) $tmpstr .= " " ;
43 return $tmpstr ;
44 }
45 }
46
47 $str = " abcd需要截取的字符串 " ;
48 echo cut_str( $str , 8 , 0 , ' gb2312 ' );
49 ?>
2
3 /*
4 Utf-8、gb2312都支持的汉字截取函数
5 cut_str(字符串, 截取长度, 开始长度, 编码);
6 编码默认为 utf-8
7 开始长度默认为 0
8 */
9
10 function cut_str( $string , $sublen , $start = 0 , $code = ' UTF-8 ' )
11 {
12 if ( $code == ' UTF-8 ' )
13 {
14 $pa = " /[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/ " ;
15 preg_match_all ( $pa , $string , $t_string );
16
17 if ( count ( $t_string [ 0 ]) - $start > $sublen ) return join ( '' , array_slice ( $t_string [ 0 ] , $start , $sublen )) . " " ;
18 return join ( '' , array_slice ( $t_string [ 0 ] , $start , $sublen ));
19 }
20 else
21 {
22 $start = $start * 2 ;
23 $sublen = $sublen * 2 ;
24 $strlen = strlen ( $string );
25 $tmpstr = '' ;
26
27 for ( $i = 0 ; $i < $strlen ; $i ++ )
28 {
29 if ( $i >= $start && $i < ( $start + $sublen ))
30 {
31 if ( ord ( substr ( $string , $i , 1 )) > 129 )
32 {
33 $tmpstr .= substr ( $string , $i , 2 );
34 }
35 else
36 {
37 $tmpstr .= substr ( $string , $i , 1 );
38 }
39 }
40 if ( ord ( substr ( $string , $i , 1 )) > 129 ) $i ++ ;
41 }
42 if ( strlen ( $tmpstr ) < $strlen ) $tmpstr .= " " ;
43 return $tmpstr ;
44 }
45 }
46
47 $str = " abcd需要截取的字符串 " ;
48 echo cut_str( $str , 8 , 0 , ' gb2312 ' );
49 ?>
4. BugFree 的字符截取函数
1
<
?
php
2 /* *
3 * @package BugFree
4 * @version $Id: FunctionsMain.inc.php,v 1.32 2005/09/24 11:38:37 wwccss Exp $
5 *
6 *
7 * Return part of a string(Enhance the function substr())
8 *
9 * @author Chunsheng Wang <wwccss@263.net>
10 * @param string $String the string to cut.
11 * @param int $Length the length of returned string.
12 * @param booble $Append whether append "": false|true
13 * @return string the cutted string.
14 */
15 function sysSubStr( $String , $Length , $Append = false )
16 {
17 if ( strlen ( $String ) < = $Length )
18 {
19 return $String ;
20 }
21 else
22 {
23 $I = 0 ;
24 while ( $I < $Length )
25 {
26 $StringTMP = substr ( $String , $I , 1 );
27 if ( ord ( $StringTMP ) >= 224 )
28 {
29 $StringTMP = substr ( $String , $I , 3 );
30 $I = $I + 3 ;
31 }
32 elseif ( ord ( $StringTMP ) >= 192 )
33 {
34 $StringTMP = substr ( $String , $I , 2 );
35 $I = $I + 2 ;
36 }
37 else
38 {
39 $I = $I + 1 ;
40 }
41 $StringLast [] = $StringTMP ;
42 }
43 $StringLast = implode ( "" , $StringLast );
44 if ( $Append )
45 {
46 $StringLast .= " " ;
47 }
48 return $StringLast ;
49 }
50 }
51
52 $String = " CodeBit.cn -- 简单、精彩、通用 " ;
53 $Length = " 18 " ;
54 $Append = false ;
55 echo sysSubStr( $String , $Length , $Append );
56 ?>
2 /* *
3 * @package BugFree
4 * @version $Id: FunctionsMain.inc.php,v 1.32 2005/09/24 11:38:37 wwccss Exp $
5 *
6 *
7 * Return part of a string(Enhance the function substr())
8 *
9 * @author Chunsheng Wang <wwccss@263.net>
10 * @param string $String the string to cut.
11 * @param int $Length the length of returned string.
12 * @param booble $Append whether append "": false|true
13 * @return string the cutted string.
14 */
15 function sysSubStr( $String , $Length , $Append = false )
16 {
17 if ( strlen ( $String ) < = $Length )
18 {
19 return $String ;
20 }
21 else
22 {
23 $I = 0 ;
24 while ( $I < $Length )
25 {
26 $StringTMP = substr ( $String , $I , 1 );
27 if ( ord ( $StringTMP ) >= 224 )
28 {
29 $StringTMP = substr ( $String , $I , 3 );
30 $I = $I + 3 ;
31 }
32 elseif ( ord ( $StringTMP ) >= 192 )
33 {
34 $StringTMP = substr ( $String , $I , 2 );
35 $I = $I + 2 ;
36 }
37 else
38 {
39 $I = $I + 1 ;
40 }
41 $StringLast [] = $StringTMP ;
42 }
43 $StringLast = implode ( "" , $StringLast );
44 if ( $Append )
45 {
46 $StringLast .= " " ;
47 }
48 return $StringLast ;
49 }
50 }
51
52 $String = " CodeBit.cn -- 简单、精彩、通用 " ;
53 $Length = " 18 " ;
54 $Append = false ;
55 echo sysSubStr( $String , $Length , $Append );
56 ?>
UTF-8编码的字符可能由1~3个字节组成, 具体数目可以由第一个字节判断出来。(理论上可能更长,但这里假设不超过3个字节)
第一个字节大于224的,它与它之后的2个字节一起组成一个UTF-8字符
第一个字节大于192小于224的,它与它之后的1个字节组成一个UTF-8字符
否则第一个字节本身就是一个英文字符(包括数字和一小部分标点符号)。
原文地址http://www.ccvita.com/27.html