PHP截取各种编码的汉字字符串
2010-11-06
虽然PHP有现成的截取字符串函数substr(),但是这个函数不能对汉字字符串进行截取,要实现这种效果还需要我们自己去编写相应的函数。汉字有多种编码,比如GB2312,UTF-8等,汉字字符串的截取需要区分这种汉字编码,下面是给出的几个解决方案。
截取GB2312中文字符串
01 | <?php |
02 | //截取中文字符串- |
03 | function mysubstr( $str , $start , $len ) { |
04 | $tmpstr = "" ; |
05 | $strlen = $start + $len ; |
06 | for ( $i = 0; $i < $strlen ; $i ++) { |
07 | if (ord( substr ( $str , $i , 1)) > 0xa0) { |
08 | $tmpstr .= substr ( $str , $i , 2); |
09 | $i ++; |
10 | } else |
11 | $tmpstr .= substr ( $str , $i , 1); |
12 | } |
13 | return $tmpstr ; |
14 | } |
15 | ?> |
截取utf8编码的多字节字符串
1 | <?php |
2 | //截取utf8字符串 |
3 | function utf8Substr( $str , $from , $len ) |
4 | { |
5 | return preg_replace( '#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' . |
6 | '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s' , |
7 | '$1' , $str ); |
8 | } |
9 | ?> |
UTF-8、GB2312都支持的汉字截取函数
01 | <?php |
02 | /* |
03 | Utf-8、gb2312都支持的汉字截取函数 |
04 | cut_str(字符串, 截取长度, 开始长度, 编码); |
05 | 编码默认为 utf-8 |
06 | 开始长度默认为 0 |
07 | */ function cut_str( $string , $sublen , $start = 0, $code = 'UTF-8' ) |
08 | { |
09 | if ( $code == 'UTF-8' ) |
10 | { |
11 | $pa = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/" ; |
12 | preg_match_all( $pa , $string , $t_string ); if ( count ( $t_string [0]) - $start > $sublen ) return join( '' , array_slice ( $t_string [0], $start , $sublen )). "..." ; |
13 | return join( '' , array_slice ( $t_string [0], $start , $sublen )); |
14 | } |
15 | else |
16 | { |
17 | $start = $start *2; |
18 | $sublen = $sublen *2; |
19 | $strlen = strlen ( $string ); |
20 | $tmpstr = '' ; for ( $i =0; $i < $strlen ; $i ++) |
21 | { |
22 | if ( $i >= $start && $i <( $start + $sublen )) |
23 | { |
24 | if (ord( substr ( $string , $i , 1))>129) |
25 | { |
26 | $tmpstr .= substr ( $string , $i , 2); |
27 | } |
28 | else |
29 | { |
30 | $tmpstr .= substr ( $string , $i , 1); |
31 | } |
32 | } |
33 | if (ord( substr ( $string , $i , 1))>129) $i ++; |
34 | } |
35 | if ( strlen ( $tmpstr )< $strlen ) $tmpstr .= "..." ; |
36 | return $tmpstr ; |
37 | } |
38 | } $str = "abcd需要截取的字符串" ; |
39 | echo cut_str( $str , 8, 0, 'gb2312' ); |
40 | ?> |
BugFree 的字符截取函数
01 | <?php |
02 | /** |
03 | * @package BugFree |
04 | * @version $Id: FunctionsMain.inc.php,v 1.32 2005/09/24 11:38:37 wwccss Exp $ |
05 | * |
06 | * |
07 | * Return part of a string(Enhance the function substr()) |
08 | * |
09 | * @author Chunsheng Wang <[email]wwccss@263.net[/email]> |
10 | * @param string $String the string to cut. |
11 | * @param int $Length the length of returned string. |
12 | * @param booble $Append whether append "...": false|true |
13 | * @return string the cutted string. |
14 | */ |
15 | function sysSubStr( $String , $Length , $Append = false) |
16 | { |
17 | if ( strlen ( $String ) <= $Length ) |
18 | { |
19 | return $String ; |
20 | } |
21 | else |
22 | { |
23 | $I = 0; |
24 | while ( $I < $Length ) |
25 | { |
26 | $StringTMP = substr ( $String , $I ,1); |
27 | if ( ord( $StringTMP ) >=224 ) |
28 | { |
29 | $StringTMP = substr ( $String , $I ,3); |
30 | $I = $I + 3; |
31 | } |
32 | elseif ( ord( $StringTMP ) >=192 ) |
33 | { |
34 | $StringTMP = substr ( $String , $I ,2); |
35 | $I = $I + 2; |
36 | } |
37 | else |
38 | { |
39 | $I = $I + 1; |
40 | } |
41 | $StringLast [] = $StringTMP ; |
42 | } |
43 | $StringLast = implode( "" , $StringLast ); |
44 | if ( $Append ) |
45 | { |
46 | $StringLast .= "..." ; |
47 | } |
48 | return $StringLast ; |
49 | } |
50 | } $String = "www.at0915.cn" ; |
51 | $Length = "18" ; |
52 | $Append = false; |
53 | echo sysSubStr( $String , $Length , $Append ); |
54 | ?> |