<?php
/**
* 实现utf-8与gbk中文无乱码截取,提示中文占用‘2’个字符,英文占用‘1’个字符
*
@param
sting $string 待截取字符串
*
@param
int $length 截取字符长度,注意截取是字符长度,不是截取‘中文个数’或者‘英文个数’
*
@param
string $charset 编码方式,只能为‘uft-8与gbk’,默认为‘utf-8’
*
@param
string $dot 分割符,默认为‘...’
*
@return
string 已截取的目标字符串
*/
function
cutstr($string, $length, $charset =
'utf-8'
, $dot =
'...'
)
//字符,截取长度,字符集,结尾符
{
if
(strlen($string) <= $length)
return
$string;
$pre = chr(1);
$end = chr(1);
//保护特殊字符串
$string = str_replace(
array
(
'&'
,
'"'
,
'<'
,
'>'
),
array
($pre .
'&'
. $end, $pre .
'"'
. $end, $pre .
'<'
. $end, $pre .
'>'
. $end), $string);
$strcut =
''
;
if
(strtolower($charset) ==
'utf-8'
)
{
$n = $tn = $noc = 0;
while
($n < strlen($string))
{
$t = ord($string[$n]);
if
($t == 9 || $t == 10 || (32 <= $t && $t <= 126))
{
$tn = 1;
$n++;
$noc++;
}
elseif
(194 <= $t && $t <= 223)
{
$tn = 2;
$n += 2;
$noc += 2;
}
elseif
(224 <= $t && $t <= 239)
{
$tn = 3;
$n += 3;
$noc += 2;
}
elseif
(240 <= $t && $t <= 247)
{
$tn = 4;
$n += 4;
$noc += 2;
}
elseif
(248 <= $t && $t <= 251)
{
$tn = 5;
$n += 5;
$noc += 2;
}
elseif
($t == 252 || $t == 253)
{
$tn = 6;
$n += 6;
$noc += 2;
}
else
{
$n++;
}
if
($noc >= $length)
break
;
}
if
($noc > $length) $n -= $tn;
$strcut = substr($string, 0, $n);
}
else
{
for
($i = 0; $i < $length; $i++)
$strcut .= ord($string[$i]) > 127 ? $string[$i] . $string[++$i] : $string[$i];
}
//还原特殊字符串
$strcut = str_replace(
array
($pre .
'&'
. $end, $pre .
'"'
. $end, $pre .
'<'
. $end, $pre .
'>'
. $end),
array
(
'&'
,
'"'
,
'<'
,
'>'
), $strcut);
//修复出现特殊字符串截段的问题
$pos = strrpos($s, chr(1));
if
($pos !==
false
) $strcut = substr( $s, 0, $pos);
return
$strcut . $dot;
}
/**
* 测试数据
*/
header(
"Content-type:text/html; charset=utf-8"
);
$str =
"我爱中华1213我爱中华人民共和国"
;
echo
cutstr($str, 6) .
'<br />'
;
//注意截取是字符长度,不是截取‘中文个数’或者‘英文个数’
echo
cutstr($str, 10) .
'<br />'
;
echo
cutstr($str, 14) .
'<br />'
;
?>