http://www.nowamagic.net/php/php_SubMixedString.php
http://waiting.iteye.com/blog/581888
function get_word($string, $length, $dot = '..',$charset='gbk') {
if(strlen($string) <= $length) {
return $string;
}
$string = str_replace(array(' ',' ', '&', '"', '<', '>'), array('','','&', '"', '<', '>'), $string);
$strcut = '';
if(strtolower($charset) == 'utf-8') {
$n = $tn = $noc = 0;
while($n < strlen($string)) {
$t = ord($string[$n]);
if($t == 9 || $t == 10 || (32 <= $t && $t <= 126)) {
$tn = 1; $n++; $noc++;
} elseif(194 <= $t && $t <= 223) {
$tn = 2; $n += 2; $noc += 2;
} elseif(224 <= $t && $t < 239) {
$tn = 3; $n += 3; $noc += 2;
} elseif(240 <= $t && $t <= 247) {
$tn = 4; $n += 4; $noc += 2;
} elseif(248 <= $t && $t <= 251) {
$tn = 5; $n += 5; $noc += 2;
} elseif($t == 252 || $t == 253) {
$tn = 6; $n += 6; $noc += 2;
} else {
$n++;
}
if($noc >= $length) {
break;
}
}
if($noc > $length) {
$n -= $tn;
}
$strcut = substr($string, 0, $n);
} else {
for($i = 0; $i < $length; $i++) {
$strcut .= ord($string[$i]) > 127 ? $string[$i].$string[++$i] : $string[$i];
}
}
return $strcut.$dot;
}
$str = "欢迎 visit 简明 nowamagic";
$str_result = get_word($str, 12);
echo $str_result;
/**
* 截取UTF8编码字符串从首字节开始指定宽度(非长度), 适用于字符串长度有限的如新闻标题的等宽度截取
* 中英文混排情况较理想. 全中文与全英文截取后对比显示宽度差异最大,且截取宽度远大越明显.
* @param string $str UTF-8 encoding
* @param int[option] $width 截取宽度
* @param string[option] $end 被截取后追加的尾字符
* @param float[option] $x3<p>
* 3字节(中文)字符相当于希腊字母宽度的系数coefficient(小数)
* 中文通常固定用宋体,根据ascii字符字体宽度设定,不同浏览器可能会有不同显示效果</p>
*
* @return string
* @author waiting
* http://waiting.iteye.com
*/
function u8_title_substr($str, $width = 0, $end = '...', $x3 = 0) {
global $CFG; // 全局变量保存 x3 的值
if ($width <= 0 || $width >= strlen($str)) {
return $str;
}
$arr = str_split($str);
$len = count($arr);
$w = 0;
$width *= 10;
// 不同字节编码字符宽度系数
$x1 = 11; // ASCII
$x2 = 16;
$x3 = $x3===0 ? ( $CFG['cf3'] > 0 ? $CFG['cf3']*10 : $x3 = 21 ) : $x3*10;
$x4 = $x3;
// http://zh.wikipedia.org/zh-cn/UTF8
for ($i = 0; $i < $len; $i++) {
if ($w >= $width) {
$e = $end;
break;
}
$c = ord($arr[$i]);
if ($c <= 127) {
$w += $x1;
}
elseif ($c >= 192 && $c <= 223) { // 2字节头
$w += $x2;
$i += 1;
}
elseif ($c >= 224 && $c <= 239) { // 3字节头
$w += $x3;
$i += 2;
}
elseif ($c >= 240 && $c <= 247) { // 4字节头
$w += $x4;
$i += 3;
}
}
return implode('', array_slice($arr, 0, $i) ). $e;
}