以下程序主要处理在UTF-8编码下PHP程序截取中文、日文、韩文等字符串,程序基本逻辑与之前写过的PHP处理GB2312下中文字符串的代码类似,主要不同在于UTF-8对于中文、日文字符的编码字节数不同,具体可以参考unicode 编码的说明。不多做解释,希望能对大家有所帮助!程序仅供参考,细节和性能方面不做推敲。转载请注明出处,多谢!
function FSubstr($title,$start,$len="",$magic=true)
{
/**
* powered by Smartpig
* mailto:d.einstein@263.net
*/
if($len == "") $len=strlen($title);
if($start != 0)
{
$startv = ord(substr($title,$start,1));
if($startv >= 128)
{
if($startv < 192)
{
for($i=$start-1;$i>0;$i--)
{
$tempv = ord(substr($title,$i,1));
if($tempv >= 192) break;
}
$start = $i;
}
}
}
if(strlen($title)<=$len) return substr($title,$start,$len);
$alen = 0;
$blen = 0;
$realnum = 0;
for($i=$start;$i<strlen($title);$i++)
{
$ctype = 0;
$cstep = 0;
$cur = substr($title,$i,1);
if($cur == "&")
{
if(substr($title,$i,4) == "<")
{
$cstep = 4;
$length += 4;
$i += 3;
$realnum ++;
if($magic)
{
$alen ++;
}
}
else if(substr($title,$i,4) == ">")
{
$cstep = 4;
$length += 4;
$i += 3;
$realnum ++;
if($magic)
{
$alen ++;
}
}
else if(substr($title,$i,5) == "&")
{
$cstep = 5;
$length += 5;
$i += 4;
$realnum ++;
if($magic)
{
$alen ++;
}
}
else if(substr($title,$i,6) == """)
{
$cstep = 6;
$length += 6;
$i += 5;
$realnum ++;
if($magic)
{
$alen ++;
}
}
else if(preg_match("/&#(/d+);?/i",substr($title,$i,8),$match))
{
$cstep = strlen($match[0]);
$length += strlen($match[0]);
$i += strlen($match[0])-1;
$realnum ++;
if($magic)
{
$blen ++;
$ctype = 1;
}
}
}else{
if(ord($cur)>=252)
{
$cstep = 6;
$length += 6;
$i += 5;
$realnum ++;
if($magic)
{
$blen ++;
$ctype = 1;
}
}elseif(ord($cur)>=248){
$cstep = 5;
$length += 5;
$i += 4;
$realnum ++;
if($magic)
{
$ctype = 1;
$blen ++;
}
}elseif(ord($cur)>=240){
$cstep = 4;
$length += 4;
$i += 3;
$realnum ++;
if($magic)
{
$blen ++;
$ctype = 1;
}
}elseif(ord($cur)>=224){
$cstep = 3;
$length += 3;
$i += 2;
$realnum ++;
if($magic)
{
$ctype = 1;
$blen ++;
}
}elseif(ord($cur)>=192){
$cstep = 2;
$length += 2;
$i += 1;
$realnum ++;
if($magic)
{
$blen ++;
$ctype = 1;
}
}elseif(ord($cur)>=128){
$length += 1;
}else{
$cstep = 1;
$length +=1;
$realnum ++;
if($magic)
{
if(ord($cur) >= 65 && ord($cur) <= 90)
{
$blen++;
}else{
$alen++;
}
}
}
}
if($magic)
{
if(($blen*2+$alen) == ($len*2)) break;
if(($blen*2+$alen) == ($len*2+1))
{
if($ctype == 1)
{
$length -= $cstep;
break;
}else{
break;
}
}
}else{
if($realnum == $len) break;
}
}
unset($cur);
unset($alen);
unset($blen);
unset($realnum);
unset($ctype);
unset($cstep);
return substr($title,$start,$length);
}