支持所有语言的utf8编码的字符串.
汇编: 时间效率
//
// LenUTF8.s
// TestASM
//
// Created by doorxp on 7/5/13.
// Copyright (c) 2013 doorxp. All rights reserved.
//
//extern int lenutf(const char *src, bool noAsciiAs2);
//@src 要计算长度的字符串utf8编码
//
//@noAsciiAs2 true 非ascii做为两个字节对待,false 一个字符算一个
//
.globl _lenutf
.align 2
_lenutf:
START:
EOR R5, R5 //清零计数器
TEQ R0, #0 //
BEQ END //NULL退出
LOOP:
LDRB R3,[R0],#1
TEQ R3, #0
BEQ END
AND R4, R3, #0xE0
EORS R4, R4, #0xC0
ADDEQ R5,R5, #1
ADDEQ R0, R0,#0x1
BEQ ADDAGIN
AND R4, R3, #0xF0
EORS R4, R4, #0xE0
ADDEQ R5,R5, #1
ADDEQ R0,R0, #2
BEQ ADDAGIN
AND R4, R3, #0xF8
EORS R4, R4, #0xF0
ADDEQ R5,R5, #1
ADDEQ R0,R0, #0x3
BEQ ADDAGIN
AND R4, R3, #0xFC
EORS R4, R4, #0xF8
ADDEQ R5,R5, #1
ADDEQ R0,R0,#4
BEQ ADDAGIN
AND R4, R3, #0xFE
EORS R4, R4, #0xFC
ADDEQ R5,R5, #1
ADDEQ R0,R0, #5
BEQ ADDAGIN
ADD R5, R5, #1
B LOOP
ADDAGIN:
TEQ R1, #0
ADDNE R5, R5, #1
B LOOP
END:
MOV R0, R5
MOV PC,LR
c++:
size_t lengthOf(const string& src, bool noAsciiHas2)
{
register size_t siz = 0;
register unsigned char item;
register int i = 0;
for (; i<src.size(); i++)
{
item = src[i];
if (!((item & 0XE0) ^ 0XC0))
{
siz ++;
if (noAsciiHas2)
{
siz ++ ;
}
i++;
}
else if (!((item & 0XF0) ^ 0XE0))
{
siz ++;
if (noAsciiHas2)
{
siz ++ ;
}
i+=2;
}
else if (!((item & 0XF8) ^ 0XF0))
{
siz ++;
if (noAsciiHas2)
{
siz ++ ;
}
i+=3;
}
else if (!((item & 0XFC) ^ 0XF8))
{
siz ++;
if (noAsciiHas2)
{
siz ++ ;
}
i+=4;
}
else if (!((item & 0XFE) ^ 0XFC))
{
siz ++;
if (noAsciiHas2)
{
siz ++ ;
}
i+=5;
}
else
{
siz ++;
}
}
return siz;
}
代码简单版:
int calcCharCount(const char * pszText)
{
int n = 0;
char ch = 0;
while ((ch = *pszText))
{
CC_BREAK_IF(! ch);
if (0x80 != (0xC0 & ch))
{
++n;
}
++pszText;
}
return n;
}