应群友cjc要求改写了一个poschar的快速算法,实测速度CGPosChar比系统自带pos要快40%-60%以上,CGPosCharSSE比系统自带pos要快550%-1000%以上。最新版本增加了倒序查找。
SSE优化算法:
function CGPosCharSSE(SubChar: Char; SrcString: PChar; Len: Integer; Order: Bool = True): Integer;
// SubChar -> AL; SrcString -> EDX; Len -> ECX Order -> [ebp+8]
asm
push esi
test ecx, ecx
jz @NotFound
test edx, edx
jz @NotFound
xor esi,esi
mov ah,al
movd xmm1, eax
pshuflw xmm1, xmm1, 0
pshufd xmm1, xmm1, 0
mov eax, [ebp+8]
test eax,eax //为0则表示Order =false
je @Reverse //为0倒序查找
{---------------顺序查找------------------}
movups xmm0,[edx]
pcmpeqb xmm0, xmm1
pmovmskb eax, xmm0
test eax, eax
jnz @OrderFound
mov esi,edx
and esi,$F
neg esi
add esi,$10
@OrderCmp:
movaps xmm0,[edx+esi] // movaps要内存对齐/ movups
pcmpeqb xmm0, xmm1
pmovmskb eax, xmm0
test eax, eax
jnz @OrderFound
add esi,$10
cmp esi,ecx
jl @OrderCmp
jmp @Notfound
{---------------倒序查找------------------}
@Reverse:
mov esi,ecx
sub esi,$10
movups xmm0,[edx+ecx-$10]
pcmpeqb xmm0, xmm1
pmovmskb eax, xmm0
test eax, eax
jnz @ReverseFound
mov esi,ecx
mov eax,edx
add eax,esi
and eax,$F
sub esi,eax
@ReverseCmp:
movaps xmm0,[edx+esi]
pcmpeqb xmm0, xmm1
pmovmskb eax, xmm0
test eax, eax
jnz @ReverseFound
sub esi,$10
cmp esi,-$10
jg @ReverseCmp
@NotFound:
xor eax, eax
jmp @exit
@OrderFound:
bsf eax, eax
add eax,esi
add eax,1
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
jmp @exit
@ReverseFound:
bsr eax, eax
add eax,esi
add eax,1
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
@exit:
pop esi
end;
function CGPosChar(SubChar: Char; SrcString: PChar; Len: Integer; Order: Bool = True): Integer;
// SubChar -> AL; SrcString -> EDX; Len -> ECX Order -> [ebp+8]
asm
push esi
push edi
push ebx
test ecx, ecx
jz @Notfound
test edx, edx
jz @Notfound
xor ebx,ebx
mov ah, al
mov bx, ax
shl eax, $10
or ebx, eax
xor esi,esi
mov eax, [ebp+8]
test eax,eax //为0则表示Order =false
je @Reverse //为0倒序查找
{---------------顺序查找------------------}
@OrderCmp:
mov eax,[edx+esi]
xor eax,ebx
lea edi, [eax-$01010101]
not eax
and eax, edi
and eax, $80808080
jnz @OrderFound
add esi,4
cmp esi,ecx
jl @OrderCmp
jmp @Notfound
{---------------倒序查找------------------}
@Reverse:
mov esi,ecx
sub esi,4
@ReverseCmp:
mov eax,[edx+esi]
xor eax,ebx
lea edi, [eax-$01010101]
not eax
and eax, edi
and eax, $80808080
jnz @ReverseFound
sub esi,4
cmp esi,-4
jg @ReverseCmp
@Notfound:
xor eax, eax
jmp @Exit
@OrderFound:
bsf eax, eax
shr eax, 3
add eax,esi
add eax,1
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
jmp @Exit
@ReverseFound:
mov eax,4
cmp bl, [edx+esi+3]
je @Found
mov eax,3
cmp bl, [edx+esi+2]
je @Found
mov eax,2
cmp bl, [edx+esi+1]
je @Found
mov eax,1
cmp bl, [edx+esi]
je @Found
jmp @Notfound
@Found:
add eax,esi
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
@Exit:
pop ebx
pop edi
pop esi
end;
常规优化算法:
function CGPosChar(SubChar: Char; SrcString: PChar; Len: Integer; Order: Bool = True): Integer;
// SubChar -> AL; SrcString -> EDX; Len -> ECX Order -> [ebp+8]
asm
push esi
push edi
push ebx
test ecx, ecx
jz @Notfound
test edx, edx
jz @Notfound
xor ebx,ebx
mov ah, al
mov bx, ax
shl eax, $10
or ebx, eax
xor esi,esi
mov eax, [ebp+8]
test eax,eax //为0则表示Order =false
je @Reverse //为0倒序查找
{---------------顺序查找------------------}
@OrderCmp:
mov eax,[edx+esi]
xor eax,ebx
lea edi, [eax-$01010101]
not eax
and eax, edi
and eax, $80808080
jnz @OrderFound
add esi,4
cmp esi,ecx
jl @OrderCmp
jmp @Notfound
{---------------倒序查找------------------}
@Reverse:
mov esi,ecx
sub esi,4
@ReverseCmp:
mov eax,[edx+esi]
xor eax,ebx
lea edi, [eax-$01010101]
not eax
and eax, edi
and eax, $80808080
jnz @ReverseFound
sub esi,4
cmp esi,-4
jg @ReverseCmp
@Notfound:
xor eax, eax
jmp @Exit
@OrderFound:
bsf eax, eax
shr eax, 3
add eax,esi
add eax,1
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
jmp @Exit
@ReverseFound:
mov eax,4
cmp bl, [edx+esi+3]
je @Found
mov eax,3
cmp bl, [edx+esi+2]
je @Found
mov eax,2
cmp bl, [edx+esi+1]
je @Found
mov eax,1
cmp bl, [edx+esi]
je @Found
jmp @Notfound
@Found:
add eax,esi
cmp eax,ecx
jg @Notfound //越界大于长度
cmp eax,1
jl @Notfound //越界小于1
@Exit:
pop ebx
pop edi
pop esi
end;
完.