FReplace.pas单元升级

网络上流传的FReplace.pas文件已经使用了N年,最近发现FastReplace函数有问题,处理一些带中文的字符串,居然无法替换。比如将“177草2”里面的“7草”替换成“新内容”,无法成功替换;研究发现,只有连续出现两个字母数字,仅跟着汉字,就无法查找到一个字母数字+汉字拼成的字符串并替换。跟踪代码发现是FastPos和FastPosNoCase没有正确的查到位置,返回0导致,这两个函数大同小异,里面都是汇编代码,跟踪到下面这一段(出现判断中文字符)时发现少了一句,导致数据的比较不准确,从而没找到字符串的位置。目前只发现这个问题,两个函数分别添加了一句汇编以后,函数马上恢复正常使用。

  @Nextchar:
    //This   is   where   I   jump   to   when   I   want   to   continue   searching   for   the   first   Char
    //acter   of   aFindString   in   aSearchString:
    //Point   EDI   (aFindString[X])   to
    //the   next   character.
    Inc     EDI
    //Dec   ECX   tells   us   that   we've   checked
    //another   character,   and   that   we're
    //fast   running   out   of   AnsiString   to   check!
    dec     ECX
    //If   EBX   <>   0,   then   continue   scanning
    //for   the   first   character.


    //add   by   shengquanhu
    //if   ah   is   chinese   Char,jump   again
    jz       @Result0
//******************************************
    {TODO -o 添加了下面一句 -c : zhunanhui  2014/9/15 00:25:36}
    mov     Ah, [EDI-1]
//******************************************
    cmp     Ah,   $80
    jb       @ScaSB
    Inc     EDI
    Dec     ECX
    //add   by   shengquanhu   end


    jnz     @ScaSB


    //add   by   shengquanhu

这里帖上FReplace.pas全文件更新后的源代码

unit FReplace;

interface

type
  TFastPosProc = function(
    const aSourceString, aFindString: AnsiString;
    const aSourceLen, aFindLen, StartPos: integer
    ): integer;

function FastReplace(
  const aSourceString: AnsiString;
  const aFindString, aReplaceString: AnsiString;
  CaseSensitive: Boolean = False): AnsiString; overload;

//function FastReplace(
//  const aSourceString: WideString;
//  const aFindString, aReplaceString: WideString; Flags: TReplaceFlags;
//  CaseSensitive: Boolean = False): WideString; overload;

function FastPos(
  const aSourceString, aFindString: AnsiString;
  const aSourceLen, aFindLen, StartPos: integer
  ): integer;

function FastPosNoCase(
  const aSourceString, aFindString: AnsiString;
  const aSourceLen, aFindLen, StartPos: integer
  ): integer;

implementation

//   This   TYPE   declaration   will   become   apparent   later.
//The   first   thing   to   note   here   is   that   I’m   passing   the   SourceLength   and   FindL
//ength.   As   neither   Source   nor   Find   will   alter   at   any   point   during   FastReplace
//,   there’s   no   need   to   call   the   LENGTH   subroutine   each   time!

function FastPos(
  const aSourceString, aFindString: AnsiString;
  const aSourceLen, aFindLen, StartPos: integer
  ): integer;
var
  SourceLen: integer;
begin
  //   Next,   we   determine   how   many   bytes   we   need   to
  //   scan   to   find   the   "start"   of   aFindString.
  SourceLen := aSourceLen;
  SourceLen := SourceLen - aFindLen;
  if (StartPos - 1) > SourceLen then
  begin
    Result := 0;
    Exit;
  end;
  SourceLen := SourceLen - StartPos;
  SourceLen := SourceLen + 2;
  //   The   ASM   starts   here.
  asm
    //Delphi   uses   ESI,   EDI,   and   EBX   a   lot,
    //so   we   must   preserve   them.
    push   ESI
    push   EDI
    push   EBX
    //Get   the   address   of   sourceString[1]
    //and   Add   (StartPos-1).
    //We   do   this   for   the   purpose   of   finding
    //the   NEXT   occurrence,   rather   than
    //always   the   first!
    mov   EDI,   aSourceString
    add   EDI,   StartPos
    Dec   EDI
    //Get   the   address   of   aFindString.
    mov   ESI,   aFindString
    //Note   how   many   bytes   we   need   to
    //look   through   in   aSourceString
    //to   find   aFindString.
    mov   ECX,   SourceLen
    //Get   the   first   Char   of   aFindString;
    //note   how   it   is   done   outside   of   the
    //main   loop,   as   it   never   changes!
    Mov     Al,   [ESI]
    //Now   the   FindFirstcharacter   loop!
  @ScaSB:
    //Get   the   value   of   the   current
    //character   in   aSourceString.
    //This   is   equal   to   ah   :=   EDI^,   that
    //is   what   the   []   are   around   [EDI].
    Mov     Ah,   [EDI]
    //Compare   this   character   with   aDestString[1].
    cmp     Ah,Al
    //If   they're   not   equal   we   don't
    //compare   the   strings.
    jne     @Nextchar
    //If   they're   equal,   obviously   we   do!
  @CompareStrings:
    //Put   the   length   of   aFindLen   in   EBX.
    mov     EBX,   aFindLen
    //We   DEC   EBX   to   point   to   the   end   of
    //the   AnsiString;   that   is,   we   don't   want   to
    //add   1   if   aFindString   is   1   in   length!
    dec     EBX

    //add   by   ShengQuanhu
    //If   EBX   is   zero,   then   we've   successfully
    //compared   each   character;   i.e.   it's   A   MATCH!
    //It   will   be   happened   when   aFindLen=1
    Jz   @EndOfMatch
    //add   end

    //Here’s   another   optimization   tip.   People   at   this   point   usually   PUSH   ESI   and
    //so   on   and   then   POP   ESI   and   so   forth   at   the   end–instead,   I   opted   not   to   chan
    //ge   ESI   and   so   on   at   all.   This   saves   lots   of   pushing   and   popping!
  @CompareNext:
    //Get   aFindString   character   +
    //aFindStringLength   (the   last   Char).
    mov     Al,   [ESI+EBX]
    //Get   aSourceString   character   (current
    //position   +   aFindStringLength).
    mov     Ah,   [EDI+EBX]
    //Compare   them.
    cmp     Al,   Ah
    Jz       @Matches
    //If   they   don't   match,   we   put   the   first   Char
    //of   aFindString   into   Al   again   to   continue
    //looking   for   the   first   character.
    Mov     Al,   [ESI]
    Jmp     @Nextchar
  @Matches:
    //If   they   match,   we   DEC   EBX   (point   to
    //previous   character   to   compare).
    Dec     EBX
    //If   EBX   <>   0   ("J"ump   "N"ot   "Z"ero),   we
    //continue   comparing   strings.
    Jnz     @CompareNext

    //add   by   Shengquanhu
  @EndOfMatch:
    //add   end

    //If   EBX   is   zero,   then   we've   successfully
    //compared   each   character;   i.e.   it's   A   MATCH!
    //Move   the   address   of   the   *current*
    //character   in   EDI.
    //Note,   we   haven't   altered   EDI   since
    //the   first   Char   was   found.
    mov     EAX,   EDI
    //This   is   an   address,   so   subtract   the
    //address   of   aSourceString[1]   to   get
    //an   actual   character   position.
    sub     EAX,   aSourceString
    //Inc   EAX   to   make   it   1-based,
    //rather   than   0-based.
    inc     EAX
    //Put   it   into   result.
    mov     Result,   EAX
    //Finish   this   routine!
    jmp     @TheEnd
  @Nextchar:
    //This   is   where   I   jump   to   when   I   want   to   continue   searching   for   the   first   Char
    //acter   of   aFindString   in   aSearchString:
    //Point   EDI   (aFindString[X])   to
    //the   next   character.
    Inc     EDI
    //Dec   ECX   tells   us   that   we've   checked
    //another   character,   and   that   we're
    //fast   running   out   of   AnsiString   to   check!
    dec     ECX
    //If   EBX   <>   0,   then   continue   scanning
    //for   the   first   character.

    //add   by   shengquanhu
    //if   ah   is   chinese   Char,jump   again
    jz       @Result0
    {TODO -o 添加了下面一句 -c : zhunanhui  2014/9/15 00:25:36}
    mov     Ah, [EDI-1]
    cmp     Ah,   $80
    jb       @ScaSB
    Inc     EDI
    Dec     ECX
    //add   by   shengquanhu   end

    jnz     @ScaSB

    //add   by   shengquanhu
  @Result0:
    //add   by   shengquanhu   end

    //If   EBX   =   0,   then   move   0   into   RESULT.
    mov     Result,0
    //Restore   EBX,   EDI,   ESI   for   Delphi
    //to   work   correctly.
    //Note   that   they're   POPped   in   the
    //opposite   order   they   were   PUSHed.
  @TheEnd:
    pop     EBX
    pop     EDI
    pop     ESI
  end;
end;
//This   routine   is   an   identical   copy   of   FastPOS   except   where   commented!   The   ide
//a   is   that   when   grabbing   bytes,   it   ANDs   them   with   $df,   effectively   making   the
//m   lowercase   before   comparing.   Maybe   this   would   be   quicker   if   aFindString   was
//   made   lowercase   in   one   fell   swoop   at   the   beginning   of   the   function,   saving   a
//n   AND   instruction   each   time.

function FastPosNoCase(
  const aSourceString, aFindString: AnsiString;
  const aSourceLen, aFindLen, StartPos: integer
  ): integer;
var
  SourceLen: integer;
begin
  SourceLen := aSourceLen;
  SourceLen := SourceLen - aFindLen;
  if (StartPos - 1) > SourceLen then
  begin
    Result := 0;
    Exit;
  end;
  SourceLen := SourceLen - StartPos;
  SourceLen := SourceLen + 2;
  asm
    push   ESI
    push   EDI
    push   EBX

    mov   EDI,   aSourceString
    add   EDI,   StartPos
    Dec   EDI
    mov   ESI,   aFindString
    mov   ECX,   SourceLen
    Mov     Al,   [ESI]

    //add   by   shengquanhu:just   modified   the   lowercase   'a'..'z'
    cmp   Al,   $7A
    ja   @ScaSB

    cmp   Al,   $61
    jb   @ScaSB
    //end------------------------------------------

    //Make   Al   uppercase.
    and     Al,   $df

  @ScaSB:
    Mov     Ah,   [EDI]

    //add   by   shengquanhu:just   modified   the   lowercase   'a'..'z'
    cmp   Ah,   $7A
    ja   @Comparechar

    cmp   Ah,   $61
    jb   @Comparechar
    //end------------------------------------------

    //Make   Ah   uppercase.
    and     Ah,   $df

  @Comparechar:
    cmp     Ah,Al
    jne     @Nextchar
  @CompareStrings:
    mov     EBX,   aFindLen
    dec     EBX

    //add   by   ShengQuanhu
    Jz       @EndOfMatch
    //add   end

  @CompareNext:
    mov     Al,   [ESI+EBX]
    mov     Ah,   [EDI+EBX]

    //add   by   shengquanhu:just   modified   the   lowercase   'a'..'z'
    cmp   Ah,   $7A
    ja   @LowerAh

    cmp   Al,   $61
    jb   @LowerAh
    //end------------------------------------------

    //Make   Al   and   Ah   uppercase.
    and     Al,   $df

    //add   by   shengquanhu:just   modified   the   lowercase   'a'..'z'
  @LowerAh:
    cmp   Ah,   $7A
    ja   @Comparechar2

    cmp   Ah,   $61
    jb   @Comparechar2
    //end------------------------------------------

    and     Ah,   $df

  @Comparechar2:
    cmp     Al,   Ah
    Jz       @Matches
    Mov     Al,   [ESI]

    //add   by   shengquanhu:just   modified   the   lowercase   'a'..'z'
    cmp   Al,   $7A
    ja   @Nextchar

    cmp   Al,   $61
    jb   @Nextchar
    //end------------------------------------------

    //Make   Al   uppercase.
    and     Al,   $df
    Jmp     @Nextchar
  @Matches:
    Dec     EBX
    Jnz     @CompareNext

    //add   by   Shengquanhu
  @EndOfMatch:
    //add   end

    mov     EAX,   EDI
    sub     EAX,   aSourceString
    inc     EAX
    mov     Result,   EAX
    jmp     @TheEnd
  @Nextchar:
    Inc     EDI
    dec     ECX
    //add   by   shengquanhu
    //if   ah   is   chinese   Char,jump   again
    jz       @Result0
    {TODO -o添加了下面一句 -c : zhunanhui 2014/9/15 00:25:36}
    mov     Ah, [EDI-1]
    cmp     Ah,   $80
    jb       @ScaSB
    Inc     EDI
    Dec     ECX
    //add   by   shengquanhu   end
    jnz     @ScaSB
  @Result0:
    mov     Result,0
  @TheEnd:
    pop     EBX
    pop     EDI
    pop     ESI
  end;
end;

//My   move   isn’t   as   fast   as   MOVE   when   source   and   destination   are   both   DWord   al
//igned,   but   it’s   certainly   faster   when   they’re   not.   As   we’re   moving   charac
//ters   in   a   AnsiString,   it   isn’t   very   likely   at   all   that   both   source   and   destinat
//ion   are   DWord   aligned,   so   moving   bytes   avoids   the   cycle   penalty   of   reading/w
//riting   DWords   across   physical   boundaries.

procedure MyMove(
  const Source; var Dest; Count: integer);
asm
  //Note:   When   this   function   is   called,
  //Delphi   passes   the   parameters   as   follows:
  //ECX   =   Count
  //EAX   =   Const   Source
  //EDX   =   Var   Dest
  //If   there   are   no   bytes   to   copy,   just   quit
  //altogether;   there's   no   point   pushing   registers.
  cmp       ECX,0
  Je         @JustQuit
  //Preserve   the   critical   Delphi   registers.
  push     ESI
  push     EDI
  //Move   Source   into   ESI   (generally   the
  //SOURCE   register).
  //Move   Dest   into   EDI   (generally   the   DEST
  //register   for   AnsiString   commands).
  //This   might   not   actually   be   necessary,
  //as   I'm   not   using   MOVsb   etc.
  //I   might   be   able   to   just   use   EAX   and   EDX;
  //there   could   be   a   penalty   for   not   using
  //ESI,   EDI,   but   I   doubt   it.
  //This   is   another   thing   worth   trying!
  mov       ESI,   EAX
  mov       EDI,   EDX
  //The   following   loop   is   the   same   as   repNZ
  //MovSB,   but   oddly   quicker!
@Loop:
  //Get   the   source   byte.
  Mov       AL,   [ESI]
  //Point   to   next   byte.
  Inc       ESI
  //Put   it   into   the   Dest.
  mov       [EDI],   AL
  //Point   dest   to   next   position.
  Inc       EDI
  //Dec   ECX   to   note   how   many   we   have   left   to   copy.
  Dec       ECX
  //If   ECX   <>   0,   then   loop.
  Jnz       @Loop
  //Another   optimization   note.
  //Many   people   like   to   do   this.
  //Mov   AL,   [ESI]
  //Mov   [EDI],   Al
  //Inc   ESI
  //Inc   ESI
  //There’s   a   hidden   problem   here.   I   won’t   go   into   too   much   detail,   but   the   Pe
  //ntium   can   continue   processing   instructions   while   it’s   still   working   out   the
  //result   of   INC   ESI   or   INC   EDI.   If,   however,   you   use   them   while   they’re   stil
  //l   being   calculated,   the   processor   will   stop   until   they’re   calculated   (a   pen
  //alty).   Therefore,   I   alter   ESI   and   EDI   as   far   in   advance   as   possible   of   using
  //them.
  //Pop   the   critical   Delphi   registers
  //that   we've   altered.
  pop       EDI
  pop       ESI
@JustQuit:
end;

//Point   1:   I   pass   VAR   aSourceString   rather   than   just   aSourceString.   This   is   be
//cause   I’ll   just   be   passed   a   pointer   to   the   data   rather   than   a   10M   copy   of   t
//he   data   itself,   which   is   much   quicker!

function FastReplace(
  const aSourceString: AnsiString;
  const aFindString, aReplaceString: AnsiString;
  CaseSensitive: Boolean = False): AnsiString;
var
  //   Size   already   passed   to   SetLength,
  //   the   REAL   size   of   RESULT.
  ActualResultLen,
  //   Position   of   aFindString   is   aSourceString.
  CurrentPos,
  //   Last   position   the   aFindString   was   found   at.
  LastPos,
  //   Bytes   to   copy   (that   is,   lastpos   to   this   pos).
  BytesToCopy,
  //   The   "running"   result   length,   not   the   actual   one.
  ResultLen,
  //   Length   of   aFindString,   to   save
  //   calling   LENGTH   repetitively.
  FindLen,
  //   Length   of   aReplaceString,   for   the   same   reason.
  ReplaceLen,
    SourceLen: integer;
  //   This   is   where   I   explain   the
  //   TYPE   TFastPosProc   from   earlier!
  FastPosProc: TFastPosProc;
begin
  //As   this   function   has   the   option   of   being   case-insensitive,   I’d   need   to   call
  //   either   FastPOS   or   FastPOSNoCase.   The   problem   is   that   you’d   have   to   do   this
  //   within   a   loop.   This   is   a   bad   idea,   since   the   result   never   changes   throughou
  //t   the   whole   operation–in   which   case   we   can   determine   it   in   advance,   like   so
  //:
  if CaseSensitive then
    FastPosProc := FastPos
  else
    FastPosProc := FastPosNoCase;
  //   I   don't   think   I   actually   need
  //   this,   but   I   don't   really   mind!
  Result := '';
  //   Get   the   lengths   of   the   strings.
  FindLen := Length(aFindString);
  ReplaceLen := Length(aReplaceString);
  SourceLen := Length(aSourceString);
  //   If   we   already   have   room   for   the   replacements,
  //   then   set   the   length   of   the   result   to
  //   the   length   of   the   SourceString.
  if ReplaceLen <= FindLen then
    ActualResultLen := SourceLen
  else
    //   If   not,   we   need   to   calculate   the
    //   worst-case   scenario.
    //   That   is,   the   Source   consists   ONLY   of
    //   aFindString,   and   we're   going   to   replace
    //   every   one   of   them!
    ActualResultLen :=
      SourceLen +
      (SourceLen * ReplaceLen div FindLen) +
      ReplaceLen;
  //   Set   the   length   of   Result;   this
  //   will   assign   the   memory,   etc.
  SetLength(Result, ActualResultLen);
  CurrentPos := 1;
  ResultLen := 0;
  LastPos := 1;
  //Again,   I’m   eliminating   an   IF   statement   in   a   loop   by   repeating   code–this   ap
  //proach   results   in   very   slightly   larger   code,   but   if   ever   you   can   trade   some
  //memory   in   exchange   for   speed,   go   for   it!
  if ReplaceLen > 0 then
  begin
    repeat
      //   Get   the   position   of   the   first   (or   next)
      //   aFindString   in   aSourceString.
      //   Note   that   there's   no   If   CaseSensitive,
      //   I   just   call   FastPOSProc,   which   is   pointing
      //   to   the   correct   pre-determined   routine.
      CurrentPos :=
        FastPosProc(aSourceString, aFindString,
        SourceLen, FindLen, CurrentPos);
      //   If   0,   then   we're   finished.
      if CurrentPos = 0 then
        break;
      //   Number   of   bytes   to   copy   from   the
      //   source   AnsiString   is   CurrentPos   -   lastPos,
      //   i.e.   "   cat   "   in   "the   cat   the".
      BytesToCopy := CurrentPos - LastPos;
      //   Copy   chars   from   aSourceString
      //   to   the   end   of   Result.
      MyMove(aSourceString[LastPos],
        Result[ResultLen + 1], BytesToCopy);
      //   Copy   chars   from   aReplaceString   to
      //   the   end   of   Result.
      MyMove(aReplaceString[1],
        Result[ResultLen + 1 + BytesToCopy], ReplaceLen);
      //   Remember,   using   COPY   would   copy   all   of
      //   the   data   over   and   over   again.
      //   Never   fall   into   this   trap   (like   a   certain
      //   software   company   did).
      //   Set   the   running   length   to
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;
      //   Set   the   position   in   aSourceString   to   where
      //   we   want   to   continue   searching   from.
      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until False;
  end
  else
  begin
    //   You   might   have   noticed   If   ReplaceLen   >   0.
    //   Well,   if   ReplaceLen   =   0,   then   we're   deleting   the
    //   substrings,   rather   than   replacing   them,   so   we
    //   don't   need   the   extra   MyMove   from   aReplaceString.
    repeat
      CurrentPos := FastPosProc(aSourceString,
        aFindString, SourceLen, FindLen, CurrentPos);
      if CurrentPos = 0 then
        break;
      BytesToCopy := CurrentPos - LastPos;
      MyMove(aSourceString[LastPos],
        Result[ResultLen + 1], BytesToCopy);
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;
      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until False;
  end;
  //Now   that   we’ve   finished   doing   all   of   the   replaces,   I   just   need   to   adjust   th
  //e   length   of   the   final   result:
  Dec(LastPos);
  //Now   I   set   the   length   to   the   Length   plus   the   bit   of   AnsiString   left.   That   is,   "   m
  //at"   when   replacing   "the"   in   "sat   on   the   mat".
  SetLength(Result, ResultLen + (SourceLen - LastPos));
  //   If   there's   a   bit   of   AnsiString   dangling,   then
  //   add   it   to   the   end   of   our   AnsiString.
  if LastPos + 1 <= SourceLen then
    MyMove(aSourceString[LastPos + 1],
      Result[ResultLen + 1], SourceLen - LastPos);
end;

//function FastReplace(
//  const aSourceString: WideString;
//  const aFindString, aReplaceString: WideString; Flags: TReplaceFlags;
//  CaseSensitive: Boolean = False): WideString; overload;
//begin
//  Result := FastReplace(aSourceString, aFindString, aReplaceString, CaseSensitive);
//end;

end.


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值