- {************************************
- *功能:我的处理字符串的一些类和函数
- *说明:SubStrPos---简单字串定位
- * SubStrPosEx---有通配符子串定位
- * SubStrFind---统计子串数量,支持通配符
- * MidSubStr---取中间子串,支持通配符
- *版本:1.0
- *版权:Big One Tech Co. 2008
- *作者:乌龙哈里
- *语言:Dephi 2007 for Win32
- *初写日期:2008.6.12
- *最后修改:2008.6.17
- ************************************}
- unit MyStr;
- interface
- uses math;
- type
- TStrMsg=record
- Pos,Len:Integer;
- end;
- TStrMsgArray =array of TStrMsg;
- //----从S的startpos开始找substr,返回位置。20080617修改
- function SubStrPos(const SubStr,S:WideString;const StartPos:Integer=1):Integer;
- //----从S的startpos开始找substr,返回strmsg。支持通配符,20080617修改
- procedure SubStrPosEx(const SubStr,S:WideString;var SubStrMsg:TStrMsg;const StartPos:Integer=1);
- //----查找子串,返回StrMsgArry数组,记录位置和长短,20080617修改
- function SubStrFind(const SubStr,S:WideString;var StrMsgArray:TStrMsgArray):Integer;
- //----定义左右两边字符,取中间字符内容。支持通配符。20080617修改
- function MidSubStr(const LStr,RStr,S:WideString;var StrMsgArray:TStrMsgArray;const ExcludeStr:widestring=''):integer;
- type
- THtmlSplit=class
- private
- HtmlText:WideString;
- CellSplit:TStrMsgArray;
- fCount:integer;
- fCellNo:integer;
- function GetfCount:integer;
- function GetfCellNo:integer;
- procedure SetfCellNo(Value:integer);
- public
- procedure Split(const S:widestring;const Symbol:widestring='td');
- function Cell(n:integer):WideString;
- constructor Create; virtual;
- destructor Destroy; override;
- property Count:integer read GetfCount;
- property CellNo:integer read GetfCellNo write SetfCellNo;
- end;
- implementation
- //===============================================
- //*******************HtmlSplit类*****************
- //===============================================
- constructor THtmlSplit.Create;
- begin
- inherited Create;
- fCount:=0;
- fCellNo:=1;
- end;
- destructor THtmlSplit.Destroy;
- begin
- CellSplit:=nil;
- inherited Destroy;
- end;
- function THtmlSplit.GetfCount:integer;
- begin
- Result:=fCount;
- end;
- function THtmlSplit.GetfCellNo:integer;
- begin
- Result:=fCellNo;
- end;
- procedure THtmlSplit.SetfCellNo(Value:integer);
- begin
- if (Value>0) and (Value<=fCount) then
- fCellNo:=Value
- else fCellNo:=fCount;
- end;
- procedure THtmlSplit.Split(const S:widestring;const Symbol:widestring='td');
- const
- TdLeft:WideString='<td*>';
- TdRight:WideString='</td>';
- var
- LStr,RStr:WideString;
- begin
- if Symbol='td' then
- begin
- LStr:='<td*>';
- RStr:='</td>';
- end;
- if Symbol='<>' then
- begin
- LStr:='>';
- RStr:='<';
- end;
- HtmlText:=widestring(S);
- fCount:=MidSubStr(LStr,RStr,HtmlText,CellSplit,'<*>')
- end;
- function THtmlSplit.Cell(n:integer):WideString;
- begin
- if (n>0) and (n<=fCount) then
- begin
- Result:=Copy(HtmlText,CellSplit[n-1].Pos,CellSplit[n-1].Len);
- fCellNo:=n;
- end;
- end;
- //===============================================
- //*******************独立函数********************
- //===============================================
- //----从S的startpos开始找substr,返回位置。20080613修改
- function SubStrPos(const SubStr,S:WideString;const StartPos:Integer=1):Integer;
- var
- LenSub,i,j,n:Integer;
- Loop:Boolean;
- begin
- Result:=0;
- LenSub:=Length(SubStr);
- i:=StartPos; //开始位置
- n:=Length(S)-LenSub+1;
- if n>0 then Loop:=True else Loop:=False;
- while (i<=n) and Loop do
- begin
- if S[i]=SubStr[1] then
- begin
- j:=1;
- while (j<LenSub) and (S[i+j]=SubStr[j+1]) do Inc(j); //计算匹配的字数
- if (j=LenSub) then //完全匹配
- begin
- Loop:=False;
- Result:=i;
- end else inc(i); //不匹配
- end else Inc(i);
- end;
- end;
- //----从S的startpos开始找substr,返回strmsg。支持通配符,20080617修改
- procedure SubStrPosEX(const SubStr,S:WideString;var SubStrMsg:TStrMsg;const StartPos:Integer=1);
- const
- Widecard:widechar ='*'; //通配符
- var
- i,j,n:integer;
- LenS,LenSub,nPos:integer;
- WidecardSplit:array of WideString; //被通配符截断的子串
- Loop:Boolean;
- begin
- LenS:=Length(S);
- LenSub:=Length(SubStr);
- SetLength(WidecardSplit,LenSub);
- //分析substr
- n:=0;
- if SubStr[1]=Widecard then //第一个是通配符
- begin
- WidecardSplit[n]:=Widecard;
- inc(n);
- end else WidecardSplit[n]:=WidecardSplit[n]+SubStr[1]; //非通配符全弄成一个
- for i := 2 to LenSub do //从第二个字符开始,前面已经判断过第一个了
- begin
- if SubStr[i]=Widecard then
- begin
- if SubStr[i-1]<> Widecard then //连续多个通配符只当一个
- begin
- inc(n);
- WidecardSplit[n]:=Widecard;
- if i<LenSub then inc(n);
- end;
- end else WidecardSplit[n]:=WidecardSplit[n]+SubStr[i];
- end;
- SetLength(WidecardSplit,n+1); //重新定义widesplit数组的元素个数
- //查找
- j:=StartPos;
- i:=0;
- Loop:=True;
- SubStrMsg.Pos:=0; //返回找到字符的位置
- SubStrMsg.Len:=0; //字符的长度
- while (i <= n) and Loop do
- begin
- if WidecardSplit[i]= Widecard then //如果是通配符
- begin
- if i=0 then SubStrMsg.Pos:=1; //第一个是通配符定位
- if i=n then SubStrMsg.Len:=LenS+1-SubStrMsg.Pos; //末尾
- inc(i);
- // inc(j);
- if i >n then Loop:=False else Loop:=True;
- end else //不是通配符
- begin
- nPos:=SubStrPos(WidecardSplit[i],S,j);
- if nPos>0 then //如果找到
- begin
- j:=nPos+Length(WidecardSplit[i]);
- if i=0 then SubStrMsg.Pos:=nPos; //第一个不是通配符定位
- SubStrMsg.Len:=j-SubStrMsg.Pos;
- Loop:=True;
- inc(i);
- end else
- begin
- Loop:=False; //如果没找到
- SubStrMsg.Pos:=0;
- end;
- end;
- end;
- end;
- //----查找子串,返回StrMsgArry数组,记录位置和长短
- function SubStrFind(const SubStr,S:WideString;var StrMsgArray:TStrMsgArray):Integer;
- const
- Widecard:widechar='*';
- var
- j,n:integer;
- LenS,LenSub:Integer;
- nPos,nStrMsgNum:integer;
- // NoneWide:Boolean;
- strMsg:TStrMsg;
- begin
- LenS:=Length(S);
- LenSub:=Length(SubStr);
- SetLength(StrMsgArray,min(10000,LenS)); //1万和整个字串谁小取谁来做数组下标
- //处理完毕
- Result:=0;
- nStrMsgNum:=0;
- j:=1;
- //统一写法,好像不慢
- SubStrPosEx(SubStr,S,strMsg,1);
- nPos:=strMsg.Pos;
- while nPos>0 do
- begin
- StrMsgArray[nStrMsgNum].Pos:=strMsg.Pos;
- StrMsgArray[nStrMsgNum].Len:=strMsg.Len;
- Inc(nStrMsgNum);
- j:=nPos+1;
- SubStrPosEx(SubStr,S,strMsg,j);
- nPos:=strMsg.Pos;
- end;
- Result:=nStrMsgNum;
- SetLength(StrMsgArray,nStrMsgNum); ---统一写法结束
- end;
- //----定义左右两边字符,取中间字符内容。支持通配符。20080617修改
- function MidSubStr(const LStr,RStr,S:WideString;var StrMsgArray:TStrMsgArray;const ExcludeStr:widestring=''):integer;
- var
- i,j,LPos,RPos:integer;
- LenS,nStrMsgNum:integer;
- strMsg:TStrMsg;
- StrMsgArrayB:TStrMsgArray;
- begin
- Result:=0;
- LenS:=Length(S);
- j:=1;
- nStrMsgNum:=0;
- SetLength(StrMsgArray,min(10000,LenS)); //1万和整个字串谁小取谁来做数组下标
- SubStrPosEx(LStr,S,strMsg,1);
- while strMsg.Pos>0 do
- begin
- LPos:=strMsg.Pos+strMsg.Len;
- j:=LPos;
- SubStrPosEx(RStr,S,strMsg,j);
- if strMsg.Pos>0 then
- begin
- if ExcludeStr='' then
- begin
- StrMsgArray[nStrMsgNum].Pos:=LPos;
- StrMsgArray[nStrMsgNum].Len:=strMsg.Pos-LPos;
- Inc(nStrMsgNum);
- j:=strMsg.Pos;
- SubStrPosEx(LStr,S,strMsg,j);
- end
- else begin
- RPos:=strMsg.Pos;
- j:=LPos;
- while j<RPos do
- begin
- SubStrPosEx(ExcludeStr,S,strMsg,j);
- if strMsg.Pos>j+1 then
- begin
- StrMsgArray[nStrMsgNum].Pos:=LPos;
- StrMsgArray[nStrMsgNum].Len:=strMsg.Pos-LPos;
- Inc(nStrMsgNum);
- end; //--if strMsg.Pos>j
- LPos:=strMsg.Pos+strMsg.Len;
- j:=LPos;
- SubStrPosEx(ExcludeStr,S,strMsg,j);
- end;//--while j<RPos
- j:=RPos+1;
- SubStrPosEx(LStr,S,strMsg,j);
- end;//--if ExcludeStr='' else begin
- end; //-- if strMsg.Pos>0
- end; //-- while strMsg.Pos>0
- Result:=nStrMsgNum;
- SetLength(StrMsgArray,nStrMsgNum);
- end;
- end.
字符串处理函数
最新推荐文章于 2020-09-09 11:09:40 发布