第四个版本。对编译的部分进行了整理和重构。第三个版本 $(0|1)居然会编译出错 -____-b
持续维护中。CSDN竟然不能插入Delphi代码,B4一个先。
第一个版本竟然连e-mail都不能匹配-___-b
{***************************************************************************
Delphi Regular Expression Library
By superarhow (superarhow@hotmail.com)
The author does not answer for the code explain, upgrade or harmful uses.
<<=== History ===>>
Version: 1.2.0.1
Commit: 2006/09/09 12:19
Reconstructed the compiling codes, fixed important bugs
and in [] now supports /w /W, etc
Now: <(.*)>.*<///1>|<(.*) //>,
/w+([-+.]/w+)*@/w+([-.]/w+)*/./w+([-.]/w+)* ,... can work
( TODO: decrease the stack cost ^^b )
Version: 1.1.0.2
Commit: 2006/09/08 9:00
Fixed the end '$' bug
Version: 1.1.0.1
Commit: 2006/09/07 20:39
Fixed Important bugs, including E-mail bug: (.*)@(.*)/.(.*) cannot
match "superarhow@hotmail.com" -___-|||
Version: 1.0.0.1
Commit: 2006/09/06 22:27
***************************************************************************}
unit RegExp;
(***************************************************************************
语法:本正则表达式分析器是基于JScript的正则表达式语法的,不同点是本分析器
使用ANSI字符集(MBCS)而JScript使用Unicode。因此在/u的处理上略有不同。
另外的不同是:
JScript (a|bc)+ 匹配 "abc" 会得到 $1=bc
而本分析器会得到: values[0]=abc
例:
RE := TRegExp.Create('办证/s*(?:/+86)?(/d{11,14})', []);
try
if RE.Match('办证13689636999') then ShowMessage(RE.Values[0]);
finally
RE.Free;
end;
***************************************************************************)
interface
uses
SysUtils, Classes, StrUtils, Windows, Dialogs;
{*************** Compiled Regular Expression Data Model *******************
Head --- Next --- Next(A) --- Next --- ... --- Tail
|
|(represents the 'OR' relativity)
Link
|
|
Proxy(start of '(') --- Link --- Next --- Tail --- Proxy(end of ')', next points to A)
**************************************************************************}
{***************************************************************************
*** 广告位招租***
***************************************************************************}
type
TLinkedList = class;
{ Linked item class }
TLinkedItem = class
private
FNext: TLinkedItem;
FOwner: TLinkedList;
procedure SetOwner(const Value: TLinkedList);
public
constructor Create(AOwner: TLinkedList);
destructor Destroy; override;
property Next: TLinkedItem read FNext;
property Owner: TLinkedList read FOwner write SetOwner;
end;
{ Linked list class, container of linked items }
TLinkedList = class
private
FHead: TLinkedItem;
FTail: TLinkedItem;
FList: TList;
public
constructor Create;
destructor Destroy; override;
procedure Clear;
{ remove the node from the list, not change the linked relationships }
procedure Remove(AItem: TLinkedItem);
{ add the node into the list, not change the linked relationships }
procedure Add(AItem: TLinkedItem);
property Head: TLinkedItem read FHead;
property Tail: TLinkedItem read FTail;
function IsEmpty: Boolean;
end;
EREException = class(Exception)
end;
TRENodeType = (
rentUnknown, { Uninitialized }
rentControl, { Control(^, $, /b, /B) }
rentChar, { Char/WideChar }
rentCharRange, { Any Char in a set (ASCII/MBCS) }
rentOrStart, { Start of 'or' }
rentOrEnd, { End of 'or' }
rentSubStart, { '(' }
rentSubEnd, { ')' }
rentCaptured { A captured string(backreference) }
);
TRENodeList = class;
TREOption = ( reoIgnoreCase, reoGlobal, reoMultiLine );
TREOptions = set of TREOption;
TRegExp = class;
TREMatchContext = class
private
FOptions: TREOptions;
FValues: TStrings;
FOwner: TRegExp;
public
constructor Create;
destructor Destroy; override;
function IsCaseSensitive: Boolean;
function IsMultiLine: Boolean;
property Options: TREOptions read FOptions;
{ $1..$9 }
property Values: TStrings read FValues;
property Owner: TRegExp read FOwner;
end;
TRENodeItem = class(TLinkedItem)
private
FNodeType: TRENodeType;
FMinTimes: Integer;
FMaxTimes: Integer;
FGreedy: Boolean;
function GetNext: TRENodeItem;
public
{ match the next chars, then matches the next item, inc the pointer. }
function Match(var Src: PChar; var Context: TREMatchContext): Boolean; virtual;
function MatchTimes(var Src: PChar; var Context: TREMatchContext; N: Integer): Boolean; virtual;
function MatchNext(var Src: PChar; var Context: TREMatchContext): Boolean; virtual;
function MatchOnce(var Src: PChar; var Context: TREMatchContext): Boolean; virtual;
function GetOwner: TRENodeList; reintroduce;
public
procedure AfterConstruction; override;
destructor Destroy; override;
property NodeType: TRENodeType read FNodeType;
property Owner: TRENodeList read GetOwner;
property MinTimes: Integer read FMinTimes;
property MaxTimes: Integer read FMaxTimes;
property Greedy: Boolean read FGreedy;
property Next: TRENodeItem read GetNext;
end;
TREControlType = (rectStart, rectEnd, rectWordBound, rectNonWordBound);
TRENodeItemControl = class(TRENodeItem)
private
FControlType: TREControlType;
public
function MatchOnce(var Src: PChar; var Context: TREMatchContext): Boolean; override;
property ControlType: TREControlType read FControlType;
end;
TRENodeItemChar = class(TRENodeItem)
private
FPattern: string;
public
function MatchOnce(var Src: PChar; var Context: TREMatchContext): Boolean; override;
property Pattern: string read FPattern;
end;
{ char/dbcs char . dbcs stores as: char1(bit8-15), char2(bit0-7) }
TRENodeItemCharRange = class(TRENodeItem)
private
FList: TList;
FNegative: Boolean;
private
{ adds a char or a dbcs char into the range }
procedure AddChar(P: PChar);
{ find a code(0-65535). must be sorted }
function FindChar(Code: Integer): Integer;
procedure SortIt;
public
procedure AfterConstruction; override;
destructor Destroy; override;
{ combines to range, and destruct the range eaten by the other one }
class function Combine(Src, Dest: TRENodeItemCharRange): TRENodeItemCharRange;
{ removes a char or a dbcs char from the range }
procedure DeleteChar(P: PChar);
function MatchOnce(var Src: PChar; var Context: TREMatchContext): Boolean; override;
{ pattern includes DBCS or not }
property Negative: Boolean read FNegative;
end;
TRENodeItemOrStart = class(TRENodeItem)
private
FChilds: array of TRENodeItem;
public
function Match(var Src: PChar; var Context: TREMatchContext): Boolean; override;
end;
TRENodeItemOrEnd = class(TRENodeItem)
private
FStart: TRENodeItemOrStart;
public
function Match(var Src: PChar; var Context: TREMatchContext): Boolean; override;
end;
TRENodeItemSubEnd = class;
TRENodeItemSubStart = class(TRENodeItem)
private
FSavePoint: PChar;
FSubEnd: TRENodeItemSubEnd;
public
function Match(var Src: PChar; var Context: TREMatchContext): Boolean; override;
end;
TRENodeItemSubEnd = class(TRENodeItem)
private
FStart: TRENodeItemSubStart;
FSlotIndex: Integer;
FLookAhead: Boolean;
FNegative: Boolean;
FCapture: Boolean;
FMatchCount: Integer;
public
procedure AfterConstruction; override;
function Match(var Src: PChar; var Context: TREMatchContext): Boolean; override;
property Capture: Boolean read FCapture;
property LookAhead: Boolean read FLookAhead;
property Negative: Boolean read FNegative;
end;
TRENodeItemCaptured = class(TRENodeItem)
private
FIndex: Integer;
FMatchPattern: Boolean;
public
procedure AfterConstruction; override;
function MatchOnce(var Src: PChar; var Context: TREMatchContext): Boolean; override;
property MatchPattern: Boolean read FMatchPattern;
end;
TREParseContext = class
private
FSlotCount: Integer;
public
property SlotCount: Integer read FSlotCount;
end;
{***************************************************************************
exp = strip [ "|" strip ["|" strip ...] ... ]
strip = node|strip [ [node|strip...] ... ]
| "(" exp ")" [times]
node = nodeitem [times]
***************************************************************************}
TRENodeList = class(TLinkedList)
private
FOwner: TRegExp;
function GetHead: TRENodeItem;
protected
function Match(var Src: PChar; Context: TREMatchContext): Boolean;
function GetOwner: TRegExp; reintroduce;
public
constructor Create; reintroduce;
function ParseNextNode(var P: PChar): TRENodeItem;
procedure ParseNextStrip(var P: PChar; var StartItem, EndItem: TRENodeItem; ParseContext: TREParseContext);
procedure ParseExp(var P: PChar; var StartItem, EndItem: TRENodeItem; ParseContext: TREParseContext);
function CreateItem(AType: TRENodeType): TRENodeItem;
property Head: TRENodeItem read GetHead;
end;
TRegExp = class
private
FNodes: TRENodeList;
{ pattern of the regexp }
FPattern: string;
{ text to match }
FText: string;
{ options in match }
FOptions: TREOptions;
{ captured values }
FValues: TStrings;
procedure SetPattern(const Value: string);
protected
procedure Compile; virtual;
function IsStartOfText(Src: PChar): Boolean;
function IsEndOfText(Src: PChar): Boolean;
public
constructor Create(APattern: string = ''; AOptions: TREOptions = []);
destructor Destroy; override;
{ matches a string }
function Match(S: string): Boolean;
{ replaces a single match or all matches in src to replaceto. depends on
whether reoGlobal is in options }
function Replace(Src: string; ReplaceTo: string): string;
property Nodes: TRENodeList read FNodes;
property Pattern: string read FPattern write SetPattern;
property Options: TREOptions read FOptions write FOptions;
property Values: TStrings read FValues;
end;
implementation
{*************************** 传说中滴分隔线(之一) *******************************}
const
{ spaces }
WhiteSpaces = [' ', #9, #13, #10];
{ characters have special meaning }
ControlChars = ['/', '^', '$', '*', '+', '?', '{', '}', '.', '|', '(', ')', '[', ']'];
{*************************** 传说中滴分隔线(之二) *******************************}
{ TRegExp }
function TRegExp.IsEndOfText(Src: PChar): Boolean;
begin
Result := Src = @PChar(FText)[Length(FText)];
end;
function TRegExp.IsStartOfText(Src: PChar): Boolean;
begin
Result := Src = PChar(FText);
end;
procedure TRegExp.SetPattern(const Value: string);
begin
FPattern := Value;
Compile;
end;
function TRegExp.Match(S: string): Boolean;
var
P, Q: PChar;
Context: TREMatchContext;
I: Integer;
begin
FText := S;
P := PChar(FText);
Context := TREMatchContext.Create;
try
for I := 0 to FValues.Count - 1 do
begin
FValues[I] := '';
Context.Values.Add('');
end;
Context.FOptions := FOptions;
Context.FOwner := Self;
repeat
{ parameter will be modified }
Q := P;
Result := Self.FNodes.Match(Q, Context);
if not Result then
begin
P := CharNext(P);
if P^ = #0 then Break;
end;
until Result;
if Result then FValues.Assign(Context.Values);
finally
FreeAndNil(Context);
end;
end;
constructor TRegExp.Create(APattern: string; AOptions: TREOptions);
begin
FNodes := TRENodeList.Create;
FValues := TStringList.Create;
Pattern := APattern;
FOptions := AOptions;
end;
destructor TRegExp.Destroy;
begin
FreeAndNil(FNodes);
FreeAndNil(FValues);
inherited;
end;
function TRegExp.Replace(Src, ReplaceTo: string): string;
var
P, Q: PChar;
Context: TREMatchContext;
Match: Boolean;
I: Integer;
begin
FText := Src;
P := PChar(FText);
Context := TREMatchContext.Create;
try
for I := 0 to FValues.Count - 1 do
begin
FValues[I] := '';
Context.Values.Add('');
end;
Context.FOptions := FOptions;
Context.FOwner := Self;
{ pre-allocate captured values }
repeat
{ parameter will be modified }
Q := P;
Match := Self.FNodes.Match(Q, Context);
if Match then
begin
Result := Result + ReplaceTo;
P := Q; { point to next char }
if not (reoGlobal in FOptions) then
begin
Result := Result + P;
Break;
end;
end
else
begin
if IsDBCSLeadByte(Byte(P^)) then
begin
Result := Result + P^ + P[1];
Inc(P, 2);
end
else
begin
Result := Result + P^;
Inc(P);
end;
if P^ = #0 then Break;
end;
until False;
finally
FreeAndNil(Context);
end;
end;
{ TRENodeList }
function TRENodeList.CreateItem(AType: TRENodeType): TRENodeItem;
begin
case AType of
rentControl: Result := TRENodeItemControl.Create(Self);
rentChar: Result := TRENodeItemChar.Create(Self);
rentCharRange: Result := TRENodeItemCharRange.Create(Self);
rentCaptured: Result := TRENodeItemCaptured.Create(Self);
rentOrStart: Result := TRENodeItemOrStart.Create(Self);
rentOrEnd: Result := TRENodeItemOrEnd.Create(Self);
rentSubStart: Result := TRENodeItemSubStart.Create(Self);
rentSubEnd: Result := TRENodeItemSubEnd.Create(Self);
else raise EREException.Create('unknown item type');
end;
Result.FNodeType := AType;
end;
constructor TRENodeList.Create;
begin
inherited Create;
end;
function TRENodeList.GetHead: TRENodeItem;
begin
Result := inherited Head as TRENodeItem;
end;
function TRENodeList.GetOwner: TRegExp;
begin
Result := FOwner;
end;
function TRENodeList.Match(var Src: PChar; Context: TREMatchContext): Boolean;
begin
if Head = nil then Result := True
else
begin
Result := Head.Match(Src, Context);
end;
end;
procedure TLinkedList.Add(AItem: TLinkedItem);
begin
if FList.IndexOf(AItem) > 0 then Exit;
FList.Add(AItem);
end;
procedure TLinkedList.Remove(AItem: TLinkedItem);
var
I: Integer;
begin
I := FList.IndexOf(AItem);
if I >= 0 then FList.Delete(I);
end;
{ TRENodeItem }
procedure TRENodeItem.AfterConstruction;
begin
inherited;
FGreedy := False;
FMinTimes := 1;
FMaxTimes := 1;
end;
destructor TRENodeItem.Destroy;
begin
inherited;
end;
function TRENodeItem.GetOwner: TRENodeList;
begin
Result := inherited Owner as TRENodeList;
end;
function TRENodeItem.GetNext: TRENodeItem;
begin
Result := inherited Next as TRENodeItem;
end;
function TRENodeItem.Match(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
Result := MatchTimes(Src, Context, 0);
end;
function TRENodeItem.MatchTimes(var Src: PChar;
var Context: TREMatchContext; N: Integer): Boolean;
var
SaveBeforeOnce: PChar;
begin
if N >= MaxTimes then
begin
Result := MatchNext(Src, Context);
Exit;
end;
if (N >= MinTimes) and (not Greedy) then
begin
{ non-greedy, and matched enough: direct try next }
Result := MatchNext(Src, Context);
if Result then Exit;
end;
SaveBeforeOnce := Src;
Result := MatchOnce(Src, Context);
if Result then
begin
{ match once success }
if Greedy or (N + 1 < MinTimes) then
begin
{ try once more }
Result := MatchTimes(Src, Context, N + 1);
if Result then Exit; { result = true }
end;
end;
{ match once success, or match moretimes success }
if Result and (not Greedy) and (N + 1 >= MinTimes) then
begin
{ already enough for non-greedy match }
Result := MatchNext(Src, Context);
if not Result then Result := MatchTimes(Src, Context, N + 1); { try once more }
Exit;
end;
{ match once failed, if already enough, we can try next }
Src := SaveBeforeOnce;
Result := Result or (N >= MinTimes);
if Result then
begin
if Next = nil then Exit;
if Next.FNodeType = rentSubEnd then
begin
TRENodeItemSubEnd(Next).FMatchCount := Next.FMaxTimes - 1; { prevent to try any more match }
Result := Next.Match(Src, Context);
end
else if Next.FNodeType = rentSubStart then
begin
TRENodeItemSubStart(Next).FSubEnd.FMatchCount := TRENodeItemSubStart(Next).FSubEnd.FMaxTimes - 1; { prevent to try any more match }
Result := TRENodeItemSubStart(Next).Match(Src, Context);
end
else Result := Next.MatchTimes(Src, Context, 0); { prevent to try any more match }
end;
end;
function TRENodeItem.MatchOnce(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
{ raise EREException.Create('not implemented:' + Self.ClassName); }
Result := True;
end;
{ TRENodeItemCharRange }
function TRENodeItem.MatchNext(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
if Next = nil then
begin
Result := True;
Exit;
end;
Result := Next.Match(Src, Context);
end;
{ TRENodeItemControl }
function TRENodeItemControl.MatchOnce(var Src: PChar; var Context: TREMatchContext): Boolean;
begin
case FControlType of
rectStart:
begin
Result := False;
if Context.Owner.IsStartOfText(Src) then Result := True
else if Context.IsMultiLine and (Src[-1] in [^M, ^J]) then Result := True;
end;
rectEnd:
begin
Result := False;
if Context.Owner.IsEndOfText(Src) then Result := True
else if Context.IsMultiLine and (Src[0] in [^M, ^J]) then Result := True;
end;
rectWordBound:
begin
Result := (Src^ in WhiteSpaces) or (Src^ = #0)
or Context.Owner.IsStartOfText(Src) or Context.Owner.IsEndOfText(Src);
end;
rectNonWordBound:
begin
Result := not ((Src^ in WhiteSpaces)or (Src^ = #0)
or Context.Owner.IsStartOfText(Src) or Context.Owner.IsEndOfText(Src));
end
else raise EREException.Create('unknown control!');
end;
end;
procedure TRegExp.Compile;
var
P: PChar;
Context: TREParseContext;
I: Integer;
StartItem, EndItem, LastEnd: TRENodeItem;
begin
P := PChar(FPattern);
Context := TREParseContext.Create;
try
Context.FSlotCount := 0;
FNodes.Clear;
FNodes.ParseExp(P, StartItem, EndItem, Context);
FNodes.FHead := StartItem;
LastEnd := EndItem;
while P^ <> #0 do
begin
FNodes.ParseExp(P, StartItem, EndItem, Context);
if LastEnd <> nil then LastEnd.FNext := StartItem;
LastEnd := EndItem;
end;
FValues.Clear;
for I := 0 to Context.SlotCount - 1 do FValues.Add('');
finally
FreeAndNil(Context);
end;
end;
{ TLinkedList }
constructor TLinkedList.Create;
begin
inherited;
FHead := nil;
FTail := nil;
FList := TList.Create;
end;
destructor TLinkedList.Destroy;
begin
Clear;
FreeAndNil(FList);
inherited;
end;
function TLinkedList.IsEmpty: Boolean;
begin
Result := FHead <> nil;
end;
procedure TLinkedList.Clear;
begin
while FList.Count > 0 do TLinkedItem(FList[0]).Free;
end;
{ TLinkedItem }
var
_l: Integer = 0;
constructor TLinkedItem.Create(AOwner: TLinkedList);
begin
FNext := nil;
FOwner := AOwner;
if FOwner <> nil then FOwner.Add(Self);
Inc(_l);
// ShowMessage(IntToStr(_l));
end;
destructor TLinkedItem.Destroy;
begin
if FOwner <> nil then FOwner.Remove(Self);
Dec(_l);
// ShowMessage(IntToStr(_l));
inherited;
end;
procedure TLinkedItem.SetOwner(const Value: TLinkedList);
begin
if FOwner = Value then Exit;
FOwner.Remove(Self);
FOwner := Value;
if FOwner <> nil then FOwner.Add(Self);
end;
{ TREMatchContext }
constructor TREMatchContext.Create;
begin
FValues := TStringList.Create;
end;
destructor TREMatchContext.Destroy;
begin
FreeAndNil(FValues);
inherited;
end;
function TREMatchContext.IsCaseSensitive: Boolean;
begin
Result := not (reoIgnoreCase in FOptions);
end;
function TREMatchContext.IsMultiLine: Boolean;
begin
Result := reoMultiLine in FOptions;
end;
{ TRENodeItemChar }
function TRENodeItemChar.MatchOnce(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
if Context.IsCaseSensitive then
begin
if Length(FPattern) = 1 then Result := Src^ = PChar(FPattern)^
else Result := StrLComp(Src, PChar(FPattern), Length(FPattern)) = 0;
if Result then Inc(Src, Length(FPattern));
end
else // case insensitive
begin
if Length(FPattern) = 1 then Result := Upcase(Src^) = Upcase(PChar(FPattern)^)
else Result := StrLIComp(Src, PChar(FPattern), Length(FPattern)) = 0;
if Result then Inc(Src, Length(FPattern));
end;
end;
{ TRENodeItemCharRange }
procedure TRENodeItemCharRange.AddChar(P: PChar);
begin
if IsDBCSLeadByte(Byte(P^)) then FList.Add(Pointer(Integer(Ord(P^) * 256 + Ord(P[1]))))
else FList.Add(Pointer(Ord(P^)));
end;
procedure TRENodeItemCharRange.AfterConstruction;
begin
inherited;
FList := TList.Create;
end;
class function TRENodeItemCharRange.Combine(Src, Dest: TRENodeItemCharRange): TRENodeItemCharRange;
var
I, Index: Integer;
P: Pointer;
Tmp: TRENodeItemCharRange;
begin
if Src.Negative = Dest.Negative then
begin
{ 1. a or b }
{ 2. (not a) or (not b) }
for I := Dest.FList.Count - 1 downto 0 do
begin
P := Dest.FList[I];
if Src.FindChar(Integer(P)) >= 0 then Continue;
Src.FList.Add(P);
end;
FreeAndNil(Dest);
Result := Src;
end
else
begin
if Dest.Negative then
begin
Tmp := Src; Src := Dest; Dest := Tmp;
end;
for I := Dest.FList.Count - 1 downto 0 do
begin
P := Dest.FList[I];
Index := Src.FindChar(Integer(P));
if Index < 0 then Continue;
Src.FList.Delete(Index);
end;
Src.FNegative := True;
FreeAndNil(Dest);
Result := Src;
end;
end;
procedure TRENodeItemCharRange.DeleteChar(P: PChar);
var
Index: Integer;
begin
if IsDBCSLeadByte(Byte(P^)) then Index := FindChar(Integer(Ord(P^) * 256 + Ord(P[1])))
else Index := FindChar(Ord(P^));
if Index >= 0 then FList.Delete(Index);
end;
destructor TRENodeItemCharRange.Destroy;
begin
FreeAndNil(FList);
inherited;
end;
function TRENodeItemCharRange.FindChar(Code: Integer): Integer;
var
L, H, I, C: Integer;
P: Pointer;
Found: Boolean;
begin
P := Pointer(Code);
Found := False;
L := 0;
H := FList.Count - 1;
while L <= H do
begin
I := (L + H) shr 1;
if P = FList.List^[I] then C := 0
else if Cardinal(P) > Cardinal(FList.List^[I]) then C := 1
else C := -1;
if C < 0 then L := I + 1 else
begin
H := I - 1;
if C = 0 then
begin
Found := True;
end;
end;
end;
if Found then Result := L else Result := -1;
end;
function TRENodeItemCharRange.MatchOnce(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
if Src^ = #0 then
begin
Result := False;
Exit;
end;
if IsDBCSLeadByte(Byte(Src^)) then
begin
Result := FindChar(Integer(Ord(Src^) * 256 + Ord(Src[1]))) >= 0;
if Negative then Result := not Result;
if Result then
begin
Inc(Src);
if Src = #0 then raise EREException.Create('unterminated string');
Inc(Src);
end;
end
else
begin
Result := FindChar(Ord(Src^)) >= 0;
if Negative then Result := not Result;
if Result then Inc(Src);
end;
end;
function _CompareByValue(Item1, Item2: Pointer): Integer;
begin
if Item1 = Item2 then Result := 0
else if Cardinal(Item1) < Cardinal(Item2) then Result := 1
else Result := -1;
end;
procedure TRENodeItemCharRange.SortIt;
begin
FList.Sort(_CompareByValue);
end;
{ TRENodeItemCaptured }
procedure TRENodeItemCaptured.AfterConstruction;
begin
inherited;
FIndex := 0;
FMatchPattern := True;
end;
function TRENodeItemCaptured.MatchOnce(var Src: PChar;
var Context: TREMatchContext): Boolean;
var
L: Integer;
begin
if FMatchPattern and ((FIndex < 1) or (FIndex + 1 >= Context.Values.Count)) then
begin
// match number
Result := Ord(Src^) = FIndex;
if Result then Inc(Src);
end
else
begin
// match backpreference
L := Length(Context.Values[FIndex - 1]);
Result := StrLComp(Src, PChar(Context.Values[FIndex - 1]), L) = 0;
if Result then Inc(Src, L);
end;
end;
procedure CheckNonGreedy(var P: PChar; Item: TRENodeItem);
begin
if P^ = '?' then
begin
Item.FGreedy := False;
Inc(P);
end;
end;
procedure CheckEOS(P: PChar);
begin
if P^ = #0 then raise EREException.Create('unexpected end of regular expression!');
end;
procedure GetMinMaxTimes(var P: PChar; Item: TRENodeItem);
var
N: Integer;
begin
Inc(P); { '{' }
while P^ in WhiteSpaces do Inc(P);
CheckEOS(P);
N := 0;
while P^ in ['0'..'9'] do
begin
N := N * 10 + Ord(P^) - Ord('0');
Inc(P);
end;
Item.FMinTimes := N;
while P^ in WhiteSpaces do Inc(P);
CheckEOS(P);
if P^ = '}' then
begin
Item.FMaxTimes := Item.FMinTimes;
Inc(P);
Exit;
end;
if P^ = ',' then Inc(P);
while P^ in WhiteSpaces do Inc(P);
CheckEOS(P);
if P^ = '}' then
begin
Item.FMaxTimes := MaxInt;
Inc(P);
Exit;
end;
if not (P^ in ['0'..'9']) then raise EREException.Create('number required!');
N := 0;
while P^ in ['0'..'9'] do
begin
N := N * 10 + Ord(P^) - Ord('0');
Inc(P);
end;
Item.FMaxTimes := N;
if Item.FMinTimes > Item.FMaxTimes then raise EREException.Create('{m,n} number mistake');
while P^ in WhiteSpaces do Inc(P);
if P^ <> '}' then raise EREException.Create('} required!');
Inc(P); (* '}' *)
end;
procedure ParseTimes(var P: PChar; Item: TRENodeItem);
begin
case P^ of
'*':
begin
Item.FMinTimes := 0;
Item.FMaxTimes := MaxInt;
Item.FGreedy := True;
Inc(P);
CheckNonGreedy(P, Item);
end;
'+':
begin
Item.FMinTimes := 1;
Item.FMaxTimes := MaxInt;
Item.FGreedy := True;
Inc(P);
CheckNonGreedy(P, Item);
end;
'?':
begin
Item.FMinTimes := 0;
Item.FMaxTimes := 1;
Item.FGreedy := True;
Inc(P);
CheckNonGreedy(P, Item);
end;
'{':
begin
GetMinMaxTimes(P, Item);
Item.FGreedy := True;
CheckNonGreedy(P, Item);
end;
end;
end;
{ TRENodeItemSubEnd }
procedure TRENodeItemSubEnd.AfterConstruction;
begin
inherited;
FSlotIndex := -1;
FStart := nil;
FLookAhead := False;
FNegative := False;
FCapture := True;
end;
{ TRENodeItemOrStart }
function TRENodeItemOrStart.Match(var Src: PChar;
var Context: TREMatchContext): Boolean;
var
I: Integer;
Save: PChar;
begin
Result := False;
Save := Src;
for I := 0 to High(FChilds) do
begin
Src := Save;
Result := FChilds[I].Match(Src, Context);
if Result then Break;
end;
end;
{ TRENodeItemOrEnd }
function TRENodeItemOrEnd.Match(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
Result := MatchNext(Src, Context);
end;
{ TRENodeItemSubEnd }
function TRENodeItemSubEnd.Match(var Src: PChar;
var Context: TREMatchContext): Boolean;
var
EndPoint: PChar;
procedure AddResult;
var
L: Integer;
S: string;
begin
if not Capture then Exit;
L := EndPoint - FStart.FSavePoint;
SetLength(S, L);
Move(FStart.FSavePoint^, S[1], L);
Context.Values[FSlotIndex] := S;
end;
begin
EndPoint := Src;
{ successfully reached here, so 1 match increased }
Inc(FMatchCount);
if FMatchCount >= FMinTimes then AddResult;
if FMatchCount >= FMaxTimes then
begin
if FLookAhead then Src := FStart.FSavePoint;
Result := MatchNext(Src, Context);
Exit;
end;
if Greedy then
begin
if FStart.FSavePoint <> EndPoint then
begin
Result := FStart.MatchNext(Src, Context);
end else Result := False;
if not Result then
begin
{ more time failed }
Src := EndPoint;
if FLookAhead then Src := FStart.FSavePoint;
Result := MatchNext(Src, Context);
end;
Exit;
end;
if FLookAhead then Src := FStart.FSavePoint;
{ non greedy, and FMatchCount < FMaxTimes }
if MatchNext(Src, Context) then
begin
Result := True;
end
else
begin
{ current match not enough? match a more time }
if FStart.FSavePoint = EndPoint then
begin
Result := False;
Exit; { no pointer advanced, give up }
end;
Result := FStart.MatchNext(Src, Context);
end;
end;
{ TRENodeItemSubStart }
function TRENodeItemSubStart.Match(var Src: PChar;
var Context: TREMatchContext): Boolean;
begin
FSavePoint := Src;
FSubEnd.FMatchCount := 0;
if FSubEnd.Greedy or (FSubEnd.FMinTimes > 0) then
begin
{ at least match once }
Result := MatchNext(Src, Context);
if FSubEnd.FNegative then
begin
Result := not Result;
if Result then Result := FSubEnd.Match(Src, Context);
end;
if not Result then
begin
if FSubEnd.FMinTimes = 0 then
begin
{ (..)* }
Src := FSavePoint;
Result := FSubEnd.MatchNext(Src, Context);
Exit;
end;
end;
Exit;
end;
{ non greedy, and FMinTimes = 0 }
Result := FSubEnd.MatchNext(Src, Context);
if FSubEnd.FNegative then
begin
Result := not Result;
if Result then Result := FSubEnd.Match(Src, Context);
end;
if not Result then
begin
{ 0 time not success, try at least once }
Src := FSavePoint;
Result := MatchNext(Src, Context);
end;
end;
function TRENodeList.ParseNextNode(var P: PChar): TRENodeItem;
const
_d: PChar = '[0-9]';
_Dx: PChar = '[^0-9]';
_f: PChar = '/x0c';
_n: PChar = '/x0a';
_r: PChar = '/x0d';
_s: PChar = '[ /x0c/x0a/x0d/x09/x0b]';
_Sx: PChar = '[^ /x0c/x0a/x0d/x09/x0b]';
_t: PChar = '/x09';
_v: PChar = '/x0b';
_w: PChar = '[A-Za-z0-9_]';
_Wx: PChar = '[^A-Za-z0-9_]';
procedure SkipWhiteSpaces;
begin
while P^ in WhiteSpaces do Inc(P);
end;
procedure ProcessEnclosed(var P: PChar; var Result: TRENodeItem); forward;
function ProcessCode(PCode: PChar; var Item: TRENodeItem): PChar;
var
S: string;
Ch, Ch64: Integer;
procedure CheckEOS;
begin
if Result^ = #0 then raise EREException.Create('unexpected end of regular expression!');
end;
begin
Result := PCode;
{ ProcessCode only processes /x,/num,/u, other /... are processed by ProcessTransferMean }
case Result^ of
'/':
begin
Inc(Result);
case Result^ of
'x':
begin
Inc(Result);
CheckEOS;
S := '$';
S := S + Result^;
Inc(Result);
CheckEOS;
S := S + Result^;
Inc(Result);
Ch := StrToIntDef(S, -1);
if Ch = -1 then raise EREException.CreateFmt('not a valid hex number:"%s"', [S]);
Item := CreateItem(rentChar);
TRENodeItemChar(Item).FPattern := Chr(Ch);
end;
'u':
begin
Inc(Result);
CheckEOS;
S := '$';
{ 1 }
Inc(Result);
CheckEOS;
S := S + Result^;
{ 2 }
Inc(Result);
CheckEOS;
S := S + Result^;
{ 3 }
Inc(Result);
CheckEOS;
S := S + Result^;
{ 4 }
Inc(Result);
CheckEOS;
S := S + Result^;
Inc(Result);
Ch64 := StrToInt64Def(S, -1);
if Ch64 = -1 then EREException.CreateFmt('not a valid hex number:"%s"', [S]);
Item := CreateItem(rentChar);
TRENodeItemChar(Result).FPattern := Chr(Ch64 shr 16) + Chr(Ch64 and $FFFF);
end
else if Result^ in ['0'..'9'] then
begin
if (Result^ in ['0'..'3']) and (Result[1] in ['0'..'7']) and (Result[2] in ['0'..'7']) then
begin
Item := CreateItem(rentCaptured);
TRENodeItemCaptured(Result).FIndex := Ord(Result^) * 64 + Ord(Result[1]) * 8 + Ord(Result[2]);
Inc(Result, 3);
end
else if (Result^ in ['0'..'7']) and (Result[1] in ['0'..'7']) then
begin
Item := CreateItem(rentCaptured);
TRENodeItemCaptured(Result).FIndex := Ord(Result^) * 8 + Ord(Result[1]);
Inc(Result, 2);
end
else
begin
Ch := 0;
while Result^ in ['0'..'9'] do
begin
Ch := Ch * 10 + Ord(Result^) - Ord('0');
Inc(Result);
end;
Item := CreateItem(rentCaptured);
TRENodeItemCaptured(Item).FIndex := Ch;
TRENodeItemCaptured(Item).FMatchPattern := False;
end;
end;
end;
end;
'[':
begin
ProcessEnclosed(PCode, Item);
end;
end;
end;
procedure ProcessCtrlChar(var Item: TRENodeItem);
begin
Inc(P);
CheckEOS(P);
Item := CreateItem(rentChar);
case P^ of
'a'..'z': TRENodeItemChar(Item).FPattern := Char(Ord(P^) - Ord('a') + 1);
'A'..'Z': TRENodeItemChar(Item).FPattern := Char(Ord(P^) - Ord('A') + 1)
else
begin
TRENodeItemChar(Item).FPattern := 'c';
Dec(P);
end;
end;
Inc(P);
end;
procedure ProcessTransferMean(var Item: TRENodeItem);
begin
Inc(P);
CheckEOS(P);
case P^ of
'b':
begin
Item := TRENodeItemControl(CreateItem(rentControl));
TRENodeItemControl(Item).FControlType := rectWordBound;
Inc(P);
end;
'B':
begin
Item := TRENodeItemControl(CreateItem(rentControl));
TRENodeItemControl(Item).FControlType := rectNonWordBound;
Inc(P);
end;
'c': ProcessCtrlChar(Item);
'd': ProcessCode(_d, Item);
'D': ProcessCode(_Dx, Item);
'f': ProcessCode(_f, Item);
'n': ProcessCode(_n, Item);
'r': ProcessCode(_r, Item);
's': ProcessCode(_s, Item);
'S': ProcessCode(_Sx, Item);
't': ProcessCode(_t, Item);
'v': ProcessCode(_v, Item);
'w': ProcessCode(_w, Item);
'W': ProcessCode(_Wx, Item);
'x', 'u', '0'..'9': P := ProcessCode(P - 1, Item) - 1
else
begin
Item := CreateItem(rentChar);
TRENodeItemChar(Item).FPattern := P^;
end;
end;
Inc(P);
end;
procedure ProcessEnclosed(var P: PChar; var Result: TRENodeItem);
procedure AddRange(StartCh, Ch: Char; Item: TRENodeItemCharRange);
var
ChBuf: array[0..1] of Char;
TmpCh: Char;
begin
ChBuf[1] := #0;
for TmpCh := StartCh to Ch do
begin
ChBuf[0] := TmpCh;
Item.AddChar(@ChBuf);
end;
end;
var
Item: TRENodeItemCharRange;
Item1: TRENodeItem;
ChBuf: array[0..1] of Char;
HasDot: Boolean;
CanStart, Started: Boolean;
StartCh, Ch, TmpCh: Char;
begin
StartCh := #0;
Started := False;
Inc(P);
HasDot := False;
Item := TRENodeItemCharRange(CreateItem(rentCharRange));
Item.FNegative := False;
Result := Item;
if P^ = '^' then
begin
Item.FNegative := True;
Inc(P);
end;
while P^ <> ']' do
begin
if P^ = #0 then raise EREException.Create('] expected before end of the expression');
if P^ = '.' then
begin
HasDot := True;
Inc(P);
end
else if IsDBCSLeadByte(Byte(P^)) then
begin
Item.AddChar(P);
Inc(P, 2);
end
else
begin
CanStart := True;
if P^ = '-' then
begin
{ raise EREException.Create('not expected -'); }
{ we treat this - as a normal character(because JScript does so) }
Item.AddChar(P);
CanStart := False;
Inc(P);
end;
if P^ = '/' then
begin
Ch := #0;
Inc(P);
CheckEOS(P);
case P^ of
'x':
begin
if not (P[1] in ['0'..'9', 'a'..'z', 'A'..'Z']) then
raise EREException.CreateFmt('hex char need but "%S" found', [P[1]]);
if not (P[2] in ['0'..'9', 'a'..'z', 'A'..'Z']) then
raise EREException.CreateFmt('hex char need but "%S" found', [P[2]]);
Ch := Chr(StrToInt('$' + P[1] + P[2]));
Inc(P, 3);
end;
'c':
begin
if P[1] in ['a'..'z'] then Ch := Chr(Ord(P[1]) - Ord('a'))
else if P[1] in ['A'..'Z'] then Ch := Chr(Ord(P[1]) - Ord('A'))
else
begin
Ch := 'c';
Dec(P);
end;
Inc(P, 2);
end;
'f':
begin
Ch := #$C;
Inc(P);
end;
'n':
begin
Ch := #$A;
Inc(P);
end;
'r':
begin
Ch := #$D;
Inc(P);
end;
't':
begin
Ch := #$9;
Inc(P);
end;
'v':
begin
Ch := #$B;
Inc(P);
end
else
begin
CanStart := False;
case P^ of
'd', 'D', 's', 'S', 'w', 'W':
begin
Dec(P);
ProcessTransferMean(Item1);
Item := TRENodeItemCharRange.Combine(
Item, Item1 as TRENodeItemCharRange
);
Result := Item;
end
else
begin
Ch := P^;
Inc(P);
CanStart := True;
end;
end;
end;
end;
end
else
begin
Ch := P^;
Inc(P);
end;
if Started then
begin
{ end of ?-? }
Started := False;
if Ch = ' ' then
begin
{ ' ' followed by '-', we treat '-' as a normal character }
Item.AddChar('-');
Item.AddChar(' ');
end
else
begin
if Ord(StartCh) > Ord(Ch) then
begin
TmpCh := Ch; Ch := StartCh; StartCh := TmpCh;
end;
AddRange(StartCh, Ch, Item);
end;
end
else if P^ = '-' then
begin
if not CanStart then Item.AddChar('-') { cannot start, treat '-' as a normal character }
else
begin
{ start of ?-? }
Started := True;
Inc(P);
StartCh := Ch;
end;
end
else
begin
ChBuf[0] := Ch;
ChBuf[1] := #0;
Item.AddChar(@ChBuf);
end;
end;
end;
if P^ = ']' then Inc(P);
if Started then
begin
{ started but not end, we treat last '-' as a normal character }
Item.AddChar('-');
end;
if HasDot then
begin
Item.SortIt;
if Item.FindChar(Ord(^M)) >= 0 then
begin
if Item.FNegative then
begin
{ [^/n.] : all next patterns have no effect, always false }
Item.FList.Clear;
Item.FNegative := False;
end
else
begin
{ [/n.] : all next patterns have no effect, always true }
Item.FList.Clear;
Item.FNegative := True;
end;
end
else
begin
if Item.FNegative then
begin
{ [^.] : only ^M has effect }
Item.FList.Clear;
Item.FList.Add(Pointer(Ord(^M)));
Item.FNegative := False;
end
else
begin
{ [.] : matches all except ^M }
Item.FList.Clear;
Item.FList.Add(Pointer(Ord(^M)));
Item.FNegative := True;
end;
end;
end;
Item.SortIt;
end;
const
CReturn: PChar = ^M;
var
L: Integer;
begin
Result := nil;
if P^ = #0 then Exit;
{ STEP I: parse pattern }
if P^ in ControlChars then
begin
case P^ of
'/':
begin
ProcessTransferMean(Result);
end;
'^':
begin
Result := CreateItem(rentControl);
TRENodeItemControl(Result).FControlType := rectStart;
Inc(P);
Exit; { no modifier need }
end;
'$':
begin
Result := CreateItem(rentControl);
TRENodeItemControl(Result).FControlType := rectEnd;
Inc(P);
Exit; { no modifier need }
end;
'.':
begin
Inc(P);
Result := CreateItem(rentCharRange);
TRENodeItemCharRange(Result).AddChar(CReturn);
TRENodeItemCharRange(Result).FNegative := True;
end;
'[':
begin
ProcessEnclosed(P, Result);
end;
'(':
begin
raise Exception.Create('"(" not expected here');
end
{ '|' is processed outside }
else raise EREException.CreateFmt('unexpected char:"%S"', [P^]);
end;
end
else
begin
Result := CreateItem(rentChar);
while not (P^ in ControlChars) do
begin
if IsDBCSLeadByte(Byte(P^)) then
begin
L := Length(TRENodeItemChar(Result).FPattern);
SetLength(TRENodeItemChar(Result).FPattern, L + 2);
Move(P^, TRENodeItemChar(Result).FPattern[L + 1], 2);
Inc(P);
end else TRENodeItemChar(Result).FPattern := TRENodeItemChar(Result).FPattern + P^;
Inc(P);
end;
end;
{ STEP II: parse modifiers }
ParseTimes(P, Result);
end;
procedure TRENodeList.ParseNextStrip(var P: PChar; var StartItem,
EndItem: TRENodeItem; ParseContext: TREParseContext);
var
Q, QE: TRENodeItem;
SubSignal: Char;
begin
StartItem := nil;
EndItem := nil;
if P^ = ')' then Exit;
if P^ = #0 then Exit;
if P^ = '(' then
begin
{ "(" exp ")" }
Inc(P);
SubSignal := #0;
if P^ = '?' then
begin
Inc(P);
SubSignal := P^;
if not (SubSignal in [':', '=', '!']) then raise EREException.CreateFmt('":", "=", or "!" required but %S found', [SubSignal]);
Inc(P);
end;
StartItem := CreateItem(rentSubStart);
ParseExp(P, Q, EndItem, ParseContext);
if Q = nil then raise EREException.Create('empty () not allowed');
StartItem.FNext := Q;
if P^ <> ')' then raise EREException.CreateFmt('")" expected but "%S" found', [P^]);
Inc(P);
Q:= CreateItem(rentSubEnd);
EndItem.FNext := Q;
EndItem := Q;
TRENodeItemSubStart(StartItem).FSubEnd := TRENodeItemSubEnd(Q);
TRENodeItemSubEnd(Q).FStart := TRENodeItemSubStart(StartItem);
{ modifiers of '(...)' }
ParseTimes(P, Q);
case SubSignal of
':': TRENodeItemSubEnd(Q).FCapture := False;
'=':
begin
TRENodeItemSubEnd(Q).FCapture := False;
TRENodeItemSubEnd(Q).FLookAhead := True;
end;
'!':
begin
TRENodeItemSubEnd(Q).FCapture := False;
TRENodeItemSubEnd(Q).FLookAhead := True;
TRENodeItemSubEnd(Q).FNegative := True;
end;
end;
if TRENodeItemSubEnd(Q).Capture then
begin
TRENodeItemSubEnd(Q).FSlotIndex := ParseContext.FSlotCount;
Inc(ParseContext.FSlotCount);
end;
end
else
begin
{ node|strip [ [node|strip...] ... ] }
if P^ = '(' then
begin
ParseNextStrip(P, StartItem, EndItem, ParseContext);
end
else
begin
StartItem := ParseNextNode(P);
EndItem := StartItem;
end;
while not (P^ in [#0, '|', ')']) do
begin
if P^ = '(' then
begin
ParseNextStrip(P, Q, QE, ParseContext);
end
else
begin
Q := ParseNextNode(P);
QE := Q;
end;
if Q <> nil then
begin
EndItem.FNext := Q;
EndItem := QE;
end else Break;
end;
end;
end;
procedure TRENodeList.ParseExp(var P: PChar; var StartItem,
EndItem: TRENodeItem; ParseContext: TREParseContext);
var
CurStart, CurEnd: TRENodeItem;
L: Integer;
begin
if P^ = #0 then
begin
StartItem := nil;
EndItem := nil;
Exit;
end;
{ strip [ "|" strip ["|" strip ...] ... ] }
ParseNextStrip(P, StartItem, EndItem, ParseContext);
if P^ = '|' then
begin
CurStart := StartItem;
CurEnd := EndItem;
if (CurStart = nil) or (CurEnd = nil) then raise EREException.Create('unexpected "|"');
StartItem := CreateItem(rentOrStart);
EndItem := CreateItem(rentOrEnd);
StartItem.FNext := EndItem;
TRENodeItemOrEnd(EndItem).FStart := TRENodeItemOrStart(StartItem);
L := Length(TRENodeItemOrStart(StartItem).FChilds);
SetLength(TRENodeItemOrStart(StartItem).FChilds, L + 1);
TRENodeItemOrStart(StartItem).FChilds[L] := CurStart;
CurEnd.FNext := EndItem;
while P^ = '|' do
begin
Inc(P);
if CurStart = nil then raise Exception.Create('unexpected end of expression');
ParseNextStrip(P, CurStart, CurEnd, ParseContext);
L := Length(TRENodeItemOrStart(StartItem).FChilds);
SetLength(TRENodeItemOrStart(StartItem).FChilds, L + 1);
TRENodeItemOrStart(StartItem).FChilds[L] := CurStart;
CurEnd.FNext := EndItem;
end;
end;
end;
{*************************** 非常低调滴分隔线 ********************************}
end.