delphi解析html字符串,delphi,几个实用的HTML解析函数

1)HTML 标签值攫取函数,任意标签哦,纯字符串分析,可以配合IDHTTP编程

uses StrUtils;

function ExtractHtmlTagValues(const HtmlText: string; TagName,

AttribName: string; var Values: TStringList): integer;

function FindFirstCharAfterSpace(const Line: string; StartPos:

integer): Integer;

var i: integer;

begin

Result := -1;

for i := StartPos to Length(Line) do

begin

if (Line[i] <> ' ') then

begin

Result := i;

exit;

end;

end;

end;

function FindFirstSpaceAfterChars(const Line: string; StartPos:

integer): Integer;

begin

Result := PosEx(' ', Line, StartPos);

end;

function FindFirstSpaceBeforeChars(const Line: string; StartPos:

integer): Integer;

var i: integer;

begin

Result := 1;

for i := StartPos downto 1 do

begin

if (Line[i] = ' ') then

begin

Result := i;

exit;

end;

end;

end;

var InnerTag: string;

LastPos, LastInnerPos: Integer;

SPos, LPos, RPos: Integer;

AttribValue: string;

ClosingChar: char;

TempAttribName: string;

begin

Result := 0;

LastPos := 1;

while (true) do

begin

// find outer tags '

'>'

LPos := PosEx('

if (LPos <= 0) then break;

RPos := PosEx('>', HtmlText, LPos+1);

if (RPos <= 0) then

LastPos := LPos + 1

else

LastPos := RPos + 1;

// get inner tag

InnerTag := Copy(HtmlText, LPos+1, RPos-LPos-1);

InnerTag := Trim(InnerTag); // remove spaces

if (Length(InnerTag) < Length(TagName)) then

continue;

// check tag name

if (SameText(Copy(InnerTag, 1, Length(TagName)), TagName))

then

begin

// found tag

AttribValue := '';

LastInnerPos := Length(TagName)+1;

while (LastInnerPos < Length(InnerTag)) do

begin

// find first '=' after LastInnerPos

RPos := PosEx('=', InnerTag, LastInnerPos);

if (RPos <= 0) then break;

// this way you can check for multiple attrib names and not a

specific attrib

SPos := FindFirstSpaceBeforeChars(InnerTag, RPos);

TempAttribName := Trim(Copy(InnerTag, SPos, RPos-SPos));

if (true) then

begin

// found correct tag

LPos := FindFirstCharAfterSpace(InnerTag, RPos+1);

if (LPos <= 0) then

begin

LastInnerPos := RPos + 1;

continue;

end;

LPos := FindFirstCharAfterSpace(InnerTag, LPos); // get to first

char after '='

if (LPos <= 0) then continue;

if ((InnerTag[LPos] <> '"') and

(InnerTag[LPos] <> '''')) then

begin

// AttribValue is not between '"' or ''' so get it

RPos := FindFirstSpaceAfterChars(InnerTag, LPos+1);

if (RPos <= 0) then

AttribValue := Copy(InnerTag, LPos, Length(InnerTag)-LPos+1)

else

AttribValue := Copy(InnerTag, LPos, RPos-LPos+1);

end

else

begin

// get url between '"' or '''

ClosingChar := InnerTag[LPos];

RPos := PosEx(ClosingChar, InnerTag, LPos+1);

if (RPos <= 0) then

AttribValue := Copy(InnerTag, LPos+1,

Length(InnerTag)-LPos-1)

else

AttribValue := Copy(InnerTag, LPos+1, RPos-LPos-1)

end;

if (SameText(TempAttribName, AttribName)) and (AttribValue

<> '') then

begin

Values.Add(AttribValue);

inc(Result);

end;

end;

if (RPos <= 0) then

LastInnerPos := Length(InnerTag)

else

LastInnerPos := RPos+1;

end;

end;

end;

end;

用法示例:

取得页面中所有链接

var

Links : TStringList;

LinkFound,i : Integer;

begin

Links := TStringList.Create;

LinkFound := ExtractHtmlTagValues(HtmlText,'A','HREF',Links);

for i:=0 to LinkFound-1 do

begin

//Add your own codes here

end;

Links.Free;

end;

2)表单元素值攫取函数,可以从HTML文本中按照给定的Input名称解析出其Value

function GetValByName(S, Sub: string) : string;

var

EleS,EleE,iPos: Integer;

ELeStr,ValSt: String;

St,Ct : Integer;

function FindEleRange(str: string ; front : boolean; posi :

integer): Integer;

var

i: integer;

begin

if Front then

begin

for i:=posi-1 downto 1 do

if Str[i]='

begin

Result := i;

break;

end;

end else begin

for i := posi+1 to length(Str) do

if Str[i]='>' then

begin

Result := i;

break;

end;

end;

end;

function FindEnd (str : string; posi : integer) : Integer;

var

i: integer;

begin

for i:=posi to length(str) do

begin

if (str[i] ='"') or (str[i] ='''') or (str[i] =' ') then

begin

result := i-1;

break;

end;

end;

end;

begin

iPos := Pos('name="'+lowercase(Sub)+'"',lowercase(S));

if iPos = 0 then iPos :=

Pos('name='+lowercase(Sub),lowercase(S));

if iPos = 0 then iPos :=

Pos('name='''+lowercase(Sub)+'''',lowercase(S));

if iPos = 0 then exit;

EleS := FindEleRange(S,TRUE,iPos);

EleE := FindEleRange(S,FALSE,iPos);

EleStr := Copy(S,EleS,EleE-EleS+1);

ValSt := 'value="';

iPos := Pos(ValSt,EleStr);

if iPos = 0 then

begin

ValSt := 'value=''';

iPos := Pos(ValSt,EleStr);

end;

if iPos = 0 then

begin

ValSt := 'value=';

iPos := Pos(ValSt,EleStr);

end;

St := iPos+length(ValSt);

Ct := FindEnd(EleStr,St)-St+1;

Result := Copy(EleStr,St,Ct);

end;

用法示例:

取得页面中名为 Submit 的表单项的值

var

InputValue : String;

begin

InputValue := GetValByName(HtmlText,'Submit');

end;

3)取某两个字符串中间的字符

function getStrFromHtml(var Source: String; SbStr, bStr, eStr:

String): String;

var

I: Integer;

sbPos, bPos, ePos: Integer;

S: String;

begin

S := Source;

Result := '' ;

if SBStr <> '' then

Begin

sbPos := Pos(UpperCase(SbStr), UpperCase(S));

if sbPos > 0 then

Delete(S, 1, sbPos - 1 + length(sbStr))

Else

Exit;

End;

bPos := Pos(UpperCase(bStr), UpperCase(S));

if bPos > 0 then

Delete(S, 1, bPos - 1 + length(bStr))

Else

Exit;

ePos := pos(UpperCase(eStr), UpperCase(S));

if ePos > 0 then

Delete(S, ePos, length(S));

Result := S;

end;

用法实例:

FUserID := getStrFromHtml(reqStr, 'id="userID"', 'value="',

'"');

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值