delphi html解析函数,delphi 几个实用的HTML解析函数

该楼层疑似违规已被系统折叠 隐藏此楼查看此楼

1)HTML 标签值攫取函数,任意标签哦,纯字符串分析,可以配合IDHTTP编程

uses StrUtils;

function ExtractHtmlTagValues(const HtmlText: string; TagName, AttribName: string; var Values: TStringList): integer;

function FindFirstCharAfterSpace(const Line: string; StartPos: integer): Integer;

var i: integer;

begin

Result := -1;

for i := StartPos to Length(Line) do

begin

if (Line[i] <> ' ') then

begin

Result := i;

exit;

end;

end;

end;

function FindFirstSpaceAfterChars(const Line: string; StartPos: integer): Integer;

begin

Result := PosEx(' ', Line, StartPos);

end;

function FindFirstSpaceBeforeChars(const Line: string; StartPos: integer): Integer;

var i: integer;

begin

Result := 1;

for i := StartPos downto 1 do

begin

if (Line[i] = ' ') then

begin

Result := i;

exit;

end;

end;

end;

var InnerTag: string;

LastPos, LastInnerPos: Integer;

SPos, LPos, RPos: Integer;

AttribValue: string;

ClosingChar: char;

TempAttribName: string;

begin

Result := 0;

LastPos := 1;

while (true) do

begin

// find outer tags ''

LPos := PosEx('

if (LPos <= 0) then break;

RPos := PosEx('>', HtmlText, LPos+1);

if (RPos <= 0) then

LastPos := LPos + 1

else

LastPos := RPos + 1;

// get inner tag

InnerTag := Copy(HtmlText, LPos+1, RPos-LPos-1);

InnerTag := Trim(InnerTag); // remove spaces

if (Length(InnerTag) < Length(TagName)) then continue;

// check tag name

if (SameText(Copy(InnerTag, 1, Length(TagName)), TagName)) then

begin

// found tag

AttribValue := '';

LastInnerPos := Length(TagName)+1;

while (LastInnerPos < Length(InnerTag)) do

begin

// find first '=' after LastInnerPos

RPos := PosEx('=', InnerTag, LastInnerPos);

if (RPos <= 0) then break;

// this way you can check for multiple attrib names and not a specific attrib

SPos := FindFirstSpaceBeforeChars(InnerTag, RPos);

TempAttribName := Trim(Copy(InnerTag, SPos, RPos-SPos));

if (true) then

begin

// found correct tag

LPos := FindFirstCharAfterSpace(InnerTag, RPos+1);

if (LPos <= 0) then

begin

LastInnerPos := RPos + 1;

continue;

end;

LPos := FindFirstCharAfterSpace(InnerTag, LPos); // get to first char after '='

if (LPos <= 0) then continue;

if ((InnerTag[LPos] <> '"') and (InnerTag[LPos] <> '''')) then

begin

// AttribValue is not between '"' or ''' so get it

RPos := FindFirstSpaceAfterChars(InnerTag, LPos+1);

if (RPos <= 0) then

AttribValue := Copy(InnerTag, LPos, Length(InnerTag)-LPos+1)

else

AttribValue := Copy(InnerTag, LPos, RPos-LPos+1);

end

else

begin

// get url between '"' or '''

ClosingChar := InnerTag[LPos];

RPos := PosEx(ClosingChar, InnerTag, LPos+1);

if (RPos <= 0) then

AttribValue := Copy(InnerTag, LPos+1, Length(InnerTag)-LPos-1)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值