mshtml 解析html c,使用MSHTML解析HTML代码

C++版本:

I have a lot of experience in programming low-level MSHTML and I always see questions on how one can use MSHTML to parse HTML and then access elements via the DOM.

Well, here it is. I use IMarkupServices provided by MSHTML. There is no need for an IOleClientSite or any sort of embedding. I think is is just about as light as anyone can get.

In future articles, I will be concentrating on the reuse of MSHTML in other aspects of programming. Such as using MSHTML as an editor, for example.

This code makes use of simple COM calls and nothing more. It can be easily adapted for ATL, MFC and VB, among other languages. Please don't ask me to provide samples in other languages. In order to build this you need the IE SDK

/******************************************************************

* ParseHTML.cpp

*

* ParseHTML: Lightweight UI-less HTML parser using MSHTML

*

* Note: This is for accessing the DOM only. No image download,

* script execution, etc...

*

* 8 June 2001 - Asher Kobin (asherk@pobox.com)

*

* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY

* OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT

* LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR

* FITNESS FOR A PARTICULAR PURPOSE.

*

*******************************************************************/

#include

#include

OLECHAR szHTML[] = OLESTR("

Hello World!");

int __stdcall WinMain(HINSTANCE hInst,

HINSTANCE hPrev,

LPSTR lpCmdLine,

int nShowCmd)

{

IHTMLDocument2 *pDoc = NULL;

CoInitialize(NULL);

CoCreateInstance(CLSID_HTMLDocument,

NULL,

CLSCTX_INPROC_SERVER,

IID_IHTMLDocument2,

(LPVOID *) &pDoc);

if (pDoc)

{

IPersistStreamInit *pPersist = NULL;

pDoc->QueryInterface(IID_IPersistStreamInit,

(LPVOID *) &pPersist);

if (pPersist)

{

IMarkupServices *pMS = NULL;

pPersist->InitNew();

pPersist->Release();

pDoc->QueryInterface(IID_IMarkupServices,

(LPVOID *) &pMS);

if (pMS)

{

IMarkupContainer *pMC = NULL;

IMarkupPointer *pMkStart = NULL;

IMarkupPointer *pMkFinish = NULL;

pMS->CreateMarkupPointer(&pMkStart);

pMS->CreateMarkupPointer(&pMkFinish);

pMS->ParseString(szHTML,

0,

&pMC,

pMkStart,

pMkFinish);

if (pMC)

{

IHTMLDocument2 *pNewDoc = NULL;

pMC->QueryInterface(IID_IHTMLDocument,

(LPVOID *) &pNewDoc);

if (pNewDoc)

{

// do anything with pNewDoc, in this case

// get the body innerText.

IHTMLElement *pBody;

pNewDoc-gt;get_body(&pBody);

if (pBody)

{

BSTR strText;

pBody->get_innerText(&strText);

pBody->Release();

SysFreeString(strText);

}

pNewDoc->Release();

}

pMC->Release();

}

if (pMkStart)

pMkStart->Release();

if (pMkFinish)

pMkFinish->Release();

pMS->Release();

}

}

pDoc->Release();

}

CoUninitialize();

return TRUE;

}

Delphi版本1:

( add to uses clause, MSHTML, ActiveX, ComObj )

const

IID_IPersistStreamInit : TGUID = '{7FD52380-4E07-101B-AE2D-08002B2EC713}';

procedure TFormMain.FormCreate(Sender: TObject);

var

pDoc : IHTMLDocument2;

pNewDoc : IHTMLDocument2;

pPersist : IPersistStreamInit;

pMS : IMarkupServices;

pMC : IMarkupContainer;

pMkStart : IMarkupPointer;

pMkFinish : IMarkupPointer;

pBody : IHTMLElement;

strText : string;

szHTML : widestring;

didInit : boolean;

begin

didInit :=Succeeded(CoInitialize(nil));

szHTML :='

Hello World!';

CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, pDoc);

if pDoc <> nil then

begin

pDoc.QueryInterface(IID_IPersistStreamInit, pPersist);

if (pPersist <> nil) then

begin

pPersist.InitNew;

pPersist._Release;

pDoc.QueryInterface(IID_IMarkupServices, pMS);

if (pMS <> nil) then

begin

pMS.CreateMarkupPointer(pMkStart);

pMS.CreateMarkupPointer(pMkFinish);

pMS.ParseString(word(szHTML[1]), 0, pMC, pMkStart, pMkFinish);

if (pMC <> nil) then

begin

pMC.QueryInterface(IID_IHTMLDocument, pNewDoc);

if (pNewDoc <> nil) then

begin

// do anything with pNewDoc, in this case

// get the body innerText.

pBody :=pNewDoc.Get_body;

if (pBody <> nil) then

begin

strText :=pBody.Get_innerText;

m.Text :=strText;

pBody._Release;

end;

pNewDoc._Release;

end;

pMC._Release;

end;

if (pMkStart <> nil) then pMkStart._Release;

if (pMkFinish <> nil) then pMkFinish._Release;

pMS._Release;

end;

pPersist._Release;

end;

pDoc._Release;

end;

if didInit then CoUninitialize();

end;

Delphi版本2:

( add to uses clause, MSHTML, ActiveX, ComObj )

const

IID_IPersistStreamInit : TGUID = '{7FD52380-4E07-101B-AE2D-08002B2EC713}';

procedure TFormMain.FormCreate(Sender: TObject);

var

pDoc : IHTMLDocument2;

pBody : IHTMLElement;

strText : string;

szHTML : widestring;

didInit : boolean;

begin

didInit :=Succeeded(CoInitialize(nil));

szHTML :='

Hello World!';

CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, pDoc);

if pDoc <> nil then

begin

pDoc.Set_designMode('On'); //no script execution

while not (pDoc.readyState = 'complete') do Application.ProcessMessages;

pDoc.body.innerHTML :=szHTML;

pBody :=pDoc.Get_body;

if pBody <> nil then

strText :=pBody.Get_innerText else strText :='';

m.Text :=strText;

pDoc._Release;

end;

if didInit then CoUninitialize();

end;

============== Other Useful Routines ===============

============== Other Useful Routines ===============

============== Other Useful Routines ===============

function GetHTMLSource(Document: IDispatch) : string;

var

pStream : IStream;

pPersist : IPersistStreamInit;

li,lo : int64;

stat : STATSTG;

str : string;

BytesRead : longint;

begin

result :='';

if SUCCEEDED(CreateStreamOnHGlobal(0, TRUE, pStream)) then

begin

if (SUCCEEDED(Document.QueryInterface(IID_IPersistStreamInit, pPersist))) then

begin

pPersist.Save(pStream, FALSE);

li :=0;

pStream.Seek(li, STREAM_SEEK_SET, lo);

pStream.Stat(stat, 0);

SetLength(str,stat.cbSize + 1);

pStream.Read(@str[1], stat.cbSize, @BytesRead);

result :=str;

end;

end;

end;

procedure SetHTMLSource(Document: IDispatch; value: string);

var

stm : TMemoryStream;

psi : IPersistStreamInit;

sa : TStreamAdapter;

begin

stm :=TMemoryStream.Create;

stm.SetSize(Length(value));

stm.Seek(0,0);

stm.Write(value[1],Length(value));

stm.Seek(0,0);

sa :=TStreamAdapter.Create(stm, soReference); //if you pass soOwned instead, the stream will be freed for you

if (SUCCEEDED(Document.QueryInterface(IID_IPersistStreamInit,psi))) then

psi.Load(sa);

end;

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值