C++版本:
I have a lot of experience in programming low-level MSHTML and I always see questions on how one can use MSHTML to parse HTML and then access elements via the DOM.
Well, here it is. I use IMarkupServices provided by MSHTML. There is no need for an IOleClientSite or any sort of embedding. I think is is just about as light as anyone can get.
In future articles, I will be concentrating on the reuse of MSHTML in other aspects of programming. Such as using MSHTML as an editor, for example.
This code makes use of simple COM calls and nothing more. It can be easily adapted for ATL, MFC and VB, among other languages. Please don't ask me to provide samples in other languages. In order to build this you need the IE SDK
/******************************************************************
* ParseHTML.cpp
*
* ParseHTML: Lightweight UI-less HTML parser using MSHTML
*
* Note: This is for accessing the DOM only. No image download,
* script execution, etc...
*
* 8 June 2001 - Asher Kobin (asherk@pobox.com)
*
* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY
* OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT
* LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR
* FITNESS FOR A PARTICULAR PURPOSE.
*
*******************************************************************/
#include
#include
OLECHAR szHTML[] = OLESTR("
Hello World!");int __stdcall WinMain(HINSTANCE hInst,
HINSTANCE hPrev,
LPSTR lpCmdLine,
int nShowCmd)
{
IHTMLDocument2 *pDoc = NULL;
CoInitialize(NULL);
CoCreateInstance(CLSID_HTMLDocument,
NULL,
CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2,
(LPVOID *) &pDoc);
if (pDoc)
{
IPersistStreamInit *pPersist = NULL;
pDoc->QueryInterface(IID_IPersistStreamInit,
(LPVOID *) &pPersist);
if (pPersist)
{
IMarkupServices *pMS = NULL;
pPersist->InitNew();
pPersist->Release();
pDoc->QueryInterface(IID_IMarkupServices,
(LPVOID *) &pMS);
if (pMS)
{
IMarkupContainer *pMC = NULL;
IMarkupPointer *pMkStart = NULL;
IMarkupPointer *pMkFinish = NULL;
pMS->CreateMarkupPointer(&pMkStart);
pMS->CreateMarkupPointer(&pMkFinish);
pMS->ParseString(szHTML,
0,
&pMC,
pMkStart,
pMkFinish);
if (pMC)
{
IHTMLDocument2 *pNewDoc = NULL;
pMC->QueryInterface(IID_IHTMLDocument,
(LPVOID *) &pNewDoc);
if (pNewDoc)
{
// do anything with pNewDoc, in this case
// get the body innerText.
IHTMLElement *pBody;
pNewDoc-gt;get_body(&pBody);
if (pBody)
{
BSTR strText;
pBody->get_innerText(&strText);
pBody->Release();
SysFreeString(strText);
}
pNewDoc->Release();
}
pMC->Release();
}
if (pMkStart)
pMkStart->Release();
if (pMkFinish)
pMkFinish->Release();
pMS->Release();
}
}
pDoc->Release();
}
CoUninitialize();
return TRUE;
}
Delphi版本1:
( add to uses clause, MSHTML, ActiveX, ComObj )
const
IID_IPersistStreamInit : TGUID = '{7FD52380-4E07-101B-AE2D-08002B2EC713}';
procedure TFormMain.FormCreate(Sender: TObject);
var
pDoc : IHTMLDocument2;
pNewDoc : IHTMLDocument2;
pPersist : IPersistStreamInit;
pMS : IMarkupServices;
pMC : IMarkupContainer;
pMkStart : IMarkupPointer;
pMkFinish : IMarkupPointer;
pBody : IHTMLElement;
strText : string;
szHTML : widestring;
didInit : boolean;
begin
didInit :=Succeeded(CoInitialize(nil));
szHTML :='
Hello World!';CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, pDoc);
if pDoc <> nil then
begin
pDoc.QueryInterface(IID_IPersistStreamInit, pPersist);
if (pPersist <> nil) then
begin
pPersist.InitNew;
pPersist._Release;
pDoc.QueryInterface(IID_IMarkupServices, pMS);
if (pMS <> nil) then
begin
pMS.CreateMarkupPointer(pMkStart);
pMS.CreateMarkupPointer(pMkFinish);
pMS.ParseString(word(szHTML[1]), 0, pMC, pMkStart, pMkFinish);
if (pMC <> nil) then
begin
pMC.QueryInterface(IID_IHTMLDocument, pNewDoc);
if (pNewDoc <> nil) then
begin
// do anything with pNewDoc, in this case
// get the body innerText.
pBody :=pNewDoc.Get_body;
if (pBody <> nil) then
begin
strText :=pBody.Get_innerText;
m.Text :=strText;
pBody._Release;
end;
pNewDoc._Release;
end;
pMC._Release;
end;
if (pMkStart <> nil) then pMkStart._Release;
if (pMkFinish <> nil) then pMkFinish._Release;
pMS._Release;
end;
pPersist._Release;
end;
pDoc._Release;
end;
if didInit then CoUninitialize();
end;
Delphi版本2:
( add to uses clause, MSHTML, ActiveX, ComObj )
const
IID_IPersistStreamInit : TGUID = '{7FD52380-4E07-101B-AE2D-08002B2EC713}';
procedure TFormMain.FormCreate(Sender: TObject);
var
pDoc : IHTMLDocument2;
pBody : IHTMLElement;
strText : string;
szHTML : widestring;
didInit : boolean;
begin
didInit :=Succeeded(CoInitialize(nil));
szHTML :='
Hello World!';CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, pDoc);
if pDoc <> nil then
begin
pDoc.Set_designMode('On'); //no script execution
while not (pDoc.readyState = 'complete') do Application.ProcessMessages;
pDoc.body.innerHTML :=szHTML;
pBody :=pDoc.Get_body;
if pBody <> nil then
strText :=pBody.Get_innerText else strText :='';
m.Text :=strText;
pDoc._Release;
end;
if didInit then CoUninitialize();
end;
============== Other Useful Routines ===============
============== Other Useful Routines ===============
============== Other Useful Routines ===============
function GetHTMLSource(Document: IDispatch) : string;
var
pStream : IStream;
pPersist : IPersistStreamInit;
li,lo : int64;
stat : STATSTG;
str : string;
BytesRead : longint;
begin
result :='';
if SUCCEEDED(CreateStreamOnHGlobal(0, TRUE, pStream)) then
begin
if (SUCCEEDED(Document.QueryInterface(IID_IPersistStreamInit, pPersist))) then
begin
pPersist.Save(pStream, FALSE);
li :=0;
pStream.Seek(li, STREAM_SEEK_SET, lo);
pStream.Stat(stat, 0);
SetLength(str,stat.cbSize + 1);
pStream.Read(@str[1], stat.cbSize, @BytesRead);
result :=str;
end;
end;
end;
procedure SetHTMLSource(Document: IDispatch; value: string);
var
stm : TMemoryStream;
psi : IPersistStreamInit;
sa : TStreamAdapter;
begin
stm :=TMemoryStream.Create;
stm.SetSize(Length(value));
stm.Seek(0,0);
stm.Write(value[1],Length(value));
stm.Seek(0,0);
sa :=TStreamAdapter.Create(stm, soReference); //if you pass soOwned instead, the stream will be freed for you
if (SUCCEEDED(Document.QueryInterface(IID_IPersistStreamInit,psi))) then
psi.Load(sa);
end;