//TestMSHTML.cpp : 定义控制台应用程序的入口点。//#include"stdafx.h"#include"TestMSHTML.h"#ifdef _DEBUG#definenew DEBUG_NEW#endif//唯一的应用程序对象CWinApp theApp;
FILE*fout;usingnamespacestd;//OLECHAR szHTML[] = OLESTR("
Hello World!");typedefintBorderAttribute;voidFindAllElementHavingBg(IHTMLDocument2*pNewDoc,map&borderValue2ElementMap){
IHTMLElement*pBody;
pNewDoc->get_body(&pBody);
pBody->Release();
}voidPrintTabs(intn)
{for(inti=0;i
{//cout <
}
}voidVisitNode(IHTMLElement*pElement,intlevel)
{
BSTR strName,strId,strTag;
PrintTabs(level);
pElement->get_className(&strName);
pElement->get_id(&strId);
pElement->get_tagName(&strTag);if(strTag!=NULL)
{
fwprintf(fout,_T("TagName:%s"),strTag);
}if(strName!=NULL)
{
fwprintf(fout,_T("className:%s"),strName);
}if(strId!=NULL)
{
fwprintf(fout,_T("Id:%s"),strId);
}
SysFreeString(strName);
SysFreeString(strId);
SysFreeString(strTag);
BSTR strAttrName1=_T("border");
BSTR strAttrName2=_T("bgcolor");
VARIANT val;
pElement->getAttribute(strAttrName1,2,&val);if(val.vt!=VT_NULL)
{if(val.bstrVal!=NULL)
{
fwprintf(fout,_T("border:%s"),val.bstrVal);
}
}
pElement->getAttribute(strAttrName2,2,&val);if(val.vt!=VT_NULL)
{if(val.bstrVal!=NULL)
{
fwprintf(fout,_T("bgcolor:%s"),val.bstrVal);
}
}
fwprintf(fout,_T("\n"));
}//将DOM树打印出来voidRun(IHTMLElement*pElement,intlevel)
{
IHTMLElementCollection*children;
VisitNode(pElement,level);
IDispatch*pDisp;
pElement->get_children(&pDisp);
pDisp->QueryInterface(IID_IHTMLElementCollection,(void**)&children);
pDisp->Release();longlen;
children->get_length(&len);
VARIANT dummy;
dummy.vt=VT_I4;for(inti=0;i
{
IHTMLElement*child;
dummy.intVal=i;
children->item(dummy,dummy,(IDispatch**)&pDisp);
pDisp->QueryInterface(IID_IHTMLElement,(void**)&child);
pDisp->Release();
Run(child,level+1);
child->Release();
}
children->Release();
}voidTestParse(IHTMLDocument2*pNewDoc)
{
BSTR strText;
IHTMLElement*pBody;
pNewDoc->get_body(&pBody);
pBody->get_innerText(&strText);
wprintf(_T("%s\n"),strText);
SysFreeString(strText);
pNewDoc->get_title(&strText);
wprintf(_T("%s\n"),strText);
SysFreeString(strText);
cout<
Run(pBody,0);
cout<
pBody->Release();//FindAllElementHavingBg(pNewDoc);}voidTestMSHTML(wchar_t*wcontent)
{
IHTMLDocument2*pDoc=NULL;
CoInitialize(NULL);
CoCreateInstance(CLSID_HTMLDocument,
NULL,
CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2,
(LPVOID*)&pDoc);if(pDoc)
{
IPersistStreamInit*pPersist=NULL;
pDoc->QueryInterface(IID_IPersistStreamInit,
(LPVOID*)&pPersist);if(pPersist)
{
IMarkupServices*pMS=NULL;
pPersist->InitNew();
pPersist->Release();
pDoc->QueryInterface(IID_IMarkupServices,
(LPVOID*)&pMS);if(pMS)
{
IMarkupContainer*pMC=NULL;
IMarkupPointer*pMkStart=NULL;
IMarkupPointer*pMkFinish=NULL;
pMS->CreateMarkupPointer(&pMkStart);
pMS->CreateMarkupPointer(&pMkFinish);
pMS->ParseString(wcontent,0,&pMC,
pMkStart,
pMkFinish);if(pMC)
{
IHTMLDocument2*pNewDoc=NULL;
pMC->QueryInterface(IID_IHTMLDocument,
(LPVOID*)&pNewDoc);if(pNewDoc)
{//do anything with pNewDoc, in this case//get the body innerText.TestParse(pNewDoc);
pNewDoc->Release();
}
pMC->Release();
}if(pMkStart)
pMkStart->Release();if(pMkFinish)
pMkFinish->Release();
pMS->Release();
}
}
pDoc->Release();
}
CoUninitialize();
}
inline wchar_t*AnsiToUnicode(constchar*szStr )
{intnLen=MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr,-1, NULL,0);if(nLen==0)
{returnNULL;
}
wchar_t*pResult=newwchar_t[nLen+1];
MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr,-1, pResult, nLen );
pResult[nLen]=L'\0';returnpResult;
}//调用者负责delete wcontentwchar_t*ReadFromHtmlFile(stringstr,string&content)
{
ifstream fin(str.c_str());stringline;while(getline(fin,line))
{//cout <
}//cout <
fin.clear();returnwcontent;
}int_tmain(intargc, TCHAR*argv[], TCHAR*envp[])
{intnRetCode=0;//初始化 MFC 并在失败时显示错误if(!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(),0))
{//TODO: 更改错误代码以符合您的需要_tprintf(_T("错误: MFC 初始化失败\n"));
nRetCode=1;
}else{
fout=fopen("out.txt","w");stringstr="test.html";stringcontent;
wchar_t*wcontent=ReadFromHtmlFile(str,content);intlen=wcslen(wcontent);//cout <
delete[] wcontent;
fclose(fout);
}returnnRetCode;
}