vc6.0 调用html,VC6.0 使用mshtml解析html

//TestMSHTML.cpp : 定义控制台应用程序的入口点。//#include"stdafx.h"#include"TestMSHTML.h"#ifdef _DEBUG#definenew DEBUG_NEW#endif//唯一的应用程序对象CWinApp theApp;

FILE*fout;usingnamespacestd;//OLECHAR szHTML[] = OLESTR("

Hello World!");typedefintBorderAttribute;voidFindAllElementHavingBg(IHTMLDocument2*pNewDoc,map&borderValue2ElementMap)

{

IHTMLElement*pBody;

pNewDoc->get_body(&pBody);

pBody->Release();

}voidPrintTabs(intn)

{for(inti=0;i

{//cout <

}

}voidVisitNode(IHTMLElement*pElement,intlevel)

{

BSTR strName,strId,strTag;

PrintTabs(level);

pElement->get_className(&strName);

pElement->get_id(&strId);

pElement->get_tagName(&strTag);if(strTag!=NULL)

{

fwprintf(fout,_T("TagName:%s"),strTag);

}if(strName!=NULL)

{

fwprintf(fout,_T("className:%s"),strName);

}if(strId!=NULL)

{

fwprintf(fout,_T("Id:%s"),strId);

}

SysFreeString(strName);

SysFreeString(strId);

SysFreeString(strTag);

BSTR strAttrName1=_T("border");

BSTR strAttrName2=_T("bgcolor");

VARIANT val;

pElement->getAttribute(strAttrName1,2,&val);if(val.vt!=VT_NULL)

{if(val.bstrVal!=NULL)

{

fwprintf(fout,_T("border:%s"),val.bstrVal);

}

}

pElement->getAttribute(strAttrName2,2,&val);if(val.vt!=VT_NULL)

{if(val.bstrVal!=NULL)

{

fwprintf(fout,_T("bgcolor:%s"),val.bstrVal);

}

}

fwprintf(fout,_T("\n"));

}//将DOM树打印出来voidRun(IHTMLElement*pElement,intlevel)

{

IHTMLElementCollection*children;

VisitNode(pElement,level);

IDispatch*pDisp;

pElement->get_children(&pDisp);

pDisp->QueryInterface(IID_IHTMLElementCollection,(void**)&children);

pDisp->Release();longlen;

children->get_length(&len);

VARIANT dummy;

dummy.vt=VT_I4;for(inti=0;i

{

IHTMLElement*child;

dummy.intVal=i;

children->item(dummy,dummy,(IDispatch**)&pDisp);

pDisp->QueryInterface(IID_IHTMLElement,(void**)&child);

pDisp->Release();

Run(child,level+1);

child->Release();

}

children->Release();

}voidTestParse(IHTMLDocument2*pNewDoc)

{

BSTR strText;

IHTMLElement*pBody;

pNewDoc->get_body(&pBody);

pBody->get_innerText(&strText);

wprintf(_T("%s\n"),strText);

SysFreeString(strText);

pNewDoc->get_title(&strText);

wprintf(_T("%s\n"),strText);

SysFreeString(strText);

cout<

Run(pBody,0);

cout<

pBody->Release();//FindAllElementHavingBg(pNewDoc);}voidTestMSHTML(wchar_t*wcontent)

{

IHTMLDocument2*pDoc=NULL;

CoInitialize(NULL);

CoCreateInstance(CLSID_HTMLDocument,

NULL,

CLSCTX_INPROC_SERVER,

IID_IHTMLDocument2,

(LPVOID*)&pDoc);if(pDoc)

{

IPersistStreamInit*pPersist=NULL;

pDoc->QueryInterface(IID_IPersistStreamInit,

(LPVOID*)&pPersist);if(pPersist)

{

IMarkupServices*pMS=NULL;

pPersist->InitNew();

pPersist->Release();

pDoc->QueryInterface(IID_IMarkupServices,

(LPVOID*)&pMS);if(pMS)

{

IMarkupContainer*pMC=NULL;

IMarkupPointer*pMkStart=NULL;

IMarkupPointer*pMkFinish=NULL;

pMS->CreateMarkupPointer(&pMkStart);

pMS->CreateMarkupPointer(&pMkFinish);

pMS->ParseString(wcontent,0,&pMC,

pMkStart,

pMkFinish);if(pMC)

{

IHTMLDocument2*pNewDoc=NULL;

pMC->QueryInterface(IID_IHTMLDocument,

(LPVOID*)&pNewDoc);if(pNewDoc)

{//do anything with pNewDoc, in this case//get the body innerText.TestParse(pNewDoc);

pNewDoc->Release();

}

pMC->Release();

}if(pMkStart)

pMkStart->Release();if(pMkFinish)

pMkFinish->Release();

pMS->Release();

}

}

pDoc->Release();

}

CoUninitialize();

}

inline wchar_t*AnsiToUnicode(constchar*szStr )

{intnLen=MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr,-1, NULL,0);if(nLen==0)

{returnNULL;

}

wchar_t*pResult=newwchar_t[nLen+1];

MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr,-1, pResult, nLen );

pResult[nLen]=L'\0';returnpResult;

}//调用者负责delete wcontentwchar_t*ReadFromHtmlFile(stringstr,string&content)

{

ifstream fin(str.c_str());stringline;while(getline(fin,line))

{//cout <

}//cout <

fin.clear();returnwcontent;

}int_tmain(intargc, TCHAR*argv[], TCHAR*envp[])

{intnRetCode=0;//初始化 MFC 并在失败时显示错误if(!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(),0))

{//TODO: 更改错误代码以符合您的需要_tprintf(_T("错误: MFC 初始化失败\n"));

nRetCode=1;

}else{

fout=fopen("out.txt","w");stringstr="test.html";stringcontent;

wchar_t*wcontent=ReadFromHtmlFile(str,content);intlen=wcslen(wcontent);//cout <

delete[] wcontent;

fclose(fout);

}returnnRetCode;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值