当需要将一个网页解析成比较直观的dom树形式,有以下几种方法。
(1)MSHTML中提供的IWebBrowser2接口。输入为网页的url;
IHTMLDocument2 *pDoc = NULL;
CoInitialize(NULL);
//取得网页内容
IWebBrowser2* pWebBrowser = NULL;
HRESULT hr = CoCreateInstance(CLSID_InternetExplorer,NULL,CLSCTX_LOCAL_SERVER,IID_IWebBrowser2,(void**)&pWebBrowser );
if( FAILED(hr) )
{
MessageBox(NULL,_T("WebBrowser2接口失败"),_T("Error"),NULL);
return -1 ;
}
string WebUrl("http://beiningsa.blog.sohu.com/130372778.html");
CComVariant varUrl( WebUrl.c_str() );
CComVariant var;
hr = pWebBrowser->Navigate2( &varUrl,&var,&var,&var,&var );
if( FAILED(hr) ) return -1;
READYSTATE readystate;
hr = pWebBrowser->get_ReadyState(&readystate);
if( FAILED( hr ) ) return -1;
DWORD