小发猫

【小发猫】基于AI的NLP伪原创工具

获取IE (控件)的所有链接(包括Frameset, iframe)zz

获取IE (控件)的所有链接(包括Frameset, iframe)zz

IE 顶层 body 节点通过IHTMLElement->get_all 方法无法获取iframe 里面的节点列表

CComPtr<IHTMLElement> body;
 
CComPtr<IDispatch> spDispCollection;
body->get_all(&spDispCollection);
所以要获取iframe/frame(frameset) 里面的节点列表的话, 则需要根据body/doc 找到frames, 然后从frames -> IHTMLWindow2 -> IHTMLDocument2 . 主要有2个方法, 下面是代码片段
方法一:
IHTMLDocument2 *pDoc = 浏览器的Document(IWebBrowser2->IDispatch->IHTMLDocument2);
IHTMLWindow2 *pHTMLWnd = NULL;
IHTMLDocument2 *pFrameDoc=NULL;
IHTMLFramesCollection2 *pFramesCollection=NULL;
LPDISPATCH lpDispatch;

long p;
VARIANT varindex,varresult;
varresult.vt=VT_DISPATCH;
varindex.vt = VT_I4;
if(pDoc!=NULL)
{
     HRESULT hr=pDoc->get_frames(&pFramesCollection);
    if(SUCCEEDED(hr)&&pFramesCollection!=NULL)
     {
         hr=pFramesCollection->get_length(&p);
        if(SUCCEEDED(hr))
            for(int i=0; i<p; i++)
             {
                 varindex.lVal = i;
                if(pFramesCollection->item(&varindex, &varresult) ==S_OK)
                 {
                     lpDispatch=(LPDISPATCH)varresult.ppdispVal;
                    if (SUCCEEDED(lpDispatch->QueryInterface(IID_IHTMLWindow2, (LPVOID *)&pHTMLWnd)))
                     {
                        if(SUCCEEDED(pHTMLWnd->get_document( &pFrameDoc)))
                         {
                            //work with the pFrameDoc
                         }
                         pHTMLWnd->Release();
                         pHTMLWnd=NULL;
                     }
                 }
             }
             pFramesCollection->Release();
     }
     pDoc->Release();
}
方法二:
CComQIPtr<IHTMLElement> pElem = ; // 可以递归上面的 CComPtr<IDispatch> spDispCollection 来得到
CComBSTR bstrTagName;
pElem->get_tagName(&bstrTagName);
if ( lstrcmpiW(L"IFRAME", bstrTagName)==0 ||
         lstrcmpiW(L"FRAME", bstrTagName)==0 )
{
     CComQIPtr<IHTMLFrameBase2>     _framebase2;
     CComPtr<IHTMLWindow2>         _framewindow;
     CComPtr<IHTMLDocument2>         _framedoc;
   
    if( (_framebase2 = spItem)
        && SUCCEEDED( _framebase2->get_contentWindow(&_framewindow) ) && _framewindow!=NULL
        && SUCCEEDED( _framewindow->get_document(&_framedoc) ) && _framedoc!=NULL )
     {
        // 对 _framedoc 节点进行处理
     }
}

iframe 跨域访问(cross frame)   zz from : http://www.wzszf.com/
由于安全性限制, 为防止跨域脚本攻击, 当frames 跨域的时候, IHTMLWindow2::get_document 调用将返回 E_ACCESSDENIED .
下面函数 HtmlWindowToHtmlDocument 对于跨域的frame 通过 IHTMLWindow2 -> IID_IWebBrowserApp -> IHTMLWindow2 绕过了限制.

// Converts a IHTMLWindow2 object to a IHTMLDocument2. Returns NULL in case of failure.
// It takes into account accessing the DOM across frames loaded from different domains.
CComQIPtr<IHTMLDocument2> HtmlWindowToHtmlDocument(CComQIPtr<IHTMLWindow2> spWindow)
{
      ATLASSERT(spWindow != NULL);

      CComQIPtr<IHTMLDocument2> spDocument;
      HRESULT hRes = spWindow->get_document(&spDocument);
   
     if ((S_OK == hRes) && (spDocument != NULL))
      {
          // The html document was properly retrieved.
          return spDocument;
      }

     // hRes could be E_ACCESSDENIED that means a security restriction that
     // prevents scripting across frames that loads documents from different internet domains.
      CComQIPtr<IWebBrowser2>   spBrws = HtmlWindowToHtmlWebBrowser(spWindow);
     if (spBrws == NULL)
      {
          return CComQIPtr<IHTMLDocument2>();
      }

     // Get the document object from the IWebBrowser2 object.
      CComQIPtr<IDispatch> spDisp;
      hRes = spBrws->get_Document(&spDisp);
      spDocument = spDisp;

     return spDocument;
}


// Converts a IHTMLWindow2 object to a IWebBrowser2. Returns NULL in case of failure.
CComQIPtr<IWebBrowser2> HtmlWindowToHtmlWebBrowser(CComQIPtr<IHTMLWindow2> spWindow)
{
      ATLASSERT(spWindow != NULL);

      CComQIPtr<IServiceProvider>   spServiceProvider = spWindow;
     if (spServiceProvider == NULL)
      {
          return CComQIPtr<IWebBrowser2>();
      }

      CComQIPtr<IWebBrowser2> spWebBrws;
      HRESULT hRes = spServiceProvider->QueryService(IID_IWebBrowserApp, IID_IWebBrowser2, (void**)&spWebBrws);
     if (hRes != S_OK)
      {
          return CComQIPtr<IWebBrowser2>();
      }

     return spWebBrws;
}

附:
IE(控件/接口)中主要有4个部分, Browser, Document, Frame/IFrame, Element , 其对应接口分别是
Browser          -     IWebBrowser2
Document       -     IHTMLDocument2
Frame/IFrame-     IHTMLWindow2
Element          -     IHTMLElement
可以通过下面方法互相获取
browser      -> document        IWebBrowser2::get_Document
document     -> frame           IHTMLDocument2::get_parentWindow
frame        -> document        IHTMLWindow2::get_document
frame        -> parent frame    IHTMLWindow2::get_parent
frame        -> children frames IHTMLWindow2::get_frames
element     -> Frame              IHTMLElement->QI(IHTMLFrameBase2) -> IHTMLFrameBase2->get_contentWindow -> IHTMLWindow2

阅读更多
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

加入CSDN,享受更精准的内容推荐,与500万程序员共同成长!
关闭
关闭