vba 爬虫常用对象和方法

最近又要小爬一下动态网页,于是复习了一下常用对象。

Sub WebCrawler(ByRef Item, ByRef DraftPage)

Dim sKey As String
Dim k As Integer

    sKey = "Time In Source Status"
    k = 0

Dim GUrl As String

    GUrl = "https://msjira.ooo.com/browse/" & Item

Dim IE As Object
    Set IE = CreateObject("InternetExplorer.Application")

    With IE

            .Visible = True
            .navigate GUrl
            Do Until .readyState = 4  'Complete
                    DoEvents
            Loop
            If (.document.querySelectorAll("div.aui-group aui-group-split").Length > 0) Then

                    Do Until (.document.querySelectorAll("div.wrap").Length > 0)
                        WaitToIEReady IE
                    Loop
                    GoTo Content
            End If

            On Error GoTo Content

            .document.getElementById("login-form-username").Value = "yiyi"
            .document.getElementById("login-form-password").Value = "yaya"
            .document.getElementById("login-form-submit").Click

            Do Until .readyState = 4  'Complete
                    DoEvents
            Loop
            Do Until (.document.querySelectorAll("div.wrap").Length > 0)
                    WaitToIEReady IE
            Loop

Content:
            .document.getElementById("all-tabpanel").Click
            Do Until (.document.querySelectorAll("div.actionContainer").Length > 0)
                WaitToIEReady IE
            Loop


            Do While k < 1000:  'Favorite part
                Set tables = .document.getElementsBytagname("table")
                k = k + 1
                For Each tabl In tables

                    For Each oRow In tabl.Rows
                        For Each oCell In oRow.Cells
                           If Trim(oCell.innertext) = sKey Then GoTo CopyWork
                        Next
                    Next
                Next
                If k = 999 Then MsgBox "cannot find page": Exit Sub
            Loop



CopyWork:
            'Don't forget to close IE Clipboard warning in IE Option->SecurityTab->Custom Level->Scripting session
            .document.execCommand "SelectAll"
            .document.execCommand "copy"

            'Paste all to put the webpage content
            DraftPage.Range("A1").Select
            Application.ActiveSheet.PasteSpecial Format:="Text", link:=False, DisplayAsIcon:=False



      End With
      IE.Quit


End Sub

Sub WaitToIEReady(ByRef IeObj)
    Do While IeObj.Busy
        DoEvents
    Loop
End Sub

继续增加一点,2017-12-07:这次用xmldom

'URL of XII
            strURL = "http://xii.danager.com/xii.portfolio/main/getData.aspx?entityType=Portfolio.PortfolioXml&entityId=" & ws.Cells(1, 2).Value & "&effectiveDate=" & ws.Cells(2, 2) & "&internalteam=true"


            Set xmlDom = CreateObject("Microsoft.XMLDOM")
            'Set xmlDom = CreateObject("MSXML2.DOMDocument")
            xmlDom.Load (strURL)

            Do Until xmlDom.readystate = 4
                DoEvents
            Loop


            Set holdNodes = xmlDom.SelectNodes("/Portfolio/Holding/HoldingDetail")

            'Confirm holding number
            If holdNodes.Length < 1 Then
                MsgBox "empty portfolio in xml"
                Exit Sub
            End If
     'Start to fill detail value into HoldingSheet
            Osht.UsedRange.Clear
            Osht.Cells(1, 1).Value = "PortfolioDate"
            Osht.Cells(1, 2).Value = "SecurityName"
            Osht.Cells(1, 3).Value = "DTID"
            Osht.Cells(1, 4).Value = "MatchingTypeCode"
            Osht.Cells(1, 5).Value = "LessThan92"
            Osht.Cells(1, 6).Value = "CUSIP"
            Osht.Cells(1, 7).Value = "Share"
            Osht.Cells(1, 8).Value = "MarketValue"
            Osht.Cells(1, 9).Value = "SecId"
            Osht.Cells(1, 10).Value = "LocalCurrencyCode"

            For i = 0 To holdNodes.Length - 1

                Set SecurityName = holdNodes(i).SelectSingleNode("SecurityName")
                Set dtidType = holdNodes(i).Attributes.getNamedItem("_DetailHoldingTypeId")
                Set matchingType = holdNodes(i).SelectSingleNode("MatchingTypeCodeId")
                Set lessThan92 = holdNodes(i).SelectSingleNode("LessThan92DaysBond")
                Set CUSIP_no = holdNodes(i).SelectSingleNode("CUSIP")
                Set marketValue = holdNodes(i).SelectSingleNode("MarketValue")
                Set shareNumber = holdNodes(i).SelectSingleNode("NumberOfShare")
                Set secId = holdNodes(i).Attributes.getNamedItem("_Id")
                Set localCur = holdNodes(i).SelectSingleNode("LocalCurrencyCode")
  • 11
    点赞
  • 64
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

取啥都被占用

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值