vba抓取网页数据

功能实现:自动计算国庆期间在个个城市停留的时间(乱码转码,html截取、日历、下拉,两个网站:携程和火车票网)

Function getTrainInfo_ctrip(rowNum)
    
    If Range("A" & rowNum).Value = "" Then
        Range("B" & rowNum & ":C" & rowNum).Delete
        Exit Function
    End If
    Dim strRespText$, tt$
    Dim URL
    URL = "https://trains.ctrip.com/trainbooking/TrainSchedule/" & Range("A" & rowNum).Value
    'Debug.Print URL
    With CreateObject("Microsoft.XMLHTTP")
        .Open "GET", URL, False                     '要抓取的链接,"GET"尽量用大写,以免某些系统不兼容
        .Send
        
        Index = InStr(.responsetext, "<div id=""ctl00_MainContentPlaceHolder_pnlResult"">")
        If Index <= 0 Then
            Range("B" & rowNum & ":C" & rowNum).Delete
            Range("B" & rowNum) = "查无此车"
            Exit Function
        End If
        
        tt = Mid(.responsetext, Index)
        Index2 = InStr(tt, "ctl00_MainContentPlaceHolder_divStartAndEndSchedule")
        tt = Left(tt, Index2 - 44)
        
        'Set fs = CreateObject("Scripting.FileSystemObject")
        'Set a = fs.CreateTextFile("D:\Document\excel\log.txt", True)
        'a.write (tt)
        'a.Close
    End With
    
    Set oDom = CreateObject("htmlfile")
    oDom.body.innerHTML = tt
    
    'Debug.Print oDom.getElementById("ctl00_MainContentPlaceHolder_pnlResult").innerText
    Set oTbody = oDom.getElementById("ctl00_MainContentPlaceHolder_pnlResult").getElementsByTagName("div")(2).getElementsByTagName("table")(1).getElementsByTagName("tbody")(0)
    'Debug.Print oTbody.innerText
    'Debug.Print oTbody.Rows.Length
    
    Dim allCity As String
    allCity = ""
    'Integer String
    Dim startPlace, endPlace As Integer
    startPlace = 0
    endPlace = 0
    'Debug.Print Range("B" & rowNum).Value
    
    If Range("B" & rowNum).Value = "" Then
            Range("B" & rowNum) = oTbody.Rows(0).Cells(2).innerText
    End If
    If Range("C" & rowNum).Value = "" Then
        Range("C" & rowNum) = oTbody.Rows(oTbody.Rows.Length - 1).Cells(2).innerText
    End If

    For i = 1 To oTbody.Rows.Length
        allCity = allCity & "," & oTbody.Rows(i - 1).Cells(2).innerText
        'Debug.Print oTbody.Rows(i - 1).Cells(2).innerText
        'Debug.Print InStr(Range("B" & rowNum).Value, oTbody.Rows(i - 1).Cells(2).innerText)
        
        If startPlace = 0 And InStr(Range("B" & rowNum).Value, oTbody.Rows(i - 1).Cells(2).innerText) > 0 Then
            startPlace = i
            'Debug.Print startPlace
        End If
        
        If endPlace = 0 And InStr(Range("C" & rowNum).Value, oTbody.Rows(i - 1).Cells(2).innerText) > 0 Then
            endPlace = i
            'Debug.Print endPlace
        End If
        
    Next
    
    If startPlace = 0 Then
        Range("B" & rowNum) = oTbody.Rows(0).Cells(2).innerText
        startPlace = 1
    End If
    
    If endPlace = 0 Then
        Range("C" & rowNum) = oTbody.Rows(oTbody.Rows.Length - 1).Cells(2).innerText
        endPlace = oTbody.Rows.Length
    End If
    
    Set oSelect = Range("B" & rowNum & ":C" & rowNum)
    With oSelect.Validation
        .Delete
        .Add Type:=xlValidateList, AlertStyle:=xlValidAlertStop, Operator:= _
        xlBetween, Formula1:=allCity
    End With
    
    Range("D" & rowNum) = oTbody.Rows(startPlace - 1).Cells(4).innerText
    Range("E" & rowNum) = oTbody.Rows(endPlace - 1).Cells(4).innerText
    
End Function
'http://search.huochepiao.com/checi/C7140
Function getTrainInfo_huochepiao(rowNum)
    
    If Range("A" & rowNum).Value = "" Then
        Range("C" & rowNum & ":D" & rowNum).Delete
        Exit Function
    End If
    If Range("B" & rowNum).Value = "" Then
        Range("B" & rowNum) = Date
    End If
    Dim strRespText$, tt$
    Dim URL
    URL = "http://search.huochepiao.com/checi/" & Range("A" & rowNum).Value
    'Debug.Print URL
    With CreateObject("Microsoft.XMLHTTP")
        .Open "GET", URL, False                     '要抓取的链接,"GET"尽量用大写,以免某些系统不兼容
        .Send
        
        tt = .responsetext
        With CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")    'DataObject对象,数据放入剪贴板,记事本观察数据
            .SetText tt                                                    '因为XMLHTTP默认是UTF-8,不能识别gb2312,会发现数据乱码
            .PutInClipboard                                                '所以不能采用.responsetext对象来得到字符串
        End With
        tt = StrConv(.ResponseBody, vbUnicode)
        
        Index = InStr(tt, "</table><br><table border=")
        If Index <= 0 Then
            Range("C" & rowNum & ":D" & rowNum).Delete
            Range("C" & rowNum) = "查无此车"
            Exit Function
        End If
        tt = Mid(tt, Index + 12)
        Index2 = InStr(tt, "</td></tr></table>")
        tt = Left(tt, Index2 + 18)
        
        'Debug.Print tt
        
        Set fs = CreateObject("Scripting.FileSystemObject")
        Set a = fs.CreateTextFile("D:\Document\excel\log.txt", True)
        a.write (tt)
        a.Close
        
    End With
    
    Set oDom = CreateObject("htmlfile")
    oDom.body.innerHTML = tt
    
    Set oTbody = oDom.getElementsByTagName("td")

    'Debug.Print oTbody.Length
    '84-
    'Debug.Print oTbody(14).innerText
    
    Dim allCity As String
    allCity = ""
    'Integer String
    Dim startPlace, endPlace As Integer
    startPlace = 0
    endPlace = 0
    'Debug.Print Range("C" & rowNum).Value
    
    Dim startNum, stepNum, endCityDataNum As Integer
    If oTbody(9).innerText = "硬座" Then
        startNum = 15
        stepNum = 13
        endCityDataNum = 11
    ElseIf oTbody(9).innerText = "硬卧上/中/下" Or oTbody(9).innerText = "商务座" Then
        startNum = 14
        stepNum = 12
        endCityDataNum = 10
    Else
        Exit Function
    End If
    If Range("C" & rowNum).Value = "" Then
            Range("C" & rowNum) = oTbody(startNum).innerText
    End If
    If Range("D" & rowNum).Value = "" Then
        Range("D" & rowNum) = oTbody(oTbody.Length - endCityDataNum).innerText
    End If
    
    'Debug.Print "startNum"; startNum; "oTbody.Length"; oTbody.Length
    For i = startNum To oTbody.Length Step stepNum
        allCity = allCity & "," & oTbody(i).innerText
        'Debug.Print oTbody(i).innerText
        'Debug.Print InStr(Range("C" & rowNum).Value, oTbody.Rows(i - 1).Cells(2).innerText)
        
        If startPlace = 0 And InStr(Range("C" & rowNum).Value, oTbody(i).innerText) > 0 Then
            startPlace = i
            'Debug.Print startPlace
        End If
        
        If endPlace = 0 And InStr(Range("D" & rowNum).Value, oTbody(i).innerText) > 0 Then
            endPlace = i
            'Debug.Print endPlace
        End If
        
    Next
    
    If startPlace = 0 Then
        Range("C" & rowNum) = oTbody(startNum).innerText
        startPlace = startNum
    End If
    
    If endPlace = 0 Then
        Range("D" & rowNum) = oTbody(oTbody.Length - endCityDataNum).innerText
        endPlace = oTbody.Length - endCityDataNum
    End If
    
    Set oSelect = Range("C" & rowNum & ":D" & rowNum)
    With oSelect.Validation
        .Delete
        .Add Type:=xlValidateList, AlertStyle:=xlValidAlertStop, Operator:= _
        xlBetween, Formula1:=allCity
    End With
    
    
    Range("E" & rowNum) = Range("B" & rowNum).Value & " " & oTbody(startPlace + 2).innerText

    Dim startUseTime, endUseTime As Integer
    
    Dim s
    Dim midString As String

    If InStr(oTbody(startPlace + 4).innerText, "小时") > 0 Then
        midString = Replace(oTbody(startPlace + 4).innerText, "小时", ":")
        s = Split(Replace(midString, "分", ""), ":")
        startUseTime = s(0) * 60 + s(1)
        
    Else
        startUseTime = Replace(oTbody(startPlace + 4).innerText, "分", "")
    End If
    
    If InStr(oTbody(endPlace + 4).innerText, "小时") > 0 Then
        midString = Replace(oTbody(endPlace + 4).innerText, "小时", ":")
        s = Split(Replace(midString, "分", ""), ":")
        endUseTime = s(0) * 60 + s(1)
    Else
        endUseTime = Replace(oTbody(endPlace + 4).innerText, "分", "")
    End If
    
    'Debug.Print startUseTime
    'Debug.Print endUseTime
    'Debug.Print Range("E" & rowNum).Value
    'Debug.Print Format$(CDate((endUseTime - startUseTime) / 1440 + CDate(Range("E" & rowNum).Value)), "yyyy-mm-dd hh:mm:ss")
    Range("F" & rowNum) = Format$(CDate((endUseTime - startUseTime) / 1440 + CDate(Range("E" & rowNum).Value)), "yyyy-mm-dd hh:mm:ss")
    
End Function
Sub test()
    Range("B" & 1) = "查询中、、、"
    'getTrainInfo_huochepiao (10)
    For i = 3 To 20
        getTrainInfo_huochepiao (i)
    Next
    
    Range("B" & 1) = "查询完毕"
End Sub

效果截图:(求偶遇哈哈!!!)

源文档已经上传:https://download.csdn.net/download/struggletolife/11723002

  • 4
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值