【Excel VBA】网抓知识(3)-获取当当网图书数据

假设需要根据工作表A2单元格所输入的关键字,查询并获取当当网图书类商品的封面、书名、现价、定价、折扣及链接数据,示例代码如下。​

Sub WebCrawlerDangD()
     Dim objXMLHTTP As Object
     Dim objDOM As Object
     Dim objDOMLi As Object
     Dim objShape As Shape
     Dim strURL As String
     Dim strText As String
     Dim strKey As String
     Dim strMsg As String
     Dim strMsgYesOrNo As String
     Dim strDOMLi As String
     Dim astrResult() As String
     Dim vntShapePic As Variant
     Dim intPageNum As Integer
     Dim intLiLength As Integer
     Dim lngaResult As Long
     Dim i As Long
     Dim k As Long
     strKey = Range("a2").Value
     If Len(strKey) = 0 Then
         MsgBox "未在A2单元格输入查询关键字。"
         Exit Sub
     End If
     Set objXMLHTTP = CreateObject("MSXML2.XMLHTTP")
     Set objDOM = CreateObject("htmlfile")
     For intPageNum = 1 To 100'100页是当当网提供查询结果的最大页数
         strURL = "http://search.dangdang.com/?"
         strURL = strURL & "category_path=01.00.00.00.00.00#J_tab"'指定查询的商品类别为图书类
         strURL = strURL & "&act=input"
         strURL = strURL & "&key=" & strKey'指定查询的关键值
         strURL = strURL & "&page_index=" & intPageNum
         With objXMLHTTP
             .Open "GET", strURL, False
             .send
             strText = .responseText
         End With
         If InStr(strText, "没有找到") Then Exit For
         objDOM.body.innerHTML = strText
         Set objDOMLi = objDOM.getElementById("search_nature_rg").getElementsByTagName("li")'获取li标签的元素集合
         intLiLength = objDOMLi.Length'获取li标签的数量
         lngaResult = lngaResult + intLiLength
         ReDim Preserve astrResult(1 To 7, 1 To lngaResult)
         For i = 0 To intLiLength - 1'遍历HTML DOM对象的li标签
             k = k + 1
             astrResult(1, k) = k
             strDOMLi = objDOMLi(i).innerHTML
             strDOMLi = strDOMLi & "now_price>search_pre_price>search_discount> ("
             astrResult(4, k) = Val(Mid(Split(strDOMLi, "now_price>")(1), 2))'获取图书现价
             astrResult(5, k) = Val(Mid(Split(strDOMLi, "search_pre_price>")(1), 2))'获取图书定价
             If astrResult(5, k) = 0 Then astrResult(5, k) = astrResult(4, k)
             astrResult(6, k) = Val(Split(strDOMLi, "search_discount> (")(1))'获取图书折扣
             If astrResult(6, k) = 0 Then astrResult(6, k) = ""
             With objDOMLi(i).getElementsByTagName("A")(0)'获取图书的标题和网页地址
                 astrResult(3, k) = .Title
                 astrResult(7, k) = .href
             End With
             With objDOMLi(i).getElementsByTagName("IMG")(0)'获取图书封面图片的网页地址
                 astrResult(2, k) = .src
                 If Left(astrResult(2, k), 4) <> "http" Then
                     astrResult(2, k) = .getAttribute("data-original")
                 End If
             End With
         Next i
     Next intPageNum
 If k = 0 Then'判断是否有符合条件的查询结果
         MsgBox "未找到符合条件的查询结果。"
         Exit Sub
     End If
     ActiveSheet.UsedRange.Offset(3).ClearContents
     Application.ScreenUpdating = False
     For Each objShape In ActiveSheet.Shapes
         If objShape.Type = msoLinkedPicture Then objShape.Delete
     Next objShape
     strMsg = "一共有" & k & "张图片需要导入Excel工作表。"
     If k > 50 Then strMsg = strMsg & "耗时过长!不建议导入!"
     strMsgYesOrNo = MsgBox("请选择是否需要导入图书图片!" _
         & vbCrLf & strMsg, vbYesNo)
     If strMsgYesOrNo = vbYes Then
         Const PIC_HEIGHT As Integer = 100
         Const RNG_HEIGHT As Integer = 110
         Const RNG_WIDTH As Integer = 16
         Range("B:B").ColumnWidth = RNG_WIDTH
         Range("A5").Resize(k, 1).EntireRow.RowHeight = RNG_HEIGHT
         For i = 1 To k
             Set vntShapePic = ActiveSheet.Pictures.Insert(astrResult(2, i))
             With Cells(i + 4, 2)
                 vntShapePic.Height = PIC_HEIGHT
                 vntShapePic.Top = (RNG_HEIGHT - PIC_HEIGHT) / 2 + .Top
                 vntShapePic.Left = (.Width - vntShapePic.Width) / 2 + .Left
             End With
             astrResult(2, i) = ""
         Next i
     End If
     Range("a4:g4") = Array("序号", "封面", "书名", "现价", "定价", "折扣", "链接")
     Range("A5").Resize(k, UBound(astrResult)) = Application.Transpose(astrResult)
     Application.ScreenUpdating = True
     Set objXMLHTTP = Nothing
     Set objDOM = Nothing
     Set objDOMLi = Nothing
 End Sub

运行结果如下。

 


微信公众号:VBA168

淘宝店铺地址:https://item.taobao.com/item.htm?spm=a1z10.1-c-s.w4004-21233576391.4.1af0683dzrx3oU&id=584940166162

关注微信公众号,每天及时接收Excel VBA经典示例讲解。

淘宝店铺提供Excel定制服务。

祝你工作和学习更轻松!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值