假设需要根据工作表A2单元格所输入的关键字,查询并获取当当网图书类商品的封面、书名、现价、定价、折扣及链接数据,示例代码如下。
Sub WebCrawlerDangD()
Dim objXMLHTTP As Object
Dim objDOM As Object
Dim objDOMLi As Object
Dim objShape As Shape
Dim strURL As String
Dim strText As String
Dim strKey As String
Dim strMsg As String
Dim strMsgYesOrNo As String
Dim strDOMLi As String
Dim astrResult() As String
Dim vntShapePic As Variant
Dim intPageNum As Integer
Dim intLiLength As Integer
Dim lngaResult As Long
Dim i As Long
Dim k As Long
strKey = Range("a2").Value
If Len(strKey) = 0 Then
MsgBox "未在A2单元格输入查询关键字。"
Exit Sub
End If
Set objXMLHTTP = CreateObject("MSXML2.XMLHTTP")
Set objDOM = CreateObject("htmlfile")
For intPageNum = 1 To 100'100页是当当网提供查询结果的最大页数
strURL = "http://search.dangdang.com/?"
strURL = strURL & "category_path=01.00.00.00.00.00#J_tab"'指定查询的商品类别为图书类
strURL = strURL & "&act=input"
strURL = strURL & "&key=" & strKey'指定查询的关键值
strURL = strURL & "&page_index=" & intPageNum
With objXMLHTTP
.Open "GET", strURL, False
.send
strText = .responseText
End With
If InStr(strText, "没有找到") Then Exit For
objDOM.body.innerHTML = strText
Set objDOMLi = objDOM.getElementById("search_nature_rg").getElementsByTagName("li")'获取li标签的元素集合
intLiLength = objDOMLi.Length'获取li标签的数量
lngaResult = lngaResult + intLiLength
ReDim Preserve astrResult(1 To 7, 1 To lngaResult)
For i = 0 To intLiLength - 1'遍历HTML DOM对象的li标签
k = k + 1
astrResult(1, k) = k
strDOMLi = objDOMLi(i).innerHTML
strDOMLi = strDOMLi & "now_price>search_pre_price>search_discount> ("
astrResult(4, k) = Val(Mid(Split(strDOMLi, "now_price>")(1), 2))'获取图书现价
astrResult(5, k) = Val(Mid(Split(strDOMLi, "search_pre_price>")(1), 2))'获取图书定价
If astrResult(5, k) = 0 Then astrResult(5, k) = astrResult(4, k)
astrResult(6, k) = Val(Split(strDOMLi, "search_discount> (")(1))'获取图书折扣
If astrResult(6, k) = 0 Then astrResult(6, k) = ""
With objDOMLi(i).getElementsByTagName("A")(0)'获取图书的标题和网页地址
astrResult(3, k) = .Title
astrResult(7, k) = .href
End With
With objDOMLi(i).getElementsByTagName("IMG")(0)'获取图书封面图片的网页地址
astrResult(2, k) = .src
If Left(astrResult(2, k), 4) <> "http" Then
astrResult(2, k) = .getAttribute("data-original")
End If
End With
Next i
Next intPageNum
If k = 0 Then'判断是否有符合条件的查询结果
MsgBox "未找到符合条件的查询结果。"
Exit Sub
End If
ActiveSheet.UsedRange.Offset(3).ClearContents
Application.ScreenUpdating = False
For Each objShape In ActiveSheet.Shapes
If objShape.Type = msoLinkedPicture Then objShape.Delete
Next objShape
strMsg = "一共有" & k & "张图片需要导入Excel工作表。"
If k > 50 Then strMsg = strMsg & "耗时过长!不建议导入!"
strMsgYesOrNo = MsgBox("请选择是否需要导入图书图片!" _
& vbCrLf & strMsg, vbYesNo)
If strMsgYesOrNo = vbYes Then
Const PIC_HEIGHT As Integer = 100
Const RNG_HEIGHT As Integer = 110
Const RNG_WIDTH As Integer = 16
Range("B:B").ColumnWidth = RNG_WIDTH
Range("A5").Resize(k, 1).EntireRow.RowHeight = RNG_HEIGHT
For i = 1 To k
Set vntShapePic = ActiveSheet.Pictures.Insert(astrResult(2, i))
With Cells(i + 4, 2)
vntShapePic.Height = PIC_HEIGHT
vntShapePic.Top = (RNG_HEIGHT - PIC_HEIGHT) / 2 + .Top
vntShapePic.Left = (.Width - vntShapePic.Width) / 2 + .Left
End With
astrResult(2, i) = ""
Next i
End If
Range("a4:g4") = Array("序号", "封面", "书名", "现价", "定价", "折扣", "链接")
Range("A5").Resize(k, UBound(astrResult)) = Application.Transpose(astrResult)
Application.ScreenUpdating = True
Set objXMLHTTP = Nothing
Set objDOM = Nothing
Set objDOMLi = Nothing
End Sub
运行结果如下。
微信公众号:VBA168
淘宝店铺地址:https://item.taobao.com/item.htm?spm=a1z10.1-c-s.w4004-21233576391.4.1af0683dzrx3oU&id=584940166162
关注微信公众号,每天及时接收Excel VBA经典示例讲解。
淘宝店铺提供Excel定制服务。
祝你工作和学习更轻松!