用vba程序从网页里抓取想要的内容,如下代码可抓取autopiano.cn网站上的所有曲谱和歌词:
Private Sub MacraGrabTunes()
Dim iBegin, iEnd, iRow As Integer
Dim t0, t1 As Single
Dim Sign1, Sign2 As String
Dim strHtml, strTitle, strTune As String
Dim Web1 As Object
Set Web1 = CreateObject("Msxml2.ServerXMLHTTP.3.0")
Application.ScreenUpdating = False
t0 = Timer
For i = 1 To 5000
strURL = "https://www.autopiano.cn/post/" & i
Web1.Open "GET", strURL, False
On Error Resume Next
Web1.Send
strHtml = Web1.responseText
If Err < 0 Then
strHtml = "检测网址找不到或没返回信息!"
MsgBox strHtml
Else
Sign1 = "<div class=""section-content"">"
iBegin = InStr(strHtml, Sign1)
If iBegin < 1 Then
strHtml = "没找到歌谱"
Else
Sign1 = "<title>"
Sign2 = "- 自由钢琴"
iBegin = InStr(strHtml, Sign1)
iEnd = InStr(strHtml, Sign2)
strTitle = Mid(strHtml, iBegin + Len(Sign1), iEnd - iBegin - Len(Sign1) - 1)
Sign1 = "<div class=""section-content"">"
iBegin = InStr(strHtml, Sign1)
strHtml = Mid(strHtml, iBegin - 1, Len(strHtml))
Sign2 = "</div>"
iBegin = InStr(strHtml, Sign1)
iEnd = InStr(strHtml, Sign2)
strTune = Mid(strHtml, iBegin + Len(Sign1) + 1, iEnd - iBegin - Len(Sign1) - 1)
Sign1 = "<div class=""section lyric-section"">"
iBegin = InStr(strHtml, Sign1)
If iBegin > 0 Then
strHtml = Mid(strHtml, iBegin - 1, Len(strHtml))
Sign1 = "<div class=""section-content"">"
iBegin = InStr(strHtml, Sign1)
iEnd = InStr(strHtml, Sign2)
strHtml = Mid(strHtml, iBegin + Len(Sign1) + 1, iEnd - iBegin - Len(Sign1) - 1)
Else
strHtml = "-NULL-"
End If
If Len(strHtml) < 4 Then strHtml = "-NULL-"
iRow = Cells(Rows.Count, 1).End(xlUp).Row + 1
If iRow < 5 Then iRow = 5
Cells(iRow, 1) = i
Cells(iRow, 2) = strTitle
Cells(iRow, 3) = strTune
Cells(iRow, 4) = strHtml
t1 = Timer
Do
DoEvents
Loop While Timer - t1 < 0.02
End If
End If
Next
Cells.Replace What:="<p>=", Replacement:="'=", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Cells.Replace What:="<p>", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Cells.Replace What:="</p>", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Cells.Replace What:="<br />", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Application.ScreenUpdating = True
t1 = Timer
MsgBox "结束,用时:" & t1 - t0
End Sub