本地html链接提取,如何提取网页源代码中的链接代码?

匿名用户

1级

2018-11-25 回答

Private Sub Command1_Click()

Dim s As String

s = Text1.Text

s = Replace(Text1.Text, vbCrLf, "") '移除所有回车换行符

'Dim oRegEx As RegExp

'Set oRegEx = New RegExp

'Dim oMatches As MatchCollection

'Dim oMatch As Match

Dim oRegEx As Object

Set oRegEx = CreateObject("VBScript.RegExp")

Dim oMatches As Object

Dim oMatch As Object

With oRegEx

.Global = True '全局匹配

.IgnoreCase = True '忽略大小写

.Pattern = "]*?href=[""' ]?(.*?)(?:""|'| ).[^> ]*?>([\s\S]*?)"

'提取所有A标签的正则式,小括号中是子匹配引用组第一个是 (.*?) 第二个是([\s\S]*?)

Set oMatches = .Execute(s)

If oMatches.Count >= 1 Then

Text2.Text = ""

Dim sHref As String, sInnerText As String

Dim i As Integer

Dim sLink As String

'Dim colLinks As Scripting.Dictionary

'Set colLinks = New Scripting.Dictionary

Dim colLinks As Object

Set colLinks = CreateObject("Scripting.Dictionary")

For Each oMatch In oMatches

sHref = oMatch.SubMatches(0) '(.*?)

sInnerText = oMatch.SubMatches(1) '([\s\S]*?)

sInnerText = RemoveTags(sInnerText) '移除A标签(内容)中的多余标签

sInnerText = Replace(sInnerText, " ", "") '移除A标签(内容)中的所有空格

sLink = "" & sInnerText & ""

If Not colLinks.Exists(sLink) Then

colLinks.Add sLink, sLink

Text2.Text = Text2.Text & sLink & vbNewLine

End If

Next

End If

End With

Set oMatches = Nothing

Set oMatch = Nothing

Set oRegEx = Nothing

Set colLinks = Nothing

End Sub

'这个函数可以去除HTML代码中的标签

Function RemoveTags(ByVal html As String)

'Dim oRegEx As RegExp

'Set oRegEx = New RegExp

Dim oRegEx As Object

Set oRegEx = CreateObject("VBScript.RegExp")

With oRegEx

.Global = True

.IgnoreCase = True

.Pattern = "]*>"

RemoveTags = .Replace(html, "")

End With

Set oRegEx = Nothing

End Function

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值