Public Function HtmlStr$(URL$) '提取网页源码函数
Dim XmlHttp
Set XmlHttp = CreateObject("Microsoft.XMLHTTP")
XmlHttp.Open "GET", URL, False
XmlHttp.Send
If XmlHttp.ReadyState = 4 Then HtmlStr = StrConv(XmlHttp.Responsebody, vbUnicode)
End Function
2.函数调用:
Dim strweb1 As String
strweb1 = HtmlStr("http://www.baidu.com")
二?获取WebBrowser控件中网页源代码
1.函数代码:
Public Function WebDaima(WebBrowser, BuFen) '获取WebBrowser控件中网页源代码
Select Case BuFen
Case "Body" '只获取<body>与</body>之间的代码
WebDaima = WebBrowser.Document.body.innerhtml
Case "All" '获取整个网页源代码
WebDaima = WebBrowser.Document.documentelement.outerhtml
Case Else
WebDaima = WebBrowser.Document.documentelement.outerhtml
End Select
End Function
2.调用
Dim strWeb As String
strWeb = WebDaima(frmIndex.WebBrowser1, "All") '获取整个网页源代码
strWeb = WebDaima(frmIndex.WebBrowser1, "Body") '只获取body中源代码
三?提取字符串或网页源代码中指定的资源 (可利用这一函数做文章采集器)
1.函数代码:
Public Function FindStrMulti$(Strall$, FirstStr$, EndStr$, SplitStr$) '提取字符串或网页源代码中所有指定代码
'参数
'总文本,起始字符串,终止字符串,分隔符
Dim i&, j&
j = 1
Do
i = InStr(j, Strall, FirstStr)
If i = 0 Then
Exit Do
End If
i = i + Len(FirstStr)
j = InStr(i, Strall, EndStr)
If j > 0 Then
FindStrMulti = IIf(Len(FindStrMulti) > 0, FindStrMulti & SplitStr, "") & Mid(Strall, i, j - i)
Else
Exit Do
End If
Loop
End Function
2.函数调用
截取字符串中的内容
Dim str1 As String
Dim str2 As String
str1 = "<table><tr><td>要截取的内容</td></tr></table>"
str2 = FindStrMulti(str1, "<td>", "</td>", "")
MsgBox str2
'此时str2的值就为 要截取的内容
文章列表标题链接采集实例
网页代码
<DIV id=content><SPAN class=navbar><STRONG><A href="/blog/">博客首页</A> > 文章列表</STRONG></SPAN>
<TABLE class=content_table width="100%">
<TBODY>
<TR>
<TD>
<H1>比目鱼博客文章列表</H1>
<P>
<UL>
<LI><SPAN class=list-category>[文坛张望]</SPAN> <A class=list-title href="/blog/archives/119491210.shtml"><STRONG>谁会拿下2010年的诺贝尔文学奖?</STRONG></A> <SPAN class=list-date>(2010-10-01 22:38)</SPAN></LI>
<LI><SPAN class=list-category>[视觉训练]</SPAN> <A class=list-title href="/blog/archives/119247165.shtml"><STRONG>书法练习二幅</STRONG></A> <SPAN class=list-date>(2010-09-29 01:51)</SPAN> </LI>
<LI><SPAN class=list-category>[文坛张望]</SPAN> <A class=list-title href="/blog/archives/118604217.shtml"><STRONG>骆以军对话董启章</STRONG></A> <SPAN class=list-date>(2010-09-21 17:15)</SPAN> </LI>
<LI><SPAN class=list-category>[视觉训练]</SPAN> <A class=list-title href="/blog/archives/118206492.shtml"><STRONG>夜临古画(六) </STRONG></A><SPAN class=list-date>(2010-09-17 01:46)</SPAN> </LI>
<LI><SPAN class=list-category>[我也读书]</SPAN> <A class=list-title href="/blog/archives/117345094.shtml"><STRONG>Jennifer Egan 的《A Visit From the Goon Squad》</STRONG></A> <SPAN class=list-date>(2010-09-07 02:30)</SPAN> </LI>
<LI><SPAN class=list-category>[我也读书]</SPAN> <A class=list-title href="/blog/archives/116446375.shtml"><STRONG>当我们谈论电子书的时候我们在谈论电子书阅读器</STRONG></A> <SPAN class=list-date>(2010-08-27 16:51)</SPAN> </LI>
<LI><SPAN class=list-category>[IT互联网]</SPAN> <A class=list-title href="/blog/archives/116133972.shtml"><STRONG>“读写人”和“比目鱼”网站的手机版</STRONG></A> <SPAN class=list-date>(2010-08-24 02:04)</SPAN> </LI>
</UL>
<P></P>
<P align=center>
<P align=center><STRONG>1 <A href="/blog/list_all_2.shtml">2</A> <A href="/blog/list_all_3.shtml">3</A> <A href="/blog/list_all_4.shtml">4</A> <A href="/blog/list_all_5.shtml">5</A> <A href="/blog/list_all_6.shtml">6</A> <A href="/blog/list_all_7.shtml">7</A> <A href="/blog/list_all_8.shtml">8</A> <A href="/blog/list_all_2.shtml">>></A> </STRONG></P>
<P></P></TD></TR></TBODY></TABLE>
<P> </P></DIV><!-- END CONTENT --><!-- BEGIN SITEBAR -->
<DIV id=sidebar>
<P>
从以上代码中获取<ul>与</ul>之间所有文章的标题链接,实现方法如下:
Dim strWeb As String
Dim i As Integer
Dim strListArea As String
Dim strLink '定义存放列表文章链接的数组
strWeb = WebDaima(Me.WebBrowser1, "Body") '获取网页body代码(具体查看WebDaima函数)
strListArea = FindStrMulti(strWeb, "<H1>比目鱼博客文章列表</H1>", "</UL>", "") '截列表区域代码
'获取列表区域中文章链接,并存在在数组strLink中
strLink = Split(FindStrMulti(strListArea, "href=" & Chr(34), Chr(34) & "><STRONG>", vbCrLf), vbCrLf)
For i = 0 To UBound(strLink) '循环输出链接
Text1.Text = Text1.Text & strLink(i) & vbCrLf
Next i
四?中文汉字转化为URL编码
函数代码:
'以下两个函数用于将文字转化为UTF8或GBK编码:(如在百度中搜索内容时,百度先将搜索词转化为UTF8的编码,再传送给服务器)
'调用:
'KeyWordUtf = UTF8EncodeURI(KeyWord) 或 KeyWordUtf = GBKEncodeURI(KeyWord)
Public Function UTF8EncodeURI(szInput)
Dim wch, uch, szRet
Dim x
Dim nAsc, nAsc2, nAsc3
If szInput = "" Then
UTF8EncodeURI = szInput
Exit Function
End If
For x = 1 To Len(szInput)
wch = Mid(szInput, x, 1)
nAsc = AscW(wch)
If nAsc < 0 Then nAsc = nAsc + 65536
If (nAsc And &HFF80) = 0 Then
szRet = szRet & wch
Else
If (nAsc And &HF000) = 0 Then
uch = "%" & Hex(((nAsc / 2 ^ 6)) Or &HC0) & Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
Else
uch = "%" & Hex((nAsc / 2 ^ 12) Or &HE0) & "%" & _
Hex((nAsc / 2 ^ 6) And &H3F Or &H80) & "%" & _
Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
End If
End If
Next
UTF8EncodeURI = szRet
End Function
Public Function GBKEncodeURI(szInput)
Dim i As Long
Dim x() As Byte
Dim szRet As String
szRet = ""
x = StrConv(szInput, vbFromUnicode)
For i = LBound(x) To UBound(x)
szRet = szRet & "%" & Hex(x(i))
Next
GBKEncodeURI = szRet
End Function
函数调用:
MsgBox UTF8EncodeURI("中文汉字")
MsgBox GBKEncodeURI("中文汉字")
五?获取网页中的验证码
函数代码:
Public Function GetImg(WebBrowser, Img, sxz)
'参数
'WebBrowser:等获取验证码网页所在的WebBrowser控件
'Img:显示验证码的Image控件
'sxz:网页中验证码相应属性的属性值
Dim CtrlRange, x
For Each x In WebBrowser.Document.All
If UCase(x.tagName) = "IMG" Then
'x.src为验证码图片的属性,也可是其他属性 如 x.onload等
If InStr(x.src, sxz) > 0 Then
Set CtrlRange = WebBrowser.Document.body.createControlRange()
CtrlRange.Add (x)
CtrlRange.execCommand ("Copy")
Debug.Print "Copy"
Img.Picture = Clipboard.GetData
End If
End If
Next
End Function
函数调用:
'如获取网页http://www.pceggs.com/login.aspx中的验证码图片代码如下:
'<IMG id=valiCode style="CURSOR: pointer" alt=验证码 src="/VerifyCode_Login.aspx" border=0>
'获取验证码函数调用如下:
Call GetImg(Form1.WebBrowser1, Form1.Image1, "VerifyCode_Login.aspx")
六?WebBrowser控件中网页按钮的点击
'<BUTTON id="WordSearchBtn" class="btn">查询</button>
'此按钮的点击方法
WebBrowser1.Document.getelementsbytagname("BUTTON")("WordSearchBtn").Click
七?WebBrowser控件中网页文本框的赋值
'文本框代码:<input id="WordInput" maxlength="40" type="text" />
WebBrowser1.Document.getelementsbytagname("input")("WordInput").Value = "要在文本框输入的文字"
'此处WordInput为文本框的ID或Name属性值
八、WebBrowser控件中网页列表/菜单表单选项的选取
函数代码
Public Function SelectXq(WebBrowser, SelectName, SelectValue)
'参数
'WebBrowser:WebBrowser控件名称
'SelectName:网页中 列表/菜单 表单名称或ID值
'SelectValue:选中值
WebBrowser.doc.All.Item(SelectName).Value = SelectValue
End Function
函数调用方法:
WebBrowser中网页Select表单代码如下:
<SELECT id=ctl00_ContentPlaceHolder1_DropDownList1 name=ctl00$ContentPlaceHolder1$DropDownList1> <OPTION value=我就读的第一所学校的名称? selected>我就读的第一所学校的名称?</OPTION> <OPTION value=我最喜欢的休闲运动是什么?>我最喜欢的休闲运动是什么?</OPTION> <OPTION value=我最喜欢的运动员是谁?>我最喜欢的运动员是谁?</OPTION> <OPTION value=我最喜欢的物品的名称?>我最喜欢的物品的名称?</OPTION> <OPTION value=我最喜欢的歌曲?>我最喜欢的歌曲?</OPTION> <OPTION value=我最喜欢的食物?>我最喜欢的食物?</OPTION> <OPTION value=我最爱的人的名字?>我最爱的人的名字?</OPTION> <OPTION value=我最爱的电影?>我最爱的电影?</OPTION> <OPTION value=我妈妈的生日?>我妈妈的生日?</OPTION></SELECT>
'让列表表单选中选项值为 我最爱的人的名字 的选项
Call SelectXq(Form1.WebBrowser1, "ctl00_ContentPlaceHolder1_DropDownList1", "我最爱的人的名字?")
八?自动填写注册表单并提交
网页表单代码
<form method="POST" action="result.asp">
<p>请填写下面表单注册(*项为必添项)</p>
<p>*姓名<input type="text" name="Name" size="20"></p>
<p>*男<input type="radio" value="V1" name="R1"></p>
<p>*女<input type="radio" value="V1" name="R2"></p>
<p>*昵称<input type="text" name="NickName" size="20"></p>
<p>*兴趣爱好<select name="aihao">
<option value="计算机">计算机</option>
<option value="游戏">游戏</option>
<option value="逛街">逛街</option>
</select></p>
<p>电子邮件<input type="text" name="EMail" size="20"></p>
<p>*密码<input type="password" name="Password" size="20"></p>
<p><input type="submit" value="提交" name="B1">
<input type="reset" value="全部重写" name="B2"></p>
</form>
填写表单并提交操作代码
Private Sub Form_Load()
WebBrowser1.Navigate2 App.Path & "/test.htm"
End Sub
Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
Dim vDoc, vTag
Dim i As Integer
Set vDoc = WebBrowser1.Document
List1.Clear
For i = 0 To vDoc.All.Length - 1
If UCase(vDoc.All(i).tagName) = "INPUT" Or UCase(vDoc.All(i).tagName) = "SELECT" Then
Set vTag = vDoc.All(i)
If vTag.Type = "text" Or vTag.Type = "password" Or vTag.Type = "radio" Or vTag.Name = "aihao" Then
List1.AddItem vTag.Name
Select Case vTag.Name
Case "Name"
vTag.Value = "IMGod"
Case "R2"
vTag.Checked = True
Case "NickName"
vTag.Value = "IMGod"
Case "aihao"
vTag.Value = "逛街"
Case "Password"
vTag.Value = "IMGodpass"
Case "EMail"
vTag.Value = "IMGod@paradise.com"
End Select
ElseIf vTag.Type = "submit" Then
vTag.Click
End If
End If
Next i
End Sub
九?限制WebBrowser控件中网页的所有链接在同一个窗口打开
Private Sub Form_Load()
WebBrowser1.Navigate ("http://www.hywz123.com/tool")
End Sub
Private Sub WebBrowser1_NewWindow2(ppDisp As Object, Cancel As Boolean)
Cancel = True
WebBrowser1.Navigate WebBrowser1.Document.activeelement.href
End Sub
十?控件WebBrowser控件中网页弹窗或新窗口打开的链接在另一个WebBrowser控件中打开
Private Sub WebBrowser1_NewWindow2(ppDisp As Object, Cancel As Boolean)
Set ppDisp = WebPageAd.Object
End Sub
十一?禁止WebBrowser控件中网页弹窗
Private Sub WebBrowser1_NewWindow2(ppDisp As Object, Cancel As Boolean)
Cancel = True
End Sub