vb获取html表格数据,vb 读取网页数据

【实例简介】

【实例截图】

b9fa379902186907ad400ef2f383ee31.png

【核心代码】

'田草博客:www.tiancao.net

'tiancao1001@126.com

'QQ:327750885

'2008.1.16

'Option Explicit

Private Declare Function GetKeyState Lib "user32" (ByVal nVirtKey As Long) As Integer

Const VK_NUMLOCK = &H90

Private Declare Function SendMessage Lib "user32" _

Alias "SendMessageA" (ByVal hwnd As Long, _

ByVal wMsg As Long, ByVal wParam As Long, _

lParam As Any) As Long

Private Const LB_SETHORIZONTALEXTENT = &H194

Private Sub Combo2_Click()

Me.Text2.Text = Me.Combo2.Text

End Sub

Private Sub Command1_Click()

On Error Resume Next

Dim HTML As String

HTML = viewSource(Me.Text2.Text, Me.Combo1.Text)

If HTML = "" Then Exit Sub

Me.Text1.Text = HTML

Dim URLS() As String

Url_In_Html Me.Text2.Text, URLS, Me.Combo1.Text

Dim i As Integer

'Me.List1.Clear

For i = 0 To UBound(URLS)

Me.List1.AddItem URLS(i)

Next

Me.Timer1.Enabled = True

Me.Label1.Caption = Me.List1.ListCount

End Sub

'URL为网页地址

'URLS为网页代码中的URL组

Function Url_In_Html(URL As String, ByRef URLS() As String, CodeType As String)

Dim i As Long, j As Integer

URL = Replace(URL, "\", "/") '将网页地址中可能含有的“\“全部替换成成”/”,这样地址中的分割符合就一致。

i = inStr_n(URL, "/") '比如给的路径是http://www.tiancao.net

If i = 2 Then URL = URL & "/"

i = InStrRev(URL, "/")

Dim URL1 As String

URL1 = Left(URL, i) '查找地址的绝对地址路径

Dim HTML As String

HTML = viewSource(URL, CodeType)

If HTML = "" Then Exit Function

HTML = UCase(HTML) '将网页源码全部转换成大写

Dim N As Integer

Dim index() As Long

N = inStr_n(HTML, "HREF", index)

'MsgBox "总共有" & N & "个href标签"

Dim T As String

Dim T1 As String

Dim Temp As String

Dim Temp1 As String

Dim Temp2 As Integer

Dim Temp3 As Integer

Dim Temp5 As Integer

Dim M As Integer

For i = 0 To N - 1

Temp = Mid(HTML, index(i) 5, 300)

'这里取url的长度为300,如果超过则检测不到,这300个字符中可能包含下一个或几个HREF标签,但这不用担心,程序会分析每个标签的。

'为什么取那么多,是因为很多网页的URL编码可能很长,比如百度推广的广告和陶宝网的网址都很长。

For j = 2 To Len(Temp)

T = Mid(Temp, j, 1)

If T = """" Or T = ">" Or T = "'" Or T = " " Then

Temp1 = Left(Temp, j - 1)

Temp1 = Left(Temp, j - 1)

Temp2 = InStr(Temp1, " ") 'URL中含有 号的(比如)

Temp3 = InStr(Temp1, "#") 'URL中含有#号的(比如)

Temp5 = InStr(Temp1, "MAILTO") 'URL中含有空格的(比如)

'没有能检查所以的情况

If Temp2 = 0 And Temp3 = 0 And Temp5 = 0 Then

ReDim Preserve URLS(M)

If Left(Temp1, 1) = """" Or Left(Temp1, 1) = "'" Then Temp1 = Right(Temp1, Len(Temp1) - 1) 'URL前面可能还有个引号或单引号

If Temp2 = InStr(Temp1, ":") <> 0 Then

'存在冒号,说明是绝对路径(HTTP://),没有用判断HTTP来判断,是因为windows可以用HTTP给文件夹命名,而不可以用冒号

If Left(Temp1, 1) = "/" Or Left(Temp1, 1) = "\" Then

Temp1 = URL1 & Right(Temp1, Len(Temp1) - 1)

Else

Temp1 = URL1 & Temp1

End If

End If

URLS(M) = Temp1

M = M 1

Exit For

End If

End If

Next

Next

End Function

'返回某一字符串在另一个字符串中出现的次数 index返回出现的位置数组

Public Function inStr_n(str As String, StrIn As String, Optional index As Variant) As Long

Dim i As Long

Dim Temp As Long: Temp = 1

Dim N As Long

N = 0

For i = 1 To Len(str)

Temp = InStr(Temp 1, str, StrIn)

If Temp = 0 Then

Exit For

Else

If IsMissing(index) = False Then

ReDim Preserve index(N)

index(N) = Temp

End If

N = N 1

End If

Next i

inStr_n = N

End Function

'查看网页的源码

Function viewSource(URL As String, CodeType As String)

On Error GoTo E:

Dim XmlHttp

Set XmlHttp = CreateObject("Microsoft.XMLHTTP")

XmlHttp.Open "GET", URL, False

XmlHttp.setRequestHeader "Content-Type", "text/XML"

XmlHttp.Send

Dim HTML

HTML = Bytes_to_Unicode(XmlHttp.responseBody, CodeType)

viewSource = HTML

Exit Function

E:

viewSource = ""

End Function

'只能得到西文的字符串,中文只能显示GB2312编码。

Function bytes2BSTR(vIn)

Dim strReturn As String

Dim i As Long

Dim ThisCharCode As Integer

Dim NextCharCode As Integer

Dim ThirdCharCode As Integer

strReturn = ""

For i = 1 To LenB(vIn)

ThisCharCode = AscB(MidB(vIn, i, 1))

If ThisCharCode < &H80 Then

strReturn = strReturn & Chr(ThisCharCode)

Else

NextCharCode = AscB(MidB(vIn, i 1, 1))

ThirdCharCode = AscB(MidB(vIn, i 2, 1))

strReturn = strReturn & UTF8_to_Unicode(ThisCharCode, NextCharCode, ThirdCharCode)

i = i 2

End If

Next

bytes2BSTR = strReturn

End Function

'字节数值转汉字

Function Bytes_to_Unicode(Bytes, CodeType As String)

Dim strReturn As String

Dim i As Long

Dim ThisCharCode As Integer

Dim NextCharCode As Integer

Dim ThirdCharCode As Integer

strReturn = ""

For i = 1 To LenB(Bytes)

ThisCharCode = AscB(MidB(Bytes, i, 1))

If ThisCharCode < &H80 Then

strReturn = strReturn & Chr(ThisCharCode)

Else

If CodeType = "UTF-8" Or CodeType = "UTF8" Then

NextCharCode = AscB(MidB(Bytes, i 1, 1))

ThirdCharCode = AscB(MidB(Bytes, i 2, 1))

strReturn = strReturn & UTF8_to_Unicode(ThisCharCode, NextCharCode, ThirdCharCode)

i = i 2

Else

NextCharCode = AscB(MidB(Bytes, i 1, 1))

strReturn = strReturn & Unicode(ThisCharCode, NextCharCode)

i = i 1

End If

End If

Next

Bytes_to_Unicode = strReturn

End Function

'二字节汉字转换

Function Unicode(BY1, BY2) As String

Unicode = Chr(Int(BY1) * 256 Int(BY2))

End Function

'三字节的UTF-8编码转二字节的Unicode编码

Function UTF8_to_Unicode(BY1, BY2, BY3) As String

Dim BIN_UTF8 As String

BIN_UTF8 = DEC_to_BIN(Int(BY1)) & DEC_to_BIN(Int(BY2)) & DEC_to_BIN(Int(BY3))

Dim BIN_Unicode As String

BIN_Unicode = Mid(BIN_UTF8, 5, 4) & Mid(BIN_UTF8, 11, 6) & Mid(BIN_UTF8, 19, 6)

Dim DEC_Unicode As Long

DEC_Unicode = BIN_to_DEC(BIN_Unicode)

UTF8_to_Unicode = ChrW(DEC_Unicode)

End Function

Private Sub Command2_Click()

Me.Hide

Me.Timer2.Enabled = True

End Sub

Private Sub Command3_Click()

Dim FSO As Object

Dim FSO_File As Object

Set FSO = CreateObject("Scripting.FileSystemObject")

Set FSO_File = FSO.OpenTextFile(App.Path & "/url.txt", ForWriting, True) '读取文件而不创建

Dim i As Long

For i = 0 To Me.List1.ListCount - 1

FSO_File.WriteLine Me.List1.List(i)

Next

FSO_File.Close

End Sub

Private Sub Command4_Click()

Dim FSO As Object

Dim FSO_File As Object

Set FSO = CreateObject("Scripting.FileSystemObject")

Set FSO_File = FSO.OpenTextFile(App.Path & "/url.txt", ForReading, False) '读取文件而不创建

Do While Not FSO_File.AtEndOfStream

Me.List1.AddItem FSO_File.ReadLine

Loop

FSO_File.Close

Me.Timer1.Enabled = True

End Sub

Private Sub Form_Load()

Me.WebBrowser1.Navigate "http://www.tiancao.net/"

Me.WebBrowser1.Silent = True

Me.Timer1.Enabled = False

addHorScrlBarListBox List1

Me.Combo1.AddItem "UTF-8"

Me.Combo1.AddItem "GB2312"

Me.Combo1.AddItem "Unicode"

Me.Combo1.Text = "GB2312"

Me.Combo2.Text = "http://www.tiancao.net/"

Me.Combo2.AddItem "http://www.tiancao.net/"

Me.Combo2.AddItem "http://tiancao.net"

Me.Combo2.AddItem "http://ntsjytfgs.w39.cndns.com/"

Me.Combo2.AddItem "http://tiancao1001.w18.cndns.com/"

End Sub

Private Sub List1_DblClick()

Me.WebBrowser1.Navigate Me.List1.List(Me.List1.ListIndex)

End Sub

Private Sub List1_MouseDown(Button As Integer, Shift As Integer, X As Single, Y As Single)

On Error Resume Next

If Button = 2 Then Me.List1.RemoveItem Me.List1.ListIndex

End Sub

'每一分钟随机打开list中的一个连接

Private Sub Timer1_Timer()

On Error Resume Next

Dim j As Integer

j = Rnd() * Me.List1.ListCount

Me.WebBrowser1.Navigate Me.List1.List(j)

End Sub

' list加横向滚动条

Public Sub addHorScrlBarListBox(ByVal refControlListBox As Object)

Dim nRet As Long

Dim nNewWidth As Integer

nNewWidth = refControlListBox.Width * 4 ' 新宽度,以像素为单位。

nRet = SendMessage(refControlListBox.hwnd, _

LB_SETHORIZONTALEXTENT, nNewWidth, ByVal 0&)

End Sub

Private Sub Timer2_Timer()

Dim i As Long

i = GetKeyState(VK_NUMLOCK)

If i = 0 Then

Me.Show

Me.Timer2.Enabled = False

End If

End Sub

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值