Visual Basic Code | '**************************************************************************************************** ' 'Name.......... WEB Page Read Program 'File.......... WEBRead.frm 'Version....... 1.0.0 'Dependencies.. XMLHTTP Object 'Description... Dynamic read URL html data 'Author........ Zhou Wen Xing 'Date.......... Nov, 5nd 2010 'CSDN Accounts..SupermanKing ' 'Copyright (c) 2008 by www.rljy.com 'LiuZhou city, China ' '**************************************************************************************************** '==================================================================================================== ' API function defining ( API 函数定义 ) '==================================================================================================== Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" ( _ Destination As Any, _ Source As Any, _ ByVal Length As Long)
'==================================================================================================== ' Form event dispose process ( 窗体基本的事件处理过程 ) '==================================================================================================== '==================== 点击按钮1产生的事件 ==================== Private Sub Command1_Click() '==================== 变量定义 ==================== Dim strTemp As String ' 临时字符串变量 Dim strUserList As String ' 最终拼合用户列表的变量 Dim strSearch As String ' 搜索关键内容的字符串变量 Dim lngSearchSize As Long ' 搜索关键内容的字符串大小 Dim lngStart As Long ' 搜索用户字符串时存储开始位置的变量 Dim lngEnd As Long ' 搜索用户字符串时存储结束位置的变量 Dim ComXMLHTTP As Object ' 访问网页的 XMLHTTP 对象 Dim byteHTML() As Byte ' 存储网页内容的字节流数组变量
On Error Resume Next ' 开始设置错误陷阱,防止程序发生意外错误而崩溃 '==================== 初始化变量 ==================== strUserList = "" strSearch = "class=""dropmenu"" οnmοuseοver=""showMenu(this.id)"">" lngSearchSize = LenB(StrConv(strSearch, vbFromUnicode))
'==================== 开始下载指定 URL 的数据内容 ==================== Set ComXMLHTTP = CreateObject("Microsoft.XMLHTTP") '初始化 XMLHTTP 对象 If Err.Number <> 0 Then MsgBox "错误:" & Err.Number & "," & Err.Description Err.Clear Exit Sub End If ComXMLHTTP.Open "GET", "http://bbs.duowan.com/thread-17408898-2-1.html", False '设置访问方式和URL地址 ComXMLHTTP.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded" '向HTTP头加入参数 ComXMLHTTP.Send '提交HTTP请求 If Err.Number <> 0 Then MsgBox "错误:" & Err.Number & "," & Err.Description Err.Clear Exit Sub End If '---------- 判断下载是否成功 ---------- If ComXMLHTTP.Status <> 200 Then MsgBox "访问URL失败,请您确定。", 64, "提示" Exit Sub End If '==================== 下载 URL 的数据完成后将数据读入字节数组中 ==================== '---------- 将数据读入 byteHTML 这个字节数组中 ---------- ' 因为该网页原来是 UTF-8 编码,所以取得的数据也就是 UTF-8 的编码数据 byteHTML = ComXMLHTTP.ResponseBody Call SaveTextFile("c:/UTF-8.txt", byteHTML, "UTF-8") ' 保存原始数据到磁盘,可以验证数据的完整性
'---------- 将 UTF-8 编码的字节数组转换成 Unicode 编码的字节数组 ---------- byteHTML = UTF8ToUnicode(byteHTML) Call SaveTextFile("c:/Unicode.txt", byteHTML, "Unicode") ' 保存转换 Unicode 后的数据到磁盘,可以验证数据的完整性
'---------- 将 Unicode 编码的字节数组转换成 GB2312 编码的字节数组 ---------- ' 其转换目的是方便用 GB2312 的字符串查找数据,当然直接用 Unicode 也是可以实现的 byteHTML = UnicodeToGB2312(byteHTML) Call SaveTextFile("c:/GB2312.txt", byteHTML) ' 保存转换 GB2312 后的数据到磁盘,可以验证数据的完整性
'==================== 得到完整的 GB2312 编码数组数据后,开始分析网页内容 ==================== ' 第一个找到的被忽略,因为这个不是所需的内容 lngStart = InStr_Array(0, byteHTML, strSearch) ' 如果一个都找不到,就没必要继续下去了 If lngStart >= 0 Then lngStart = lngStart + lngSearchSize '---------- 开始循环查找所有用户内容 ---------- Do ' 这里开始才是要找的东西位置 lngStart = InStr_Array(lngStart, byteHTML, strSearch) If lngStart >= 0 Then lngStart = lngStart + lngSearchSize lngEnd = InStr_Array(lngStart, byteHTML, "") strTemp = Mid_Array(byteHTML, lngStart, lngEnd - lngStart) lngStart = lngEnd strUserList = strUserList & strTemp & vbCrLf End If Loop While lngStart >= 0 End If '==================== 完成工作将用户信息合并内容输出到文本框 ==================== Text1.Text = strUserList End Sub
'==================================================================================================== ' User in the class custom's funtion dispose process ( 自定义函数及处理过程 ) '==================================================================================================== '---------------------------------------------------------------------------------------------------- ' Function Name: UTF8ToUnicode ' Input Parameter: funUTF8(Byte Array) - The UTF-8's byte array ' Return Value: (Byte Array) - Return Unicode's byte array ' Description : Visual Basic compile's conversion the UTF-8 to Unicode dispose process ' Author : SupermanKing '---------------------------------------------------------------------------------------------------- Function UTF8ToUnicode(ByRef funUTF8() As Byte) As Byte() '==================== 变量定义 ==================== Dim lngLength As Long Dim lngLengthB As Long Dim lngUTF8Char As Long Dim intWChar As Integer Dim byteTemp As Byte Dim byteBit As Byte Dim byteUnicode() As Byte Dim lngUTF8Count As Long Dim i As Long Dim j As Long
On Error Resume Next ' 开始设置错误陷阱,防止程序发生意外错误而崩溃 '==================== 初始化变量 ==================== lngLengthB = 0
'==================== 校验输入参数 ==================== lngLength = UBound(funUTF8) + 1 If Err.Number <> 0 Then Err.Clear Exit Function End If
'==================== 开始循环处理编码转换过程 ==================== For i = 0 To lngLength - 1 '-------------------- 根据 UTF-8 编码规则数 UTF-8 字符的存储个数 -------------------- lngUTF8Count = 0 byteTemp = funUTF8(i) For j = 1 To 7 byteBit = Int(byteTemp / (2 ^ (8 - j))) '二进制位向右偏移 (8 - j) 个二进制位 byteBit = byteBit And 1 '取最后一个二进制位值 If byteBit = 1 Then lngUTF8Count = lngUTF8Count + 1 Else '碰到0就结束数字符数操作 Exit For End If Next j
'-------------------- 判断编码内存储的内容是否是经过编码的 -------------------- If lngUTF8Count < 2 Or lngUTF8Count > 3 Then '---------- 没有经过 UTF-8 格式编码,直接转换成 Unicode 编码 ---------- If lngLengthB = 0 Then lngLengthB = 2 ReDim byteUnicode(lngLengthB - 1) Else lngLengthB = lngLengthB + 2 ReDim Preserve byteUnicode(lngLengthB - 1) End If byteUnicode(lngLengthB - 2) = byteTemp Else '---------- 经过 UTF-8 格式编码,先读出内容后再转换成 Unicode 编码 ---------- ' 读出这几个UTF-8字节内容 For j = 0 To lngUTF8Count - 1 byteTemp = funUTF8(i + j) If j = 0 Then '第一个UTF-8编码含编码字节信息,所以取存储信息特别点 byteTemp = byteTemp And ((2 ^ (8 - (lngUTF8Count + 1))) - 1) lngUTF8Char = byteTemp Else '后面的只取6个二进制位 byteTemp = byteTemp And &H3F lngUTF8Char = lngUTF8Char * &H40 '向左偏移6位好存储后面的6位数据 lngUTF8Char = lngUTF8Char Or byteTemp '将低6位的数据补充到编码中 End If Next j ' 已经取出Unicode编码到 lngUTF8Char 里 If lngLengthB = 0 Then lngLengthB = 2 ReDim byteUnicode(lngLengthB - 1) Else lngLengthB = lngLengthB + 2 ReDim Preserve byteUnicode(lngLengthB - 1) End If byteUnicode(lngLengthB - 2) = lngUTF8Char And 255 byteUnicode(lngLengthB - 1) = Int(lngUTF8Char / (2 ^ 8)) And 255 i = i + (lngUTF8Count - 1) End If If i > (lngLength - 1) Then Exit For End If Next i
'==================== 完成编码转换过程,返回数据 ==================== UTF8ToUnicode = byteUnicode End Function
'---------------------------------------------------------------------------------------------------- ' Function Name: UnicodeToGB2312 ' Input Parameter: funUnicode(Byte Array) - The Unicode's byte array ' Return Value: (Byte Array) - Return GB2312's byte array ' Description : Visual Basic compile's conversion the Unicode to GB2312 dispose process ' Author : SupermanKing '---------------------------------------------------------------------------------------------------- Function UnicodeToGB2312(ByRef funUnicode() As Byte) As Byte() '==================== 变量定义 ==================== Dim lngLength As Long Dim lngLengthB As Long Dim byteGB2312() As Byte Dim i As Long Dim intWChar As Integer Dim intChar As Integer
On Error Resume Next ' 开始设置错误陷阱,防止程序发生意外错误而崩溃 '==================== 初始化变量 ==================== lngLengthB = 0
'==================== 校验输入参数 ==================== lngLength = UBound(funUnicode) + 1 If Err.Number <> 0 Then Err.Clear Exit Function End If lngLength = lngLength / 2
'==================== 开始循环处理编码转换过程 ==================== For i = 0 To lngLength - 1 CopyMemory intWChar, funUnicode(i * 2), 2 intChar = Asc(StrConv(ChrW(intWChar), vbNarrow)) If intChar < 0 Or intChar > 255 Then If lngLengthB = 0 Then lngLengthB = 2 ReDim byteGB2312(lngLengthB - 1) byteGB2312(lngLengthB - 1) = intChar And 255 byteGB2312(lngLengthB - 2) = Int(CLng("&H" & Hex(intChar)) / (2 ^ 8)) And 255 Else lngLengthB = lngLengthB + 2 ReDim Preserve byteGB2312(lngLengthB - 1) byteGB2312(lngLengthB - 1) = intChar And 255 byteGB2312(lngLengthB - 2) = Int(CLng("&H" & Hex(intChar)) / (2 ^ 8)) And 255 End If Else If lngLengthB = 0 Then lngLengthB = 1 ReDim byteGB2312(lngLengthB - 1) byteGB2312(lngLengthB - 1) = CByte(intChar) Else lngLengthB = lngLengthB + 1 ReDim Preserve byteGB2312(lngLengthB - 1) byteGB2312(lngLengthB - 1) = CByte(intChar) End If End If Next i
'==================== 完成编码转换过程,返回数据 ==================== UnicodeToGB2312 = byteGB2312 End Function
'---------------------------------------------------------------------------------------------------- ' Function Name: InStr_Array ' Input Parameter: funStart(Long) - Search the byte array start's address ' : funBytes(Byte Array) - Want search data's byte array ' : funFind(String) - Search's qualification ' Return Value: (Long) - Find qualification's address ' Description : Imitate InStr function's dispose process ' Author : SupermanKing '---------------------------------------------------------------------------------------------------- Function InStr_Array(ByVal funStart As Long, _ ByRef funBytes() As Byte, _ ByVal funFind As String) As Long '==================== 变量定义 ==================== Dim byteFindArray() As Byte Dim lngBytesCount As Long Dim lngFindCount As Long Dim lngIsFind As Long Dim i As Long Dim j As Long
On Error Resume Next ' 开始设置错误陷阱,防止程序发生意外错误而崩溃 '==================== 初始化变量 ==================== InStr_Array = -1
'==================== 校验输入参数 ==================== '---------- 校验搜索条件参数 ---------- If Len(funFind) = 0 Then Exit Function End If '---------- 校验搜索内容参数 ---------- lngBytesCount = UBound(funBytes) If Err.Number <> 0 Then Err.Clear Exit Function End If byteFindArray = StrConv(funFind, vbFromUnicode) lngFindCount = UBound(byteFindArray) '---------- 校验搜索位置参数 ---------- If funStart + lngFindCount > lngBytesCount Then Exit Function End If
'==================== 开始搜索数据 ==================== For i = funStart To lngBytesCount lngIsFind = 1 For j = 0 To lngFindCount If funBytes(i + j) < &HA0 And byteFindArray(j) < &HA0 Then If UCase(Chr(funBytes(i + j))) <> UCase(Chr(byteFindArray(j))) Then lngIsFind = 0 Exit For End If Else If funBytes(i + j) <> byteFindArray(j) Then lngIsFind = 0 Exit For End If End If Next j If lngIsFind = 1 Then InStr_Array = i Exit For End If Next i End Function
'---------------------------------------------------------------------------------------------------- ' Function Name: Mid_Array ' Input Parameter: funBytes(Byte Array) - Want get data's byte array ' : funStart(Long) - Want get data's start address ' : funCount(Long) - Want get data's size ' Return Value: (String) - Return want get string ' Description : Imitate Mid function's dispose process ' Author : SupermanKing '---------------------------------------------------------------------------------------------------- Function Mid_Array(ByRef funBytes() As Byte, _ ByVal funStart As Long, _ ByVal funCount As Long) As String '==================== 变量定义 ==================== Dim byteRead() As Byte Dim lngBytesCount As Long
On Error Resume Next ' 开始设置错误陷阱,防止程序发生意外错误而崩溃 '==================== 初始化变量 ==================== Mid_Array = ""
'==================== 校验输入参数 ==================== lngBytesCount = UBound(funBytes) If Err.Number <> 0 Then Err.Clear Exit Function End If If funStart + funCount > lngBytesCount Then Exit Function End If
'==================== 开始取指定数据内容 ==================== ReDim byteRead(funCount - 1) CopyMemory byteRead(0), funBytes(funStart), funCount Mid_Array = StrConv(byteRead, vbUnicode) End Function
'---------------------------------------------------------------------------------------------------- ' Function Name: SaveTextFile ' Input Parameter: funFileName(String) - Save file's path ' : funBytes(Byte Array) - Save file's data ' : funMode(String) - Data codeing mode ' Return Value: (void) ' Description : Save .txt file dispose process ' Author : SupermanKing '---------------------------------------------------------------------------------------------------- Sub SaveTextFile(ByVal funFileName As String, _ ByRef funBytes() As Byte, _ Optional ByVal funMode As String = "GB2312") '==================== 变量定义 ==================== Dim fs As Integer
On Error Resume Next ' 开始设置错误陷阱,防止程序发生意外错误而崩溃 '==================== 校验输入参数 ==================== ' 判断给定文件地址是否可读写,同时也可进行文件数据初始化操作 fs = FreeFile Open funFileName For Output As #fs If Err.Number <> 0 Then MsgBox "错误:" & Err.Number & "," & Err.Description, 16, "错误" Err.Clear Exit Sub End If Close #fs
'==================== 开始写文件数据 ==================== fs = FreeFile Open funFileName For Binary As #fs '根据编码模式来写 TXT 文件头,这样可让 Windows 记事本识别该文件的编码方式 Select Case UCase(funMode) Case "GB2312": '输出 GB2312 编码的文本文件 Put #1, 1, funBytes
Case "UNICODE": '输出 Unicode 编码的文本文件 Put #1, 1, CByte(&HFF) Put #1, 2, CByte(&HFE) Put #1, 3, funBytes
Case "UTF-8": '输出 UTF-8 编码的文本文件 Put #1, 1, CByte(&HEF) Put #1, 2, CByte(&HBB) Put #1, 3, CByte(&HBF) Put #1, 4, funBytes End Select Close #fs End Sub
| |