转载自:www.csdn.net
抓取网页。偶要实现实实更新天气预报。利用了XMLHTTP组件,抓取网页的指定部分。
需要分件html源代码
此例中的被抓取的html源代码如下
<p align=left>2004年8月24日星期二;白天:晴有时多云南风3—4级;夜间:晴南风3—4级;气温:最高29℃最低19℃ </p>
而程序中是从
以2004年8月24日为关键字搜索,直到</p>结速
而抓取的内容就变成了"2004年8月24日星期二;白天:晴有时多云南风3—4级;夜间:晴南风3—4级;气温:最高29℃最低19℃ "
干干净净的了。记录一下。
<
%
On Error Resume Next
Server.ScriptTimeOut = 9999999
Function getHTTPPage(Path)
t = GetBody(Path)
getHTTPPage = BytesToBstr(t, " GB2312 " )
End function
Function GetBody(url)
on error resume next
Set Retrieval = CreateObject ( " Microsoft.XMLHTTP " )
With Retrieval
.Open " Get " , url, False , "" , ""
.Send
GetBody = .ResponseBody
End With
Set Retrieval = Nothing
End Function
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject( " adodb.stream " )
objstream.Type = 1
objstream.Mode = 3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
Function Newstring(wstr,strng)
Newstring = Instr ( lcase (wstr), lcase (strng))
if Newstring <= 0 then Newstring = Len (wstr)
End Function
% >
On Error Resume Next
Server.ScriptTimeOut = 9999999
Function getHTTPPage(Path)
t = GetBody(Path)
getHTTPPage = BytesToBstr(t, " GB2312 " )
End function
Function GetBody(url)
on error resume next
Set Retrieval = CreateObject ( " Microsoft.XMLHTTP " )
With Retrieval
.Open " Get " , url, False , "" , ""
.Send
GetBody = .ResponseBody
End With
Set Retrieval = Nothing
End Function
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject( " adodb.stream " )
objstream.Type = 1
objstream.Mode = 3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
Function Newstring(wstr,strng)
Newstring = Instr ( lcase (wstr), lcase (strng))
if Newstring <= 0 then Newstring = Len (wstr)
End Function
% >