asp数据采集

  ' On Error Resume Next
Server.Scripttimeout = 300

' ---------------------------------------------------------------------
'
采集数据
Function  getHTTPData(url) 
    
dim  http 
    
set  http = Server.createobject( " Msxml2.XMLHTTP " )
    
if   instr (url, " http:// " ) = 0   then  url = " http:// " & url
    Http.open 
" GET " ,url, false  
    Http.send() 
    
if  Http.Status <> 200    then   exit   function  
    getHTTPData
= bytesToBSTR(Http.responseBody, " UTF-8 " )
    
set  http = nothing
    
if  err.number <> 0   then  err.Clear
    sCharset
= ""  
End function
' ---------------------------------------------------------------------        
Function  BytesToBstr(body,Cset)
    
dim  objstream
    
set  objstream  =  Server.CreateObject( " adodb.stream " )
    objstream.Type 
=   1
    objstream.Mode 
= 3
    objstream.Open
    objstream.Write body
    objstream.Position 
=   0
    objstream.Type 
=   2
    objstream.Charset 
=  Cset
    BytesToBstr 
=  objstream.ReadText 
    objstream.Close
    
set  objstream  =   nothing
End Function
' ---------------------------------------------------------------------    
'
服务器登录
Function  login(url) 
    
dim  http 
    
set  http = Server.createobject( " Msxml2.XMLHTTP " )
    
if   instr (url, " http:// " ) = 0   then  url = " http:// " & url
    Http.open 
" GET " ,url, false  
    Http.send() 
    
if  Http.Status <> 200   then   exit   function  
    
set  http = nothing
    
if  err.number <> 0   then  err.Clear
End function
' ---------------------------------------------------------------------
'
正则替换
Function  ReplaceText(fString,patrn, replStr)
    
Set  regEx  =   New  RegExp
    regEx.Pattern 
=  patrn
    regEx.IgnoreCase 
=   True
    regEx.Global 
=   True
    ReplaceText 
=  regEx.Replace(fString, replStr)
End Function
' ---------------------------------------------------------------------
'
去标签 包括内容
Function  ReplaceTag(str, tag)
    
Set  regEx  =   New  RegExp
    regEx.Pattern 
=   " < " & tag & " [^>]*?>.*?<// " & tag & " > "
    regEx.IgnoreCase 
=   True
    regEx.Global 
=   True
    ReplaceTag
= regEx.Replace(str,  "" )
End Function
' ---------------------------------------------------------------------    
'
去标签 不包括内容
Function  ReplaceTab(str, tag)
    
Set  regEx  =   New  RegExp
    regEx.Pattern 
=   " <//? " & tag & " [^>]*> "
    regEx.IgnoreCase 
=   True
    regEx.Global 
=   True
    ReplaceTab
= regEx.Replace(str,  "" )
End Function
' ---------------------------------------------------------------------    
'
去标签属性 保留标签
Function  ReplaceinnerTag(str, tag)
    
Set  regEx  =   New  RegExp
    regEx.Pattern 
=   " (<//? " & tag & " )[^>]*> "
    regEx.IgnoreCase 
=   True
    regEx.Global 
=   True
    ReplaceinnerTag
= regEx.Replace(str,  " $1> " )
End Function
' ---------------------------------------------------------------------    
'
按正则取数据
Function  getText(fString, patrn,n) 
    
dim  Matches, tStr
    tStr 
=  fString
    
Set  re  =   New  Regexp
    re.IgnoreCase 
=   True
    re.Global 
=   True
    re.Pattern 
=   patrn
    
set  Matches  =  re.Execute(tStr)
    
set  re  =   nothing  
    rStr 
=   ""
    
For   Each  Match in Matches
        rStr 
=  Match.SubMatches(n)
        
exit   for
    
Next
    getText 
=  rStr
End Function
' ---------------------------------------------------------------------
'
数据过滤
Function  Encode_text(str)
    
If   Isnull (str)  Then
        Encode_text 
=   ""
        
Exit   Function  
    
End   If
    str 
=  ReplaceText(str,  " <//?br[^>]*> "  , vbCrlf )
    str 
=  ReplaceText(str,  " <//?p[^>]*> "  , vbCrlf )
    str 
=  ReplaceTab(str,  " [a-zA-Z] " )
    str 
=  ReplaceText(str,  " /n/s*/r "  , Chr ( 10 ) & Chr ( 13 ))
    str 
=   Replace (str,  " & "  ,  " &amp; "  )
    str 
=   Replace (str,  " ; "  ,  " ; "  )
    str 
=   Replace (str,  " &amp; "  ,  " &amp; "  )
    str 
=   Replace (str, Chr ( 34 ),  " &quot; "  )
    str 
=   Replace (str,  " ' "  ,  " ' "  )
    str 
=   Replace (str,  " < "  ,  " &lt; "  )
    str 
=   Replace (str,  " > "  ,  " &gt; "  )
    str 
=   Replace (str,  " ( "  ,  " ( "  )
    str 
=   Replace (str,  " ) "  ,  " ) "  )
    str 
=   Replace (str,  " * "  ,  " * "  )
    str 
=   Replace (str,  " % "  ,  " % "  )
    str 
=   Replace (str,vbCrlf,  " <br/> "  )
    Encode_text 
=  str
End Function
' ---------------------------------------------------------------------
'
通过Matches取数据
dim  Matches
sub  setMatches(str,sRe)
    
Set  re  =   New  Regexp
    re.IgnoreCase 
=   True
    re.Global 
=   True
    re.Pattern 
=   sRe
    
set  Matches  =  re.Execute(str)
    
set  re = nothing  
end sub
' ---------------------------------------------------------------------
例子
' 例子
call  setMatches(textcontent, re)
For   Each  Match in Matches
    response.write Match.value
Next
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值