ASP实现的小偷类

以前的一个项目为抓内容写的小偷类,感觉是一个不错的类,便拿出来分享一下。

< %
' ******************************************************************
'
ClassName:    Pub_Thief
'
Name:        通用小偷类
'
Version:    2.0
'
Author:    lg970044
'
Date:        2006-2-16
'
Note:        1.出错标记为"$False$"
'
            2.分隔标记为"$|||$"
'
******************************************************************
Class Pub_Thief
    
Private  PT_URL             ' 目标网址
     Public     PT_RegExp         ' 正则表达式对象
     Public      CharSet             ' 设置字符集(可设置,默认GB2312)
     Public     Str_Html         ' 获取的HTML代码,是加工操作的字符串
     Public     IgnoreCase         ' 设置是否区分大小写,True忽略大小写,False区分大小写(可设置,默认True)
     Public     RequestMethod     ' 设置网页请求方式(可设置,默认GET)
     Public     RequestForm         ' 设置Post请求方式的表单内容(格式为:表单项1=值1&表单项2=值2,当传递的表单值有特殊字符时,应用Server.URLEncode来转换)
     Public   Property   Get  Version
        Version
= " 通用小偷类 V2.0 "
    
End Property
    
    
' 类初始化
     Private   Sub  Class_Initialize()
        PT_URL
= " $False$ "
        Str_Html
= " $False$ "
        
Set  PT_RegExp = New  RegExp
        PT_RegExp.IgnoreCase
= True          ' 忽略大小写
        PT_RegExp.Global = True              ' 全程匹配。
        IgnoreCase = True
        CharSet
= " GB2312 "
        RequestMethod
= " GET "
        RequestForm
= ""
    
End Sub

    
' 注销类
     Private   Sub  Class_Terminate()
        
Set  PT_RegExp = Nothing
    
End Sub
    

    
' 连接网址,获取网页HTML源码(Get_Url为目标网址)
     Public   Sub  Open(Get_Url)
        PT_URL
= Get_Url
        getHttpPage()
    
End Sub

    
' 根据目标网页的HTML代码写入到Str_Html属性中
     Private   Sub  getHttpPage()
        
If  PT_URL = ""   Or  PT_URL = " $False$ "   Then
            Str_Html
= " $False$ "
            
Exit   Sub
        
End   If
        
Dim  Http
        
On   Error   Resume   Next
        
Set  Http = Server.CreateObject( " MSXML2.XMLHTTP " )
        Http.Open RequestMethod,PT_URL,
False
        
If   UCase (RequestMethod) = " POST "   Then      ' 当RequestMethod属性为POST时传送表单
            Http.setRequestHeader  " Content-Type " " application/x-www-form-urlencoded "
            Http.Send(RequestForm)
        
Else
            Http.Send()
        
End   If
        
' 过滤传输失败的情况
         If  Http.ReadyState <> 4   Then
            
Set  Http = Nothing
            Str_Html
= " $False$ "
            
Exit   Sub
        
End   If
        
' 过滤响应错误的情况
         If  Http.Status = 200   Then
            Str_Html
= BytesToBstr(Http.ResponseBody)
        
Else
            Str_Html
= " $False$ "
        
End   If
        
Set  Http = Nothing
        
If  Err.Number <> 0   Then
            Err.Clear
        
End   if
    
End Sub

    
' 编码转换
     Private   Function  BytesToBstr(Body)
        
Dim  ObjStream
        
Set  ObjStream  =  Server.CreateObject( " AdoDB.Stream " )
        ObjStream.Type 
=   1
        ObjStream.Mode 
=   3
        ObjStream.Open
        ObjStream.Write Body
        ObjStream.Position 
=   0
        ObjStream.Type 
=   2
        ObjStream.Charset 
=  CharSet
        BytesToBstr 
=  ObjStream.ReadText
        ObjStream.Close()
        
Set  ObjStream  =   Nothing
    
End Function

    
' 保存HTML源码到文件中
     Public   Sub  saveFile(Out_Path)
        
Dim  ObjStream,Stream
        
Set  ObjStream  =  Server.CreateObject( " AdoDB.Stream " )
        ObjStream.Type 
=   2
        ObjStream.Charset 
=  CharSet
        ObjStream.Open
        ObjStream.WriteText Str_Html
        ObjStream.SaveToFile Server.Mappath(Out_Path),
2
        ObjStream.Close()
        
Set  ObjStream  =   Nothing
    
End Sub


    
' 删除HTML中里面的换行、回车、换页
     Public   Sub  delNRF()
        PT_RegExp.Global
= True              ' 设置全程匹配
        PT_RegExp.Pattern = " | | "
        Str_Html
= PT_RegExp.Replace(Str_Html, "" )
    
End Sub

    
' 替换HTML中里面字符串(Get_StrOld为被替换的字符串,Get_StrNew为替换字符串)
     Public   Sub  replaceText(Get_StrOld,Get_StrNew)
        Str_Html
= Replace (Str_Html,Get_StrOld,Get_StrNew)
    
End Sub

    
' 查找指定首尾字符串的第一个字符串,返回查找到的值(Get_StrS为指定首字符串,Get_StrNew为指定尾字符串,Get_Start为开始查找位置,Get_Include为是否包含首尾字符串)
     Public   Function  SelectTextFirst(Get_StrS,Get_StrE,Get_Start,Get_Include)
        
If  Get_StrS = ""   Or  Get_StrE = ""   Then
            SelectTextFirst
= ""
            
Exit   Function
        
End   If
        
Dim  SN,EN,SL,EL,Total,Start,i,str,str_arr     ' 首字符串位置,尾字符串位置,首字符串长度,尾字符串长度,总长度,开始位置
        Start = Abs (Get_Start)
        
If  Start = 0   Then  Start = 1
        Total
= Len (Str_Html)
        SL
= Len (Get_StrS)
        EL
= Len (Get_StrE)
        SN
= Instr (Start,Str_Html,Get_StrS)
        EN
= Instr (Start,Str_Html,Get_StrE)
        
If  SN > 0   and  EN > 0   Then
            
If  Get_Include  Then
                i
= Total - SN + 1
                str
= Right (Str_Html,i)
                i
= EN - SN + EL
                str
= Left (str,i)
            
Else
                i
= Total - SN - SL + 1
                str
= Right (Str_Html,i)
                i
= EN - SN - SL
                str
= Left (str,i)
            
End   If
            SelectTextFirst
= str
        
Else
            SelectTextFirst
= ""
        
End   If
    
End Function

    
' 查找指定首尾字符串的一组字符串,返回查找到的值的数组(Get_StrS为指定首字符串,Get_StrNew为指定尾字符串,Get_Start为开始查找位置,Get_Include为是否包含首尾字符串)
     Public   Function  SelectText(Get_StrS,Get_StrE,Get_Start,Get_Include)
        
If  Get_StrS = ""   Or  Get_StrE = ""   Then
            SelectText
= Null
            
Exit   Function
        
End   If
        
Dim  SN,EN,SL,EL,Total,Start,i,str,str_arr     ' 首字符串位置,尾字符串位置,首字符串长度,尾字符串长度,总长度,开始位置
        Start = Abs (Get_Start)
        
If  Start = 0   Then  Start = 1
        Total
= Len (Str_Html)
        SL
= Len (Get_StrS)
        EL
= Len (Get_StrE)
        str_arr
= ""
        
Do   While  Start < Total
            SN
= Instr (Start,Str_Html,Get_StrS)
            EN
= Instr (Start,Str_Html,Get_StrE)
            
If  SN < 1   and  EN < 1   Then   Exit   Do      ' 当找不到时退出循环
             If  Get_Include  Then
                i
= Total - SN + 1
                str
= Right (Str_Html,i)
                i
= EN - SN + EL
                str
= Left (str,i)
            
Else
                i
= Total - SN - SL + 1
                str
= Right (Str_Html,i)
                i
= EN - SN - SL
                str
= Left (str,i)
            
End   If
            Start
= EN + Len (Get_StrE)
            str_arr
= str_arr & str & " $|||$ "
        
Loop
        
If  str_arr = ""   Then
            SelectText
= Null
        
Else
            str_arr
= Left (str_arr, Len (str_arr) - Len ( " $|||$ " ))         ' 去掉最后一个分隔符
            SelectText = Split (str_arr, " $|||$ " )         ' 输出数组
         End   If
    
End Function

    
' 正则表达式替换字符串(Get_StrOld为要被替换的匹配模式,Get_StrNew为替换匹配模式)
     Public   Sub  RegExpRepl(Get_PatternOld,Get_PatternNew)
        PT_RegExp.Global
= True              ' 设置全程匹配
        PT_RegExp.IgnoreCase = IgnoreCase
        PT_RegExp.Pattern
= Get_PatternOld
        Str_Html
= PT_RegExp.Replace(Str_Html,Get_PatternNew)
    
End Sub

    
' 正则表达式匹配第一个字符串(Get_Pattern为匹配模式,Get_SubMatches为子匹配[0表示返回匹配,不为0返回相应子匹配])
     Public   Function  RegExpExecFirst(Get_Pattern,Get_SubMatches)
        
Dim     Matches
        PT_RegExp.Global
= False              ' 设置匹配第一个
        PT_RegExp.IgnoreCase = IgnoreCase
        PT_RegExp.Pattern
= Get_Pattern
        
Set  Matches  =  PT_RegExp.Execute(Str_Html)
        
If  Matches.Count  =   0   Then
            RegExpExecFirst
= ""
        
Else
            
If  Get_SubMatches = 0   Then
                RegExpExecFirst
= Matches.Item( 0 ).Value
            
Else
                RegExpExecFirst
= Matches.Item( 0 ).SubMatches(Get_SubMatches - 1 )
            
End   If
        
End   If
        PT_RegExp.Global
= True              ' 设置全程匹配
     End Function

    
' 正则表达式匹配字符串,返回数组(Get_Pattern为匹配模式,Get_SubMatches为子匹配[0表示返回匹配,不为0返回相应子匹配])
     Public   Function  RegExpExec(Get_Pattern,Get_SubMatches)
        
Dim     Matches,Match,str_arr
        PT_RegExp.Global
= True              ' 设置全程匹配
        PT_RegExp.IgnoreCase = IgnoreCase
        PT_RegExp.Pattern
= Get_Pattern
        
Set  Matches  =  PT_RegExp.Execute(Str_Html)
        str_arr
= ""
        
For   Each  Match in Matches         '  遍历 Matches 集合。
             If  Get_SubMatches = 0   Then
                str_arr
= str_arr & Match.Value & " $|||$ "
            
Else
                str_arr
= str_arr & Match.SubMatches(Get_SubMatches - 1 ) & " $|||$ "
            
End   If
        
Next
        
If  str_arr = ""   Then
            RegExpExec
= Null
        
Else
            str_arr
= Left (str_arr, Len (str_arr) - Len ( " $|||$ " ))         ' 去掉最后一个分隔符
            RegExpExec = Split (str_arr, " $|||$ " )         ' 输出数组
         End   If
    
End Function

    
' 正则表达式匹配检测,返回Boolean值指示是否找到(Get_Pattern为匹配模式)
     Public   Function  RegExpTest(Get_Pattern)
        PT_RegExp.Global
= False
        PT_RegExp.Pattern
= Get_Pattern
        RegExpTest
= PT_RegExp.Test(Str_Html)
        PT_RegExp.Global
= True              ' 设置全程匹配
     End Function

    
' 取得目标网址的二进制流
     Private   Function  getBinary (Get_Url)
        
If  Get_Url = ""   Or  Get_Url = " $False$ "   Then
            getBinary
= Null
            
Exit   Function
        
End   If
        
Dim  Binary
        
On   Error   Resume   Next
        
Set  Binary = Server.CreateObject( " MSXML2.XMLHTTP " )
        Binary.Open 
" GET " ,Get_Url, False
        Binary.Send()
        
If  Binary.ReadyState <> 4   Then
            
Set  Binary = Nothing
            getBinary
= Null
            
Exit   Function
        
End   If
        
' 过滤响应错误的情况
         If  Binary.Status = 200   Then
            getBinary
= Binary.ResponseBody
        
Else
            getBinary
= Null
        
End   If
        
Set  Binary = Nothing
        
If  Err.Number <> 0   Then
            Err.Clear
        
End   if
    
End Function

    
' 根据目标网址保存二进制文件,返回保存的文件路径(Get_Url为要保存的远程文件[要求完整URL],Out_Path为保存的路径前缀[使用虚路径])
     Public   Function  saveBinaryFile(Get_Url,Out_Path)
        
Dim  ObjStream,Stream,Matches
        Stream
= getBinary(Get_Url)
        
If   IsNull (Stream)  Then      ' 当无法获取二进制文件时退出函数,并返回远程文件URL
            saveBinaryFile = Get_Url
            
Exit   Function
        
End   If
        
' 从URL中取得文件名
        PT_RegExp.IgnoreCase = IgnoreCase
        PT_RegExp.Pattern
= " ^http://.+?/(.+)$ "
        
Set  Matches  =  PT_RegExp.Execute(Get_Url)
        
If  Matches.Count  =   0   Then      ' 当取没有相匹配文件名时退出函数,并返回远程文件URL
            saveBinaryFile = Get_Url
            
Exit   Function
        
Else
            Out_Path
= Out_Path & Replace (Matches.Item( 0 ).SubMatches( 0 ), " / " , " - " )
        
End   If
        
Set  ObjStream  =  Server.CreateObject( " AdoDB.Stream " )
        ObjStream.Type 
=   1
        ObjStream.Open
        ObjStream.Write Stream
        ObjStream.SaveToFile Server.Mappath(Out_Path),
2
        ObjStream.Close()
        
Set  ObjStream  =   Nothing
        saveBinaryFile
= Out_Path         ' 返回本地保存的文件路径
     End Function
End  Class
%
>
 
详细内容见: http://www.qlolo.com/?m=pc&a=page_fh_diary&target_c_diary_id=879
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值