一个asp采集程序类

'================================================== '函数名:GetHttpPage '作 用:获取网页源码 '参 数:HttpUrl ------网页地址 '================================================== Function GetHttpPage(HttpUrl) If IsNull(HttpUrl)=True Or Len(HttpUrl)<18 Or HttpUrl="$False$" Then GetHttpPage="$False$" Exit Function End If Dim Http Set Http=server.createobject("MSXML2.XMLHTTP") Http.open "GET",HttpUrl,False Http.Send() If Http.Readystate<>4 then Set Http=Nothing GetHttpPage="$False$" Exit function End if GetHTTPPage=bytesToBSTR(Http.responseBody,"GB2312") Set Http=Nothing If Err.number<>0 then Err.Clear End If End Function '================================================== '函数名:BytesToBstr '作 用:将获取的源码转换为中文 '参 数:Body ------要转换的变量 '参 数:Cset ------要转换的类型 '================================================== Function BytesToBstr(Body,Cset) Dim Objstream Set Objstream = Server.CreateObject("adodb.stream") objstream.Type = 1 objstream.Mode =3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothing End Function '================================================== '函数名:UrlEncoding '作 用:转换编码 '================================================== Function UrlEncoding(DataStr) Dim StrReturn,Si,ThisChr,InnerCode,Hight8,Low8 StrReturn = "" For Si = 1 To Len(DataStr) ThisChr = Mid(DataStr,Si,1) If Abs(Asc(ThisChr)) < &HFF Then StrReturn = StrReturn & ThisChr Else InnerCode = Asc(ThisChr) If InnerCode < 0 Then InnerCode = InnerCode + &H10000 End If Hight8 = (InnerCode And &HFF00)/ &HFF Low8 = InnerCode And &HFF StrReturn = StrReturn & "%" & Hex(Hight8) & "%" & Hex(Low8) End If Next UrlEncoding = StrReturn End Function '================================================== '函数名:GetBody '作 用:截取字符串 '参 数:ConStr ------将要截取的字符串 '参 数:StartStr ------开始字符串 '参 数:OverStr ------结束字符串 '参 数:IncluL ------是否包含StartStr '参 数:IncluR ------是否包含OverStr '================================================== Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR) If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then GetBody="$False$" Exit Function End If Dim ConStrTemp Dim Start,Over ConStrTemp=Lcase(ConStr) StartStr=Lcase(StartStr) OverStr=Lcase(OverStr) Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare) If Start<=0 then GetBody="$False$" Exit Function Else If IncluL=False Then Start=Start+LenB(StartStr) End If End If Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare) If Over<=0 Or Over<=Start then GetBody="$False$" Exit Function Else If IncluR=True Then Over=Over+LenB(OverStr) End If End If GetBody=MidB(ConStr,Start,Over-Start) End Function '================================================== '函数名:GetArray '作 用:提取链接地址,以$Array$分隔 '参 数:ConStr ------提取地址的原字符 '参 数:StartStr ------开始字符串 '参 数:OverStr ------结束字符串 '参 数:IncluL ------是否包含StartStr '参 数:IncluR ------是否包含OverStr '================================================== Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR) If ConStr="$False$" or ConStr="" Or IsNull(ConStr)=True or StartStr="" Or OverStr="" or IsNull(StartStr)=True Or IsNull(OverStr)=True Then GetArray="$False$" Exit Function End If Dim TempStr,TempStr2,objRegExp,Matches,Match TempStr="" Set objRegExp = New Regexp objRegExp.IgnoreCase = True objRegExp.Global = True objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")" Set Matches =objRegExp.Execute(ConStr) For Each Match in Matches TempStr=TempStr & "$Array$" & Match.Value Next Set Matches=nothing If TempStr="" Then GetArray="$False$" Exit Function End If TempStr=Right(TempStr,Len(TempStr)-7) If IncluL=False then objRegExp.Pattern =StartStr TempStr=objRegExp.Replace(TempStr,"") End if If IncluR=False then objRegExp.Pattern =OverStr TempStr=objRegExp.Replace(TempStr,"") End if Set objRegExp=nothing Set Matches=nothing TempStr=Replace(TempStr,"""","") TempStr=Replace(TempStr,"'","") TempStr=Replace(TempStr," ","") TempStr=Replace(TempStr,"(","") TempStr=Replace(TempStr,")","") If TempStr="" then GetArray="$False$" Else GetArray=TempStr End if End Function '================================================== '函数名:DefiniteUrl '作 用:将相对地址转换为绝对地址 '参 数:PrimitiveUrl ------要转换的相对地址 '参 数:ConsultUrl ------当前网页地址 '================================================== Function DefiniteUrl(Byval PrimitiveUrl,Byval ConsultUrl) Dim ConTemp,PriTemp,Pi,Ci,PriArray,ConArray If PrimitiveUrl="" or ConsultUrl="" or PrimitiveUrl="$False$" or ConsultUrl="$False$" Then DefiniteUrl="$False$" Exit Function End If If Left(Lcase(ConsultUrl),7)<>"http://" Then ConsultUrl= "http://" & ConsultUrl End If ConsultUrl=Replace(ConsultUrl,"/","/") ConsultUrl=Replace(ConsultUrl,"://","://") PrimitiveUrl=Replace(PrimitiveUrl,"/","/") If Right(ConsultUrl,1)<>"/" Then If Instr(ConsultUrl,"/")>0 Then If Instr(Right(ConsultUrl,Len(ConsultUrl)-InstrRev(ConsultUrl,"/")),".")>0 then Else ConsultUrl=ConsultUrl & "/" End If Else ConsultUrl=ConsultUrl & "/" End If End If ConArray=Split(ConsultUrl,"/") If Left(LCase(PrimitiveUrl),7) = "http://" then DefiniteUrl=Replace(PrimitiveUrl,"://","://") ElseIf Left(PrimitiveUrl,1) = "/" Then DefiniteUrl=ConArray(0) & PrimitiveUrl ElseIf Left(PrimitiveUrl,2)="./" Then PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-2) If Right(ConsultUrl,1)="/" Then DefiniteUrl=ConsultUrl & PrimitiveUrl Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl End If ElseIf Left(PrimitiveUrl,3)="../" then Do While Left(PrimitiveUrl,3)="../" PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-3) Pi=Pi+1 Loop For Ci=0 to (Ubound(ConArray)-1-Pi) If DefiniteUrl<>"" Then DefiniteUrl=DefiniteUrl & "/" & ConArray(Ci) Else DefiniteUrl=ConArray(Ci) End If Next DefiniteUrl=DefiniteUrl & "/" & PrimitiveUrl Else If Instr(PrimitiveUrl,"/")>0 Then PriArray=Split(PrimitiveUrl,"/") If Instr(PriArray(0),".")>0 Then If Right(PrimitiveUrl,1)="/" Then DefiniteUrl="http://" & PrimitiveUrl Else If Instr(PriArray(Ubound(PriArray)-1),".")>0 Then DefiniteUrl="http://" & PrimitiveUrl Else DefiniteUrl="http://" & PrimitiveUrl & "/" End If End If Else If Right(ConsultUrl,1)="/" Then DefiniteUrl=ConsultUrl & PrimitiveUrl Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl End If End If Else If Instr(PrimitiveUrl,".")>0 Then If Right(ConsultUrl,1)="/" Then If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then DefiniteUrl="http://" & PrimitiveUrl & "/" Else DefiniteUrl=ConsultUrl & PrimitiveUrl End If Else If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then DefiniteUrl="http://" & PrimitiveUrl & "/" Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl End If End If Else If Right(ConsultUrl,1)="/" Then DefiniteUrl=ConsultUrl & PrimitiveUrl & "/" Else DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl & "/" End If End If End If End If If Left(DefiniteUrl,1)="/" then DefiniteUrl=Right(DefiniteUrl,Len(DefiniteUrl)-1) End if If DefiniteUrl<>"" Then DefiniteUrl=Replace(DefiniteUrl,"//","/") DefiniteUrl=Replace(DefiniteUrl,"://","://") Else DefiniteUrl="$False$" End If End Function '================================================== '函数名:ReplaceSaveRemoteFile '作 用:替换、保存远程图片 '参 数:ConStr ------ 要替换的字符串 '参 数:SaveTf ------ 是否保存文件,False不保存,True保存 '参 数: TistUrl------ 当前网页地址 '================================================== Function ReplaceSaveRemoteFile(ConStr,strInstallDir,strChannelDir,SaveTf,TistUrl) If ConStr="$False$" or ConStr="" or strInstallDir="" or strChannelDir="" Then ReplaceSaveRemoteFile=ConStr Exit Function End If Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2 Set Re = New Regexp Re.IgnoreCase = True Re.Global = True Re.Pattern ="<img.+?[^/>]>" Set Matches =Re.Execute(ConStr) For Each Match in Matches If TempStr<>"" then TempStr=TempStr & "$Array$" & Match.Value Else TempStr=Match.Value End if Next If TempStr<>"" Then TempArray=Split(TempStr,"$Array$") TempStr="" For Tempi=0 To Ubound(TempArray) Re.Pattern ="src/s*=/s*.+?/.(gif|jpg|bmp|jpeg|psd|png|svg|dxf|wmf|tiff)" Set Matches =Re.Execute(TempArray(Tempi)) For Each Match in Matches If TempStr<>"" then TempStr=TempStr & "$Array$" & Match.Value Else TempStr=Match.Value End if Next Next End if If TempStr<>"" Then Re.Pattern ="src/s*=/s*" TempStr=Re.Replace(TempStr,"") End If Set Matches=nothing Set Re=nothing If TempStr="" or IsNull(TempStr)=True Then ReplaceSaveRemoteFile=ConStr Exit function End if TempStr=Replace(TempStr,"""","") TempStr=Replace(TempStr,"'","") TempStr=Replace(TempStr," ","") Dim RemoteFileurl,SavePath,PathTemp,DtNow,strFileName,strFileType,ArrSaveFileName,RanNum,Arr_Path DtNow=Now() If SaveTf=True then '*********************************** SavePath= strChannelDir & "/" & year(DtNow) & right("0" & month(DtNow),2) & "/"    response.write "链接路径:" & savepath & "<br>" Arr_Path=Split(SavePath,"/") PathTemp="" For Tempi=0 To Ubound(Arr_Path) If Tempi=0 Then PathTemp=Arr_Path(0) & "/" ElseIf Tempi=Ubound(Arr_Path) Then Exit For Else PathTemp=PathTemp & Arr_Path(Tempi) & "/" End If If CheckDir(PathTemp)=False Then If MakeNewsDir(PathTemp)=False Then SaveTf=False Exit For End If End If Next End If '去掉重复图片开始 TempArray=Split(TempStr,"$Array$") TempStr="" For Tempi=0 To Ubound(TempArray) If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then TempStr=TempStr & "$Array$" & TempArray(Tempi) End If Next TempStr=Right(TempStr,Len(TempStr)-7) TempArray=Split(TempStr,"$Array$") '去掉重复图片结束 '转换相对图片地址开始 TempStr="" For Tempi=0 To Ubound(TempArray) TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl) Next TempStr=Right(TempStr,Len(TempStr)-7) TempStr=Replace(TempStr,Chr(0),"") TempArray2=Split(TempStr,"$Array$") TempStr="" '转换相对图片地址结束 '图片替换/保存 Set Re = New Regexp Re.IgnoreCase = True Re.Global = True For Tempi=0 To Ubound(TempArray2) RemoteFileUrl=TempArray2(Tempi) If RemoteFileUrl<>"$False$" And SaveTf=True Then'保存图片 ArrSaveFileName = Split(RemoteFileurl,".")    strFileType=Lcase(ArrSaveFileName(Ubound(ArrSaveFileName)))'文件类型 If strFileType="asp" or strFileType="asa" or strFileType="aspx" or strFileType="cer" or strFileType="cdx" or strFileType="exe" or strFileType="rar" or strFileType="zip" then UploadFiles="" ReplaceSaveRemoteFile=ConStr Exit Function End If Randomize RanNum=Int(900*Rnd)+100    strFileName = year(DtNow) & right("0" & month(DtNow),2) & right("0" & day(DtNow),2) & right("0" & hour(DtNow),2) & right("0" & minute(DtNow),2) & right("0" & second(DtNow),2) & ranNum & "." & strFileType Re.Pattern =TempArray(Tempi)    If SaveRemoteFile(SavePath & strFileName,RemoteFileUrl)=True Then '******************************** PathTemp=SavePath & strFileName ConStr=Re.Replace(ConStr,PathTemp) Re.Pattern=strInstallDir & strChannelDir & "/" UploadFiles=UploadFiles & "|" & Re.Replace(SavePath &strFileName,"") Else PathTemp=RemoteFileUrl ConStr=Re.Replace(ConStr,PathTemp) 'UploadFiles=UploadFiles & "|" & RemoteFileUrl End If ElseIf RemoteFileurl<>"$False$" and SaveTf=False Then'不保存图片 Re.Pattern =TempArray(Tempi) ConStr=Re.Replace(ConStr,RemoteFileUrl) UploadFiles=UploadFiles & "|" & RemoteFileUrl End If Next Set Re=nothing If UploadFiles<>"" Then UploadFiles=Right(UploadFiles,Len(UploadFiles)-1) End If ReplaceSaveRemoteFile=ConStr End function '================================================== '函数名:ReplaceSwfFile '作 用:解析动画路径 '参 数:ConStr ------ 要替换的字符串 '参 数: TistUrl------ 当前网页地址 '================================================== Function ReplaceSwfFile(ConStr,TistUrl) If ConStr="$False$" or ConStr="" or TistUrl="" or TistUrl="$False$" Then ReplaceSwfFile=ConStr Exit Function End If Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2 Set Re = New Regexp Re.IgnoreCase = True Re.Global = True Re.Pattern ="<object.+?[^/>]>" Set Matches =Re.Execute(ConStr) For Each Match in Matches If TempStr<>"" then TempStr=TempStr & "$Array$" & Match.Value Else TempStr=Match.Value End if Next If TempStr<>"" Then TempArray=Split(TempStr,"$Array$") TempStr="" For Tempi=0 To Ubound(TempArray) Re.Pattern ="value/s*=/s*.+?/.swf" Set Matches =Re.Execute(TempArray(Tempi)) For Each Match in Matches If TempStr<>"" then TempStr=TempStr & "$Array$" & Match.Value Else TempStr=Match.Value End if Next Next End if If TempStr<>"" Then Re.Pattern ="value/s*=/s*" TempStr=Re.Replace(TempStr,"") End If If TempStr="" or IsNull(TempStr)=True Then ReplaceSwfFile=ConStr Exit function End if TempStr=Replace(TempStr,"""","") TempStr=Replace(TempStr,"'","") TempStr=Replace(TempStr," ","") Set Matches=nothing Set Re=nothing '去掉重复文件开始 TempArray=Split(TempStr,"$Array$") TempStr="" For Tempi=0 To Ubound(TempArray) If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then TempStr=TempStr & "$Array$" & TempArray(Tempi) End If Next TempStr=Right(TempStr,Len(TempStr)-7) TempArray=Split(TempStr,"$Array$") '去掉重复文件结束 '转换相对地址开始 TempStr="" For Tempi=0 To Ubound(TempArray) TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl) Next TempStr=Right(TempStr,Len(TempStr)-7) TempStr=Replace(TempStr,Chr(0),"") TempArray2=Split(TempStr,"$Array$") TempStr="" '转换相对地址结束 '替换 Set Re = New Regexp Re.IgnoreCase = True Re.Global = True For Tempi=0 To Ubound(TempArray2) RemoteFileUrl=TempArray2(Tempi) Re.Pattern =TempArray(Tempi) ConStr=Re.Replace(ConStr,RemoteFileUrl) Next Set Re=nothing ReplaceSwfFile=ConStr End function '================================================== '过程名:SaveRemoteFile '作 用:保存远程的文件到本地 '参 数:LocalFileName ------ 本地文件名 '参 数:RemoteFileUrl ------ 远程文件URL '================================================== Function SaveRemoteFile(LocalFileName,RemoteFileUrl) SaveRemoteFile=True   dim Ads,Retrieval,GetRemoteData   Set Retrieval = Server.CreateObject("Microsoft.XMLHTTP")   With Retrieval     .Open "Get", RemoteFileUrl, False, "", ""     .Send If .Readystate<>4 then SaveRemoteFile=False Exit Function End If     GetRemoteData = .ResponseBody   End With   Set Retrieval = Nothing   Set Ads = Server.CreateObject("Adodb.Stream")   With Ads     .Type = 1     .Open     .Write GetRemoteData     .SaveToFile server.MapPath(LocalFileName),2     .Cancel()     .Close()   End With   Set Ads=nothing end Function '================================================== '函数名:FpHtmlEnCode '作 用:标题过滤 '参 数:fString ------字符串 '================================================== Function FpHtmlEnCode(fString) If IsNull(fString)=False or fString<>"" or fString<>"$False$" Then fString=nohtml(fString) fString=FilterJS(fString) fString = Replace(fString," "," ") fString = Replace(fString,""","") fString = Replace(fString,"'","") fString = replace(fString, ">", "") fString = replace(fString, "<", "") fString = Replace(fString, CHR(9), " ")'  fString = Replace(fString, CHR(10), "") fString = Replace(fString, CHR(13), "") fString = Replace(fString, CHR(34), "") fString = Replace(fString, CHR(32), " ")'space fString = Replace(fString, CHR(39), "") fString = Replace(fString, CHR(10) & CHR(10),"") fString = Replace(fString, CHR(10)&CHR(13), "") fString=Trim(fString) FpHtmlEnCode=fString Else FpHtmlEnCode="$False$" End If End Function
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值