google,baidu,alexa,pr查询代码(修正alexa问题)

研究了两天搞出以下代码,共享给各个同行,我只贴过程函数出来,百度是网上找来的,其实其他三个也有代码,只是对应的网页代码改了,数据抓取不对,我重写了这三个函数,尤其是alexa罗索....


sub GoogleRank(strurl,id)
Set R=Server.CreateObject("Microsoft.XmlHttp")
R.Open "GET",Url,False,"",""
R.SetRequestHeader "Referer",Url
R.Send
str1=B2S(R.ResponseBody)
str1=replace(str1,",","")
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<font color=#FB5E3C>"
str_bottom="</font>"
reg.Pattern=""&str_top&"((.|/n)*?)"&str_bottom&""
Set matches = reg.execute(str1)
str1=""
For Each match1 in matches
str1=match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1=replace(replace(str1,str_top,""),str_bottom,"")
conn.execute("update webtable set pr='"&str1&"' where id="&id)
end sub

Sub Error(str)
select case str
case 1
response.write "<BR>&nbsp;&nbsp;搜索引擎为空,请联系<a href=mailto:zhming1112@hotmail.com>zhming1112@hotmail.com</a>"
case 2
response.write "<BR>&nbsp;&nbsp;站点名字为空,请联系<a href=mailto:zhming1112@hotmail.com>zhming1112@hotmail.com</a>"
case 3
response.write "<BR>&nbsp;&nbsp;你输入的搜索引擎本程序不支持,请联系<a href=mailto:zhming1112@hotmail.com>zhming1112@hotmail.com</a></body></html>"
case 4
response.write "<BR>&nbsp;&nbsp;未知错误--抓取不到数据 请<font color=blue><a href=javascript:location.reload();>刷新</a></font>重试</body></html>"
end select
response.end
End Sub
Sub google(strurl,id,all)
url="http://www.google.cn/search?complete=1&hl=zh-CN&q=site%3A"&strUrl&"&meta="
str= getHTTPPage(url)
if str="" then
conn.execute("update webtable set google='0' where id="&id)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true

str_top="<td align=right nowrap><font size=-1>"
str_bottom="</font></td></tr></table>"
reg.Pattern=""&str_top&"((.)*)"&str_bottom&""
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing

if instr(str,"<html>")=1 then
str2=0
else
str=split(str,"</b>")
str1=str(3)
str2=split(str1,"<b>")(1)
end if
if str2="" or len(str2)>200 then
conn.execute("update webtable set google='0' where id="&id)
else
conn.execute("update webtable set google='"&str2&"' where id="&id)
end if
end if
End Sub
Sub baidu(str,id,all)
'call print_do("baidu")
if all="n" then
url="http://www.baidu.com/s?wd=site%3A"&str&"&cl=3"
else
strext=split(str,".")
url="http://www.baidu.com/s?wd="&strext(0)&"&cl=3"
end if
'response.Write("<br>baidu's url:"&url)
If IsObjInstalled("AspHTTP.Conn")=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if

if str="" then
Call Error(4)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top="<td align=""right"" nowrap>"
str_bottom="</td>"
reg.Pattern=""&str_top&"((.|/n)*?)"&str_bottom&""
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
Set reg = Nothing
response.write "<BR>"
'response.write "&nbsp;&nbsp;"

if str="" or len(str)>200 then
conn.execute("update webtable set baidu='0' where id="&id)
else
if instr(str,"约")=0 then
keyw="页"
else
keyw="约"
end if
str=Mid(str,(InStr(str,keyw)+1),(InStr(str,"篇")-InStr(str,keyw)-1))
response.write str
conn.execute("update webtable set baidu='"&replace(replace(str,",","")," ","")&"' where id="&id)
end if

end if
End Sub

Sub alexa(strurl,id)
url="http://www.alexa.com/data/details/traffic_details?q=&url="&strurl
str1=GetRemoteData(url)
str1=replace(str1,chr(10),"")
str1=replace(str1,chr(13),"")
str1=replace(str1,"  ","")
str1=replace(str1,vbCrLf,"")
str1=replace(str1,"&nbsp;","")
str1=split(str1,"rank of:</span>")(1)
str1=split(str1,"</span><br>")(0)
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
reg.Pattern = "<!--(.+?)-->"
str1 = reg.Replace(str1,"")
if str1<>"" then
str1=replace(str1,"<span class","")
str1=replace(str1,"</span></span>","</span>")
str1=replace(str1,"""","")
str1=replace(str1,",","")
str1=replace(str1,"=descBold>","")
if isnumeric(str1) then
num=str1
else
csstxt=GetAlexaCss()
num=""
str1=split(str1,"</span>")
for i=0 to ubound(str1)
  str2=str1(i) 
  if instr(str2,">")>0 then
  s1=split(str2,">")(0)
  if split(s1,"=")(0)<>"" then
  num=num&split(s1,"=")(0)
  end if
  if instr(csstxt,split(s1,"=")(1))=0 then
  num=num&split(str2,">")(1)
  end if
  else
  num=num&str2
  end if
next
end if
else
num=0
end if
End Sub

 

Function B2S(Str)
Dim O
Set O = Server.CreateObject("adodb.stream")
O.Type = 1
O.Mode =3
O.Open
O.Write Str
O.Position = 0
O.Type = 2
O.Charset = "GB2312"
B2S=O.ReadText
O.Close
Set O = nothing
End Function

'获取alexa的样式表
Function GetAlexaCss()
url="http://client.alexa.com/common/css/scramble.css"
If IsObjInstalled("AspHTTP.Conn")=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
GetAlexaCss=str
end function
Sub print_do(str)
response.write "<script>"
response.write "function HiddenLoad()"
response.write "{"
response.write "parent.do"&str&".style.display='none';"
response.write "}"
response.write "</script>"
response.write "<body leftmargin=0 topmargin=0 marginwidth=0 marginheight=0 bgcolor=#f2f2f2 οnlοad=HiddenLoad()>"
end sub
Function getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject("Microsoft.XMLHTTP")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
End function
Function Bytes2bStr(vin)
Dim BytesStream,StringReturn
Set BytesStream = Server.CreateObject("ADODB.Stream")
BytesStream.Type = 2
BytesStream.Open
BytesStream.WriteText vin
BytesStream.Position = 0
BytesStream.Charset = "GB2312"
BytesStream.Position = 2
StringReturn =BytesStream.ReadText
BytesStream.close
Set BytesStream = Nothing
Bytes2bStr = StringReturn
End Function
Function getaspHTTPPage(url)
if url="" then
exit function
end if
Set HttpObj = Server.CreateObject("AspHTTP.Conn")

'设置代理服务器,通过代理上网的用户需要设置此选项
If ProxyIP=1 Then
HttpObj.Proxy="192.168.5.254:808"
end if

HTTPObj.TimeOut = 45
HttpObj.Url = url
HttpObj.RequestMethod = "GET"
getaspHTTPPage = HttpObj.GetURL
set HttpObj=nothing
End function
Function IsObjInstalled(strClassString)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim xTestObj
Set xTestObj = Server.CreateObject(strClassString)
If 0 = Err Then
If AspHttpOpen=1 Then
IsObjInstalled = True
'Response.write "当前组件 ASPHTTP"
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Set xTestObj = Nothing
Err = 0

End Function  

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值