python 获取百度搜索结果–so easy,破解百度加密

 

Python
 import requests from bs4 import BeautifulSoup as bs4 #引入模块 url="http://www.baidu.com/s?ie=UTF-8&wd=%E6%98%8A%E5%A4%A9seo" res = requests.get(url) soup= bs4(res.text, "lxml") for z in soup.select('h3[class="t"]'): #获取百度的搜索结果list print(z.select('a')[0].get_text()) #获取百度title url = z.select('a')[0]['href'] #获取<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/bai-du-jia-mi-di-zhi" title="View all posts in 百度加密地址" target="_blank">百度加密地址</a></span> print(requests.get(url).url) #获取搜索结果真实地址
1
2
3
4
5
6
7
8
9
10
11
12
  import requests
from bs4 import BeautifulSoup as bs4
 
#引入模块
 
url = "http://www.baidu.com/s?ie=UTF-8&wd=%E6%98%8A%E5%A4%A9seo"
res = requests . get ( url )
soup = bs4 ( res . text , "lxml" )
for z in soup . select ( 'h3[class="t"]' ) : #获取百度的搜索结果list
     print ( z . select ( 'a' ) [ 0 ] . get_text ( ) ) #获取百度title
     url = z . select ( 'a' ) [ 0 ] [ 'href' ] #获取百度加密地址
     print ( requests . get ( url ) . url ) #获取搜索结果真实地址

 

Python
# -*- coding: utf-8 -*- """ @Time: 2018/5/16 @Author: songhao @微信公众号: zero<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/python" title="View all posts in python" target="_blank">python</a></span> @File: bdspider.py """ from urllib.parse import quote import requests from scrapy import Selector url1 = "http://www.baidu.com/s?wd={}".format(quote("雅昌")) r = requests.get(url1) # print(r.text) selector = Selector(text=r.text,type="html") # na = requests.get(url).url for a in selector.xpath('//h3[contains(@class, "t")]'): print("".join(a.xpath('.//text()').extract()).strip()) # print() urlen = a.xpath('./a/@href').extract_first() print(urlen) # HEAD: 只请求页面的首部。 print(requests.head(urlen).headers.get('Location'))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# -*- coding: utf-8 -*-
"""
@Time: 2018/5/16
@Author: songhao
@微信公众号: zeropython
@File: bdspider.py
"""
from urllib . parse import quote
 
import requests
from scrapy import Selector
 
 
url1 = "http://www.baidu.com/s?wd={}" . format ( quote ( "雅昌" ) )
 
r = requests . get ( url1 )
# print(r.text)
selector = Selector ( text = r . text , type = "html" )
# na = requests.get(url).url
 
for a in selector . xpath ( '//h3[contains(@class, "t")]' ) :
     print ( "" . join ( a . xpath ( './/text()' ) . extract ( ) ) . strip ( ) )
     # print()
     urlen = a . xpath ( './a/@href' ) . extract_first ( )
     print ( urlen )
     # HEAD: 只请求页面的首部。
     print ( requests . head ( urlen ) . headers . get ( 'Location' ) )

其中 获取 嵌套标签的text可以参考

 

Python
# 引入库文件 import requests import urllib.request bd_url = "https://www.baidu.com/link?url=mhMx_W4kSIqeHdckh0dvrBt4LDIxvTrf1XqoDQKAptW&amp;" \ "ck=5341.10.0.0.0.203.232.0&amp;shh=www.baidu.com&amp;sht=baiduhome_pg&amp;wd=&amp;" \ "eqid=cd90b17a00034b1c000000035a645fd5" # 解密方法一: r = requests.get(bd_url) print(r.url) # 解密方法二: # python3下和Python2.7 下urlopen 引入不同 r = urllib.request.urlopen(bd_url) print(r.geturl())
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 引入库文件
import requests
import urllib . request
 
bd_url = "https://www.baidu.com/link?url=mhMx_W4kSIqeHdckh0dvrBt4LDIxvTrf1XqoDQKAptW&amp;" \
         "ck=5341.10.0.0.0.203.232.0&amp;shh=www.baidu.com&amp;sht=baiduhome_pg&amp;wd=&amp;" \
         "eqid=cd90b17a00034b1c000000035a645fd5"
# 解密方法一:
 
r = requests . get ( bd_url )
print ( r . url )
 
# 解密方法二:
# python3下和Python2.7 下urlopen 引入不同
r = urllib . request . urlopen ( bd_url )
 
print ( r . geturl ( ) )

 




  • zeropython 微信公众号 5868037 QQ号 5868037@qq.com QQ邮箱
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值