# _*_coding:UTF-8 _*_
import re
import urllib
def get_content(url):
"""
doc
"""
html = urllib.urlopen(url)
content = html.read()
html.close()
return content
def get_images(info):
"""
<img style="-webkit-user-select: none;" src="https://imgsa.baidu.com/forum/w%3D580/sign=ab8a793e69d9f2d3201124e799ed8a53/4d16738da97739126365c3bef3198618377ae240.jpg"
width="580" height="435">
"""
#r' '表示不进行转义
"""
s = r'test\tddd'
s2 = 'test\tddd'
print(s)
print(s2)
输出结果是:
test\tddd
test ddd
"""
regex = r'class="BDE_Image" src="(.+?\.jpg)"'
pat = re.compile(regex)
image_code =re.findall(pat,info)
#设置一个计数器
i = 0
#将每一条的地址信息进行迭代,不创建文件夹,默认下载目录和程序所在目录一样
for image_url in image_code:
print image_url
urllib.urlretrieve(image_url, '%s.jpg' % i)
i += 1
#print image_code
info= get_content('http://tieba.baidu.com/p/2772656630')
print get_images(info)
python爬取百度贴吧图片
最新推荐文章于 2021-02-10 18:21:39 发布