在网页的源代码中,图片的信息在<img> 标签中 其中<img src=>中表明图片所在地址,<img class=>表明图片类型,如表情图片,正常图片,还是广告图片
#--*coding:utf-8
import urllib
import re
#正则表达式
def get_content(url):
"""获得文档"""
html = urllib.urlopen(url)
content = html.read()
html.close()
return content
def get_images(info):
"""
<img class ="BDE_Image" src="http://" pic_ext="jpeg" changedsize="true" width="50" height="373">
"""
regex = r'class="BDE_Image" src="(.+?\.jpg)"'
pat = re.compile(regex)
i = 0
images_code = re.findall(pat,info)
#print len(images_code)
for image_url in images_code:
print image_url
urllib.urlretrieve(image_url,'%s.jpg'%i)
i +=1
url = "https://tieba.baidu.com/p/2772656630"
info = get_content(url)
print get_images(info)