#coding:utf-8
import urllib
import re
url = "http://www.ithome.com/html/it/221721.htm"
html = urllib.urlopen(url)
content = html.read()
html.close()
# 通过正则匹配图片特征,并获取图片链接
img_tag = re.compile(r'data-original="(.+?\.jpg)"')
img_links = re.findall(img_tag, content)
print img_links
# 下载图片 img_counter为图片计数器(文件名)
img_counter = 0
for img_link in img_links:
img_name = '%s.jpg' % img_counter
urllib.urlretrieve(img_link, "D://dataset//%s" %img_name)
img_counter += 1