自定义一个pipeline
# 图片下载类
class ImageDownloadPipeline(object):
def process_item(self, item, spider):
global img_index
#if 'image_urls' in item: # 如何‘图片地址’在项目中
imgPath="/home/abc/image" # 下载图片的保存路径
if not os.path.isdir(imgPath):
os.mkdir(imgPath)
for url in item["image_urls"]:
print("下载:", url)
# 未能正确获得网页 就进行异常处理
try:
res = urllib2.urlopen(url)
if str(res.status) != '200':
print('未下载成功:', url)
continue
except Exception as e:
print('未下载成功:', url)
filename = os.path.join(imgPath, str(img_index) + '.jpg')
with open(filename, 'wb') as f:
f.write(res.read())