官方链接:http://www.robots.ox.ac.uk/~vgg/data/vgg_face/
首先去官网下载vgg_face_dataset.tar.gz
python3下载程序:
#!/usr/bin/python3
#-*- coding: utf-8 -*-
import sys
import os
import threading
import socket
import urllib.request
timeout = 4
socket.setdefaulttimeout(timeout)
'''
保存远程url图片数据
'''
def download_and_save(url,savename):
try:
data = urllib.request.urlopen(url).read()
fid=open(savename,'w+b')
fid.write(data)
print ("download succeed: "+ url)
fid.close()
except IOError:
print ("download failed: "+ url)
def get_all_iamge(filename):
fid = open(filename)
name = filename.split('/')[-1]
name = name[:-4]
lines = fid.readlines()
for line in lines:
line_split = line.split(' ')
image_id = line_split[0]
image_url = line_split[1]
if False == os.path.exists('./vgg_face_dataset/images' + '/' + name):
os.mkdir('./vgg_face_dataset/images' + '/' + name)
savefile = './vgg_face_dataset/images' + '/' + name + '/' + image_id + '.jpg'
#The maxSize of Thread numberr:1000
print(image_url,savefile)
while True:
if(len(threading.enumerate()) < 1000):
break
t = threading.Thread(target=download_and_save,args=(image_url,savefile,))
t.start()
if __name__ == "__main__":
if len(sys.argv) != 2:
print ('Usage:python %s faceUrl.txt'%(sys.argv[0]))
sys.exit()
fileDir = sys.argv[1]
list = os.listdir(fileDir)
for i in range(len(list)):
get_all_iamge(os.path.join(sys.argv[1],list[i]))
调用方式:
python3 download.py ./vgg_face_dataset/files/