import urllib
import os
from multiprocessing import Process
import time
def download(url, filename):
try:
urllib.urlretrieve(url, filename)
except Exception:
if os.path.isfile(filename):
os.remove(filename)
print("Failed Downloading... ", filename)
def getphotos():
prefix = 'http://hwcdn.ddstatic.com/fhg/fhg_photos/2011_04_28/11121/roxanna_milan_11121_1-gal-1600-jpg/'
for i in range(20):
filename = "%04d" % (i + 1) + '.jpg'
print filename
download(prefix + filename, filename)
def removepics():
pics = [x for x in os.listdir('.') if x.endswith('.jpg')]
count = 0
for pic in pics:
d = open(pic).read()
if len(d) < 20000:
os.remove(pic)
count += 1
print 'remove %d files' % count
def down_from_file():
itr = 1
urls = '/Users/chenhaoy/tutorial/piclinks.txt'
maxprocess = 50
processlist = []
for url in open(urls):
while len(processlist) > maxprocess:
time.sleep(1)
lst = []
for p in processlist:
if p.is_alive():
lst.append(p)
processlist = lst
filename = "%05d.jpg" % (itr)
itr += 1
print 'downloading.. %s to %s' % (url, filename)
p = Process(target=download, args=(url, filename))
processlist.append(p)
p.start()
time.sleep(5)
print 'Process count=%d' % (len(processlist))
print 'add all pic to download list..'
for p in processlist:
if p.is_alive():
p.terminate()
for p in processlist:
if p.is_alive():
p.join(2)
removepics()
print 'bye..'
def main():
down_from_file()
if __name__ == '__main__':main()
python multi process download files
最新推荐文章于 2022-01-10 20:51:26 发布