python multi process download files

import urllib
import os
from multiprocessing import Process 
import time

def download(url, filename):
  try:
    urllib.urlretrieve(url, filename)
  except Exception:
    if os.path.isfile(filename):
      os.remove(filename)
    print("Failed Downloading... ", filename)

def getphotos():
  prefix = 'http://hwcdn.ddstatic.com/fhg/fhg_photos/2011_04_28/11121/roxanna_milan_11121_1-gal-1600-jpg/'
  
  for i in range(20):
    filename = "%04d" % (i + 1) + '.jpg'
    print filename
    download(prefix + filename, filename)

def removepics():
  pics = [x for x in os.listdir('.') if x.endswith('.jpg')]
  count = 0
  for pic in pics:
    d = open(pic).read()
    if len(d) < 20000:
      os.remove(pic)
      count += 1
  print 'remove %d files' % count

def down_from_file():
  itr = 1 
  urls = '/Users/chenhaoy/tutorial/piclinks.txt'
  maxprocess = 50
  processlist = []
  for url in open(urls):
    while len(processlist) > maxprocess:
      time.sleep(1)
      lst = []
      for p in processlist:
        if p.is_alive():
          lst.append(p)
      processlist = lst
      
    filename = "%05d.jpg" % (itr)
    itr += 1
    print 'downloading.. %s to %s' % (url, filename)
    p = Process(target=download, args=(url, filename))
    processlist.append(p)
    p.start()

  time.sleep(5)
  print 'Process count=%d' % (len(processlist))
  print 'add all pic to download list..'
  for p in processlist:
    if p.is_alive():
      p.terminate()
  for p in processlist:
    if p.is_alive():
      p.join(2)

  removepics()
  print 'bye..'
  
def main():
  down_from_file()

if __name__ == '__main__':main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值