python批量下载网页文件夹_Python 批量文件下载

python文件 批量下载 、图片批量下载 ,批量请求、爬虫

#====================================================================================

downloadFile.py

#====================================================================================

#!/usr/bin/env python

# -*- coding:utf-8 -*-

from gevent import monkey

monkey.patch_all()

from gevent.pool import Pool

import requests

import sys

import os

def download(url):

chrome = 'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 ' +'(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'

headers = {'User-Agent': chrome}

# filename = url.split('/')[-1].strip()

filename = "respose.log"

r = requests.get(url.strip(), headers=headers, stream=True)

with open(filename, 'a+') as f:

for chunk in r.iter_content(chunk_size=1024):

if chunk:

f.write(chunk)

f.flush()

print filename,"is ok"

def removeLine(key, filename):

os.system('sed -i /%s/d %s' % (key, filename))

if __name__ =="__main__":

if len(sys.argv) == 2:

filename = sys.argv[1]

f = open(filename,"r")

p = Pool(4)

for line in f.readlines():

if line:

p.spawn(download, line.strip())

key = line.split('/')[-1].strip()

#removeLine(key, filename)

f.close()

p.join()

else:

print 'Usage: python %s urls.txt' % sys.argv[0]

#====================================================================================

#====================================================================================

测试文件 url.txt

#===========================

http://download2.boulder.ibm.com/sar/CMA/RAA/075lj/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075ln/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075lt/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075m7/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075m9/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075mb/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075mf/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075mn/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075ms/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075mv/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075nd/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075nk/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075no/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075nr/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075ns/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075nu/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075ny/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075o0/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075o1/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075p8/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075px/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075py/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075pz/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075q1/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075q3/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075q5/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/075zm/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/0758i/0/

http://download2.boulder.ibm.com/sar/CMA/RAA/0759s/0/

#=================================================

运行结果

G:\test\appscan>python downloadFile.py url.txt

respose.log is ok

respose.log is ok

respose.log is ok

respose.log is ok

respose.log is ok

respose.log is ok

respose.log is ok

respose.log is ok

respose.log is ok

#================================

结果文件

819771-20181130155739454-1293374319.png

819771-20181130155705316-760536863.png

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值