# -*- coding: utf-8 -*-
__author__ = 'gull'
import os, urllib2, log_factory
from urlparse import urlsplit
def get(url, filePath, fileName = None, buffer = 16 * 1024):
log = log_factory.getLogger()
log.info("send http request to %s", url)
def writefile(fsrc, fdst, totalLength):
"""copy data from file-like object fsrc to file-like object fdst"""
if not totalLength:
totalLength = "?"
else:
totalLength = float(totalLength)
bytesRead = 0.0
while 1:
buf = fsrc.read(buffer)
if not buf:
break
fdst.write(buf)
bytesRead += len(buf)
if totalLength != "?":
log.info("%s: %.02f/%.02f kb (%d%%)" % (
fileName,
bytesRead / 1024.0,
totalLength / 1024.0,
100 * bytesRead / totalLength
))
else:
log.info("%s: %.02f/? kb (?%%)" % (
fileName,
bytesRead / 1024.0
))
def getFileName(openUrl):
if 'Content-Disposition' in openUrl.info():
# If the response has Content-Disposition, try to get filename from it
cd = dict(map(
lambda x: x.strip().split('=') if '=' in x else (x.strip(), ''),
openUrl.info().split(';')))
if 'filename' in cd:
filename = cd['filename'].strip("\"'")
if filename: return filename
# if no filename was found above, parse it out of the final URL.
return os.path.basename(urlsplit(openUrl.url)[2])
def getFileLength(openUrl):
return openUrl.info().getheader("Content-Length")
r = urllib2.urlopen(urllib2.Request(url), timeout = 120) #timeout is 120s
try:
fileName = fileName or getFileName(r)
fullfileName = "%s%s%s" % (filePath, os.path.sep, fileName)
totalLength = getFileLength(r)
log.info("write response date to %s", fullfileName)
with open(fullfileName, 'wb') as f:
writefile(r, f, totalLength)
return fileName, totalLength, fullfileName
finally:
r.close()
log.info("http request finished.")
参数说明:
- url:即下载路径,如http://apache.etoak.com/tomcat/tomcat-7/v7.0.20/bin/apache-tomcat-7.0.20.tar.gz
- filePath:下载文件保存的文件夹
- fileName:下载后保存的文件名,可选参数。若为空,则会却reponse header中的filename信息(如下图)继续判空,则取url后缀名(如:apache-tomcat-7.0.20.tar.gz)
- buffer:下载缓冲区大小,默认16k
可继续加入以下特性:
- 支持代理
- 文件分块,多线程下载
- 异步下载,回调机制
- ...