#!/usr/bin/python #coding=gbk import re import os import sys import time import glob import string import socket import getopt import urllib import urllib2 import threading from sgmllib import SGMLParser from optparse import OptionParser # ############################################################################# # # # # self-defined exception classes # # # ############################################################################# class ConnectionError(Exception): pass class URLUnreachable(Exception):pass class CanotDownload(Exception):pass # ############################################################################# # # # # multiple threads download module starts here # # # ############################################################################# class HttpGetThread(threading.Thread): def __init__(self, name, url, filename, range=0): #print " name ",name #print " url ",url #print " filename ",filename #print " range ",range threading.Thread.__init__(self, name=name) self.name = name self.url = url self.filename = filename self.range = range self.totalLength = range[1] - range[0] +1 try: self.downloaded = os.path.getsize(self.filename) except OSError: self.downloaded = 0 self.percent = self.downloaded/float(self.totalLength)*100 self.headerrange = (self.range[0]+self.downloaded, self.range[1]) self.bufferSize = 8192 def run(self): try: self.downloaded = os.path.getsize(self.filename) except OSError: self.downloaded = 0 self.percent = self.downloaded/float(self.totalLength)*100 #self.headerrange = (self.range[0]+self.downloaded, self.range[1]) self.bufferSize = 8192 #request = urllib2.Request(self.url) #request.add_header('Range', 'bytes=%d-%d' %self.headerrange) downloadAll = False retries = 1 while not downloadAll: if retries > 10: break try: self.headerrange = (self.range[0]+self.downloaded, self.range[1]) request = urllib2.Request(self.url) request.add_header('Range', 'bytes=%d-%d' %self.headerrange) conn = urllib2.urlopen(request) startTime = time.time() data = conn.read(self.bufferSize) while data: f = open(self.filename, 'ab') f.write(data) f.close() self.time = int(time.time() - startTime) self.downloaded += len(data) self.percent = self.downloaded/float(self.totalLength) *100 data = conn.read(self.bufferSize) downloadAll = True except Exception, err: retries += 1 time.sleep(1) continue #分割文件方便多线程下载 def Split(size,blocks): ranges = [] blocksize = size / blocks for i in xrange(blocks-1): ranges.append((i*blocksize,blocksize*i+blocksize-1)) ranges.append(( blocksize*(blocks-1), size-1)) return ranges #获得文件大小 def GetHttpFileSize(url): length = 0 try: conn = urllib.urlopen(url) headers = conn.info() length = headers.getheader("Content-Length") length = int(length) print "Get File Length: %d"%length except Exception, err: pass return length def hasLive(ts): for t in ts: #print "/n thread name ",t.getName()," alive ",t.isAlive() if t.isAlive(): return True return False # def MyHttpGet(url, output=None, connections=4): """ arguments: url, in GBK encoding output, default encoding, do no convertion connections, integer """ length = GetHttpFileSize(url) startTime = time.time() #开始时间 #print " startTime ",startTime mb = length/1024/1024.0 if length == 0: raise URLUnreachable blocks = connections if output: filename = output else: output = url.split('/')[-1] ranges = Split(length, blocks) names = [] #names = ["%s_%d" %(filename,i) for i in xrange(blocks)] for i in xrange(blocks): names.append("%s_%d" %(filename,i)) ts = [] #print "+++++++++++++++++++++++++++ blocks ",blocks for i in xrange(blocks): t = HttpGetThread(" 下载线程 "+str(i), url, names[i], ranges[i]) t.setDaemon(True) t.start() ts.append(t) live = hasLive(ts) startSize = sum([t.downloaded for t in ts]) # 已下载多少 #print "++++++++++ startSize ",startSize etime = 0 # rate = 0 # 下载速度 * while live: try: etime = time.time() - startTime d = sum([t.downloaded for t in ts])/float(length)*100 downloadedThistime = sum([t.downloaded for t in ts])-startSize try: rate = downloadedThistime / float(etime)/1024 except: rate = 0.0 progressStr = u'/rFilesize: %d(%.2fM) Downloaded: %.2f%% Avg rate: %.1fKB/s' %(length, mb, d, rate) sys.stdout.write(progressStr) sys.stdout.flush() #sys.stdout.write('/b'*(len(progressStr)+1)) live = hasLive(ts) time.sleep(0.8) except KeyboardInterrupt: print print "Exit..." for n in names: try: os.remove(n) except: pass sys.exit(1) print etime = time.time() - startTime #print " endTime ",time.time() print u'耗时: %d:%d, 平均速度:%.2fKB/s' %(int(etime)/60, int(etime)%60,rate) f = open(filename, 'wb') for n in names: f.write(open(n,'rb').read()) try: os.remove(n) except: pass f.close() if __name__ == "__main__": parser = OptionParser() parser.add_option("-f","--file",action="store",type="string",dest="url") parser.add_option("-o","--output",action="store",type="string",dest="output") (options,args) =parser.parse_args(sys.argv) connections = 5 if options.url: url = options.url if options.output: output = options.output else: output = os.path.basename(url) MyHttpGet(url,output,connections);
修改网友写的下载软件
最新推荐文章于 2024-07-20 16:30:05 发布