修改网友写的下载软件

#!/usr/bin/python  
#coding=gbk  
   
import re  
import os  
import sys  
import time  
import glob  
import string  
import socket  
import getopt  
import urllib  
import urllib2  
import threading  
from sgmllib import SGMLParser  
from optparse import OptionParser

   
# #############################################################################  
# #  
# # self-defined exception classes  
# #  
# #############################################################################  
class ConnectionError(Exception): pass  
class URLUnreachable(Exception):pass  
class CanotDownload(Exception):pass  
   
# #############################################################################  
# #  
# # multiple threads download module starts here  
# #  
# #############################################################################  
class HttpGetThread(threading.Thread):  
     def __init__(self, name, url, filename, range=0):  
         #print " name ",name  
         #print " url ",url  
         #print " filename ",filename   
         #print " range ",range  
         threading.Thread.__init__(self, name=name) 
         self.name = name
         self.url = url  
         self.filename = filename  
         self.range = range  
         self.totalLength = range[1] - range[0] +1  
         try:  
             self.downloaded = os.path.getsize(self.filename)  
         except OSError:  
             self.downloaded = 0  
         self.percent = self.downloaded/float(self.totalLength)*100  
         self.headerrange = (self.range[0]+self.downloaded, self.range[1])  
         self.bufferSize = 8192  
   
   
     def run(self):  
         try:  
             self.downloaded = os.path.getsize(self.filename)  
         except OSError:  
             self.downloaded = 0  
         self.percent = self.downloaded/float(self.totalLength)*100  
         #self.headerrange = (self.range[0]+self.downloaded, self.range[1])  
         self.bufferSize = 8192  
         #request = urllib2.Request(self.url)  
         #request.add_header('Range', 'bytes=%d-%d' %self.headerrange)  
         downloadAll = False  
         retries = 1 
         while not downloadAll:  
             if retries > 10:  
                 break  
             try:   
                 self.headerrange = (self.range[0]+self.downloaded, self.range[1])  
                 request = urllib2.Request(self.url)  
                 request.add_header('Range', 'bytes=%d-%d' %self.headerrange)  
                 conn = urllib2.urlopen(request)  
                 startTime = time.time()  
                 data = conn.read(self.bufferSize)  
                 while data:  
                     f = open(self.filename, 'ab')  
                     f.write(data)  
                     f.close()  
                     self.time = int(time.time() - startTime)  
                     self.downloaded += len(data)  
                     self.percent = self.downloaded/float(self.totalLength) *100                 
                     data = conn.read(self.bufferSize)  
                 downloadAll = True  
             except Exception, err:  
                 retries += 1  
                 time.sleep(1)  
                 continue  
   
 #分割文件方便多线程下载  
def Split(size,blocks):  
     
     ranges = []  
     blocksize = size / blocks  
     for i in xrange(blocks-1):  
         ranges.append((i*blocksize,blocksize*i+blocksize-1))  
     ranges.append(( blocksize*(blocks-1), size-1))  
     return ranges  
 #获得文件大小  
def GetHttpFileSize(url):  
     length = 0  
     try: 
         conn = urllib.urlopen(url)
         headers = conn.info()
         length = headers.getheader("Content-Length")
         length = int(length)
         print "Get File Length: %d"%length
     except Exception, err:  
         pass      
     return length  
   
def hasLive(ts):  
     for t in ts:  
         #print "/n thread name ",t.getName()," alive ",t.isAlive()  
         if t.isAlive():  
             return True  
     return False  
 #  
def MyHttpGet(url, output=None, connections=4):  
     """ 
     arguments: 
         url, in GBK encoding 
         output, default encoding, do no convertion 
         connections, integer 
     """  
       
     length = GetHttpFileSize(url)  
     startTime = time.time() #开始时间  
     #print " startTime ",startTime  
     mb = length/1024/1024.0  
     if length == 0:  
         raise URLUnreachable  
     blocks = connections  
     if output:  
         filename = output  
     else:  
         output = url.split('/')[-1]  
     ranges = Split(length, blocks)  
     names = []  
     #names = ["%s_%d" %(filename,i) for i in xrange(blocks)]  
     for i in xrange(blocks):  
         names.append("%s_%d" %(filename,i))  
     ts = []  
     #print "+++++++++++++++++++++++++++ blocks ",blocks  
     for i in xrange(blocks):  
         t = HttpGetThread(" 下载线程 "+str(i), url, names[i], ranges[i])  
         t.setDaemon(True)  
         t.start()  
         ts.append(t)  
   
     live = hasLive(ts)  
     startSize = sum([t.downloaded for t in ts]) # 已下载多少  
     #print "++++++++++ startSize ",startSize  
      
     etime = 0 #  
     rate = 0 # 下载速度 *  
     while live:  
         try:  
             etime = time.time() - startTime  
             d = sum([t.downloaded for t in ts])/float(length)*100  
             downloadedThistime = sum([t.downloaded for t in ts])-startSize  
             try:  
                 rate = downloadedThistime / float(etime)/1024  
             except:  
                 rate = 0.0  
             progressStr = u'/rFilesize: %d(%.2fM)  Downloaded: %.2f%%  Avg rate: %.1fKB/s' %(length, mb, d, rate)  
             sys.stdout.write(progressStr)  
             sys.stdout.flush()  
             #sys.stdout.write('/b'*(len(progressStr)+1))  
             live = hasLive(ts)  
             time.sleep(0.8)  
         except KeyboardInterrupt:  
             print  
             print "Exit..."  
             for n in names:  
                 try:  
                     os.remove(n)  
                 except:  
                     pass  
             sys.exit(1)  
               
     print  
     etime = time.time() - startTime  
     #print " endTime ",time.time()  
     print  u'耗时: %d:%d, 平均速度:%.2fKB/s' %(int(etime)/60, int(etime)%60,rate)   
   
     f = open(filename, 'wb')  
     for n in names:  
         f.write(open(n,'rb').read())  
         try:  
             os.remove(n)  
         except:  
             pass  
     f.close()  
   
if __name__ == "__main__":  
    parser = OptionParser()
    parser.add_option("-f","--file",action="store",type="string",dest="url")
    parser.add_option("-o","--output",action="store",type="string",dest="output")
    (options,args) =parser.parse_args(sys.argv)
    connections = 5  
    if options.url:
        url = options.url
        if options.output:
            output = options.output
        else:
            output = os.path.basename(url)

        MyHttpGet(url,output,connections); 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值