应昨天 hongfu 的要求,搞了一段用多线程从ftp上分块同时下载文件的简单例子。
- #-*- encoding: gb18030 -*-
- import ftplib, string
- import os, sys
- import threading
- class MyFTP:
- def __init__(self, host='', user='', passwd=''):
- self.host = host
- self.user = user
- self.passwd = passwd
- self.filename = ''
-
- self.ftp = ftplib.FTP(host,user,passwd)
-
- def download_by_thread(self, filename, threadnum=1, blocksize=8192):
- self.filename = filename
- # 获取文件名
- onlyname = os.path.basename(filename)
- cmd = "SIZE "+filename
- # 取得要下载的文件的大小
- ret = self.ftp.sendcmd(cmd)
-
- self.ftp.quit()
- # 计算用多线程下载时,每一个线程应该下载的大小
- fsize = int(string.split(ret)[1])
- print 'file', filename, 'size:', fsize
-
- rest = None
- bsize = fsize / threadnum
-
- # 创建线程
- threads= []
- for i in range(0, threadnum-1):
- begin = bsize * i
- print i, begin, bsize
- tp = threading.Thread(target=self.download_file, args=(i, filename,begin,bsize,blocksize,rest,))
- threads.append(tp)
-
- have1 = bsize * threadnum
- have2 = fsize - have1
- lastsize = bsize + have2
- begin = bsize * (threadnum-1)
- print threadnum-1, begin, lastsize
- tp = threading.Thread(target=self.download_file, args=(threadnum-1, filename, begin,lastsize,blocksize,rest,))
- threads.append(tp)
-
- print 'threads:', len(threads)
-
- for t in threads:
- t.start()
-
- for t in threads:
- t.join()
-
- # 每个线程都下载完成了,合并临时文件为一个文件
- fw = open(onlyname, "wb")
-
- for i in range(0, threadnum):
- fname = onlyname+'.part.'+str(i)
- print fname
- if not os.path.isfile(fname):
- print 'not found', fname
- continue
- f1 = open(fname, 'rb')
- while 1:
- data = f1.read(8192)
- if not len(data):
- break
- fw.write(data)
- f1.close()
- os.remove(fname)
- fw.close()
- print 'all ok'
-
- def download_file(self, inx, filename, begin=0, size=0, blocksize=8192, rest=None):
- onlyname = os.path.basename(filename)
- tname = threading.currentThread().getName()
- #inx = string.split(tname, '-')[-1]
- # 新建一个连接来下载,每个线程一个连接,注意这里没有考虑有些ftp服务器限制一个ip只能有多少连接的情况。
- myftp = ftplib.FTP(self.host,self.user,self.passwd)
- # 创建临时文件
- fp = open(onlyname+'.part.'+str(inx), 'wb')
- #fp.seek(begin)
-
- callback = fp.write
-
- haveread = 0
- myftp.voidcmd('TYPE I')
- # 告诉服务器要从文件的哪个位置开始下载
- cmd1 = "REST "+str(begin)
- print tname, cmd1
- ret = myftp.sendcmd(cmd1)
- # 开始下载
- cmd = "RETR "+filename
- conn = myftp.transfercmd(cmd, rest)
- readsize = blocksize
- while 1:
- if size > 0:
- last = size - haveread
- if last > blocksize:
- readsize = blocksize
- else:
- readsize = last
- data = conn.recv(readsize)
- if not data:
- break
- #callback(fp, data)
- # 已经下载的数据长度
- haveread = haveread + len(data)
- # 只能下载指定长度的数据,下载到就退出
- if haveread > size:
- print tname, 'haveread:', haveread, 'size:', size
- hs = haveread - size
- callback(data[:hs])
- break
- elif haveread == size:
- callback(data)
- print tname, 'haveread:', haveread
- break
-
- callback(data)
-
- conn.close()
- fp.close()
- try:
- ret = myftp.getresp()
- except Exception, e:
- print tname,e
-
- myftp.quit()
-
- return ret
- ftp = MyFTP("127.0.0.1", 'anonymous', '123')
- filename='/incoming/cygwin.zip'
- ftp.download_by_thread(filename, 10)
|