***今天在我BF的帮助下完成了小小的程序,也是我第一次写博客呢,谢谢他哦嘎嘎
1.首先,计算出下载文件的长度。
conn = urllib.urlopen(url)
headers = conn.info()
File_len = int (headers['Content-Length'])
2.根据所得到的文件长度对于每个线程确定下载数据的长度和开始下载位置。
req = urllib2.Request('http://guidetodatamining.com/guide/ch2/BX-Dump.zip')
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
3在本地建一个文件保存所下载数据,对于每个线程所下载起始位置跟下载起始位置相同。每个线程互斥访问文件。
4.检查所下载文件对错。
Python中的filecmp模块来比较两个文件。
import filecmp
filecmp.cmp(r'C:\Users\tana\Desktop\BX-Dump.zip',r'C:\Users\tana\Desktop\multithreads.zip')
具体代码:
import urllib
import urllib2
import threading,time
#线程函数
def threadcode(start,end):
req = urllib2.Request('http://guidetodatamining.com/guide/ch2/BX-Dump.zip')
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
response = urllib2.urlopen(req)
#互斥临界区
l.acquire()
f.seek(start,0)
f.write(response.read())
l.release()
# Get file size function 获得文件大小
def GetHttpFileSize(url):
length = 0
try:
conn = urllib.urlopen(url)
headers = conn.info()
except Exception, err:
pass
return int (headers['Content-Length'])
#分割文件方便多线程下载
def Split(size,blocks):
ranges = []
blocksize = size / blocks
for i in xrange(blocks-1):
ranges.append((i*blocksize,blocksize*i+blocksize-1))
ranges.append(( blocksize*(blocks-1), size-1))
print ranges
return ranges
#建立多线程
url = 'http://guidetodatamining.com/guide/ch2/BX-Dump.zip'
thread_num = 5
file_len = GetHttpFileSize(url)
l=threading.Lock()
ranges=Split(file_len,thread_num)
f=open("multithreads.zip",'wb+')
childthreads=[]
for i in range(thread_num):
t = threading.Thread( target = threadcode, name="Thread-%d" % i,args=(ranges[i]))
t.start()
childthreads.append(t)
for t in childthreads:
t.join()
f.close()
print 'down'