python计算文件md5_Python : 计算大文件MD5值

buffer = 8192 来自网络,但不知道是怎么实践出来的。

from hashlib import md5

import time

import os

def calMD5(str):

m = md5()

m.update(str)

return m.hexdigest()

def calMD5ForFile(file):

statinfo = os.stat(file)

if int(statinfo.st_size)/(1024*1024) >= 1000 :

print "File size > 1000, move to big file..."

return calMD5ForBigFile(file)

m = md5()

f = open(file, 'rb')

m.update(f.read())

f.close()

return m.hexdigest()

def calMD5ForFolder(dir,MD5File):

outfile = open(MD5File,'w')

for root, subdirs, files in os.walk(dir):

for file in files:

filefullpath = os.path.join(root, file)

"""print filefullpath"""

filerelpath = os.path.relpath(filefullpath, dir)

md5 = calMD5ForFile(filefullpath)

outfile.write(filerelpath+' '+md5+"\n")

outfile.close()

def calMD5ForBigFile(file):

m = md5()

f = open(file, 'rb')

buffer = 8192 # why is 8192 | 8192 is fast than 2048

while 1:

chunk = f.read(buffer)

if not chunk : break

m.update(chunk)

f.close()

return m.hexdigest()

if __name__ == "__main__":

#print calMD5("Hello World!")

t = time.time()

print calMD5ForFile("E:\\OS\\ubuntu-11.04-desktop-i386.iso")

print time.time() - t

t = time.time()

print calMD5ForBigFile("E:\\OS\\ubuntu-11.04-desktop-i386.iso")

print time.time() - t,"\n"

t = time.time()

print calMD5ForFile("E:\\OS\\ubuntu-12.04-desktop-amd64.iso")

print time.time() - t

t = time.time()

print calMD5ForBigFile("E:\\OS\\ubuntu-12.04-desktop-amd64.iso")

print time.time() - t,"\n"

t = time.time()

print calMD5ForFile("D:\\Virtual Machines\\Ubuntu 64-bit\\Ubuntu 64-bit-s001.vmdk")

print time.time() - t

t = time.time()

print calMD5ForBigFile("D:\\Virtual Machines\\Ubuntu 64-bit\\Ubuntu 64-bit-s001.vmdk")

print time.time() - t,"\n"

#output

#8b1085bed498b82ef1485ef19074c281

#2.57500004768

#8b1085bed498b82ef1485ef19074c281

#3.34100008011

#

#128f0c16f4734c420b0185a492d92e52

#2.632999897

#128f0c16f4734c420b0185a492d92e52

#3.39100003242

#

#File size > 1000, move to big file...

#ec1fa4dc1b32569e9da7b4744548a9ef

#5.40100002289

#ec1fa4dc1b32569e9da7b4744548a9ef

#5.42100000381

PS: 纪念下我直接计算3G+文件时的内存使用率

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值