import hashlib
import tarfile
from time import strftime
import os
import pickle
def check_md5(fname):
m = hashlib.md5()
with open(fname, 'rb') as fobj:
while True:
data = fobj.read(4096)
if not data:
break
m.update(data)
return m.hexdigest()
def full_backup(src, dst, md5file):
fname = '%s_full_%s.tar.gz' % (os.path.basename(src), strftime('%Y%m%d'))
fname = os.path.join(dst, fname)
# 打压缩包
tar = tarfile.open(fname, 'w:gz')
tar.add(src)
tar.close()
# 计算每个文件的md5值
md5dict = {}
for path, folders, files in os.walk(src):
for file in files:
key = os.path.join(path, file)
md5dict[key] = check_md5(key)
# 把md5值字典保存到文件
with open(md5file, 'wb') as fobj:
pickle.dump(md5dict, fobj)
def incr_backup(src, dst, md5file):
fname = '%s_incr_%s.tar.gz' % (os.path.basename(src), strftime('%Y%m%d'))
fname = os.path.join(dst, fname)
# 计算当前目录下文件的md5值
md5dict = {}
for path, folders, files in os.walk(src):
for file in files:
key = os.path.join(path, file)
md5dict[key] = check_md5(key)
# 取出前一天的文件md5值
with open(md5file, 'rb') as fobj:
old_md5 = pickle.load(fobj)
# 找出变化的文件和新增的文件,把它们压缩
tar = tarfile.open(fname, 'w:gz')
for key in md5dict:
if old_md5.get(key) != md5dict[key]:
tar.add(key)
tar.close()
# 把当前的md5字典写到文件中,以便下一次比较使用
with open(md5file, 'wb') as fobj:
pickle.dump(md5dict, fobj)
if __name__ == '__main__':
src = '/tmp/demo/security'
dst = '/tmp/demo/backup'
md5file = '/tmp/demo/backup/md5.data'
if strftime('%a') == 'Mon':
full_backup(src, dst, md5file)
else:
incr_backup(src, dst, md5file)