1 import os
2 import tarfile
3 import pickle
4 import hashlib
5 from time import strftime
6
7 def check_md5(fname):
8 m = hashlib.md5()
9 with open(fname, 'rb') as fobj:
10 while True:
11 data = fobj.read(4096)
12 if not data:
13 break
14 m.update(data)
15 return m.hexdigest()
16
17 def full_backup(src_dir, dst_dir, md5file):
18 # 构建备份文件的绝对路径
19 fname = '%s_full_%s.tar.gz' %\
20 (os.path.basename(src_dir.rstrip('/')), strftime('%Y%m%d'))
21 fname = os.path.join(dst_dir, fname)
22
23 # 将源目录打包压缩
24 tar = tarfile.open(fname, 'w:gz')
25 tar.add(src_dir)
26 tar.close()
27
28 # 计算每个文件的md5值
29 md5dict = {}
30 for path, folders, files in os.walk(src_dir):
31 for file in files:
32 key = os.path.join(path, file)
33 md5dict[key] = check_md5(key)
34
35 # 将md5字典写入文件
36 with open(md5file, 'wb') as fobj:
37 pickle.dump(md5dict, fobj)
38
39 def incr_backup(src_dir, dst_dir, md5file):
40 # 构建备份文件的绝对路径
41 fname = '%s_incr_%s.tar.gz' % \
42 (os.path.basename(src_dir.rstrip('/')), strftime('%Y%m%d'))
43 fname = os.path.join(dst_dir, fname)
44
45 # 计算每个文件的md5值
46 md5dict = {}
47 for path, folders, files in os.walk(src_dir):
48 for file in files:
49 key = os.path.join(path, file)
50 md5dict[key] = check_md5(key)
51
52 # 取出前一天的md5值
53 with open(md5file, 'rb') as fobj:
54 old_md5 = pickle.load(fobj)
55
56 # 更新md5文件
57 with open(md5file, 'wb') as fobj:
58 pickle.dump(md5dict, fobj)
59
60 # 将变化的文件和新增的文件备份
61 tar = tarfile.open(fname, 'w:gz')
62 for key in md5dict:
63 if old_md5.get(key) != md5dict[key]:
64 tar.add(key)
65 tar.close()
66
67 if __name__ == '__main__':
68 src_dir = '/tmp/mydemo/security/'
69 dst_dir = '/tmp/demo/'
70 md5file = '/tmp/demo/md5.data'
71 if strftime('%a') == 'Mon':
72 full_backup(src_dir, dst_dir, md5file)
73 else:
74 incr_backup(src_dir, dst_dir, md5file)