14-1 遍历一个目录,打印所有文件的名称,并对之中的子目录递归调用自己。
1 import os
2
3 def walk(dirname):
4 for name in os.listdir(dirname):
5 path = os.path.join(dirname, name)
6
7 if os.path.isfile(path):
8 print path
9 else:
10 walk(path)
11
12
13 def walk2(dirname):
14 for root, dirs, files in os.walk(dirname):
15 for filename in files:
16 print os.path.join(root, filename)
17
18
19 if __name__ == '__main__':
20 walk('.')
21 print('-------------')
22 walk2('.')
14-2 写一个函数sed,接收如下参数:一个模式字符串,一个替换用字符串,以及两个文件名;它应该读取第一个文件,并将内容写入到第二个文件(如果需要则新建它)。如果文件中任何地方出现了模式字符串,它应该被替换。
1 import sys
2
3
4 def sed(pattern, replace, source, dest):
5 try:
6 fin = open(source, 'r')
7 fout = open(dest, 'w')
8
9 for line in fin:
10 line = line.replace(pattern, replace)
11 fout.write(line)
12
13 fin.close()
14 fout.close()
15 except:
16 print 'Something went wrong.'
17
18
19 def main(name):
20 pattern = 'pattern'
21 replace = 'replacendum'
22 source = name
23 dest = name + '.replaced'
24 sed(pattern, replace, source, dest)
25
26
27 if __name__ == '__main__':
28 main(*sys.argv)
练习14-3
编写一个模块,导入anagram_sets,并提供两个新函数:store_anagrams 应当存储回文字典到一个"shelf"中;read_anagrams应当查询一个单词,并返回它的回文的列表。
1 import shelve
2 import sys
3 from anagram_sets import *
4
5 def store_anagrams(filename, ad):
6 shelf = shelve.open(filename, 'c')
7
8 for word, word_list in ad.iteritems():
9 shelf[word] = word_list
10
11 shelf.close()
12
13
14 def read_anagrams(filename, word):
15 shelf = shelve.open(filename)
16 sig = signature(word)
17 try:
18 return shelf[sig]
19 except KeyError:
20 return []
21
22
23 def main(name, command='store'):
24 if command == 'store':
25 ad = all_anagrams('words.txt')
26 store_anagrams('anagrams.db', ad)
27 else:
28 print read_anagrams('anagrams.db', command)
29
30
31
32 if __name__ == '__main__':
33 main(*sys.argv)
练习14-4
在一个庞大的MP3 集合中,有可能同一首歌有多个版本,保存在不同的目录中,或者文件名不同。目的是搜索重复的歌。
1.编写一个程序递归搜索目录及其所有的子目录,并返回所有指定后缀(如.mp3)的文件的完成路径的列表。
2.要发现重复文件,需要使用md5sum来计算每个文件的"校验和"。如果两个文件的校验和相同,它们很可能有相同的内容。
3.可以使用diff来复审检验。
1 import os
2
3 def walk(dirname):
4 names = []
5 for name in os.listdir(dirname):
6 path = os.path.join(dirname, name)
7
8 if os.path.isfile(path):
9 names.append(path)
10 else:
11 names.extend(walk(path))
12 return names
13
14
15 def compute_checksum(filename):
16 cmd = 'md5sum ' + filename
17 return pipe(cmd)
18
19
20 def check_diff(name1, name2):
21 cmd = 'diff %s %s' % (name1, name2)
22 return pipe(cmd)
23
24
25 def pipe(cmd):
26 fp = os.popen(cmd)
27 res = fp.read()
28 stat = fp.close()
29 assert stat is None
30 return res, stat
31
32
33 def compute_checksums(dirname, suffix):
34 names = walk(dirname)
35
36 d = {}
37 for name in names:
38 if name.endswith(suffix):
39 res, stat = compute_checksum(name)
40 checksum, _ = res.split()
41
42 if checksum in d:
43 d[checksum].append(name)
44 else:
45 d[checksum] = [name]
46
47 return d
48
49
50 def check_pairs(names):
51 for name1 in names:
52 for name2 in names:
53 if name1 < name2:
54 res, stat = check_diff(name1, name2)
55 if res:
56 return False
57 return True
58
59
60 def print_duplicates(d):
61 for key, names in d.iteritems():
62 if len(names) > 1:
63 print 'The following files have the same checksum:'
64 for name in names:
65 print name
66
67 if check_pairs(names):
68 print 'And they are identical.'
69
70
71 if __name__ == '__main__':
72 d = compute_checksums(dirname='.', suffix='.py')
73 print_duplicates(d)