1. 需求
把两个文本文件的内容合并到一个文件中,且并不要求文件中行的顺序保持不变。
且适用于两个文本文件中有较多的数据是完全一样的,仅存在少量不一致的地方。
2. 方案
因为两个文本文件中的存在
把两个文件的内容读到list中,然后再转换成set类型,最后取并集即可。
3. 实现代码
def merge_text_file(first_filename, second_filename, merged_filename):
'''
Merge two text files, the sequence of two file's content maybe changed.
example:
source_file_1 = "a.txt"
source_file_2 = "b.txt"
merged_file = "merged.txt"
merge_text_file(source_file_1, source_file_2, merged_file)
'''
first_list = open(first_filename, 'r').read().split('\n')
second_list = open(second_filename, 'r').read().split('\n')
result_set = set(first_list) | set(second_list)
result_list = list(result_set)
result_list.sort()
result_file = open(merged_filename, "w")
for item in result_list:
temp = item.strip()
if temp == "": continue
result_file.write(temp + '\n')
result_file.close()
4. 使用示例
import file_utilities
def _write_temp_file(filename, values):
f = open(filename, "w")
for value in values:
f.write(value + '\n')
f.close()
def _test():
first_values = ["first", "second"]
second_values = ["first", "third"]
first_filename = "first_abcdefghijklmn.xyz"
second_filename = "second_abcdefghijklmn.xyz"
merged_filename = "merged_abcdefghijklmn.xyz"
_write_temp_file(first_filename, first_values)
_write_temp_file(second_filename, second_values)
file_utilities.merge_text_file(first_filename, second_filename, merged_filename)
# assert the result
result = open(merged_filename, "r").read().split('\n')
result.remove('')
print "result:", result
result = set(result)
expected_result = set(['first', 'second', 'third'])
print "expected result:", expected_result
if result == expected_result:
print "PASS"
else:
print "FAIL"
if __name__ == '__main__':
_test()
运行结果:
D:\examples\python\file_utilities>python test_merge_text_file.py
result: ['first', 'second', 'third']
expected result: set(['second', 'third', 'first'])
PASS
D:\examples\python\file_utilities>