环境:基于py3
脚本名:file_diff.py
测试1:
>> python file_diff.py -f1 E:\work\cfs\diff-cmp\a.txt -f2 E:\work\cfs\diff-cmp\b.txt
输出:
same
测试2:
a 文件夹中: a.txt b.txt c.txt
b文件夹中:a.txt b.txt d.txt
输出diff文件夹中: b.txt.diff.html
>>python file_diff.py -f1_dir \diff-cmp\a\ -f2_dir \diff-cmp\b\ -d \diff-cmp\diff
输出:
only_src_file_names:1
['c.txt']
only_dest_file_names:1
['d.txt']
same_file_names: 1
['a.txt']
different_file_names: 1
['b.txt']
代码:
# -*- coding: utf8 -*-
import sys
import os
from difflib import HtmlDiff
from argparse import ArgumentParser
import filecmp
# 仅列出单层文件夹中的完整文件路径+文件名
def list_file_names(root_dir):
file_names = list()
items = os.listdir(root_dir)
for item in items:
path = os.path.join(root_dir, item)
if os.path.isfile(path=path):
file_names.append(os.path.basename(path))
return file_names
def read_lines(file_name):
try:
with open(file_name, 'r') as f:
contents = f.readlines()
return contents
except Exception as ex:
print(file_name + '打开失败1:' + str(ex))
sys.exit(1)
def file_compare(file_src, file_dest, result_dir):
lines_src = read_lines(file_src)
lines_dest = read_lines(file_dest)
diff = HtmlDiff()
result = diff.make_file(lines_src, lines_dest)
result_path = os.path.join(result_dir, os.path.basename(file_src) + '.diff' + '.html')
try:
with open(result_path, 'w') as f:
f.writelines(result)
except Exception as ex:
print(result_path + '结果写入失败:' + str(ex))
pass
def file_same(file_src, file_dest):
return filecmp.cmp(f1=file_src, f2=file_dest)
def main():
# 命令行传参
parser = ArgumentParser()
parser.add_argument('-f1', '--file_src', dest='file_src')
parser.add_argument('-f2', '--file_dest', dest='file_dest')
parser.add_argument('-d', '--result_dir', dest='result_dir', default='./')
# 对比文件夹所有文件
parser.add_argument('-f1_dir', '--file_src_dir', dest='file_src_dir', default=None)
parser.add_argument('-f2_dir', '--file_dest_dir', dest='file_dest_dir', default=None)
options = parser.parse_args()
file_src = options.file_src
file_dest = options.file_dest
result_dir = options.result_dir
file_src_dir = options.file_src_dir
file_dest_dir = options.file_dest_dir
if not file_src_dir:
if result_dir == './':
result_dir = os.path.dirname(file_src)
if not os.path.exists(result_dir):
os.makedirs(result_dir)
if not file_same(file_src, file_dest):
file_compare(file_src=file_src, file_dest=file_dest, result_dir=result_dir)
else:
print("same")
else:
if result_dir == './':
result_dir = os.path.dirname(file_src_dir)
if not os.path.exists(result_dir):
os.makedirs(result_dir)
file_names_src = list_file_names(file_src_dir)
file_names_dest = list_file_names(file_dest_dir)
file_names_src_set = set(file_names_src)
file_names_dest_set = set(file_names_dest)
only_dest_file_names = list()
only_src_file_names = list()
same_file_names = list()
different_file_names = list()
for file_name_src in file_names_src:
if file_name_src not in file_names_dest_set:
only_src_file_names.append(file_name_src)
else:
if file_same(os.path.join(file_src_dir, file_name_src), os.path.join(file_dest_dir, file_name_src)):
same_file_names.append(file_name_src)
else:
different_file_names.append(file_name_src)
file_src = os.path.join(file_src_dir, file_name_src)
file_dest = os.path.join(file_dest_dir, file_name_src)
file_compare(file_src, file_dest, result_dir=result_dir)
for file_name_dest in file_names_dest:
if file_name_dest not in file_names_src_set:
only_dest_file_names.append(file_name_dest)
print("only_src_file_names:%d\n" % len(only_src_file_names))
print(only_src_file_names)
print('\n')
print('only_dest_file_names:%d\n' % len(only_dest_file_names))
print(only_dest_file_names)
print('\n')
print('same_file_names: %d\n' % len(same_file_names))
print(same_file_names)
print('\n')
print('different_file_names: %d\n' % len(different_file_names))
print(different_file_names)
print('\n')
pass
if __name__ == "__main__":
main()