Python 比较两个文件夹内容是否相同(多层级进度条)

以下代码实现 Python 比较两个文件夹内容是否相同,命名为compareDirectory.py
该代码的优势在于:
1)通过层次遍历法跳过一些不同步的文件夹从而避免遍历不同步文件夹内的内容来达到剪枝的效果(花了一个晚上);
2)参阅了某 ACM 大佬关于比较版本号的算法,使用过程中的 list.pop(0) 和结尾处的 not any 进行加速运算(花了十分钟左右吧);
3)报告相对路径、增删情况等(花了一分钟左右吧);
4)通过 SHA256 哈希值比较文件内容,与逐字节对比相比,虽然两者都需要遍历整个文件,但使用哈希函数的方法通过节省了 if 的执行数量(哈希值往往更短且生成哈希与比较哈希的时间之和往往少于逐字节比较的时间)来达到节省时间的目的(花了一分钟左右吧);
5)拥有多层级进度报告结构(貌似这个功能花费的时间和前面四点之和一样)。

完整代码如下:

import platform
import os
from sys import stdin, exit
from shutil import copy, copytree, rmtree
import hashlib
EXIT_SUCCESS = 0
EXIT_FAILURE = 1
EOF = (-1)
specialFolders = ["", ".", ".."]
ncols = 100


class ProgressBar:
	def __init__(self:object, total:int, desc:str = "", postfix:str = "", ncols:int = 100) -> object:
		self.c = 0
		self.total  = total
		self.desc = str(desc)
		self.postfix = str(postfix)
		self.ncols = ncols
		self.print()
	def update(self:object, c:int) -> bool:
		if isinstance(c, int) and c >= 0:
			self.c += c
			self.print()
			return True
		else:
			return False
	def set_postfix(self:object, postfix:str) -> None:
		self.postfix = str(postfix)
	def print(self:object) -> None:
		print("\r" + str(self), end = "")
	def __str__(self:object) -> str:
		try:
			return "{0}: {1} / {2} = {3:.2f}% {4}".format(self.desc, self.c, self.total, 100 * self.c / self.total if self.c >= 0 and self.total > 0 else float("nan"), self.postfix)[:self.ncols]
		except:
			return ""


def clearScreen(fakeClear:int = 120):
	if stdin.isatty(): # is at a console
		if platform.system().lower() == "windows":
			os.system("cls")
		elif platform.system().lower() == "linux":
			os.system("clear")
		else:
			try:
				print("\n" * int(fakeClear))
			except:
				print("\n" * 120)
	else:
		try:
			print("\n" * int(fakeClear))
		except:
			print("\n" * 120)

def SHA256(fpath:str, isEcho:bool = False) -> str|Exception|None:
	if not os.path.isfile(fpath):
		return None
	try:
		with open(fpath, "rb") as f:
			hash = hashlib.new("SHA256")
			for chunk in iter(lambda: f.read(1 << 20), b""):
				hash.update(chunk)
			return hash.hexdigest()
	except Exception as e:
		if isEcho:
			print("\"{0}\" -> {1}".format(fpath, e))
		return e

def compare(rootDir1:str, rootDir2:str, dir1:str, dir2:str, compareFileContent:bool = True, caseSensitive:bool = True, indent:int = 0, flags:list = [True]) -> tuple:
	addLists, removeLists, conflictLists, exceptionLists, differLists = [], [], [], [], []
	try:
		listDir1, listDir2 = sorted([item for item in os.listdir(dir1) if item not in specialFolders]), sorted([item for item in os.listdir(dir2) if item not in specialFolders]) # 获取一层并排除特殊的文件夹
	except Exception as e:
		exceptionLists.append((os.path.relpath(dir1, rootDir1), e))
		print("\r" + " " * ncols + "\x1b[F\x1b[K", end = "") # 向上一层
		return (addLists, removeLists, conflictLists, exceptionLists, differLists)
	pBar = ProgressBar(total = len(listDir1) + len(listDir2), desc = "Layer {0}".format(indent), postfix = "(a, r, c, e, d) = (0, 0, 0, 0, 0)", ncols = ncols)
	try:
		while listDir1 and listDir2:
			if listDir1[0] == listDir2[0] or not caseSensitive and listDir1[0].lower() == listDir2[0].lower(): # 相同情况比较属性(目录或文件)是否一致
				target1, target2 = os.path.join(dir1, listDir1[0]), os.path.join(dir2, listDir2[0])
				if os.path.isdir(target1) and os.path.isdir(target2): # 都是文件夹则递归
					print() # 向下一层
					tRet = compare(rootDir1, rootDir2, target1, target2, compareFileContent = compareFileContent, caseSensitive = caseSensitive, indent = indent + 1, flags = flags)
					addLists.extend(tRet[0])
					removeLists.extend(tRet[1])
					conflictLists.extend(tRet[2])
					exceptionLists.extend(tRet[3])
					differLists.extend(tRet[4])
					del tRet # 手动释放内存
					if not flags[0]:
						raise KeyboardInterrupt
				elif os.path.isfile(target1) and os.path.isfile(target2): # 都是文件
					if compareFileContent:
						sha1 = SHA256(target1)
						sha2 = SHA256(target2)
						if isinstance(sha1, str) and isinstance(sha2, str):
							if sha1 != sha2:
								differLists.append(os.path.relpath(target1, rootDir1))	
						else:
							exceptionLists.append((os.path.relpath(target1, rootDir1), (sha1, sha2)))
				else: # 属性(目录或文件)不同
					conflictLicts.append(os.path.relpath(target1, rootDir1))
				listDir1.pop(0)
				listDir2.pop(0)
				pBar.set_postfix("(a, r, c, e, d) = ({0}, {1}, {2}, {3}, {4})".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists), len(differLists)))
				pBar.update(2)
			elif listDir1[0] < listDir2[0]: # 第一个目标小
				target1 = os.path.join(dir1, listDir1[0])
				removeLists.append(os.path.relpath(target1, rootDir1)) # 标记为删除
				listDir1.pop(0)
				pBar.set_postfix("(a, r, c, e, d) = ({0}, {1}, {2}, {3}, {4})".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists), len(differLists)))
				pBar.update(1)
			elif listDir1[0] > listDir2[0]: # 第二个目标小
				target2 = os.path.join(dir2, listDir2[0])
				addLists.append(os.path.relpath(target2, rootDir2)) # 标记为增加
				listDir2.pop(0)
				pBar.set_postfix("(a, r, c, e, d) = ({0}, {1}, {2}, {3}, {4})".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists), len(differLists)))
				pBar.update(1)
		if listDir1:
			removeLists.extend([os.path.relpath(os.path.join(dir1, item), rootDir1) for item in listDir1])
			pBar.set_postfix("(a, r, c, e, d) = ({0}, {1}, {2}, {3}, {4})".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists), len(differLists)))
			pBar.update(len(listDir1))
		elif listDir2:
			addLists.extend([os.path.relpath(os.path.join(dir2, item), rootDir2) for item in listDir2])
			pBar.set_postfix("(a, r, c, e, d) = ({0}, {1}, {2}, {3}, {4})".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists), len(differLists)))
			pBar.update(len(listDir2))
		print("\r" + " " * ncols + "\x1b[F\x1b[K", end = "") # 向上一层
	except KeyboardInterrupt:
		flags[0] = False
	return (addLists, removeLists, conflictLists, exceptionLists, differLists)

def selectOperation(addFlag:bool, removeFlag:bool, differFlag:bool) -> int:
	print("\n可供选择的操作如下(“删除”指直接删除而非移至回收站):")
	if addFlag:
		print("\t1 = 从目标文件夹删除目标文件夹拥有而源文件夹没有的文件(源 → 目标)")
		print("\t2 = 从目标文件夹向源文件夹复制目标文件夹拥有而源文件夹没有的文件(目标 → 源)")
	if removeFlag:
		print("\t3 = 从源文件夹向目标文件夹复制源文件夹拥有而目标文件夹没有的文件(源 → 目标)")
		print("\t4 = 从源文件夹删除源文件夹拥有而目标文件夹没有的文件(目标 → 源)")
	if differFlag:
		print("\t5 = 从源文件夹向目标文件夹同步内容不同的文件(源 → 目标)")
		print("\t6 = 从目标文件夹向源文件夹同步内容不同的文件(目标 → 源)")
	print("\t7 = 保存对比结果")
	print("\t8 = 重新发起检查(原有配置)")
	print("\t9 = 发起新的检查")
	print("\t0 = 退出程序")
	print()
	iRet = input("请选择一项以继续:")
	while True:
		if iRet in (str(i) for i in range(10)):
			if iRet in "7890" or input("即将执行操作 {0},为确保不是误触,请输入“Y”(区分大小写)回车以再次确认:".format(iRet)) == "Y":
				return int(iRet)
			else:
				iRet = input("输入取消,请重新输入:")
		else:
			iRet = input("无效输入,请重试:")

def doRemove(folder:str, targetList:list) -> bool:
	successCnt, totalCnt = 0, 0
	for item in targetList:
		totalCnt += 1
		toRemoveFp = os.path.join(folder, item)
		try:
			if os.path.isdir(toRemoveFp):
				rmtree(toRemoveFp)
			else:
				os.remove(toRemoveFp)
			successCnt += 1
		except Exception as e:
			print("Failed removing \"{0}\". Details are as follows. \n{1}".format(toRemoveFp, e))
	print("删除完成,删除成功率:{0} / {1} = {2}%。".format(successCnt, totalCnt, 100 * successCnt / totalCnt)) # 不存在 0 除情况
	return successCnt == totalCnt

def doCopy(dir1:str, dir2:str, targetList:list) -> bool:
	successCnt, totalCnt = 0, 0
	for item in targetList:
		totalCnt += 1
		sourceFp = os.path.join(dir1, item)
		targetFp = os.path.join(dir2, item)
		try:
			if os.path.isdir(sourceFp):
				copytree(sourceFp, targetFp)
			else:
				copy(sourceFp, targetFp)
			successCnt += 1
		except Exception as e:
			print("Failed copying \"{0}\" to \"{1}\". Details are as follows. \n{2}".format(sourceFp, targetFp, e))
	print("复制完成,复制成功率:{0} / {1} = {2}%。".format(successCnt, totalCnt, 100 * successCnt / totalCnt)) # 不存在 0 除情况
	return successCnt == totalCnt

def doCompare(dir1:str, dir2:str, compareFileContent:bool = True, caseSensitive:bool = True, state:list = [True]) -> bool:
	clearScreen()
	if not os.path.isdir(dir1):
		print("源文件夹不存在:\"{0}\"\n请按回车键返回。".format(dir1))
		input()
		return None
	elif not os.path.isdir(dir2):
		print("目标文件夹不存在:\"{0}\"\n请按回车键返回。".format(dir2))
		input()
		return None
	elif dir1 == dir2 or not caseSensitive and dir1.lower() == dir2.lower():
		print("源文件夹路径和目标文件夹路径相同,请按回车键返回。")
		input()
		return None
	else:
		print("源文件夹:\"{0}\"".format(dir1))
		print("目标文件夹:\"{0}\"".format(dir2))
		print()
		flags = [True]
		addLists, removeLists, conflictLists, exceptionLists, differLists = compare(dir1, dir2, dir1, dir2, compareFileContent = compareFileContent, caseSensitive = caseSensitive, flags = flags)
		if not flags[0]:
			print("\nThe process is interrupted by users. ")
		print()
		print("addLists = {0}".format(addLists))
		print("removeLists = {0}".format(removeLists))
		print("conflictLists = {0}".format(conflictLists))
		print("exceptionLists = {0}".format(exceptionLists))
		if compareFileContent:
			print("differLists = {0}".format(differLists))
			print("Totally {0} added, {1} removed, {2} conflicted, {3} erroneous, and {4} different items. ".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists), len(differLists)))
		else:
			print("Totally {0} added, {1} removed, {2} conflicted, and {3} erroneous items. ".format(len(addLists), len(removeLists), len(conflictLists), len(exceptionLists)))
		while True:
			choice = selectOperation(bool(addLists), bool(removeLists), bool(differLists))
			if choice == 1:
				doRemove(dir2, addLists)
			elif choice == 2:
				doCopy(dir2, dir1, addLists)
			elif choice == 3:
				doCopy(dir1, dir2, removeLists)
			elif choice == 4:
				doRemove(dir1, removeLists)				
			elif choice == 5:
				doCopy(dir1, dir2, differLists)
			elif choice == 6:
				doCopy(dir2, dir1, differLists)
			elif choice == 7:
				fpath = input("请输入比对结果保存路径(留空取消):").replace("\"", "")
				if fpath:
					try:
						with open(fpath, "w", encoding = "utf-8") as f:
							f.write("Source = \"{0}\"\n".format(dir1))
							f.write("Target = \"{0}\"\n".format(dir2))
							f.write("addLists = {0}\n".format(addLists))
							f.write("removeLists = {0}\n".format(removeLists))
							f.write("conflictLists = {0}\n".format(conflictLists))
							f.write("exceptionLists = {0}\n".format(exceptionLists))
							if compareFileContent:
								f.write("differLists = {0}\n".format(differLists))
								f.write("Totally {0} added, {1} removed, and {2} different files. \n".format(len(addLists), len(removeLists), len(differLists)))
							else:
								f.write("Totally {0} added and {1} removed files. \n".format(len(addLists), len(removeLists)))
						print("保存成功!")
					except Exception as e:
						print("保存失败,异常信息如下:")
						print(e)
			elif choice == 8:
				return doCompare(dir1, dir2, compareFileContent = compareFileContent, caseSensitive = caseSensitive, state = state)
			elif choice == 9:
				try:
					return not any([addLists, removeLists, conflictLists, exceptionLists, differLists])
				except: # 未定义变量
					return None
			elif choice == 0:
				state[0] = False
				try:
					return not any([addLists, removeLists, conflictLists, exceptionLists, differLists])
				except: # 未定义变量
					return None

def main() -> int:
	state = [True]
	while state[0]:
		sourcePath = input("请输入源文件夹路径:").replace("\"", "")
		targetPath = input("请输入目标文件夹路径:").replace("\"", "")
		compareFileContent = input("请选择是否需要比较文件内容(输入“Y”表示“是”):").upper() in ("1", "Y")
		caseSensitive = input("请选择大小写是否敏感(输入“Y”表示“是”):").upper() in ("1", "Y")
		bRet = doCompare(sourcePath, targetPath, compareFileContent = compareFileContent, caseSensitive = caseSensitive, state = state)
		clearScreen()
	return EXIT_SUCCESS if bRet else EXIT_FAILURE



if __name__ == "__main__":
	exit(main())

PS:某些输出处貌似有点中英混搭,请多多谅解。

最后,附上一些运行截图:

多层级进度条显示
中断运行与后续操作

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
### 回答1: 可以使用Python文件读取和比较函数来比较两个文件内容是否相同。具体步骤如下: 1. 使用open函数打开两个文件,分别读取文件内容。 2. 使用read函数读取文件内容,并将其存储到变量中。 3. 使用比较运算符(==)比较两个文件内容是否相同。 4. 如果相同,返回True,否则返回False。 示例代码如下: ``` def compare_files(file1, file2): with open(file1, 'r') as f1, open(file2, 'r') as f2: content1 = f1.read() content2 = f2.read() if content1 == content2: return True else: return False ``` 其中,file1和file2分别为要比较两个文件的路径。使用该函数即可比较两个文件内容是否相同。 ### 回答2: Python比较两个文件内容是否相同有很多方法,下面介绍几种比较常用的方法: 方法1: 使用filecmp模块 filecmp模块是Python中用于比较文件和目录的模块,可以用它来比较两个文件内容是否相同。 示例代码: ```python import filecmp result = filecmp.cmp('file1.txt', 'file2.txt') if result: print("两个文件内容相同") else: print("两个文件内容不同") ``` 方法2: 使用hashlib模块 使用hashlib模块可以计算文件的哈希值,然后比较两个文件的哈希值是否相同。 示例代码: ```python import hashlib def md5(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() file1_md5 = md5('file1.txt') file2_md5 = md5('file2.txt') if file1_md5 == file2_md5: print("两个文件内容相同") else: print("两个文件内容不同") ``` 方法3: 使用difflib模块 使用difflib模块可以比较两个文件的差异,如果两个文件内容相同,差异就会很小。 示例代码: ```python import difflib with open('file1.txt') as file1: with open('file2.txt') as file2: diff = difflib.ndiff(file1.readlines(), file2.readlines()) for line in diff: if line.startswith('-'): print("文件内容不同") break else: print("文件内容相同") ``` 以上就是比较两个文件内容是否相同的几种方法。每种方法都有自己的优缺点,可以根据实际需要选择合适的方法。 ### 回答3: Python比较两个文件内容是否相同可以使用文件读取和字符串比较的方法来实现。 1.方法一:使用文件读取和字符串比较 首先,可以使用with open方法分别打开两个文件,读取文件内容,并保存为两个字符串变量。接着,使用字符串比较方法(如==)比较两个字符串是否相同,判断文件内容是否一致。代码示例如下: ```python with open('file1.txt', 'r') as f1, open('file2.txt', 'r') as f2: str1 = f1.read() str2 = f2.read() if str1 == str2: print('文件内容相同') else: print('文件内容不同') ``` 2.方法二:使用hashlib比较文件内容 除了使用字符串比较方法,还可以使用hashlib模块实现对文件内容的hash值比较,判断文件内容是否相同。具体实现步骤如下: (1)使用with open方法打开文件,读取二进制文件内容,并进行hash算法计算,将结果保存为字符串变量; (2)比较两个文件的hash值是否相同,判断文件内容是否一致。代码示例如下: ```python import hashlib def compare_file(filepath1, filepath2): with open(filepath1, 'rb') as f1, open(filepath2, 'rb') as f2: hash1 = hashlib.md5(f1.read()).hexdigest() hash2 = hashlib.md5(f2.read()).hexdigest() if hash1 == hash2: print('文件内容相同') else: print('文件内容不同') ``` 需要注意的是,使用hashlib比较文件内容需要注意文件的大小问题。如果文件过大,代码执行的时间可能会很长,因为hash算法需要将整个文件读取并进行计算。因此,在比较文件内容前,可以先比较文件大小是否相同,如果大小不同,则直接判断文件内容不同。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值