需求背景
最近有一个新的需求:需要将数据挪动到另外一个盘下面并做 md5 校验,并统计每个目录下的文件数量。
这个源码是用 os.walk实现的,另外 os.listdir参考另一篇博客。下面是源码
# encoding: utf-8
import os
import sys
import shutil
import hashlib
import win32api
import time
import datetime
from multiprocessing import Process
def Usage():
print("Usage:\n\t%s src_dir dst_dir" % (os.path.basename(sys.argv[0])))
sys.exit(4)
def checksrcdir(src_dir):
"""
:param src_dir: 源目录
:return: 当前目录
"""
if os.path.exists(src_dir):
os.chdir(src_dir) # 切换到源目录下进行复制遍历
else:
print("%s 不存在" % src_dir)
sys.exit(3)
return os.getcwd()
def checkpythonVersion():
if not sys.version_info[0] == 3:
print("此脚本给予Python3版本编写,请使用Python3运行")
sys.exit(5)
def checkdstdir(dst_dir):
"""
:param dst_dir: 目标目录
:return: 目标目录真实路径
"""
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
return os.path.realpath(dst_dir)
def computmd5(filename, readsize):
"""
:param filename: 需要加密的文件或路径
:param readsize: 预读文件大小
:return: 大写的md5
"""
hs = hashlib.md5()
bigfile = open(filename, mode='rb')
content = bigfile.read(readsize)
if content:
hs.update(content)
bigfile.close()
return hs.hexdigest().upper()
def checkfilesize(filename):
"""
:param filename: 需要检查大小的文件名或路径
:return: 文件大小
"""
filesize = os.path.getsize(filename)
return filesize
def write_log_file(filename, messages):
"""
:param filename: 日志文件名
:param messages: 日志内容
:return: bool
"""
with open(filename, mode='a+', encoding='utf-8') as rg:
rg.writelines(messages)
rg.close()
def truncatefile(filename):
"""
:param filename: 需要清空的文件
:return: bool
"""
with open(filename, mode='w+', encoding='utf-8') as rg:
rg.truncate()
rg.close()
def copyfile(srcfile, dstfile):
shutil.copy2(src=srcfile, dst=dstfile)
def countnumber(dirs, countfile):
count = 0
for _ in os.listdir(dirs):
# print(file)
# 排除隐藏文件。当_为目录时,报错,没有测试通过,因此注销
# if win32api.GetFileAttributes(file) == 38:
# count += 1
# else:
count+=1
msg = "%s 目录下共有 %d 个文件\n" % (dirs, count)
write_log_file(filename=countfile, messages=msg)
def scandir(dirs):
for root, dirictorys, files in os.walk(dirs):
for file in files:
src_full_file_path = os.path.join(root, file)
if win32api.GetFileAttributes(src_full_file_path) == 38: #排除隐藏文件
pass
src_size = checkfilesize(filename=src_full_file_path)
# C:\Users\UserName\Downloads\VMware-viclient-all-5.1.0-941893.exe 364242368
# C:\Users\UserName\Downloads\s\spconfig.xml 631
# print(src_full_file_path,src_size)
ss = src_full_file_path.replace(src_dir + '\\', '')
dst_full_file_path = dst_dir + '\\' + ss
# F:\Downloads\微信图片_20200616175141.jpg
# F:\Downloads\s\bc_vbdll.dll
# print(dst_full_file_path)
if src_size > 104857600:
srcmd5_value = computmd5(filename=src_full_file_path, readsize=104857600)
copyfile(srcfile=src_full_file_path, dstfile=dst_full_file_path)
dstmd5_value = computmd5(filename=dst_full_file_path, readsize=104857600)
if dstmd5_value == srcmd5_value:
msg = "源文件: %s 已复制到 %s 这个路径下!\n\t源md5为 %s\n\t终md5为 %s\n" % (
src_full_file_path, dst_full_file_path, srcmd5_value, dstmd5_value)
write_log_file(filename="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\copied.log",
messages=msg)
else:
srcmd5_value = computmd5(filename=src_full_file_path, readsize=-1)
copyfile(srcfile=src_full_file_path, dstfile=dst_full_file_path)
dstmd5_value = computmd5(filename=dst_full_file_path, readsize=-1)
if dstmd5_value == srcmd5_value:
msg = "源文件: %s 已复制到 %s 这个位置下!\n\t源md5为 %s\n\t终md5为 %s\n" % (
ss, dst_full_file_path, srcmd5_value, dstmd5_value)
write_log_file(filename="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\copied.log",
messages=msg)
for dirictory in dirictorys:
src_full_path = root + '\\' + dirictory
src_file_root_path = src_full_path.replace(src_dir+'\\','')
dst_full_file_path = dst_dir+'\\'+src_file_root_path
# print(dst_full_file_path) #返回 F:\Downloads\s F:\Downloads\s\payload
if not os.path.exists(dst_full_file_path):
os.makedirs(dst_full_file_path)
# countnumber(dirs=src_file_root_path,countfile="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\dist_dir_files_number.log")
# 已经测试ok
countnumber(dirs=src_full_path,countfile="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\source_dir_files_number.log")
countnumber(dirs=dst_full_file_path, countfile="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\dist_dir_files_number.log")
if __name__ == '__main__':
print(time.strftime("%Y-%m-%d_%H:%M:%S"))
src_dir='F:\VirtualMachines\glasterfs'
# src_dir = "E:\Java\jdk-13.0.1"
dst_dir = 'F:\Downloads'
# if sys.argv.__len__() < 2:
# Usage()
# checksrcdir(sys.argv[1])
# checkdstdir(sys.argv[2])
checkpythonVersion()
checksrcdir(src_dir=src_dir)
checkdstdir(dst_dir=dst_dir)
truncatefile(filename="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\copied.log")
truncatefile(filename="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\exists.log")
truncatefile(filename="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\dist_dir_files_number.log")
truncatefile(filename="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\source_dir_files_number.log")
# 已经测试ok
countnumber(dirs=src_dir,countfile="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\source_dir_files_number.log")
# 多线程复制,测试时未通过,不能使用
# s = Process(target=scandir,args=(src_dir,))
# s.start()
scandir(src_dir)
countnumber(dirs=dst_dir,countfile="C:\\Users\\UserName\\PycharmProjects\\dataencrypt\\dist_dir_files_number.log")
print(time.strftime("%Y-%m-%d_%H:%M:%S"))
/* 多线程复制在测试过程中,会因为变量没有声明而报错。多线程还是搞不定*/