1 #-*- coding: utf-8 -*-
2 #@author: Tele
3 #@Time : 2019/04/02 下午 3:09
4 #使用多进程拷贝文件夹,对于大文件进程内部又使用了多线程进行拷贝
5 #使用进程池实现多进程时,使用的消息队列要使用multiprocessing.Manager().Queue()创建
6
7 importtime8 importre9 importos10 importshutil11 importmultiprocessing12 importmath13 from concurrent.futures importThreadPoolExecutor, wait14
15 #设置单个文件的最大值:209715200 200M
16 MAX_SINGLE_FILE_SIZE = 209715200
17 mutex =multiprocessing.Lock()18 executor = ThreadPoolExecutor(max_workers=3)19
20
21 #遍历文件夹
22 defwalk_file(file):23 file_list =list()24 for root, dirs, files inos.walk(file):25 #遍历文件
26 for f infiles:27 file_list.append(f)28
29 #空文件夹处理
30 for d indirs:31 if len(os.listdir(os.path.join(root, d))) ==0:32 file_list.append(d)33 returnfile_list34
35
36 #计算文件数量
37 defget_file_count(dir):38 returnlen(walk_file(dir))39
40
41 defcopy(src, target, queue):42 target_number = 1
43 buffer = 1024
44 #文件夹
45 ifos.path.isdir(src):46 target_number =get_file_count(src)47 for root, dirs, files inos.walk(src):48 #遍历文件
49 for f infiles:50 drive =os.path.splitdrive(target)[0]51 target = drive + os.path.splitdrive(os.path.join(root, f))[1]52 copy_single_file(buffer, os.path.join(root, f), target)53 #空文件夹
54 for d indirs:55 drive =os.path.splitdrive(target)[0]56 target = drive + os.path.splitdrive(os.path.join(root, d))[1]57 #检查文件的层级目录
58 if notos.path.exists(target):59 os.makedirs(target)60 else:61 copy_single_file(buffer, src, target)62 #将拷贝完成的文件数量放入队列中
63 queue.put(target_number)64
65
66 #拷贝单文件
67 defcopy_single_file(buffer, src, target):68 file_size =os.path.getsize(src)69 rs = open(src, "rb")70
71 #检查文件的层级目录
72 parent_path =os.path.split(target)[0]73 if notos.path.exists(parent_path):74 os.makedirs(parent_path)75
76 ws = open(target, "wb")77 #小文件直接读写
78 if file_size <=MAX_SINGLE_FILE_SIZE:79 whileTrue:80 content =rs.read(buffer)81 ws.write(content)82 if len(content) ==0:83 break
84 ws.flush()85 else:86 #设置每个线程拷贝的字节数 50M
87 PER_THREAD_SIZE = 52428800
88 #构造参数并执行
89 task_list =list()90 for i in range(math.ceil(file_size /PER_THREAD_SIZE)):91 byte_size =PER_THREAD_SIZE92 #最后一个线程拷贝的字节数应该是取模
93 if i == math.ceil(file_size / PER_THREAD_SIZE) - 1:94 byte_size = file_size %PER_THREAD_SIZE95 start = i * PER_THREAD_SIZE +i96 t =executor.submit(copy_file_thread, start, byte_size, rs, ws)97 task_list.append(t)98 wait(task_list)99 ifrs:100 rs.close()101 ifws:102 ws.close()103
104
105 #多线程拷贝
106 defcopy_file_thread(start, byte_size, rs, ws):107 mutex.acquire()108 buffer = 1024
109 count =0110 rs.seek(start)111 ws.seek(start)112 whileTrue:113 if count + buffer <=byte_size:114 content =rs.read(buffer)115 count +=len(content)116 write(content, ws)117 else:118 content = rs.read(byte_size %buffer)119 count +=len(content)120 write(content, ws)121 break
122 #global total_count
123 #total_count += byte_size
124 #print("\r拷贝进度为%.2f %%" % (total_count * 100 / file_size), end="")
125 mutex.release()126
127
128 defwrite(content, ws):129 ws.write(content)130 ws.flush()131
132
133 defcopy_dir(src, desc):134 #获得待拷贝的文件总数(含空文件夹)
135 total_number =get_file_count(src)136 #分隔符检测
137 src =check_separator(src)138 desc =check_separator(desc)139 #print("src:",src)
140 #print("desc:",desc)
141
142 file_dir_list = [src + "/" + i for i inos.listdir(src)]143 ifos.path.exists(desc):144 shutil.rmtree(desc)145
146 #进程池
147 pool = multiprocessing.Pool(3)148
149 #创建队列
150 queue =multiprocessing.Manager().Queue()151
152 #一个文件/目录开启一个进程去拷贝
153 for f_name infile_dir_list:154 target = os.path.splitdrive(desc)[0] + "/" + os.path.splitdrive(f_name)[1]155 #target = desc + "/" + f_name[index_list("/", f_name)[1] + 1:]
156 #print(target)
157 #创建target目录
158 parent_path =os.path.split(target)[0]159 if notos.path.exists(parent_path):160 os.makedirs(parent_path)161 pool.apply_async(copy, args=(f_name, target, queue))162
163 start =time.time()164 pool.close()165 #pool.join()
166 count =0167 whileTrue:168 count +=queue.get()169 #格式化输出时两个%输出一个%,不换行,每次定位到行首,实现覆盖
170 print("\r当前进度为 %.2f %%" % (count * 100 / total_number), end="")171 if count >=total_number:172 break
173
174 executor.shutdown()175 end =time.time()176 print()177 print("耗时-----", (end - start), "s")178
179
180 #查找指定字符出现的全部索引位置
181 defindex_list(c, s):182 return [i.start() for i inre.finditer(c, s)]183
184
185 #检测目录结尾是否有 "/"
186 defcheck_separator(path):187 if path.rindex("/") == len(path) - 1:188 return path[0:path.rindex("/")]189 returnpath190
191
192 defmain():193 copy_dir("f:/ftp_mypc/", "e:/ftp_mypc/")194
195
196 if __name__ == '__main__':197 main()