IO密集型使用异步;CPU密集型使用同步。
所以这里使用同步,这里给两个程序
# -*- coding: utf-8 -*-
"""
======================
@author:YuanYihan
@time:2020/5/6:19:37
@email:1226317595@qq.com
@phone:18192015917
======================
"""
import os
from random import randrange
import redis
import time
"""
文件夹下载:0,1,2,3,4.。。
当文件夹1下载完成,rename,1变成1_down
当文件夹全部下载完成,文件夹下生成SUCCESS的空文件
注意:当DOWNING存在时,下载失败(防止二次下载),x属性代表排他锁
"""
def input_file():
files_path = "F:\\python\\python\\AccountMicroEffectJudgement\\python_files_lock\\data\\"
# 不断的生成
with open(files_path + "\\DOWNING", "x", encoding="utf-8", newline='\n') as nf:
nf.write("")
print("文件夹开始下载")
for i in list(range(10)):
print(f'文件夹发{i}正在下载')
base_dir = files_path + str(i)
os.mkdir(base_dir)
for j in list(range(randrange(2, 7))):
with open(base_dir + "\\" + str(j), "w+", encoding="utf-8", newline='\n') as nf:
nf.write(str(randrange(2, 14)))
time.sleep(1)
print(f"文件夹发{i}下载完成")
os.rename(base_dir, base_dir + "_down")
# 生成完成,给一个完成的标志位
os.rename(files_path + "\\DOWNING", files_path + "\\SUCCESS")
print(f"文件夹全部下载完了")
def input_file_redis():
pool = redis.ConnectionPool(host='192.168.1.128', port=6379, db=2, decode_responses=True)
r = redis.Redis(connection_pool=pool)
r.ltrim("path", 1, 0)
files_path = "F:\\python\\python\\AccountMicroEffectJudgement\\python_files_lock\\data\\"
# files_path="/data2/test/data/"
# 不断的生成
r.hset("hash1", "k1", "DOWNING")
print(f"文件夹开始下载")
for i in list(range(100)):
print(f"文件夹发{i}正在下载")
base_dir = files_path + str(i)
os.mkdir(base_dir)
for j in list(range(randrange(10, 20))):
with open(base_dir + "/" + str(j), "w+", encoding="utf-8", newline='\n') as nf:
nf.write(str(randrange(2, 14)))
# time.sleep(3)
r.lpush("path", base_dir)
print(f"文件夹发{i}下载完成")
# 生成完成,给一个完成的标志位
r.hset("hash1", "k1", "SUCCESS")
print(f"文件夹全部下载完了")
def input_file_redis_pubsub():
pool = redis.ConnectionPool(host='192.168.1.128', port=6379, db=4, decode_responses=True)
r = redis.Redis(connection_pool=pool)
files_path = "F:\\python\\python\\AccountMicroEffectJudgement\\python_files_lock\\data\\"
# files_path="/data2/test/data/"
# 不断的生成
print(f"文件夹开始下载")
num = 0
for i in list(range(100)):
num = num + 1
print(f"文件夹发{i}正在下载")
base_dir = files_path + str(i)
os.mkdir(base_dir)
for j in list(range(randrange(10, 20))):
with open(base_dir + "/" + str(j), "w+", encoding="utf-8", newline='\n') as nf:
nf.write(str(randrange(2, 14)))
# time.sleep(3)
r.publish("mes", base_dir)
print(f"文件夹发{i}下载完成")
# 生成完成,给一个完成的标志位
r.publish("mes_status", "SUCCESS_" + str(num))
print(f"文件夹全部下载完了")
def main():
# input_file() #文件锁
# input_file_redis() #redis作为队列
input_file_redis_pubsub() # redis作为消息订阅
if __name__ == '__main__':
main()
# -*- coding: utf-8 -*-
"""
======================
@author:YuanYihan
@time:2020/5/6:20:00
@email:1226317595@qq.com
@phone:18192015917
======================
"""
import time
import os
def ouput_file():
"""
当文件夹下载完成,是1_down,2_down。。。
当文件夹1处理完成,1_down变成1_down_done
当文件夹全部下载完成(SUCCESS)并且处理完成(没有_down结尾),程序退出
"""
# zhe_shi_ye_wu(path_dir,file_name):是一个文件夹业务
def zhe_shi_ye_wu(path_dir, file_name):
time.sleep(3)
files = os.path.join('%s/%s' % (path_dir, file_name))
if os.path.isdir(files):
path_dir = os.listdir(files)
with open(os.path.join('%s/%s' % (files, "result")), "w+", encoding="utf-8", newline='\n') as nf:
for allDir in path_dir:
child = os.path.join('%s/%s' % (files, allDir))
if os.path.isfile(child):
with open(child, "r", encoding="utf-8", newline="\n") as of:
nf.write(of.read())
os.rename(files, os.path.join('%s_%s' % (files, "done")))
files_path = "F:\\python\\python\\AccountMicroEffectJudgement\\python_files_lock\\data\\"
while 1: # 循环条件为1必定成立
path_dir = os.listdir(files_path)
success = [name for name in path_dir if name.endswith("SUCCESS")]
down_to_process = [name for name in path_dir if name.endswith("_down")]
for file in down_to_process:
print(f"文件夹{file},开始处理")
zhe_shi_ye_wu(files_path, file) # 业务处理过程,是一个很费劲的过程,这里大致3秒
print(f"文件夹{file},处理结束")
if len(down_to_process) == 0 and len(success) == 1: # 当写入完毕,并且处理完毕,结束
os.rename(os.path.join('%s/%s' % (files_path, "SUCCESS")), os.path.join('%s/%s' % (files_path, "DONE")))
print(f"全部文件处理结束")
break
def ouput_file_redis():
import redis
import shutil
def zhe_shi_ye_wu_redis(path_dir, file_name, r):
# time.sleep(3)
files = os.path.join('%s/%s' % (path_dir, file_name))
shutil.rmtree(files)
r.incr("count", 1)
pool = redis.ConnectionPool(host='192.168.1.128', port=6379, db=2, decode_responses=True)
r = redis.Redis(connection_pool=pool)
while 1: # 循环条件为1必定成立
result = r.brpop("path", timeout=5)
if result is not None:
files_path, file = os.path.split(result[1])
print(f"文件夹{file},开始处理")
zhe_shi_ye_wu_redis(files_path, file, r) # 业务处理过程,是一个很费劲的过程,这里大致3秒
print(f"文件夹{file},处理结束")
else:
if r.hget("hash1", "k1") == "SUCCESS": # 当写入完毕,并且处理完毕,结束
r.hset("hash1", "k1", "DONE")
print(f"处理队列为空,标志为下载完成,全部文件处理结束")
break
else:
print("队列为空,暂时同步阻塞,消费等待===")
print(r.get("count"))
r.set("count", 0)
def ouput_file_redis_pubsub():
import redis
import shutil
def zhe_shi_ye_wu_redis(path_dir, file_name, r):
# time.sleep(1)
files = os.path.join('%s/%s' % (path_dir, file_name))
shutil.rmtree(files)
pool = redis.ConnectionPool(host='192.168.1.128', port=6379, db=2, decode_responses=True)
r = redis.Redis(connection_pool=pool)
ps = r.pubsub()
ps.subscribe('mes', 'mes_status') # 订阅消息
end = False
all_num = 0
num = 0
for item in ps.listen(): # 监听状态:有消息发布了就拿过来
if item['type'] == 'message':
chan = item['channel']
data = item['data']
if (chan == "mes_status") and (data.startswith("SUCCESS")):
print(chan, data)
end = True
lst = data.split("_")
all_num = int(lst[1])
if (chan == "mes"):
num = num + 1
files_path, file = os.path.split(data)
print(f"文件夹{file},开始处理")
zhe_shi_ye_wu_redis(files_path, file, r) # 业务处理过程,是一个很费劲的过程,这里大致3秒
print(f"文件夹{file},处理结束")
if end and num == all_num:
print(f"处理队列为空,标志为下载完成,全部文件处理结束")
break
def main():
# ouput_file() #文件锁
# ouput_file_redis() #redis队列
ouput_file_redis_pubsub() #redis订阅消息
if __name__ == '__main__':
main()
考虑到多线程版本
# -*- coding: utf-8 -*-
import os
from random import randrange
import redis
import time
"""
基于redis的list做队列和num做信号量(信号量做门闩)的多进程同步队列代码(便于多进程下载和多进程处理)
"""
def input_file_redis():
pool = redis.ConnectionPool(host='192.168.1.128', port=6379, decode_responses=True)
r = redis.Redis(connection_pool=pool)
key = "write_semaphore_lock"
print("当前key-V:{}".format(r.get(key)))
init_write_lock(key, r) # 多线程安全信号量
ll = r.get(key) # 这里用来标志是哪个线程下载的
print("当前key-V:{}".format(ll))
print(str(int(time.time())))
files_path = "F:\\python\\python\\AccountMicroEffectJudgement\\python_files_lock\\data\\"
# files_path="/home/yuanyihan/redis-demo/data/"
# 不断的生成
print(f"文件夹开始下载")
for i in list(range(100)):
print(f"文件夹发{i}正在下载")
base_dir = files_path + ll + "_" + str(i)
os.mkdir(base_dir)
for j in list(range(randrange(10, 20))):
with open(base_dir + "/" + str(j), "w+", encoding="utf-8", newline='\n') as nf:
nf.write(str(randrange(2, 14)))
time.sleep(1)
r.lpush("path", base_dir)
print(f"文件夹发{i}下载完成")
# 生成完成,给一个完成的标志位
r.decr(key)
print(f"文件夹全部下载完了")
def init_write_lock(key, r):
# 事务性处理多生产多消费
from redis import WatchError
with r.pipeline() as pipe:
while True:
try:
# 监视一个key,如果在执行期间被修改了,会抛出WatchError
pipe.watch(key)
clear_flag = False
add_flag = False
now_p = pipe.get(key)
print(now_p)
if now_p == "-1" or now_p is None: # 当不存在,或者全部处理完成,清理状态,并初始化
print("not exist or -1,init the sys to write a new one")
clear_flag = True
elif now_p != "0": # 当初始化完成,并且不为0,(>0说明当前有线程写入)信号量+1
print("exist ,add one")
add_flag = True
else: # 当前是0,表明信号量清除(写入完成,但未处理完成,可新写入)
print("0: not allow")
add_flag = True # 这里也可以新写入,读取做二次锁即可
pipe.multi()
if clear_flag:
pipe.ltrim("path", 1, 0) # 全清除数据
pipe.set(key, 1)
if add_flag:
pipe.incr(key)
pipe.execute()
return 0
except WatchError:
continue
finally:
pipe.reset()
def main():
input_file_redis() # redis作为队列
if __name__ == '__main__':
main()
# -*- coding: utf-8 -*-
import time
import os
"""
基于redis的list做队列和num做信号量(信号量做门闩)的多进程同步队列代码(便于多进程下载和多进程处理)
"""
def ouput_file_redis():
import redis
import shutil
num = 0
delaytime = 0
def zhe_shi_ye_wu_redis(path_dir, file_name, r):
time.sleep(1)
files = os.path.join('%s/%s' % (path_dir, file_name))
shutil.rmtree(files)
pool = redis.ConnectionPool(host='192.168.1.128', port=6379, decode_responses=True)
r = redis.Redis(connection_pool=pool)
key = "write_semaphore_lock"
while 1: # 循环条件为1必定成立
result = r.brpop("path", timeout=2) # 有数据立即返回,没有数据等待5s
if result is not None:
files_path, file = os.path.split(result[1])
print(f"文件夹{file},开始处理")
num = num + 1
delaytime = 0
zhe_shi_ye_wu_redis(files_path, file, r) # 业务处理过程,是一个很费劲的过程,这里大致3秒
print(f"文件夹{file},处理结束")
else:
if r.get(key) == "0": # 当多线程写入完毕,并且处理完毕(进入else),结束
print(f"处理队列为空,标志为下载完成,全部文件处理结束")
break
else:
delaytime = delaytime + 1
if delaytime > 10:
print("队列为空,虽然写入没有完成,但是本进程已经很久(10s)没有收到信息了,请排错===")
break
print("队列为空,暂时同步阻塞,消费等待===")
print("当前进程处理:{}条数据".format(num))
def main():
ouput_file_redis() # redis队列
if __name__ == '__main__':
main()