实验背景
实验背景,前端发送若干个任务交给后端做批量处理。处理逻辑,利用生产者与消费者模型完成实验。前端充当生产者角色,负责将任务交给后端处理,后端充当消费者角色。前端发送过来的任务,首先缓存在任务队列中,由后台启动一个守护线程监控缓存队列是否有任务存在,如果有任务则取出交给后端处理。
实验过程中,使用gunicorn来启动Flask服务。将缓存任务的队列随着Flask父进程启动而启动,从而避免Reload过程中频繁杀死创建子进程造成数据丢失的问题。在Flask服务退出时,检查缓存队列是否仍旧包含数据,若仍旧含有数据则将数据缓存。
项目结构
.
|-- bin
| |-- gunicorn_conf.py
| |-- run.py
|-- log
| `-- taskid.json
`-- stor
|-- consumer.py
|-- __init__.py
|-- producer.py
`-- webapi.py
webapi.py
from flask import Flask, request
app = Flask(__name__)
from consumer import distribute_task
@app.route('/demo', methods = ['GET'])
def put_task():
tasks = request.values.getlist('tasks', int)
print('tasks: ' + str(tasks))
distribute_task(tasks)
return str(tasks)
producer.py
from .consumer import task_production
def distribute_task(tasks):
for task in tasks:
task_production(task)
consumer.py
import time
import threading
import os
from multiprocessing import Queue, Lock
class Consumer():
def __init__(self, queue):
self.thr = None
self.flag = True
self.queue = queue
self.lock = Lock()
def task_consumer(self):
while self.flag == True:
print('task consumer ...')
print('consumer os ppid: ' + str(os.getppid()))
print('consumer os pid: ' + str(os.getpid()))
print('q size: ' + str(self.queue.qsize()))
if 0 < self.queue.qsize():
x = self.queue.get()
print('x: ' + str(x))
time.sleep(5)
print('i am return ...')
def task_monitor(self):
self.thr = threading.Thread(target = self.task_consumer, args = (), kwargs = {})
self.thr.setDaemon(True)
self.thr.start()
def get_queue(self):
return self.queue
def put_queue(self, task):
print('production os ppid: ' + str(os.getppid()))
print('production os pid: ' + str(os.getpid()))
print('put x: ' + str(task))
self.queue.put(task)
def task_stop(self):
self.flag = False
print('stop os ppid: ' + str(os.getppid()))
print('stop os pid: ' + str(os.getpid()))
self.thr.join()
print('queue os ppid: ' + str(os.getppid()))
print('queue os pid: ' + str(os.getpid()))
queue = Queue()
consumer = Consumer(queue)
def distribute_task(tasks):
for task in tasks:
consumer.put_queue(task)
gunicorn.py
import sys
import os
import pdb
app_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(app_root)
from stor.consumer import consumer
from multiprocessing import Queue
reload = True
root_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
cache_path = os.path.dirname(os.path.join(root_path, 'log/'))
cache_name = os.path.join(cache_path, 'taskid.json')
def on_starting(server):
print('starting ...')
if(os.path.exists(cache_name)):
with open(cache_name, 'r') as f:
for task in f.readlines():
print('get task: ' + str(task))
consumer.put_queue(task)
os.remove(cache_name)
thr = consumer.task_monitor()
def on_reload(server):
print('reload ...')
def child_exit(server, worker):
print('child exit ...')
def on_exit(server):
consumer.task_stop()
queue = consumer.get_queue()
with open(cache_name, 'a') as f:
while(0 < queue.qsize()):
task = queue.get()
print('save task: ' + str(task))
f.write(str(task) + '\n')
print('exit ...')
实验步骤
# 启动flask
gunicorn -w 1 run:app -c gunicorn_conf.py
queue os ppid: 23262
queue os pid: 24350 # 与flask的进程一致
[2020-08-09 15:35:46 +0000] [24350] [INFO] Starting gunicorn 19.10.0
starting ...
task consumer ...
consumer os ppid: 23262
consumer os pid: 24350
q size: 0
[2020-08-09 15:35:46 +0000] [24350] [INFO] Listening at: http://127.0.0.1:8000 (24350)
[2020-08-09 15:35:46 +0000] [24350] [INFO] Using worker: sync
[2020-08-09 15:35:46 +0000] [24356] [INFO] Booting worker with pid: 24356
/root/flask/myproject2
webapi
task consumer ...
consumer os ppid: 23262
consumer os pid: 24350
q size: 0
# 发送任务信息
$ curl 127.0.0.1:8000/demo?tasks=3\&tasks=4
# 接收到任务信息
tasks: [3, 4]
production os ppid: 24350
production os pid: 24356
put x: 3
production os ppid: 24350
production os pid: 24356
put x: 4
task consumer ...
consumer os ppid: 23262
consumer os pid: 24350
q size: 2
x: 3
# 退出服务,此时队列中有编号为4的任务没有处理,需要保存下来
^C[2020-08-09 15:35:58 +0000] [24350] [INFO] Handling signal: int
[2020-08-09 15:35:59 +0000] [24356] [INFO] Worker exiting (pid: 24356)
child exit ...
[2020-08-09 15:35:59 +0000] [24350] [INFO] Shutting down: Master
stop os ppid: 23262
stop os pid: 24350
i am return ...
save task: 4
exit ...
# 将缓存的任务缓存到本地文件中
cat ../log/taskid.json
4
# 重启服务,完成后续处理
gunicorn -w 1 run:app -c gunicorn_conf.py
queue os ppid: 23262
queue os pid: 24821
[2020-08-09 15:40:12 +0000] [24821] [INFO] Starting gunicorn 19.10.0
starting ...
get task: 4
production os ppid: 23262
production os pid: 24821
put x: 4
task consumer ...
consumer os ppid: 23262
consumer os pid: 24821
q size: 1
x: 4
[2020-08-09 15:40:12 +0000] [24821] [INFO] Listening at: http://127.0.0.1:8000 (24821)
[2020-08-09 15:40:12 +0000] [24821] [INFO] Using worker: sync
[2020-08-09 15:40:12 +0000] [24828] [INFO] Booting worker with pid: 24828
/root/flask/myproject2
webapi