问题描述
- 现在有多个大任务,比如A、B、C,每个大任务又分多个小任务,比如task1,task2,task3
- 小任务都是耗时任务,并且需要分成多个阶段按顺序异步完成
- 每个子任务执行过程中需要记录其状态,比如:A task1执行中,A task1完成,A task1失败
- 如果某个小任务执行失败,可以方便的进行重试
- 每个大任务开始执行时,需要在数据库中记录执行状态,比如执行到了哪一个子任务
方案设计
- 使用RabbitMQ作为消息队列中间件,用一个队列传递消息,从而实现异步
- 某个小任务执行完成后再给队列发送下一个任务的消息,来控制子任务的执行顺序
- 发送的第一个消息中,包含了起始任务,比如设为task2则从task2开始,从而方便任务的重试
- 在每个task中,通过SQLALchemy在MySQL中记录对应大任务的状态
代码实现
mq.py
import pika
import json
def send(routing_key, message):
connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
channel = connection.channel()
channel.queue_declare(queue=routing_key, durable=True)
channel.basic_publish(
exchange='',
routing_key=routing_key,
body=json.dumps(message),
properties=pika.BasicProperties(
delivery_mode=2,
)
)
connection.close()
def receive(routing_key, callback):
connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
channel = connection.channel()
channel.queue_declare(queue=routing_key, durable=True)
channel.basic_consume(queue=routing_key, on_message_callback=callback)
print('Waiting for tasks. To exit press CTRL+C')
try:
channel.start_consuming()
except KeyboardInterrupt:
print('Stopped consuming')
connection.close()
db.py
from sqlalchemy import create_engine, Column, Integer, String, DateTime, func
from sqlalchemy.orm import sessionmaker, declarative_base
from datetime import datetime
Base = declarative_base()
engine = create_engine('mysql+pymysql://root:123456@localhost/test')
Session = sessionmaker(bind=engine)
class Task(Base):
__tablename__ = 'task'
id = Column(Integer, primary_key=True)
name = Column(String(255))
status = Column(String(255))
created_at = Column(DateTime, default=datetime.utcnow)
# 更新任务状态
def update_status(task_name, status):
with Session() as session:
task = session.query(Task).filter_by(name=task_name).first()
task.status = status
session.commit()
if __name__ == '__main__':
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
worker.py
import json
from time import sleep
from db import update_status
from mq import send, receive
ROUTING_KEY = 'queue1'
# 定义任务执行函数
def task1(task_name):
try:
update_status(task_name, "task1 started")
print("Executing Task1")
sleep(0.5)
update_status(task_name, "task1 completed")
# 发送下一个任务
message = {
'task_name': task_name,
'sub_task_name': "task2"
}
send(routing_key=ROUTING_KEY, message=message)
except Exception as e:
update_status(task_name, "task1 failed")
def task2(task_name):
try:
update_status(task_name, "task2 started")
print("Executing Task2")
sleep(0.5)
# 1/0
update_status(task_name, "task2 completed")
# 发送下一个任务
message = {
'task_name': task_name,
'sub_task_name': "task3"
}
send(routing_key=ROUTING_KEY, message=message)
except Exception as e:
update_status(task_name, "task2 failed")
def task3(task_name):
try:
update_status(task_name, "task3 started")
print("Executing Task3")
sleep(0.5)
update_status(task_name, "task3 completed")
# 所有任务执行完成
except Exception as e:
update_status(task_name, "task3 failed")
# 定义回调函数
def callback(ch, method, properties, body):
task_message = json.loads(body.decode())
print(f"Received task: {task_message}")
task_name = task_message['task_name']
sub_task_name = task_message['sub_task_name']
if sub_task_name == 'task1':
task1(task_name)
elif sub_task_name == 'task2':
task2(task_name)
elif sub_task_name == 'task3':
task3(task_name)
else:
print(f"Invalid sub task: {sub_task_name}")
ch.basic_ack(delivery_tag=method.delivery_tag)
if __name__ == '__main__':
receive(routing_key=ROUTING_KEY, callback=callback)
main.py
from db import Session, Task, update_status
from mq import send
ROUTING_KEY = 'queue1'
def add(tasks):
for task_name in tasks: # 大任务
sub_task_name = "task1" # 子任务
message = {
'task_name': task_name,
'sub_task_name': sub_task_name # 控制了子任务的起点,方便从某个失败阶段重试
}
with Session() as session:
task_obj = session.query(Task).filter_by(name=message['task_name']).first()
if not task_obj:
task_obj = Task(name=message['task_name'])
session.add(task_obj)
session.commit()
else:
update_status(task_name, f"{sub_task_name} started")
send(routing_key=ROUTING_KEY, message=message)
print(f"Sent message: {message}")
if __name__ == '__main__':
tasks = ['A', 'B', 'C']
add(tasks)
运行
运行db.py,创建表
运行worker.py
C:\Python311\python.exe C:\Users\bill\PycharmProjects\test\worker.py
Waiting for tasks. To exit press CTRL+C
运行main.py
C:\Python311\python.exe C:\Users\bill\PycharmProjects\test\main.py
Sent message: {'task_name': 'A', 'sub_task_name': 'task1'}
Sent message: {'task_name': 'B', 'sub_task_name': 'task1'}
Sent message: {'task_name': 'C', 'sub_task_name': 'task1'}
C:\Python311\python.exe C:\Users\bill\PycharmProjects\test\worker.py
Waiting for tasks. To exit press CTRL+C
Received task: {'task_name': 'A', 'sub_task_name': 'task1'}
Executing Task1
Received task: {'task_name': 'B', 'sub_task_name': 'task1'}
Executing Task1
Received task: {'task_name': 'C', 'sub_task_name': 'task1'}
Executing Task1
Received task: {'task_name': 'A', 'sub_task_name': 'task2'}
Executing Task2
Received task: {'task_name': 'B', 'sub_task_name': 'task2'}
Executing Task2
Received task: {'task_name': 'C', 'sub_task_name': 'task2'}
Executing Task2
Received task: {'task_name': 'A', 'sub_task_name': 'task3'}
Executing Task3
Received task: {'task_name': 'B', 'sub_task_name': 'task3'}
Executing Task3
Received task: {'task_name': 'C', 'sub_task_name': 'task3'}
Executing Task3
数据库task表