python - 消费者生产者模型应用示例1

最新推荐文章于 2021-07-25 09:48:49 发布

fighting--sky

最新推荐文章于 2021-07-25 09:48:49 发布

阅读量381

点赞数

CC 4.0 BY-SA版权

分类专栏： python

本文链接：https://blog.csdn.net/u012720518/article/details/107410192

python 专栏收录该内容

13 篇文章

订阅专栏

本文介绍了一个分布式存储实验，涉及socket网络收发模型、生产者消费者模型和celery分布式任务调度。实验通过客户端发起存储请求，服务端批量存储数据，同时处理数字1、2互斥存储逻辑。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

实验要求

由若干个客户端发起存储数据的请求，例如’[1,2,3,4]’, ‘[5,7,8]’, ‘[1]’,’[2]’，服务端负责接收请求并将其批量存储在文件中，并且每次存储的信息不能多于5条。限制为数字1，2互斥不可以存储在同一个文件当中。

实验中用到了3种模型，socket网络收发模型，生产者消费者模型，celery的分布式任务调度中间件。

先给出完成实验的文件结构:

tree
.
|-- client.py				     # socket模型中的客户端，负责发送存储任务消息
|-- proj
|   |-- config.py                # celery配置文件
|   |-- __init__.py 
|   |-- middle.py                # 生产者消费者模型，为生产者存储任务，消费者消费任务（并完成互斥逻辑）
|   |-- tasks.py				 # 底层存储信息，负责文件的真正存储工作
`-- server.py					 # socket模型中的服务端，负责监听客户端请求，并间任务信息发送给生产者消费者模型。

Socket网络收发模型

socket是基于C/S架构的，socket网络编程通常需要编写client端和server端。模型架构如下:

socket模型架构

这里使用socket用来做前端与后端收发信息的模拟，使用最基本的socket demo。在服务端利用多线程实现对不同客户端请求的监听。

服务端 server.py:

# !/usr/bin/env python
# -*- coding:utf-8 -*-

import time
import threading
import socket

from proj.middle import producer, consumer

def middle_func(indexs):
	# 生产者消费者模型中的生产者
    producer(indexs)

def server(conn, address):
    while True:
        res = conn.recv(1024).decode()
        if res == 'exit':
            exit('exit')
        print('client: ' + str(address) + 'data: ' + str(res))
        res = eval(res)
        middle_func(res)
        conn.sendall(str('has been save info data: ' + str(res)).encode())

def listen(soc):
	# 启动线程，用来循环监听客户端发送的请求
    while True:
        conn, address = soc.accept()
        thr = threading.Thread(target = server, args = (conn, address))
        thr.start()

if __name__ == '__main__':
	# 生产者消费者模型中的消费者，启动守护线程
    thr = consumer()

    ip_port = ('127.0.0.1', 9999)
    soc = socket.socket()
    soc.bind(ip_port)
    soc.listen(5)

    listen(soc)

    thr.join()

客户端 client.py:

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import socket

def client(soc):

    while True:
        inp = input("input data: ").strip()
        if not inp:
            continue
        soc.sendall(inp.encode())
        if inp == "exit":
            print("finish!")
            break
        server_reply = soc.recv(1024).decode()
        print(server_reply)

    soc.close()

if __name__ == '__main__':
    ip_port = ('127.0.0.1', 9999)
    soc = socket.socket()
    soc.connect(ip_port)
    client(soc)

生产者消费者模型

实际背景中，前端发送的请求会比较频繁，而后端的处理要求不能过快要尽可能的慢处理。导致生产者与消费者处理速度相差较大。由此，需要一个缓冲队列来存储所有的任务信息。

在这个实验中，生产者是前端发送的任务请求，多线程方式将任务存储到缓冲队列中。消费者是启动的一个守护线程，负责慢获取缓冲队列中的任务并投放到底层模型中做真正的存储。

其中，对于数字1、2互斥处理的逻辑是在消费者模型中实现的。消费者，是启动了一个守护线程，循环监控缓冲队列任务并负责对缓冲队列的任务进行处理。

生产者消费者模型

middle.py

# !/usr/bin/env python
# -*- coding:utf-8 -*-

from proj.tasks import distribution
from celery import group
from Queue import Queue as queue

import time
import threading

# 开启的一个共享队列
q = queue()

# 启动一个异步的线程
def async(f):
    def wrapper(*args, **kwargs):
        thr = threading.Thread(target = f, args = args, kwargs = kwargs)
        thr.start()
    return wrapper

# 异步的线程负责扔task给底层做数据存储
@async
def task(indexs):
    res = distribution(indexs)

# 获取共享队列中的任务信息
def func_get():
    while True:
        L = []
        while q.qsize() != 0:
            end = 5 if 5 < q.qsize() else q.qsize()
            for i in range(0, end):
                L.append(q.get())
                q.task_done()
        l = []
        for j in range(0, len(L)):
            if L[j] == 1 and 2 not in l:
                l.append(L[j])
            elif L[j] == 2 and 1 not in l:
                l.append(L[j])
            elif (L[j] == 2 and 1 in l) or (L[j] == 1 and 2 in l):
                task(l)
                l = []
                l.append(L[j])
                time.sleep(60)
            else:
                l.append(L[j])
        if 0 <= len(l):
            task(l)
            time.sleep(60)

                
# 向共享队列中存放任务信息
def func_put(indexs):
    for index in indexs:
        q.put(index)

# 消费者，在服务端启动时启动的守护线程，并随着服务端关闭时关闭。
# 并将启动的线程句柄返回，用来阻塞。
def consumer():
    thr = threading.Thread(target = func_get, args = (), kwargs = {})
    thr.setDaemon(True)
    thr.start()
    return thr
    
# 生产者，负责将任务存放到缓冲队列中
def producer(indexs):
    func_put(indexs)

celery的group模型

celery模块的逻辑，接受消费者发送过来的任务请求。实验中模拟大部分存储任务将会失败，单次成功存储的概率为20%，对于每个存储任务会有2次重试操作。对于由消费者发送过来的请求，每5个存储任务被打包为一个task，做异步执行操作。

根据上面的实验需求，实现中对celery原生的task，绑定了task实现类。这个task实现类集成的是celery提供的基类Task并重写了其中的三个方法为on_success、on_failure、on_retry。
celery网络模型
tasks.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
from .config import app

import os
import time
import random
import fcntl
import json
from datetime import datetime, timedelta

from celery.utils.log import get_task_logger
from celery import Task, group

logger = get_task_logger(__name__)

# 引起异常
def func(x):
    if x in [1, 2, 3, 4]:
        raise

# 存储文件
def write_file(x):
    date = datetime.now()
    date = '%04d%02d%02d%02d%02d' % (date.year, date.month, date.day, date.hour, date.minute)
    filename = os.path.join('./', 'test_log_%s.log' % date)

    with open(filename, 'a') as fh:
        fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
        fh.write(json.dumps(x, ensure_ascii = False) + '\n')

# 继承Task类，重写on_success, on_failure, on_retry方法
class MyTask(Task):
    def on_success(self, retval, task_id, *args, **kwargs):
        _info = 'save %s success' % args[0]
        print(_info)
        write_file(_info)

    def on_failure(self, exc, task_id, *args, **kwargs):
        _info = 'save %s failed, roll back' % args[0]
        print(_info)
        write_file(_info)

    def on_retry(self, exc, task_id, *args, **kwargs):
        _info = 'retry %s' % args[0]
        print(_info)

# 绑定任务上下文，并定义基类
@app.task(bind = True, base = MyTask)
def save(self, s):
    #logger.info(self.request.__dict__)
    try:
        # 模拟任务的大概率异常报错
        x = random.randint(1, 5)
        func(x)
    except Exception as e:
        # 添加任务的失败重试
        raise self.retry(exc = e, countdown = 1, max_retries = 3)

def distribution(indexs):
    for i in range(0, len(indexs), 5):
        start = i
        end = (i + 5) if (i + 5) <= len(indexs) else len(indexs)

        L = []
        for j in range(start, end):
            L.append(save.s(indexs[j]))

        res = group(L)()

        while not res.ready():
            time.sleep(1)

实验结果

# 1. 启动celery的服务端
$ celery -A proj.tasks worker --concurrency=1 --loglevel=info

# 2. 启动socket的服务端
$ python server.py

# 3. 启动socket的客户端
$ python client.py
input data: '[1,2,3,4,5]'
has been save info data: [1, 2, 3, 4, 5]
input data: 'exit'
finish!

# 查看任务的存储结果
$ cat test_log_202007171833.log
"save [1] failed, roll back"

$ cat test_log_202007171834.log
"save [2] success"
"save [3] failed, roll back"
"save [4] failed, roll back"
"save [5] success"

附录

config.py

from __future__ import absolute_import, unicode_literals

from celery import Celery

app = Celery('proj',
             broker='redis://127.0.0.1:6379',
             backend='redis://127.0.0.1:6379/0',
             include=['proj.tasks'])

app.conf.update(
    result_expires = 3600,

    #task_routes = {'proj.tasks.add': {'queue': 'hipri'}}
)

if __name__ == '__main__':
    app.start()