Python 生产者消费者模式+队列

多线程

生产者

def produce():
    sql = """SELECT * FROM %s"""

    conn, cursor = mysql_conn()
    df = pd.read_sql(sql % table_name, conn)


    for name, groups in df.groupby("update_province"):
        print "name", name, len(groups), groups.shape
        q.put((name, groups), block=True)
    print "q.qsize()", q.qsize()

消费者

def customer():
    while not q.empty():
        deal = q.get(block=True)
        print "cur group start", deal[0]
        if deal[0]:
            df = ipv6_update_city(deal[1])
        else:
            df = deal[1]
        df["minip"] = df["minip"].map(inet_ntoa6)
        df["maxip"] = df["maxip"].map(inet_ntoa6)
        data = ["$".join(['%s' % _item for _item in item]) + "\n" for item in df.values]
        del df
        with open("city_update_" + str(int(time.time())) + ".csv", "a+") as f:
            f.write(
                "$".join(["id", "minip", "maxip", "update_province", "city", "update_city", "update_city_cidr"]) + "\n")
            f.writelines(data)
            f.close()
        print "cur group end", deal[0], q.qsize()
        time.sleep(5)
        q.task_done()

入口函数

def func():
    produce()
    [Thread(target=customer).start() for _ in range(4)]
    q.join()

多进程

# -*- coding: utf-8 -*-


import codecs
import time
from multiprocessing import Process, Manager

import pandas as pd


class ProducerProcess(Process):
    """定义生产者进程"""

    def __init__(self, group, task_queue):
        Process.__init__(self)
        self.group = group
        self.task_queue = task_queue

    def run(self):
        self.task_queue.put(self.group)
        time.sleep(1)


class ConsumerProcess(Process):
    """定义消费者进程"""

    def __init__(self, task_queue):
        Process.__init__(self)
        self.task_queue = task_queue

    def run(self):
        while True:
            if not self.task_queue.empty():  # 判断任务队列是否为空
                data = self.task_queue.get()  # 从队列中获取数据
                process_data(data)  # 处理数据
                time.sleep(2)  # 处理数据耗时
            else:
                break


# 处理数据的函数(计算密集型任务)
def process_data(data):
    data["C"] = data["A"] + "hello"
    ls = [",".join(item) + "\n" for item in data.values.tolist()]

    with codecs.open("test.csv", 'a+', 'utf8') as wfp:
        wfp.writelines(ls)


def func():
    # 创建共享任务队列
    manager = Manager()
    task_queue = manager.Queue()

    # 假设有一个Pandas DataFrame,根据某一列进行分组
    df = pd.DataFrame({'A': [1, 2, 3, 4, 5],
                       'B': ['a', 'b', 'a', 'b', 'a']})

    df = df.astype({"A": "string"})
    # 创建生产者进程
    producer_processes = [ProducerProcess(group, task_queue) for _, group in df.groupby('B')]
    print "task_queue", task_queue.qsize()
    # 创建消费者进程
    consumer_processes = [ConsumerProcess(task_queue) for _ in range(2)]

    # 启动生产者进程
    for process in producer_processes:
        process.start()

    # 启动消费者进程
    for process in consumer_processes:
        process.start()

    # 等待生产者进程结束
    for process in producer_processes:
        process.join()

    # 等待消费者进程结束
    for process in consumer_processes:
        process.join()


if __name__ == '__main__':
    func()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值