稍微研究一下就能看懂的进程池设计

最新推荐文章于 2024-08-17 21:19:20 发布

IT闷油瓶

最新推荐文章于 2024-08-17 21:19:20 发布

阅读量241

点赞数 1

文章标签： c++ linux 后端

本文链接：https://blog.csdn.net/wjzzzlwanghb/article/details/129305666

版权

文章介绍了池的概念，以进程池为例，展示了如何通过预先创建一组进程来提高响应速度和效率。进程池通过减少新进程的创建成本，实现了以空间换取时间的目标。文中给出了一个基于匿名管道的进程池实现，包括进程池类、工作进程、调度器、任务和通信机制等组件，展示了如何通过管道在父进程与子进程间传递任务并执行。

摘要由CSDN通过智能技术生成

池 (Pool)

池 (Pool) 的概念被广泛地应用于软件开发领域，可以明显地提供应用的响应速度、改善效率、提供资源利用率。那么什么是池呢？很形象的例子就是长工 vs 短工。应用通过豢养一批空闲的 “工人”，”任务“来临时可以及时响应，避免了再次雇佣工人的消耗。此处的”工人“概念可以具体为进程（进程池）、线程（线程池）、连接（连接池）、内存（内存池）等。我们在本文中重点关注进程池这一具体的模型。

进程池

因为网络服务器在多进程模型下，是以操作系统申请资源为手段，以进程为载体来对外提供服务的。故，进程池是一种服务器后台提供服务或者网络功能的常见模型，主要目标是减少请求到来之时，才让操作系统创建进程进而对外提供服务的成本。进程池的本质：以空间来换取时间

基于匿名管道的进程池示例:

src/Main.cc 程序入口

#include "Task.hh"
#include "Executor.hh"
#include <iostream>
#include <string.h>
#include <stdio.h>

/**
 * 裁剪字符串两边的空白字符(空格、制表符)
 * @param &s: 要进行裁剪的字符串
 * @return:   裁剪后的字符串
 */
std::string& trim(std::string& s) {
    // 如果字符串是空白字符串，直接返回
    if (s.empty()) {
        return s;
    }

    // 找到第一个不是 空格和制表符的位置，然后删除从 0 到这个位置，也就是删除开始的空白字符 */
    s.erase(0, s.find_first_not_of(" \t"));
    // 找到最后一个不是 空格和制表符的位置，然后删除从这个位置到字符串结尾，也就是删除最后的空白字符 */
    s.erase(s.find_last_not_of(" \t") + 1);

    // 返回裁剪后的字符串
    return s;
}

int main() {
    // 定义进程池，一共 3 个工作进程
    Executor executor(3);

    while (true) {
        // 打印提示符
        printf("#> ");
        // 读取一行
        std::string line;
        if (std::getline(std::cin, line) == nullptr) {
            // 用户输入了 EOF，结束程序
            printf("\n");
            break;
        }
        // 裁剪字符串
        trim(line);
        // 获取字符长度
        size_t size = line.size();
        if (size == 0) {
            // 空行跳过
            continue;
        }
        // 创建任务
        Task task(10, 20, line.c_str(), size);

        // 将任务提交给进程池去执行
        executor.execute(task);
    }
    return 0;
}

src/Executor.hh 进程池类

#pragma once

#include "Scheduler.hh"
#include "Task.hh"
#include "Worker.hh"
#include "Process.hh"
#include "SendEndpoint.hh"
#include <vector>
#include <unistd.h>
#include <assert.h>

/**
 * 进程池执行器
 */
class Executor {
public:
    /**
     * 构造方法
     * @param core: 开启的进程数
     */
    Executor(size_t core);

public:
    /**
     * 提交任务到进程池
     * @param task: 提交的任务
     */
    void execute(Task& task);

private:
    // 调度器对象
    Scheduler m_scheduler;
    // 保存所有的工作子进程
    std::vector<Process> m_processes;
};


Executor::Executor(size_t core) {
    /* 根据需要开启的子进程数进行初始化工作 */
    for (size_t i = 0; i < core; i++) {
        /* 创建管道 */
        int pipe_fd[2];
        int r = pipe(pipe_fd);
        assert(r != -1);

        /* 利用管道构造 发送端对象 */
        SendEndpoint endpoint(pipe_fd);
        /* 将发送端对象保存到调度器中 */
        m_scheduler.append(endpoint);

        /* 创建工作任务对象 */
        Worker worker(pipe_fd);
        /* 创建工作任务进程对象 */
        Process process(worker);
        /* 保存工作任务进程 */
        m_processes.push_back(process);

        /* 开启工作进程 */
        process.start();
    }

    /* 进行调度器的初始化工作，必须在循环之后进行，否则文件描述符会出错 */
    m_scheduler.construct();
}

void Executor::execute(Task& task) {
    /* 通过调度器获取要发送任务的发送端 */
    SendEndpoint& endpoint = m_scheduler.next();
    /* 通过发送端将任务发送给对应的工作进程 */
    endpoint.put(task);
}

src/Process.hh 进程类

#pragma once

#include "Worker.hh"
#include <unistd.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <assert.h>
#include <stdlib.h>


/**
 * 封装了一个进程对象
 */
class Process {
public:
    /* 构造方法 */
    Process(Worker worker): worker(worker) {};

public:
    /* 开始启动子进程 */
    void start();

private:
    /* 子进程要执行的逻辑 */
    Worker worker;
};
void Process::start() {
    /* fork 子进程 */
    pid_t pid = fork();
    assert(pid != -1);

    if (pid == 0) {
        // 子进程
        // 当父进程退出时，子进程会收到 SIGKILL 信号，用于在父进程退出后也结束子进程
        prctl(PR_SET_PDEATHSIG, SIGKILL);
        // 执行子进程逻辑
        worker.run();
        // 无论子进程的逻辑是什么，不再继续
        exit(0);
    }
}

src/Worker.hh 进程池工作逻辑

#pragma once

#include "ReceiverEndpoint.hh"
#include "Task.hh"

/**
 * 子进程的流程逻辑
 */
class Worker {
public:
    /* 构造方法 */
    Worker(int pipe_fd[2]) {
        m_pipe_fd[0] = pipe_fd[0];
        m_pipe_fd[1] = pipe_fd[1];
    }

public:
    // 子进程要执行的流程
    void run();

private:
    // 用于初始化管道
    int m_pipe_fd[2];
};

// 注意，这个方法是运行在各个子进程中的
void Worker::run() {
    // 构造接收端
    ReceiverEndpoint endpoint(m_pipe_fd);
    // 初始化接收端
    endpoint.construct();

    // 工作进程不断接收任务并执行
    while (true) {
        // 从接收端接收任务
        Task task = endpoint.take();

        // 执行任务
        task.run();
    }

    // 销毁接后端
    endpoint.destruct();
}

src/Scheduler.hh 调度器类（Round Robin 策略）

#pragma once

#include "SendEndpoint.hh"
#include <vector>

/**
 * 调度器对象
 */
class Scheduler {
public:
    /* 构造方法 */
    Scheduler(): m_current_index(0), m_endpoints() {}
    /* 析构方法中，需要将所有发送端全部销毁 */
    ~Scheduler() {
        for (auto it = m_endpoints.begin(); it != m_endpoints.end(); ++it) {
            it->destruct();
        }
    }

public:
    // 保存发送端
    void append(SendEndpoint endpoint);

    // 进行构造
    void construct();

    // 获取下一个要使用的发送端（对应的接收端的子进程来处理任务）
    SendEndpoint& next();

private:
    // 当前应该由哪个发送端处理
    size_t m_current_index;
    // 保存所有的发送端
    std::vector<SendEndpoint> m_endpoints;
};


void Scheduler::append(SendEndpoint endpoint) {
    // 将发送端对象保存
    m_endpoints.push_back(endpoint);
}

void Scheduler::construct() {
    // 依次将所有发送都初始化
    for (auto it = m_endpoints.begin(); it != m_endpoints.end(); ++it) {
        it->construct();
    }
}

// 使用 round robin 策略（一人一次）进行调度
SendEndpoint& Scheduler::next() {
    // 返回当前下标的发送端
    SendEndpoint& endpoint = m_endpoints[m_current_index];
    // 下标++
    m_current_index++;
    // 如果下标越界，循环到 0 下标
    if (m_current_index == m_endpoints.size()) {
        m_current_index = 0;
    }

    // 返回
    return endpoint;
}

src/SendEndpoint.hh 发送端

#pragma once

#include "Task.hh"
#include <assert.h>
#include <string.h>
#include <unistd.h>


/**
 * 发送端对象
 * 一个接收端和一个发送端进行对应
 * 子进程持有接收端，父进程持有发送端
 * 父进程通过发送端发送任务
 * 子进程通过接收端接收任务
 * 这里使用匿名管道机制实现的通信
 */
class SendEndpoint {
public:
    // 构造方法
    SendEndpoint(int pipe_fd[2]) {
        m_pipe_fd[0] = pipe_fd[0];
        m_pipe_fd[1] = pipe_fd[1];
    }

    ~SendEndpoint() {}

public:
    // 初始化
    void construct();

    // 销毁
    void destruct();

public:
    // 将任务发送到发送端
    void put(Task& task);

private:
    // 保存管道 fd
    int m_pipe_fd[2];
    // 保存写 fd
    int m_fd;
};


void SendEndpoint::construct() {
    // 关闭管道读 fd
    close(m_pipe_fd[0]);
    // 保存写 fd
    m_fd = m_pipe_fd[1];
}

void SendEndpoint::destruct() {
    // 关闭写 fd
    close(m_fd);
}

void SendEndpoint::put(Task& task) {
    // 获取任务序列化后需要的大小
    size_t size = task.size();
    // 开辟足够的空间，用来存放任务序列化后的数据
    uint8_t buffer[size];
    // 进行任务序列化
    task.serialize(buffer, size);

    // 通过写 fd 将消息经由发送端发送，对应的接收端会接收到消息，子进程会去接收
    ssize_t r = write(m_fd, buffer, size);
    assert(r != -1);
    assert((size_t)r == size);
}

src/ReceiverEndpoint.hh 接收端

#pragma once

#include "Task.hh"
#include <unistd.h>
#include <string.h>
#include <assert.h>


/**
 * 接收端对象
 * 一个接收端和一个发送端进行对应
 * 子进程持有接收端，父进程持有发送端
 * 父进程通过发送端发送任务
 * 子进程通过接收端接收任务
 * 这里使用匿名管道机制实现的通信
 */
class ReceiverEndpoint {
public:
    /* 构造方法：保存匿名管道的 fd */
    ReceiverEndpoint(int pipe_fd[2]) {
        m_pipe_fd[0] = pipe_fd[0];
        m_pipe_fd[1] = pipe_fd[1];
    }

    ~ReceiverEndpoint() {}

public:
    /* 进行初始化 */
    void construct();

    /* 进行销毁 */
    void destruct();

public:
    /* 从信道中接收任务 */
    Task take();

private:
    // 管道 fd
    int m_pipe_fd[2];
    // 接收用的 fd
    int m_fd;

    // 接收 buffer 的大小
    static const size_t capacity = 4096;
    // 接收 buffer
    uint8_t m_buffer[capacity];
};


void ReceiverEndpoint::construct() {
    // 关闭管道的写 fd
    close(m_pipe_fd[1]);
    // 保存管道的读 fd
    m_fd = m_pipe_fd[0];
}

void ReceiverEndpoint::destruct() {
    // 关闭读 fd
    close(m_fd);
}

Task ReceiverEndpoint::take() {
    // 清理 buffer 为 0x0
    memset(m_buffer, 0x0, capacity);
    // 从读 fd 中读取消息
    ssize_t count = read(m_fd, m_buffer, capacity);
    assert(count != -1);

    // 将读取到的内容反序列化成任务对象并返回
    return Task::deserialize(m_buffer, count);
}

src/Task.hh 任务示例

#pragma once

#include "Log.hh"
#include <string>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <stdio.h>


// 示例的任务，进行 a b 的相加，模拟耗时任务，所以休眠 5s，然后进行打印
class Task {
public:
    // 构造方法
    Task(uint32_t a, uint32_t b, const char* p, size_t s): m_a(a), m_b(b), m_bangyidage(p, s) {}

    ~Task() {}

public:
    // 从 buffer 中反序列化出任务对象
    static Task deserialize(const uint8_t* buffer, size_t size);

    // 返回任务序列化后需要的内存大小
    size_t size() const;

    // 进行任务的序列化
    void serialize(uint8_t* buffer, size_t capacity) const;

public:
    // 执行任务
    void run() const;

private:
    // 保存任务需要的参数
    const uint32_t m_a;
    const uint32_t m_b;
    const std::string m_bangyidage;
};

// 格式 [4 个字节的 a] [4 个字节的 b] [4 个字节的字符串长度] [字符串字符]


// 这个方法是子进程调用的
// 从接收端收到消息后，进行反序列化成任务对象
Task Task::deserialize(const uint8_t* buffer, size_t size) {
    // 将指针视为 uint32_t 类型的指针
    uint32_t* p = (uint32_t *)buffer;
    // 分别读取 a 和 b
    uint32_t a = p[0];
    uint32_t b = p[1];
    // 读取字符串长度
    size_t s_size = (size_t)p[2];
    assert(size >= s_size + 12);
    // 进行任务对象的构建
    return Task(a, b, (const char *)(buffer + 12), s_size);
}

// 返回需要的长度 a + b + 长度 + 字符串
size_t Task::size() const {
    return 4 + 4 + 4 + m_bangyidage.size();
}

// 这个方法是父进程调用的
// 将任务序列化成消息后，经由发送端进行发送给子进程
void Task::serialize(uint8_t* buffer, size_t capacity) const {
    assert(capacity >= size());
    // 将 a、b、字符串长度分别放入
    uint32_t* p = (uint32_t *)buffer;
    p[0] = m_a;
    p[1] = m_b;
    p[2] = m_bangyidage.size();

    // 将字符串放入
    memcpy(buffer + 12, m_bangyidage.c_str(), m_bangyidage.size());
}

// 模拟的任务流程
void Task::run() const {
    char message[200];
    sprintf(message, "准备开始执行任务，感谢榜一大哥(%s)\n", m_bangyidage.c_str());
    Log::info(message);
    uint32_t result = m_a + m_b;
    sleep(5);
    sprintf(message, "计算完毕 %u + %u = %u，再次感谢榜一大哥(%s)\n", m_a, m_b, result, m_bangyidage.c_str());
    Log::info(message);
}

src/Log.hh 日志类

#pragma once

#include <unistd.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>


/**
 * 日志操作，将打印写入到 pid 对应的文件中，防止因为不同进程打印造成的控制台出现混乱
 */
class Log {
public:
    /* 日志打印方法 */
    static void info(const char* message);
};


void Log::info(const char *message) {
    /* 获取当前进程 pid */
    pid_t pid = getpid();
    /* 构造要打印的日志的文件名 */
    char filename[100];
    sprintf(filename, "%d.out", pid);
    /* 采用追加写方式打开文件 */
    FILE *fp = fopen(filename, "a+");
    assert(fp != nullptr);

    /* 获取当前时间 */
    time_t now;
    time(&now);
    char now_s[100];
    /* 按照年-月-日 时:分:秒 的格式处理当前时间 */
    strftime(now_s, 100, "%Y-%m-%d %H:%M:%S", localtime(&now));

    /* 在日志中打印消息，并且添加 pid 和当前时间信息 */
    fprintf(fp, "%s: (%d) %s", now_s, pid, message);

    /* 确保把内容刷新到文件中 */
    int r = fflush(fp);
    assert(r != EOF);

    /* 关闭文件 */
    r = fclose(fp);
    assert(r != EOF);
}

Makefile

all: cppool

cppool: src/Main.cc
        g++ -std=c++11 -Wall -Werror -g -O0 src/Main.cc -o cppool

.PHONY: clean
clean:
        rm -rf cppool

运行结果

[root@VM-centos cppool]# ./cppool 
#> hello
#> it's ok
#> nothing
#> it's a process pool
#> 
[root@VM-52-199-centos cppool]# ls *.out
9926.out  9927.out  9928.out
[root@VM-centos cppool]# cat *.out
2022-11-04 10:56:22: (9926) 准备开始执行任务，感谢榜一大哥(hello)
2022-11-04 10:56:27: (9926) 计算完毕 10 + 20 = 30，再次感谢榜一大哥(hello)
2022-11-04 10:56:40: (9926) 准备开始执行任务，感谢榜一大哥(it's a process pool)
2022-11-04 10:56:45: (9926) 计算完毕 10 + 20 = 30，再次感谢榜一大哥(it's a process pool)
2022-11-04 10:56:24: (9927) 准备开始执行任务，感谢榜一大哥(it's ok)
2022-11-04 10:56:29: (9927) 计算完毕 10 + 20 = 30，再次感谢榜一大哥(it's ok)
2022-11-04 10:56:27: (9928) 准备开始执行任务，感谢榜一大哥(nothing)
2022-11-04 10:56:32: (9928) 计算完毕 10 + 20 = 30，再次感谢榜一大哥(nothing)
[root@VM-centos cppool]#