Linux_进程池 - 技术栈

进程池是什么？

进程池是一种创建和管理进程的技术，它可以有效地控制同时运行的进程数量，从而提高程序的效率和性能。

**进程池的核心思想：**预先创建一定数量的进程，并将这些进程放入一个池中。当有新的任务到来时，进程池会分配一个空闲的进程来处理这个任务。任务完成后，进程不会关闭，而是返回池中等待下一个任务。这种方式避免了频繁创建和销毁进程的开销，同时也限制了同时运行的进程数量，防止操作系统过载。

进程池的模拟实现

准备工作

master需要管理所有的管道，既然要管理所有的管道，怎么样管理呢？答案是先描述、在组织！当master创建管道时，我们需要知道要创建多少个管道。下面代码中的process-num代表要创建管道的个数。

bash 复制代码

// 若用户输入的参数不对，则我们提示要输入正确的信息
void Usage(std::string proc)
{
    std::cout << "Usage: " << proc << " process-num" << std::endl;
}
int main(int argc, char *argv[])
{
    if (argc != 2)
    {
        Usage(argv[0]);
        return 1;
    }
    return 0;
}

初始化进程池

根据进程池的基本模型，当我们拿到了要创建管道的个数。接下来我们就需要创建管道，之后再创建子进程，并且子进程执行完相应的工作之后就会退出，不会再继续运行。

bash 复制代码

#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <functional>

using work_t = std::function<void()>;

// 先描述
// 管道类
class Channel
{
public:
    Channel(int wfd, pid_t who)
        : _wfd(wfd), _who(who)
    {
        _name = "Channel-" + std::to_string(wfd) + "-" + std::to_string(who);
    }
    std::string Name(){ return _name; }
    ~Channel()
    {}

private:
    int _wfd;          // 每个管道的写端
    std::string _name; // 每个管道的名字(好管理)
    pid_t _who;        // 每个子进程的pid
};

void Worker()
{
    sleep(10);
}

// 若用户输入的参数不对，则我提示要输入正确的信息
void Usage(std::string proc)
{
    std::cout << "Usage: " << proc << " process-num" << std::endl;
}

// work_t work为回调方法
int InitProcessPool(const int &processnum, std::vector<Channel> &channels, work_t work)
{
    // 2.通过for循环，创建所需的进程数量；
    for (int i = 0; i < processnum; i++)
    {
        // 1. 先创建管道
        int pipefd[2] = {0};
        // pipe函数: 将父进程(写端)和子进程(读端)通过管道连接通信
        int n = pipe(pipefd);
        if (n < 0)    return 2;
        // 2. 再创建子进程
        pid_t id = fork();
        if (id < 0)    return 3;
        // 3.建立信道
        if (id == 0)
        {
            // 子进程
            ::close(pipefd[1]); // read
            // dup2函数将子进程的读端fd覆盖到了标准输入的fd上，这样会改变子进程的输入源，
            // 即进程池的输出会直接输入到子进程中的输入源中
            dup2(pipefd[0], 0); // 每次读数据直接从读端读入，不用再从标准输入中读
            
            // 执行相应的命令 
            Worker();
            ::exit(0);    // 退出
        }
        // 父进程
        ::close(pipefd[0]); // write
        // 将管道的有关信息保存到channels中
        Channel ch(pipefd[1], id);
        channels.push_back(ch);
    }
    return 0;
}
void DebugPrint(std::vector<Channel> &channels)
{
    for (auto &e : channels){ std::cout << e.Name() << std::endl; }
}
int main(int argc, char *argv[])
{
    if (argc != 2)
    {
        Usage(argv[0]);
        return 1;
    }
    int num = std::stoi(argv[1]); // num为要创建的管道(子进程)个数

    // 将创建的所有的管道用一个vector组织起来
    std::vector<Channel> channels;
    // 初始化进程池
    InitProcessPool(num, channels, Worker);
    
    DebugPrint(channels);
    sleep(100);
    return 0;
}

当要创建5个进程时：

派发任务

什么是任务？其实只需知道一个任务的任务码就可以派发该任务，我们这里任务码就是一个int类型的数字。怎么派发任务？我们不能一直让一个进程一直执行任务，而其他进程什么也不干，这里引用一个新的名词：负载均衡。我们有三种方式解决负载均衡问题，分别是1.轮询；2.随机；3.历史任务数。

bash 复制代码

using task_t = std::function<void()>;

static int number = 0;
class TaskManger
{
public:
    TaskManger()
    {
        // 种一颗随机数种子
        srand(time(nullptr));

        InsertTask(DownLoad);    // 下载任务
        InsertTask(Log);         // 日志任务
        InsertTask(Sql);         // 数据库任务
    }
    void InsertTask(task_t t){
        tasks[number++] = t;    // 将任务插入到tasks里
    }
    int SelectTask(){
        return rand() % number; //随机选择任务
    }
    void Excute(int number)
    {
        // 若找不到指定任务直接返回
        if(tasks.find(number) == tasks.end())   return ;
        tasks[number]();    // 执行指定任务
    }
    ~TaskManger(){}
private:
    std::unordered_map<int, task_t> tasks;
};

bash 复制代码

// 派发任务
void DispatchTask(std::vector<Channel> &channels)
{
    int who = 0;
    // 2.派发任务（以轮询方式）
    while (true)
    {
        // a.选择一个任务，一个整数
        int task = tm.SelectTask();
        // b.选择一个子进程channel
        Channel &cur = channels[who++];
        who %= channels.size(); // 轮询子进程

        std::cout << "#####################" << std::endl;
        std::cout << "send " << task << " to" << cur.Name() << std::endl;
        std::cout << "#####################" << std::endl;

        // c.派发任务
        cur.Send(task);
        sleep(1);
    }
}

执行结果如下：

退出进程池

退出的时候我们只需要将所有的Channel类中的所有的_wfd关闭即可，关闭了_wfd子进程读的时候就会读到0，然后就会break，子进程就会退出；在子进程退出时进行回收，从Channel类中获取子进程的pid，进行回收。

bash 复制代码

void CleanProcessPool(std::vector<Channel> &channels)
{
    // 关闭所有写端的fd
    for (auto &e : channels){ e.Close();}
    // 回收所有的子进程
    for (auto &e : channels){ pid_t rid = waitpid(e.Id(), nullptr, 0);}
}

一个小bug，在完整代码中解决

完整代码

bash 复制代码

// Task模块
#pragma once
#include <iostream>
#include <unordered_map>
#include <functional>
#include <ctime>
#include <sys/types.h>
#include <unistd.h>

using task_t = std::function<void()>;

void DownLoad(){ std::cout << "我是下载任务..., pid:" << getpid() << std::endl;}
void Log(){ std::cout << "我是日志任务..., pid:" << getpid() << std::endl;}
void Sql(){ std::cout << "我是数据库同步任务..., pid:" << getpid() << std::endl;}

static int number = 0;
class TaskManger
{
public:
    TaskManger()
    {
        // 种一颗随机数种子
        srand(time(nullptr));

        InsertTask(DownLoad);
        InsertTask(Log);
        InsertTask(Sql);
    }
    void InsertTask(task_t t){ tasks[number++] = t;}
    int SelectTask()
    {
        return rand() % number; //随机选择任务
    }
    void Excute(int number)
    {
        if(tasks.find(number) == tasks.end())   return ;
        tasks[number]();    // 执行指定任务
    }
    ~TaskManger()
    {}
private:
    std::unordered_map<int, task_t> tasks;
};

TaskManger tm;

bash 复制代码

// ProcessPool模块
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <functional>
#include "Task.hpp"

using work_t = std::function<void()>;

// 先描述
class Channel
{
public:
    Channel(int wfd, pid_t who)
        : _wfd(wfd), _who(who)
    {
        _name = "Channel-" + std::to_string(wfd) + "-" + std::to_string(who);
    }
    std::string Name(){ return _name;}
    // 发送任务
    void Send(int cmd){ ::write(_wfd, &cmd, sizeof(cmd));}
    void Close(){ ::close(_wfd);}
    pid_t Id(){ return _who;}
    ~Channel() {}

private:
    int _wfd;          // 每个管道的写端
    std::string _name; // 每个管道的名字(好管理)
    pid_t _who;        // 每个子进程的pid
};

// 若用户输入的参数不对，则我提示要输入正确的信息
void Usage(std::string proc)
{
    std::cout << "Usage: " << proc << " process-num" << std::endl;
}
void Worker()
{
    while (true)
    {
        int cmd = 0;
        int n = ::read(0, &cmd, sizeof(cmd));
        if (n == sizeof(cmd)) // 若读到的字节数与cmd的字节数相等，说明读成功了
        {
            tm.Excute(cmd);
        }
        else if (n == 0)
        {
            std::cout << "pid: " << getpid() << "quit..." << std::endl;
            break;
        }
        else;
    }
}

// work_t work为回调方法
int InitProcessPool(const int &processnum, std::vector<Channel> &channels, work_t work)
{
    // 2.创建指定个数进程
    for (int i = 0; i < processnum; i++)
    {
        // 1. 先创建管道
        int pipefd[2] = {0};
        int n = pipe(pipefd);
        if (n < 0) return 2;
        // 2. 再创建子进程
        pid_t id = fork();
        if (id < 0) return 3;
        // 3.建立信道
        if (id == 0)
        {
            // 子进程
            // 关闭历史wfd(解决bug_vison3)
            for(auto& e : channels) // 解决后续的bug
                e.Close();

            ::close(pipefd[1]); // read
            dup2(pipefd[0], 0); // 每次读数据直接从读端读入，不用再从标准输入中读
            Worker();
            ::exit(0);
        }
        // 父进程
        ::close(pipefd[0]); // write
        Channel ch(pipefd[1], id);
        channels.push_back(ch);
    }
    return 0;
}

void DispatchTask(std::vector<Channel> &channels)
{
    int who = 0;
    // 2.派发任务（以轮询方式）
    int cnt = 3;
    while (cnt--)
    {
        // a.选择一个任务，一个整数
        int task = tm.SelectTask();
        // b.选择一个子进程channel
        Channel &cur = channels[who++];
        who %= channels.size(); // 轮询子进程

        std::cout << "#####################" << std::endl;
        std::cout << "send " << task << " to" << cur.Name() << "，任务还剩：" << cnt << std::endl;
        std::cout << "#####################" << std::endl;

        // c.派发任务
        cur.Send(task);
        sleep(1);
    }
}
void CleanProcessPool(std::vector<Channel> &channels)
{
    // vison 1
    // // 关闭所有写端的fd
    // for (auto &e : channels) { e.Close(); }
    // // 回收所有的子进程
    // for (auto &e : channels)
    // { 
    //     pid_t rid = waitpid(e.Id(), nullptr, 0);
    //     if (rid > 0){ std::cout << "child " << rid << "wait...success" << std::endl;}
    // }

    // vison 2
    // 解决上面所说的bug，倒着关闭fd
    // 关闭所有写端的fd
    // for (int i = channels.size() - 1;i >= 0;i--)
    // {
    //     channels[i].Close();
    //     // 回收所有的子进程
    //     pid_t rid = waitpid(channels[i].Id(), nullptr, 0);
    //     if (rid > 0){ std::cout << "child " << rid << "wait...success" << std::endl;}
    // }

    // vison 3
    for (auto &e : channels)
    {
        e.Close();
        // 回收所有的子进程
        pid_t rid = waitpid(e.Id(), nullptr, 0);
        if (rid > 0){ std::cout << "child " << rid << "wait...success" << std::endl;}
    }
}
void DebugPrint(std::vector<Channel> &channels)
{
    for (auto &e : channels) { std::cout << e.Name() << std::endl;}
}
// 我们自己就是master
int main(int argc, char *argv[])
{
    if (argc != 2)
    {
        Usage(argv[0]);
        return 1;
    }
    int num = std::stoi(argv[1]); // num为要创建的管道(子进程)个数

    // 将创建的管道用一个vector组织起来
    std::vector<Channel> channels;
    // 初始化进程池
    InitProcessPool(num, channels, Worker);

    // 派发任务
    DispatchTask(channels);

    // 退出进程池
    CleanProcessPool(channels);

    return 0;
}