【Linux系统与网络编程】17：IO多路复用

hello world 999

已于 2023-04-03 10:15:35 修改

阅读量408

点赞数

分类专栏：系统与网络编程文章标签： linux 网络 java

于 2023-03-30 11:14:23 首次发布

本文链接：https://blog.csdn.net/weixin_49167174/article/details/129853766

版权

系统与网络编程专栏收录该内容

17 篇文章 1 订阅

订阅专栏

IO多路复用

OVERVIEW

IO多路复用

一、线程池

线程池：在IO并发较高的时候一般会选择使用线程池技术，降低线程的创建于销毁的开销。
线程池数量：根据CPU核心的数量创建对应的线程数量，线程数量应该小于等于CPU核心数量。
- CPU密集型：作业主要是进行计算等，高消耗CPU资源（线程数一定不能大于核心数）
- IO密集型1：作业主要是申请IO，然后将IO请求交给内核处理（通道技术、IO控制器负责拿数据），而不是CPU进行处理。线程有可能处于睡眠状态，可以适当的将线程池中线程的数量增高。
- IO密集型2：如果使用IO多路复用或异步IO（即不会在IO上进行等待线程不会睡眠），此时就与CPU密集型一样。

1.线程池再实现

补充：死锁是指两个或两个以上的进程/线程因为进程推进顺序不当、或者资源数量受限，而导致的互相等待的情况。

利用队列、锁、条件变量实现模拟线程池的调度过程：

构建一个循环任务队列（等候区）：加锁保护、互斥访问、任务队列push与pop操作
创建多个线程，从任务队列中抢任务去执行whlie (1) { pop(); do(); }
条件变量与惊群效应的处理。

//thread_pool.h
#ifndef _THREAD_POOL_H
#define _THREAD_POOL_H

#include "head.h"

struct task_queue {
	int head, tail;
	int size;//队列容量
	int count;//已经入队的元素
	void **data;//模拟任务
	pthread_mutex_t mutex;//需要互斥锁加锁
	pthread_cond_t cond;//信号量
};

void task_queue_init(struct task_queue *taskQueue, int size);//队列初始化
void task_queue_push(struct task_queue *taskQueue, void *data);//入队
void *task_queue_pop(struct task_queue *taskQueue);//出队

#endif

//thread_pool.c
#include "head.h"
#include "thread_pool.h"

void task_queue_init(struct task_queue *taskQueue, int size) {
	taskQueue->size = size;
	taskQueue->count = taskQueue->head = taskQueue->tail = 0;
	taskQueue->data = calloc(size, sizeof(void *));
	pthread_mutex_init(&taskQueue->mutex, NULL);
	pthread_cond_init(&taskQueue->cond, NULL);
}

void task_queue_push(struct task_queue *taskQueue, void *data) {
	/* 为了保证线程的安全性 所有对临界区的操作都需要加锁 */
	pthread_mutex_lock(&taskQueue->mutex);
	if (taskQueue->count == taskQueue->size) {
		DBG(YELLOW"<push> : taskQueue is full\n"NONE);
		pthread_mutex_unlock(&taskQueue->mutex);
		return;
	}
	taskQueue->data[taskQueue->tail] = data;
	DBG(GREEN"<push> : data is pushed!\n"NONE);
	taskQueue->tail++;
	taskQueue->count++;
	/* 考虑循环队列 */
	if (taskQueue->tail == taskQueue->size) {
		DBG(YELLOW"<push> : taskQueue tail reach end!\n"NONE);
		taskQueue->tail = 0;
	}
	pthread_cond_signal(&taskQueue->cond);//信号量
	pthread_mutex_unlock(&taskQueue->mutex);
	return;
}

void *task_queue_pop(struct task_queue *taskQueue) {
	pthread_mutex_lock(&taskQueue->mutex);
	//使用while循环而不是if语句 处理惊群效应
	while (taskQueue->count == 0) {
		/* 当任务队列中没有任务时 线程选择等待而不是直接return
		1.如果让线程直接返回则意味着一会还需要让线程轮训回来（轮询时间有要求）
		2.轮询时间太短则消耗CPU
		3.轮询时间太长则相应能力下降
		 */
		pthread_cond_wait(&taskQueue->cond, &taskQueue->mutex);//cond与mutex同时使用
	}
	void *data = taskQueue->data[taskQueue->head];
	DBG(RED"<pop> : data is poped!\n"NONE);
	taskQueue->count--;
	taskQueue->head++;
	/* 考虑循环队列 */
	if (taskQueue->head == taskQueue->size) {
		DBG(YELLOW"<pop> : taskQueue head reach end!\n"NONE);
		taskQueue->head = 0;
	}
	pthread_mutex_unlock(&taskQueue->mutex);
	return data;
}

//1.thread_pool_main.c
#include "./common/head.h"
#include "thread_pool.h"

#define INS 2
#define SIZE 1000

#define handle_error(msg) \
	do { perror(msg); exit(EXIT_FAILURE); } while (0)

void *thread_run(void *arg) {
	pthread_detach(pthread_self());//默认情况下线程都可以被join，detach可以让其不可join
	struct task_queue *taskQueue = (struct task_queue *)arg;
	while (1) {
		void *data = task_queue_pop(taskQueue);
		printf("%s", (char*)data);
	}
}

int main() {
	FILE *fp;
	char buff[SIZE][1024];
	//1.初始化任务队列
	struct task_queue *taskQueue = (struct task_queue *)malloc(sizeof(struct task_queue));
	task_queue_init(taskQueue, SIZE);
	//2.创建INS个线程 循环读出队列中的内容
	pthread_t tid[INS];
	for (int i = 0; i < INS; ++i) pthread_create(&tid[i], NULL, thread_run, (void *)taskQueue);
	//3.将文件打开读取内容 向任务队列中读入数据
	while (1) {
		if ((fp = fopen("./1.thread_pool_main.c", "r")) == NULL) handle_error("fopen");
		int ind = 0;
		while (fgets(buff[ind], 1024, fp) != NULL) {
			task_queue_push(taskQueue, buff[ind]);
			if (++ind == SIZE) ind = 0;
			//usleep(20000);
		}
		fclose(fp);
	}
	return 0;
}

运行结果，文件的输出是没有顺序的输出，

在这里插入图片描述

问题分析：由于线程具有异步性、失去了封闭性、且具有竞争干扰共享性。push的速度特别快，而pop弹出有一个标准IO的系统调用输出过程（比较消耗时间），导致输出的每一个线程执行进度不是按照预期顺序进行的。如果让push的while循环稍微usleep(20000)一会，则输出的结果则能够趋向于正常结果（但仍然不是最优解决方案）。

加上usleep之后输出的结果趋于正常：

在这里插入图片描述

二、IO多路复用

1.select

在程序启动前5秒内，用户输入一个指令（用户与服务器之间建立连接）

如果用户不输入的话，5秒之后程序会进入默认模式/缺省模式。
如果用户输入了数据，则程序需要立即响应。

实现方式1：多线程，创建一个线程进行用户数据的接收，另外再创建一个线程进行sleep睡眠等待，接收数据的线程来通知睡眠线程。

实现方式2：IO多路复用select（IO感知），

int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout);

#include "./common/head.h"

int main() {
	int flag = 0;
	fd_set rfds;
	struct timeval tv;
	tv.tv_sec = 5;
	tv.tv_usec = 0;
	FD_ZERO(&rfds);
	FD_SET(0, &rfds);
	int n = select(1, &rfds, NULL, NULL, &tv);
	if (n > 0) {
		printf("input found!\n");
		scanf("%d", &flag);
	} else {
		printf(RED"time out!\n"NONE);
	}
	printf("flag = %d\n", flag);
	return 0;
}

在这里插入图片描述

总结：select特点

文件描述符受限1024，随着文件描述符增多，其效率会呈现线性降低。
select中会有频繁的数据拷贝（用户态与内核态之间上下文切换，数据拷贝）
需要遍历文件描述符集合来确定谁就绪（时间花销很大），且在内核中也需要依次遍历。

注：select的出现具有跨时代的意义，其将原先并发的机制由多线程、多进程的形式改为单个进程就可以实现并发管理。

2.poll

int poll(struct pollfd *fds, nfds_t nfds, int timeout);
int ppoll(struct pollfd *fds, nfds_t nfds, const struct timespec *tmo_p, const sigset_t *sigmask);
struct pollfd {
    int   fd;         /* file descriptor */
    short events;     /* requested events */
    short revents;    /* returned events */
};

总结：poll特点

文件描述符不受限，因为其底层使用的是链表
poll中也会有频繁的数据拷贝，
其效率会呈现线性降低。
poll中引入的了event的概念。

3.epoll

The  epoll  API  performs  a  similar task to poll(2): monitoring multiple file descriptors to see if I/O is possible on any of them.  The epoll API can be used either as an edge-triggered or a level-triggered interface and scales well to large numbers of watched file descriptors.  The following system calls are provided to create and manage an epoll instance:

edge-triggered：边缘触发，适合高并发的场景，边沿触发只触发一次，只有文件描述符从不可读变为可读的时候才会被触发
level-triggered：条件触发，侧重于安全与稳定性，水平触发一直触发

Level-triggered and edge-triggered:
  1. The file descriptor that represents the read side of a pipe (rfd) is registered on the epoll instance.
  2. A pipe writer writes 2 kB of data on the write side of the pipe.
  3. A call to epoll_wait(2) is done that will return rfd as a ready file descriptor.
  4. The pipe reader reads 1 kB of data from rfd.
  5. A call to epoll_wait(2) is done.

edge-triggered:
If the rfd file descriptor has been added to the  epoll  interface  using  the  EPOLLET  (edge-triggered)  flag,  the  call  to epoll_wait(2)  done  in  step 5 will probably hang despite the available data still present in the file input buffer; meanwhile the remote peer might be expecting a response based on the data it already sent.  The reason for this  is  that  edge-triggered mode  delivers  events only when changes occur on the monitored file descriptor.  So, in step 5 the caller might end up waiting for some data that is already present inside the input buffer.  In the above example, an event on rfd will be generated because of  the  write  done in 2 and the event is consumed in 3.  Since the read operation done in 4 does not consume the whole buffer data, the call to epoll_wait(2) done in step 5 might block indefinitely.

epoll_create(2)  creates  a  new  epoll instance and returns a file descriptor referring to that instance.  (The more recent epoll_create1(2) extends the functionality of epoll_create(2).)
Interest in particular file descriptors is then registered via epoll_ctl(2).  The set of file descriptors  currently  registered on an epoll instance is sometimes called an epoll set.
epoll_wait(2) waits for I/O events, blocking the calling thread if no events are currently available.

epoll是一种多路转接io，相比selete和poll在管理大量描述符的时候优势很明显。epoll的流程，创建epoll描述符–> 添加事件–> wait;

epoll的三个主要函数：epoll_create、epoll_ctl、epoll_wait

int epoll_create(size)：创建一个文件用于存储epoll的集合，返回文件描述符
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *events)：将文件描述符进行注册，
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)：阻塞等待事件的发生，

 int epoll_create (int size);
 //size: 这个值在早期时候用于确定返回列表的预留长度，现在由于返回的内容放置在一个双向链表中，实际上已经没什么作用了
 //return 返回新创建的epoll描述符

extern int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event);
//epfd: epoll的文件描述符
//op: EPOLL_CTL_ADD 添加监视节点， EPOLL_CTL_DEL 删除监视 ， EPOLL_CTL_MOD 修改监视
//fd: 所关注的文件描述符
//event: 关心的事件节点填充

typedef union epoll_data {
    void *ptr; //预留的指针
    int fd;	//一般设置为当前的文件描述符
    uint32_t u32; //一般不用
    uint64_t u64;
} epoll_data_t;

struct epoll_event {
    uint32_t events;	/* Epoll events */
    epoll_data_t data;	/* User data variable */
};

extern int epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout);
//epoll文件描述符，返回事件存储的位置，数组的最大长度，超时时间
//-1 阻塞等待有事件就返回， 0 不管有无事件都直接返回， 大于0 等待超时时间。
//return 小于零出错，大于0事件发生的个数

4.mmap

mmap映射的是磁盘中的文件，而共享内存是将内存映射到用户的进程空间中，

原型：void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);

addr：映射的地址空间（如果为空则内核将会自动选择映射的位置）
length：映射的长度
prot：对映射内容的保护方式
flag：标记位
fd：文件描述符
offset：偏移量

mmap()  creates a new mapping in the virtual address space of the calling process.  The starting address for the new mapping is specified in addr.  The length argument specifies the length of the mapping (which must be greater than 0).
    
The contents of a file mapping (as opposed to an anonymous mapping; see MAP_ANONYMOUS  below),  are  initialized  using  length bytes starting at offset offset in the file (or other object) referred to by the file descriptor fd.  offset must be a multiple of the page size as returned by sysconf(_SC_PAGE_SIZE).

文件不在内存中，所有对文件的操作都是由系统调用执行的，而系统调用必须进入内核中，等待将文件读完后由内核数据拷贝出。
而mmap将文件的一部分内容映射到了内存中，如果需要读取文件，则不需要使用系统调用，不需要内核数据的拷贝。

在这里插入图片描述

hello world 999

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【Linux系统与网络编程】17：IO多路复用

push的速度特别快，而pop弹出有一个标准IO的系统调用输出过程（比较消耗时间），导致输出的每一个线程执行进度不是按照预期顺序进行的。：多线程，创建一个线程进行用户数据的接收，另外再创建一个线程进行sleep睡眠等待，接收数据的线程来通知睡眠线程。注：select的出现具有跨时代的意义，其将原先并发的机制由多线程、多进程的形式改为单个进程就可以实现并发管理。补充：死锁是指两个或两个以上的进程/线程因为进程推进顺序不当、或者资源数量受限，而导致的互相等待的情况。运行结果，文件的输出是没有顺序的输出，
复制链接

扫一扫