linux-0.12源码分析——缓冲区等待队列（栈）sleep_on+wake_up分析

最新推荐文章于 2021-05-10 08:13:42 发布

Icoding_F2014

最新推荐文章于 2021-05-10 08:13:42 发布

阅读量691

点赞数 3

分类专栏： Advanced OS Advanced OS 操作系统高级教程文章标签：缓冲区等待队列

本文链接：https://blog.csdn.net/jmh1996/article/details/90139485

版权

Advanced OS 同时被 3 个专栏收录

18 篇文章 4 订阅

订阅专栏

操作系统高级教程

12 篇文章 4 订阅

订阅专栏

Advanced OS

2 篇文章 0 订阅

订阅专栏

今天，我们来看一个有趣的东西，那就是中当有多个进程去读或写同一个文件，然后被这个文件对应的同一个缓冲区阻塞时，linux-0.12是如何把这些进程给阻塞起来，同时又是如何把被阻塞的进程一一唤醒。

从一个奇怪的地方说起

我们来看buffer-head的结构
include\linux\fs.h

struct buffer_head {
	char * b_data;			/* pointer to data block (1024 bytes) */ //指向实体数据
	unsigned long b_blocknr;	/* block number */
	unsigned short b_dev;		/* device (0 = free) */
	unsigned char b_uptodate;
	unsigned char b_dirt;		/* 0-clean,1-dirty */
	unsigned char b_count;		/* users using this block */ //使用这个缓冲区的进程数
	unsigned char b_lock;		/* 0 - ok, 1 -locked */
	struct task_struct * b_wait;
	struct buffer_head * b_prev;
	struct buffer_head * b_next;
	struct buffer_head * b_prev_free;
	struct buffer_head * b_next_free;
};

我们可以看到的是，buffer_head结构体里面的确有一个字段来标识什么进程被它阻塞了：buffer_head.b_wait.这个字段是一个task_struct *类型的变量。

当这个buffer只阻塞一个进程时，buffer_head.b_wait可以指向整个进程的task_struct。
但是，如果一个buffer把多个进程都阻塞了怎么办呢？？？？

多个进程去操作同一个文件是相当常见的场景，linux是不可能连这个都不支持。
接下来，我们来来linux-0.12是如何巧妙的利用进程上下文，以及内核全局全局视野，通过一番神仙操作只借助b_wait 就实现了一个进程等待栈。我们会看到缓存区在组织多个阻塞进行时，其实是把它们以栈的形式组织的，并非是标题写的那样以队列的形式。

构造一个小case

假设有三个进程A,B,C，这三个进程不存在父子关系，然后他们操作同一个文件。

进程A:从hello.txt读100个字节

#include <stdio.h>
char buf[1024];
int main()
{
	int fd = open("/home/hello.txt",O_RDWR,0);
	read(fd,buf,100);
	for(int i =0;i<1000000;i++) 
		{
			;
		}
	return 0;
}

进程B:从hello.txt 读前400个字节

#include <stdio.h>
char buf[1024];
int main()
{
	int fd = open("/home/hello.txt",O_RDWR,0);
	read(fd,buf,400);
	for(int i =0;i<1000000;i++) 
		{
			;
		}
	return 0;
}

进程C，写hello.txt文件

#include <stdio.h>
#include <string.h>
char buf[1024]="Hello,world";
int main()
{
	int fd = open("/home/hello.txt",O_RDWR,0);
	write(fd,buf,strlen(buf));
	for(int i =0;i<1000000;i++) 
		{
			;
		}
	return 0;
}

然后依次开启三个shell,依次运行进程A,进程B,进程C。他们三个是并发的运行的。

一个个的分析

进程A

进程A，最先被运行起来，该进程首先调用open()函数，open()函数最终会调用sys_open来打开hello.txt文件，返回文件的文件描述符。

接着，进程A执行read(fd,buf,100)函数。该函数最终会调用sys_read函数。

int sys_read(unsigned int fd,char * buf,int count)
{
	struct file * file;
	struct m_inode * inode;

	if (fd>=NR_OPEN || count<0 || !(file=current->filp[fd]))
		return -EINVAL;
	if (!count)
		return 0;
	verify_area(buf,count);
	inode = file->f_inode;
	if (inode->i_pipe)
		return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO;
	if (S_ISCHR(inode->i_mode))
		return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos);
	if (S_ISBLK(inode->i_mode))
		return block_read(inode->i_zone[0],&file->f_pos,buf,count);
	if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
		if (count+file->f_pos > inode->i_size)
			count = inode->i_size - file->f_pos;
		if (count<=0)
			return 0;
		return file_read(inode,file,buf,count);
	}
	printk("(Read)inode->i_mode=%06o\n\r",inode->i_mode);
	return -EINVAL;
}

然后sys_read在执行完所有的检查后就会调用file_read函数。

int file_read(struct m_inode * inode, struct file * filp, char * buf, int count)
{
	int left,chars,nr;
	struct buffer_head * bh;

	if ((left=count)<=0)
		return 0;
	while (left) {
		if ((nr = bmap(inode,(filp->f_pos)/BLOCK_SIZE))) {
			if (!(bh=bread(inode->i_dev,nr)))
				break;
		} else
			bh = NULL;
		nr = filp->f_pos % BLOCK_SIZE;
		chars = MIN( BLOCK_SIZE-nr , left );
		filp->f_pos += chars;
		left -= chars;
		if (bh) {
			char * p = nr + bh->b_data;
			while (chars-->0)
				put_fs_byte(*(p++),buf++);
			brelse(bh);
		} else {
			while (chars-->0)
				put_fs_byte(0,buf++);
		}
	}
	inode->i_atime = CURRENT_TIME;
	return (count-left)?(count-left):-ERROR;
}

在file_read函数里，bmap（）计算得到到底需要读硬盘的那个逻辑块号nr，并把整个逻辑块号作为参数传给bread(inode->i_dev，nr)函数。bread是block_read函数，功能是从块设备中读取设备号为inode->i_dev,块号为nr的逻辑块。

/*
 * bread() reads a specified block and returns the buffer that contains
 * it. It returns NULL if the block was unreadable.
 */
struct buffer_head * bread(int dev,int block)
	//1.有现成的
		//最新的
		//不是最新的
	//2.空闲
	//3.没空闲
	//设备号？块号？来保证存活时间尽量长？不应该是先进先出
{
	struct buffer_head * bh;

	if (!(bh=getblk(dev,block)))
		panic("bread: getblk returned NULL\n");
	if (bh->b_uptodate)
		//缓冲区与硬盘的是不是一致的,是一致就可以直接使用了
		return bh;
	ll_rw_block(READ,bh);//在驱动里面读
	wait_on_buffer(bh);
	if (bh->b_uptodate)
		return bh;
	brelse(bh);
	return NULL;
}

bread函数首先调用getblk获取一个缓冲块。然后查看b_update是不是1，其实就是查看这个块的数据是不是可用的。当然这是我们第一次读hello.txt看到是不可用。于是就会接着调用ll_rw_block(READ,bh),在里面下发读磁盘文件的命令，其中会把bh给lock住，防止其他进程也拿着这个buffer去读文件。

void ll_rw_block(int rw, struct buffer_head * bh)
{
	unsigned int major;

	if ((major=MAJOR(bh->b_dev)) >= NR_BLK_DEV ||
	!(blk_dev[major].request_fn)) {
		printk("Trying to read nonexistent block-device\n\r");
		return;
	}
	make_request(major,rw,bh);
}
static void make_request(int major,int rw, struct buffer_head * bh)
{
	struct request * req;
	int rw_ahead;

/* WRITEA/READA is special case - it is not really needed, so if the */
/* buffer is locked, we just forget about it, else it's a normal read */
	if ((rw_ahead = (rw == READA || rw == WRITEA))) {
		if (bh->b_lock)
			return;
		if (rw == READA)
			rw = READ;
		else
			rw = WRITE;
	}
	if (rw!=READ && rw!=WRITE)
		panic("Bad block dev command, must be R/W/RA/WA");
	lock_buffer(bh);//加锁
	if ((rw == WRITE && !bh->b_dirt) || (rw == READ && bh->b_uptodate)) {
		unlock_buffer(bh);
		return;
	}
repeat:
		···

之后，执行wait_on_buffer(bh);

static inline void wait_on_buffer(struct buffer_head * bh)
{
	cli();//临界区了，关中断是以进程为单位的;tss里面有eflag的寄存器的值，里面的中断控制位有可能是打开的，恢复寄存器的时候会把IF重新打开。是跟进程相关的。IF位
	//???为什么一定要加？判断之后 立马中断进来
	//这个时候是本进程不再接受中断
	while (bh->b_lock)
		//使用的是while,而不是if.因为 一次sleep_on还得回来再判断b_lock
		sleep_on(&bh->b_wait);
	sti();

在wait_on_buffer里面，它首先会关中断，注意只是把 进程自己的中断给关闭了！！。这是一个很重要的思想，即，中断的开关是与进程相关的。本进程中断关闭了，并不意味着整个系统就没法响应中断了，当通过进程调度切换到其他进程时，如果另外一个进程开启了中断，那么系统又可以接收中断了。

关闭中断后，使用while来检查bh->b_lock，如果bh->b_lock为真，意味着有人在同步这个块，于是调用sleep_on（&bh->b_wait）函数。

void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;
	*p = current;
	current->state = TASK_UNINTERRUPTIBLE;//这种进程的状态改成了不同中断；那么不能被schedule起来;要由其他进程来唤醒
	schedule();
	if (tmp)
		tmp->state=0;
}

注意，传入的是一个两重的指针：&bh->wait。
sleep_on 首先判断&bh->wait是不是为NULL,显然虽然bh->wait为NULL, 但是变量所在的地址&bh->wait不为NULL。

接着判断是不是要sleep_on 进程0。

接下来，神仙操作来了！！！！

    tmp = *p;
	*p = current;
	current->state = TASK_UNINTERRUPTIBLE;//这种进程的状态改成了不同中断；那么不能被schedule起来;要由其他进程来唤醒

p为&bh->wait,那么 * p 就是bh->wait了。对于进程A来说，此时bh->wait为NULL,于是tmp就是NULL。
然后*p=current ,其实等价于：bh->wait=current，把当前进程的task_struct指针给bh->wait 表示进程A正在wait 这个buffer。
current->state=TASK_UNINTERUPTIBLE ，把进程A的状态设置为不可中断等待状态，处于这个状态意味着进程A 无法再被运行，即使他收到了信号也不行，只有当其他进程把它的状态该成就绪态以后它才可能被再次调度。

注意！对于进程A来说，tmp为NULL，bh->wait为进程A的task_struct 指针。
注意！对于进程A来说，tmp为NULL，bh->wait为进程A的task_struct 指针。
注意！对于进程A来说，tmp为NULL，bh->wait为进程A的task_struct 指针。
接下来，调用schedule()函数，进程A主动交出控制权，主动进行进程调度。

进程 B

然后就是进程B的运行了。
B与A很相似，到bread()函数内的wait_on_buffer（）的sleep_on（）前都是差不多的，而且因为A,B要读同样的块，于是他们使用的buffer也是同一个，这个过程主要有getblk（）函数的逻辑来体现。

但是在sleep_on()里面就开始有很大不同。再看看sleep_on的实现：

static inline void wait_on_buffer(struct buffer_head * bh)
{
	cli();//临界区了，关中断是以进程为单位的;tss里面有eflag的寄存器的值，里面的中断控制位有可能是打开的，恢复寄存器的时候会把IF重新打开。是跟进程相关的。IF位
	//???为什么一定要加？判断之后 立马中断进来
	//这个时候是本进程不再接受中断
	while (bh->b_lock)
		//使用的是while,而不是if.因为 一次sleep_on还得回来再判断b_lock
		sleep_on(&bh->b_wait);
	sti();
}
void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;
	*p = current;
	current->state = TASK_UNINTERRUPTIBLE;//这种进程的状态改成了不同中断；那么不能被schedule起来;要由其他进程来唤醒
	schedule();
	if (tmp)
		tmp->state=0;
}

同样的，sleep_on的参数是&bh->wait。
对于进程B来说，tmp是个局部变量，它的值是跟进程A执行到这里的tmp值不同的。
tmp=*p，等价于tmp=bh->wait。然而，buffer_head[]是内核全局共享的。进程A把这个块的wait设置为了进程A的task_struct指针。
于是对于进程B来说，tmp就是进程A的task_struct指针。
然后*p=current，此时bh->wait就改成了进程B的task_struct指针了。
最后调用schedule（）把自己给挂起来。

因此！对于进程B来说，tmp为进程A的task_struct *,，bh->wait为进程B的task_struct 指针

进程C

进程C最后也会运行到sleep_on函数。同样的分析，最后可以知道：
对于进程C来说，tmp为进程B的task_struct ，bh->wait为进程C的task_struct 指针。

其他

进程A,B,C 都因为hello.txt那个逻辑快而阻塞了。于是系统就换去执行其他进程，同时磁盘把数据更新到缓冲区。
最后，数据读完后，会给正在运行其他进程的内核发送磁盘中断。从磁盘中断中，内核了解到数据已经准备好。

static inline void end_request(int uptodate)
//update的值,0没有更新好;1更新好
//下一个request,修改lock,唤醒进程,
{
	DEVICE_OFF(CURRENT->dev);
	if (CURRENT->bh) {
		CURRENT->bh->b_uptodate = uptodate;
		unlock_buffer(CURRENT->bh);
	}
	if (!uptodate) {
		printk(DEVICE_NAME " I/O error\n\r");
		printk("dev %04x, block %d\n\r",CURRENT->dev,
			CURRENT->bh->b_blocknr);
	}
	wake_up(&CURRENT->waiting);//为空
	wake_up(&wait_for_request);//请求项等待队列;		等待队列有缓冲块的队列,以及请求队列之分.缓冲块队列比较特殊
	CURRENT->dev = -1;
	CURRENT = CURRENT->next;//下一项
}

再看unlock_buffer（CURRENT->bh）函数：

static inline void unlock_buffer(struct buffer_head * bh)
{
	if (!bh->b_lock)
		printk(DEVICE_NAME ": free buffer being unlocked\n");
	bh->b_lock=0;
	wake_up(&bh->b_wait);
}

注意里面的wake_up函数：


void wake_up(struct task_struct **p)
{
	if (p && *p) {
		(**p).state=0;
		*p=NULL;
	}
}

对于hello.txt对应的缓存块来说，此时他的bh->b_wait是进程C。
于是进程C的状态改成了就绪态，进程C就有了被调度的权利了。

回到进程C

最后，调度到进程C,从进程C被阻塞的地方接着执行。

void sleep_on(struct task_struct **p)
{
	struct task_struct *tmp;

	if (!p)
		return;
	if (current == &(init_task.task))
		panic("task[0] trying to sleep");
	tmp = *p;
	*p = current;
	current->state = TASK_UNINTERRUPTIBLE;//这种进程的状态改成了不同中断；那么不能被schedule起来;要由其他进程来唤醒
	schedule();
	if (tmp)
		tmp->state=0;
}