get_inode(dev,numb)
/*===========================================================================*
* get_inode *
*===========================================================================*/
PUBLIC struct inode *get_inode(dev, numb)
dev_t dev; /* device on which inode resides */
int numb; /* inode number (ANSI: may not be unshort) */
{
/* Find a slot in the inode table, load the specified inode into it, and
* return a pointer to the slot. If 'dev' == NO_DEV, just return a free slot.
*/
register struct inode *rip, *xp;
/* Search the inode table both for (dev, numb) and a free slot. */
xp = NIL_INODE;
for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++) {
if (rip->i_count > 0) { /* only check used slots for (dev, numb) */
if (rip->i_dev == dev && rip->i_num == numb) {
/* This is the inode that we are looking for. */
rip->i_count++;
return(rip); /* (dev, numb) found */
}
} else {
xp = rip; /* remember this free slot for later */
}
}
/* Inode we want is not currently in use. Did we find a free slot? */
if (xp == NIL_INODE) { /* inode table completely full */
err_code = ENFILE;
return(NIL_INODE);
}
/* A free inode slot has been located. Load the inode into it. */
xp->i_dev = dev;
xp->i_num = numb;
xp->i_count = 1;
if (dev != NO_DEV) rw_inode(xp, READING); /* get inode from disk */
xp->i_update = 0; /* all the times are initially up-to-date */
return(xp);
}
-----------------------------------------------------------------------------------------------------------------------------
/*===========================================================================*
* rw_inode *
*===========================================================================*/
PUBLIC void rw_inode(rip, rw_flag)
register struct inode *rip; /* pointer to inode to be read/written */
int rw_flag; /* READING or WRITING */
{
/* An entry in the inode table is to be copied to or from the disk. */
register struct buf *bp;
register struct super_block *sp;
d1_inode *dip;
d2_inode *dip2;
block_t b, offset;
/* Get the block where the inode resides. */
sp = get_super(rip->i_dev); /* get pointer to super block */
rip->i_sp = sp; /* inode must contain super block pointer */
offset = sp->s_imap_blocks + sp->s_zmap_blocks + 2;
b = (block_t) (rip->i_num - 1)/sp->s_inodes_per_block + offset;
bp = get_block(rip->i_dev, b, NORMAL);
dip = bp->b_v1_ino + (rip->i_num - 1) % V1_INODES_PER_BLOCK;
dip2 = bp->b_v2_ino + (rip->i_num - 1) % V2_INODES_PER_BLOCK;
/* Do the read or write. */
if (rw_flag == WRITING) {
if (rip->i_update) update_times(rip); /* times need updating */
if (sp->s_rd_only == FALSE) bp->b_dirt = DIRTY;
}
/* Copy the inode from the disk block to the in-core table or vice versa.
* If the fourth parameter below is FALSE, the bytes are swapped.
*/
if (sp->s_version == V1)
old_icopy(rip, dip, rw_flag, sp->s_native);
else
new_icopy(rip, dip2, rw_flag, sp->s_native);
put_block(bp, INODE_BLOCK);
rip->i_dirt = CLEAN;
}
/*===========================================================================*
* old_icopy *
*===========================================================================*/
PRIVATE void old_icopy(rip, dip, direction, norm)
register struct inode *rip; /* pointer to the in-core inode struct */
register d1_inode *dip; /* pointer to the d1_inode inode struct */
int direction; /* READING (from disk) or WRITING (to disk) */
int norm; /* TRUE = do not swap bytes; FALSE = swap */
{
/* The V1.x IBM disk, the V1.x 68000 disk, and the V2 disk (same for IBM and
* 68000) all have different inode layouts. When an inode is read or written
* this routine handles the conversions so that the information in the inode
* table is independent of the disk structure from which the inode came.
* The old_icopy routine copies to and from V1 disks.
*/
int i;
if (direction == READING) {
/* Copy V1.x inode to the in-core table, swapping bytes if need be. */
rip->i_mode = conv2(norm, (int) dip->d1_mode);
rip->i_uid = conv2(norm, (int) dip->d1_uid );
rip->i_size = conv4(norm, dip->d1_size);
rip->i_mtime = conv4(norm, dip->d1_mtime);
rip->i_atime = rip->i_mtime;
rip->i_ctime = rip->i_mtime;
rip->i_nlinks = (nlink_t) dip->d1_nlinks; /* 1 char */
rip->i_gid = (gid_t) dip->d1_gid; /* 1 char */
rip->i_ndzones = V1_NR_DZONES;
rip->i_nindirs = V1_INDIRECTS;
for (i = 0; i < V1_NR_TZONES; i++)
rip->i_zone[i] = conv2(norm, (int) dip->d1_zone[i]);
} else {
/* Copying V1.x inode to disk from the in-core table. */
dip->d1_mode = conv2(norm, (int) rip->i_mode);
dip->d1_uid = conv2(norm, (int) rip->i_uid );
dip->d1_size = conv4(norm, rip->i_size);
dip->d1_mtime = conv4(norm, rip->i_mtime);
dip->d1_nlinks = (nlink_t) rip->i_nlinks; /* 1 char */
dip->d1_gid = (gid_t) rip->i_gid; /* 1 char */
for (i = 0; i < V1_NR_TZONES; i++)
dip->d1_zone[i] = conv2(norm, (int) rip->i_zone[i]);
}
}
/*===========================================================================*
* new_icopy *
*===========================================================================*/
PRIVATE void new_icopy(rip, dip, direction, norm)
register struct inode *rip; /* pointer to the in-core inode struct */
register d2_inode *dip; /* pointer to the d2_inode struct */
int direction; /* READING (from disk) or WRITING (to disk) */
int norm; /* TRUE = do not swap bytes; FALSE = swap */
{
/* Same as old_icopy, but to/from V2 disk layout. */
int i;
if (direction == READING) {
/* Copy V2.x inode to the in-core table, swapping bytes if need be. */
rip->i_mode = conv2(norm,dip->d2_mode);
rip->i_uid = conv2(norm,dip->d2_uid );
rip->i_nlinks = conv2(norm,(int) dip->d2_nlinks);
rip->i_gid = conv2(norm,(int) dip->d2_gid );
rip->i_size = conv4(norm,dip->d2_size);
rip->i_atime = conv4(norm,dip->d2_atime);
rip->i_ctime = conv4(norm,dip->d2_ctime);
rip->i_mtime = conv4(norm,dip->d2_mtime);
rip->i_ndzones = V2_NR_DZONES;
rip->i_nindirs = V2_INDIRECTS;
for (i = 0; i < V2_NR_TZONES; i++)
rip->i_zone[i] = conv4(norm, (long) dip->d2_zone[i]);
} else {
/* Copying V2.x inode to disk from the in-core table. */
dip->d2_mode = conv2(norm,rip->i_mode);
dip->d2_uid = conv2(norm,rip->i_uid );
dip->d2_nlinks = conv2(norm,rip->i_nlinks);
dip->d2_gid = conv2(norm,rip->i_gid );
dip->d2_size = conv4(norm,rip->i_size);
dip->d2_atime = conv4(norm,rip->i_atime);
dip->d2_ctime = conv4(norm,rip->i_ctime);
dip->d2_mtime = conv4(norm,rip->i_mtime);
for (i = 0; i < V2_NR_TZONES; i++)
dip->d2_zone[i] = conv4(norm, (long) rip->i_zone[i]);
}
}
get_inode先查询内存中的inode_table,选择i_count大于0的(即正在被使用的)inode,看dev和numb是否匹配。若匹配则找到,inode的count域加一,返回指针。若遍历完都未找到匹配,则numb节点不在内存。使用刚才遍历过程记录的空slot,调用rw_inode读取的inode放入此slot中。刚调入内存的inode在rw_inode函数最后设置为CLEAN。其他访问inode的操作可能修改为DIRTY。
注:dirty标志仅代表inode存储在磁盘上部分是dirty的还是clean的。inode仅在内存中的部分与dirty标志并不相互影响。
疑问:若用给一个参数numb,其代表的inode并未使用alloc_inode分配,结果会怎样?
put_inode(rip)
/*===========================================================================*
* put_inode *
*===========================================================================*/
PUBLIC void put_inode(rip)
register struct inode *rip; /* pointer to inode to be released */
{
/* The caller is no longer using this inode. If no one else is using it either
* write it back to the disk immediately. If it has no links, truncate it and
* return it to the pool of available inodes.
*/
if (rip == NIL_INODE) return; /* checking here is easier than in caller */
if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */
if ((rip->i_nlinks & BYTE) == 0) {
/* i_nlinks == 0 means free the inode. */
truncate(rip); /* return all the disk blocks */
rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */
rip->i_dirt = DIRTY;
free_inode(rip->i_dev, rip->i_num);
} else {
if (rip->i_pipe == I_PIPE) truncate(rip);
}
rip->i_pipe = NO_PIPE; /* should always be cleared */
if (rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
}
}
参数为内存inodetable中的项的指针。put函数对inode的icount域减一。若为0,则现在没有程序在使用此内存inode,可以从内存inode中删除。进一步判断ilink是否为0,若是,则说明现在此inode可以free掉,且调用truncate函数将此inode中占用的数据块全部设为空闲。将内存inodetable中此项设为未分配,调用free_inode在位示图中标记此inode可用。
这里要注意对pipe文件的处理,当一个进程释放管道文件的时候应该将其inode删除。因为为一个进程保留管道文件是没有意义的。
最后,将inode的pipe属性修改为NO_PIPE 。如果inode被修改了,调用rw_inode写回。(当icount减一等于0时,不管ilink是否为0,都调用rw_inode写回数据)
单独看这一个过程的话,可能会觉得只有在ilinks为0的时候才设置dirty标志,但是时候设置好像又没什么意义了,反正这个inode是没人使用了。但是当icount为0,ilinks不等于0时,并没有设置dirty标志。接着执行下面的写回脏inode会不会导致刚才对于icount的修改没有写回到磁盘。(分析错误,icount只存在于内存中,根本就不在磁盘中存储。icount代表当前使用这个inode的进程的数目。)
alloc_inode & free_inode调用位示图操作,修改位示图相关位的值。
/*===========================================================================*
* alloc_inode *
*===========================================================================*/
PUBLIC struct inode *alloc_inode(dev, bits)
dev_t dev; /* device on which to allocate the inode */
mode_t bits; /* mode of the inode */
{
/* Allocate a free inode on 'dev', and return a pointer to it. */
register struct inode *rip;
register struct super_block *sp;
int major, minor, inumb;
bit_t b;
sp = get_super(dev); /* get pointer to super_block */
if (sp->s_rd_only) { /* can't allocate an inode on a read only device. */
err_code = EROFS;
return(NIL_INODE);
}
/* Acquire an inode from the bit map. */
b = alloc_bit(sp, IMAP, sp->s_isearch);
if (b == NO_BIT) {
err_code = ENFILE;
major = (int) (sp->s_dev >> MAJOR) & BYTE;
minor = (int) (sp->s_dev >> MINOR) & BYTE;
printf("Out of i-nodes on %sdevice %d/%d\n",
sp->s_dev == ROOT_DEV ? "root " : "", major, minor);
return(NIL_INODE);
}
sp->s_isearch = b; /* next time start here */
inumb = (int) b; /* be careful not to pass unshort as param */
/* Try to acquire a slot in the inode table. */
if ((rip = get_inode(NO_DEV, inumb)) == NIL_INODE) {
/* No inode table slots available. Free the inode just allocated. */
free_bit(sp, IMAP, b);
} else {
/* An inode slot is available. Put the inode just allocated into it. */
rip->i_mode = bits; /* set up RWX bits */
rip->i_nlinks = (nlink_t) 0; /* initial no links */
rip->i_uid = fp->fp_effuid; /* file's uid is owner's */
rip->i_gid = fp->fp_effgid; /* ditto group id */
rip->i_dev = dev; /* mark which device it is on */
rip->i_ndzones = sp->s_ndzones; /* number of direct zones */
rip->i_nindirs = sp->s_nindirs; /* number of indirect zones per blk*/
rip->i_sp = sp; /* pointer to super block */
/* Fields not cleared already are cleared in wipe_inode(). They have
* been put there because truncate() needs to clear the same fields if
* the file happens to be open while being truncated. It saves space
* not to repeat the code twice.
*/
wipe_inode(rip);
}
return(rip);
}
/*===========================================================================*
* wipe_inode *
*===========================================================================*/
PUBLIC void wipe_inode(rip)
register struct inode *rip; /* the inode to be erased */
{
/* Erase some fields in the inode. This function is called from alloc_inode()
* when a new inode is to be allocated, and from truncate(), when an existing
* inode is to be truncated.
*/
register int i;
rip->i_size = 0;
rip->i_update = ATIME | CTIME | MTIME; /* update all times later */
rip->i_dirt = DIRTY;
for (i = 0; i < V2_NR_TZONES; i++) rip->i_zone[i] = NO_ZONE;
}
alloc_inode分配一个inode,mode为参数中给出的bits。与数据块的分配不同,并不需要指定在那个块号附近分配。这里都是从超级块中的sp->isearch(第一个可用inode号)开始。先调用alloc_bit从inode位示图中分配一个bit。然后还要在inodetable中为此inode找到位置。调用get_inode(NO_DEV, )若没有,则将刚才分配到的bit也free掉。
若get_inode成功从inode_table中获得一个位置rip,则对这个rip指向的inode进行初始化,比如inode_number,mode,uid,gid,dev等。初始的link数目为0 。还有部分初始化放在wipe_inode中进行。因此别处也要用到这部分初始化,因此放在一个函数里,减少代码量。
注意,这里分配的inode仅仅是在位示图中分配了一个位,然后在内存inode_table初始化了这个inode。真正磁盘inode存储区域内并没有这个inode具体信息(当然属于这个inode的存储区域是有的)。所以在分配了inode时在wipe_inode函数中将内存inode_table中对应项设置为dirty。具体写到磁盘上要put_inode依据dirty标志调用rw_inode来完成。
/*===========================================================================*
* free_inode *
*===========================================================================*/
PUBLIC void free_inode(dev, inumb)
dev_t dev; /* on which device is the inode */
ino_t inumb; /* number of inode to be freed */
{
/* Return an inode to the pool of unallocated inodes. */
register struct super_block *sp;
bit_t b;
/* Locate the appropriate super_block. */
sp = get_super(dev);
if (inumb <= 0 || inumb > sp->s_ninodes) return;
b = inumb;
free_bit(sp, IMAP, b);
if (b < sp->s_isearch) sp->s_isearch = b;
}
/*===========================================================================*
* update_times *
*===========================================================================*/
PUBLIC void update_times(rip)
register struct inode *rip; /* pointer to inode to be read/written */
{
/* Various system calls are required by the standard to update atime, ctime,
* or mtime. Since updating a time requires sending a message to the clock
* task--an expensive business--the times are marked for update by setting
* bits in i_update. When a stat, fstat, or sync is done, or an inode is
* released, update_times() may be called to actually fill in the times.
*/
time_t cur_time;
struct super_block *sp;
sp = rip->i_sp; /* get pointer to super block. */
if (sp->s_rd_only) return; /* no updates for read-only file systems */
cur_time = clock_time();
if (rip->i_update & ATIME) rip->i_atime = cur_time;
if (rip->i_update & CTIME) rip->i_ctime = cur_time;
if (rip->i_update & MTIME) rip->i_mtime = cur_time;
rip->i_update = 0; /* they are all up-to-date now */
}
free_inode 直接调用free_bit在位示图中指示inode为可用。
注意:在alloc_inode和free_inode中调用alloc_bit和free_bit的时候,参数直接给的是inode号。
在inode位示图中直接根据inode号定位所在的位。这里的inode从0开始计数。但是在rw_inode中,给出了inode_number,定位此inode具体所在的磁盘位置(磁盘块号,块内偏移地址)。这时候并不能直接使用给出的inode号,因为具体存储inode的时候是从1号inode开始的。0号为保留,作为错误返回值。因此,计算具体存储位置的时候,并不能直接使用numb,这里是要减一,其他实现可能还会有不同的处理。
下面是一些解释:
Usually, the inode 0 is reserved because a return value of 0 usually signals an error. Multiple method in the Linux kernel -- especially in the VFS layer shared by all file systems -- return an ino_t, e.g.find_inode_number .
There are more reserved inode numbers. For example in ext2 :
#define EXT2_BAD_INO 1 /* Bad blocks inode */
#define EXT2_ROOT_INO 2 /* Root inode */
#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
and ext3 has:
#define EXT3_BAD_INO 1 /* Bad blocks inode */
#define EXT3_ROOT_INO 2 /* Root inode */
#define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
#define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
#define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
#define EXT3_JOURNAL_INO 8 /* Journal inode */
Other fileystems use the ino 1 as root inode number. In general, a file system is free to choose its inode numbers and its reserved ino values (with the exception of 0).
最后关于update_times:
inode中i_update定义为char类型。主要使用到一个char8位数据中的三位,用来标识三个时间是否已经是最新的。为0表示是最新的,否则根据具体位确定更新那个时间域。具体比较还需要三个宏ATIME CTIME MTIME值。
#define ATIME 002 /* set if atime field needs updating */
#define CTIME 004 /* set if ctime field needs updating */
#define MTIME 010 /* set if mtime field needs updating */
猜测,这几个值是8进制,以0开头表示8进制。
转换成二进制则是10 100 1000 分别在一个字节的第2 3 4位为1.
/*===========================================================================*
* dup_inode *
*===========================================================================*/
PUBLIC void dup_inode(ip)
struct inode *ip; /* The inode to be duplicated. */
{
/* This routine is a simplified form of get_inode() for the case where
* the inode pointer is already known.
*/
ip->i_count++;
}