为了方便查阅,所有的源码均给出完整内容。
Fast File System (FFS)
下面是FFS中超级块的数据结构定义:
/*
* Super block for an FFS file system.
*/
struct fs {
int32_t fs_firstfield; /* historic file system linked list, */
int32_t fs_unused_1; /* used for incore super blocks */
ufs_daddr_t fs_sblkno; /* addr of super-block in filesys */
ufs_daddr_t fs_cblkno; /* offset of cyl-block in filesys */
ufs_daddr_t fs_iblkno; /* offset of inode-blocks in filesys */
ufs_daddr_t fs_dblkno; /* offset of first data after cg */
int32_t fs_cgoffset; /* cylinder group offset in cylinder */
int32_t fs_cgmask; /* used to calc mod fs_ntrak */
time_t fs_time; /* last time written */
int32_t fs_size; /* number of blocks in fs */
int32_t fs_dsize; /* number of data blocks in fs */
int32_t fs_ncg; /* number of cylinder groups */
int32_t fs_bsize; /* size of basic blocks in fs */
int32_t fs_fsize; /* size of frag blocks in fs */
int32_t fs_frag; /* number of frags in a block in fs */
/* these are configuration parameters */
int32_t fs_minfree; /* minimum percentage of free blocks */
int32_t fs_rotdelay; /* num of ms for optimal next block */
int32_t fs_rps; /* disk revolutions per second */
/* these fields can be computed from the others */
int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */
int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */
int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */
int32_t fs_fshift; /* ``numfrags'' calc number of frags */
/* these are configuration parameters */
int32_t fs_maxcontig; /* max number of contiguous blks */
int32_t fs_maxbpg; /* max number of blks per cyl group */
/* these fields can be computed from the others */
int32_t fs_fragshift; /* block to frag shift */
int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
int32_t fs_sbsize; /* actual size of super block */
int32_t fs_csmask; /* csum block offset */
int32_t fs_csshift; /* csum block number */
int32_t fs_nindir; /* value of NINDIR */
int32_t fs_inopb; /* value of INOPB */
int32_t fs_nspf; /* value of NSPF */
/* yet another configuration parameter */
int32_t fs_optim; /* optimization preference, see below */
/* these fields are derived from the hardware */
int32_t fs_npsect; /* # sectors/track including spares */
int32_t fs_interleave; /* hardware sector interleave */
int32_t fs_trackskew; /* sector 0 skew, per track */
int32_t fs_headswitch; /* head switch time, usec */
int32_t fs_trkseek; /* track-to-track seek, usec */
/* sizes determined by number of cylinder groups and their sizes */
ufs_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */
int32_t fs_cssize; /* size of cyl grp summary area */
int32_t fs_cgsize; /* cylinder group size */
/* these fields are derived from the hardware */
int32_t fs_ntrak; /* tracks per cylinder */
int32_t fs_nsect; /* sectors per track */
int32_t fs_spc; /* sectors per cylinder */
/* this comes from the disk driver partitioning */
int32_t fs_ncyl; /* cylinders in file system */
/* these fields can be computed from the others */
int32_t fs_cpg; /* cylinders per group */
int32_t fs_ipg; /* inodes per group */
int32_t fs_fpg; /* blocks per group * fs_frag */
/* this data must be re-computed after crashes */
struct csum fs_cstotal; /* cylinder summary information */
/* these fields are cleared at mount time */
int8_t fs_fmod; /* super block modified flag */
int8_t fs_clean; /* file system is clean flag */
int8_t fs_ronly; /* mounted read-only flag */
int8_t fs_flags; /* currently unused flag */
u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
/* these fields retain the current block allocation info */
int32_t fs_cgrotor; /* last cg searched */
struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
int32_t *fs_maxcluster; /* max cluster in each cyl group */
int32_t fs_cpc; /* cyl per cycle in postbl */
int16_t fs_opostbl[16][8]; /* old rotation block list head */
int32_t fs_sparecon[50]; /* reserved for future constants */
int32_t fs_contigsumsize; /* size of cluster summary array */
int32_t fs_maxsymlinklen; /* max length of an internal symlink */
int32_t fs_inodefmt; /* format of on-disk inodes */
u_int64_t fs_maxfilesize; /* maximum representable file size */
int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */
int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */
int32_t fs_state; /* validate fs_clean field */
int32_t fs_postblformat; /* format of positional layout tables */
int32_t fs_nrpos; /* number of rotational positions */
int32_t fs_postbloff; /* (u_int16) rotation block list head */
int32_t fs_rotbloff; /* (u_int8) blocks for each rotation */
int32_t fs_magic; /* magic number */
u_int8_t fs_space[1]; /* list of blocks for each rotation */
/* actually longer */
};
先说下一些先决知识,FFS将磁盘分为若干个柱面组(Cylinder Group),所有的块(block) 都可以划分为大小相等的 段(fragment),而每个文件只有最后一个块才能被分段,前面的所有块都是完整的。
在申请空闲块时,只有对齐的位置才能分配,也就是不能将跨越两个块的段组合成一个块。同时,已被分段的块,不能重新合并为块,必须另外分配空闲块,然后将内容复制过去。
负责管理块和段分配的函数为ffs_balloc()
,下面为源码:
/*
* Balloc defines the structure of file system storage
* by allocating the physical blocks on a device given
* the inode and the logical block number in a file.
*/
ffs_balloc(ip, lbn, size, cred, bpp, flags)
register struct inode *ip; //该文件的inode指针
register ufs_daddr_t lbn; //逻辑块号
int size; //申请的数据大小
struct ucred *cred; //用户相关信息
struct buf **bpp; //内核中的缓冲区,用于执行I/O
int flags;
{
register struct fs *fs;
register ufs_daddr_t nb;
struct buf *bp, *nbp;
struct vnode *vp = ITOV(ip); //根据inode得到vnode
struct indir indirs[NIADDR + 2];
ufs_daddr_t newb, *bap, pref;
int deallocated, osize, nsize, num, i, error;
ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
*bpp = NULL;
if (lbn < 0)
return (EFBIG);
fs = ip->i_fs;
/*
* If the next write will extend the file into a new block,
* and the file is currently composed of a fragment
* this fragment has to be extended to be a full block.
*/
nb = lblkno(fs, ip->i_size); //该文件的完整块数目
if (nb < NDADDR && nb < lbn) { //块数小于正在写的块的块号
osize = blksize(fs, ip, nb); //不构成完整块的最后部分的大小(要么为块大小,要么为段大小的倍数)
if (osize < fs->fs_bsize && osize > 0) { //最后一个块为分段块
error = ffs_realloccg(ip, nb,
ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
osize, (int)fs->fs_bsize, cred, &bp); //将段重新升级为块
if (error)
return (error);
ip->i_size = (nb + 1) * fs->fs_bsize;
vnode_pager_setsize(vp, (u_long)ip->i_size);
ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (flags & B_SYNC)
bwrite(bp);
else
bawrite(bp);
}
}
/*
* The first NDADDR blocks are direct blocks
*/
if (lbn < NDADDR) {
nb = ip->i_db[lbn];
if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) {
error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
if (error) {
brelse(bp);
return (error);
}
*bpp = bp;
return (0);
}
if (nb != 0) {
/*
* Consider need to reallocate a fragment.
*/
osize = fragroundup(fs, blkoff(fs, ip->i_size));
nsize = fragroundup(fs, size);
if (nsize <= osize) {
error = bread(vp, lbn, osize, NOCRED, &bp);
if (error) {
brelse(bp);
return (error);
}
} else {
error = ffs_realloccg(ip, lbn,
ffs_blkpref(ip, lbn, (int)lbn,
&ip->i_db[0]), osize, nsize, cred, &bp);
if (error)
return (error);
}
} else {
if (ip->i_size < (lbn + 1) * fs->fs_bsize)
nsize = fragroundup(fs, size);
else
nsize = fs->fs_bsize;
error = ffs_alloc(ip, lbn,
ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
nsize, cred, &newb);
if (error)
return (error);
bp = getblk(vp, lbn, nsize, 0, 0);
bp->b_blkno = fsbtodb(fs, newb);
if (flags & B_CLRBUF)
clrbuf(bp);
}
ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
*bpp = bp;
return (0);
}
/*
* Determine the number of levels of indirection.
*/
pref = 0;
if (error = ufs_getlbns(vp, lbn, indirs, &num))
return(error);
#ifdef DIAGNOSTIC
if (num < 1)
panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
#endif
/*
* Fetch the first indirect block allocating if necessary.
*/
--num;
nb = ip->i_ib[indirs[0].in_off];
allocib = NULL;
allocblk = allociblk;
if (nb == 0) {
pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
cred, &newb))
return (error);
nb = newb;
*allocblk++ = nb;
bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
bp->b_blkno = fsbtodb(fs, nb);
clrbuf(bp);
/*
* Write synchronously so that indirect blocks
* never point at garbage.
*/
if (error = bwrite(bp))
goto fail;
allocib = &ip->i_ib[indirs[0].in_off];
*allocib = nb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
/*
* Fetch through the indirect blocks, allocating as necessary.
*/
for (i = 1;;) {
error = bread(vp,
indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
if (error) {
brelse(bp);
goto fail;
}
bap = (ufs_daddr_t *)bp->b_data;
nb = bap[indirs[i].in_off];
if (i == num)
break;
i += 1;
if (nb != 0) {
brelse(bp);
continue;
}
if (pref == 0)
pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
if (error =
ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
brelse(bp);
goto fail;
}
nb = newb;
*allocblk++ = nb;
nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
clrbuf(nbp);
/*
* Write synchronously so that indirect blocks
* never point at garbage.
*/
if (error = bwrite(nbp)) {
brelse(bp);
goto fail;
}
bap[indirs[i - 1].in_off] = nb;
/*
* If required, write synchronously, otherwise use
* delayed write.
*/
if (flags & B_SYNC) {
bwrite(bp);
} else {
bdwrite(bp);
}
}
/*
* Get the data block, allocating if necessary.
*/
if (nb == 0) {
pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
if (error = ffs_alloc(ip,
lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
brelse(bp);
goto fail;
}
nb = newb;
*allocblk++ = nb;
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
if (flags & B_CLRBUF)
clrbuf(nbp);
bap[indirs[i].in_off] = nb;
/*
* If required, write synchronously, otherwise use
* delayed write.
*/
if (flags & B_SYNC) {
bwrite(bp);
} else {
bdwrite(bp);
}
*bpp = nbp;
return (0);
}
brelse(bp);
if (flags & B_CLRBUF) {
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
if (error) {
brelse(nbp);
goto fail;
}
} else {
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
}
*bpp = nbp;
return (0);
fail:
/*
* If we have failed part way through block allocation, we
* have to deallocate any indirect blocks that we have allocated.
*/
for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
ffs_blkfree(ip, *blkp, fs->fs_bsize);
deallocated += fs->fs_bsize;
}
if (allocib != NULL)
*allocib = 0;
if (deallocated) {
#ifdef QUOTA
/*
* Restore user's disk quota because allocation failed.
*/
(void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
#endif
ip->i_blocks -= btodb(deallocated);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
return (error);
}