postgresql src/backend/storage/file/fd.c 源代码解读 (jung)

/*-------------------------------------------------------------------------
 *
 * fd.c
 *	  Virtual file descriptor code.
 *
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *	  src/backend/storage/file/fd.c
 *
 * NOTES:
 *
 * This code manages a cache of 'virtual' file descriptors (VFDs).
 * The server opens many file descriptors for a variety of reasons,
 * including base tables, scratch files (e.g., sort and hash spool
 * files), and random calls to C library routines like system(3); it
 * is quite easy to exceed system limits on the number of open files a
 * single process can have.  (This is around 256 on many modern
 * operating systems, but can be as low as 32 on others.)
 *
 * VFDs are managed as an LRU pool, with actual OS file descriptors
 * being opened and closed as needed.  Obviously, if a routine is
 * opened using these interfaces, all subsequent operations must also
 * be through these interfaces (the File type is not a real file
 * descriptor).
 *
 * For this scheme to work, most (if not all) routines throughout the
 * server should use these interfaces instead of calling the C library
 * routines (e.g., open(2) and fopen(3)) themselves.  Otherwise, we
 * may find ourselves short of real file descriptors anyway.
 *
 * This file used to contain a bunch of stuff to support RAID levels 0
 * (jbod), 1 (duplex) and 5 (xor parity).  That stuff is all gone
 * because the parallel query processing code that called it is all
 * gone.  If you really need it you could get it from the original
 * POSTGRES source.
 *-------------------------------------------------------------------------
 */


Fd.c 虚拟文件描述符代码 

        这段代码管理的是虚拟文件描述符的一段缓存,服务器因为各种原因打开许多的文件描述符,包括基本的表、临时文件(比如排序和哈希池文件)以及随机调用C语言库文件像system.因此一个进程打开的文件数很容易就超过系统的限制。(很多现代操作系统中大约是256个,也有的低至32个。) 

        LRU池管理VFDs,根据实际的需要打开和关闭操作系统描述符。很显然如果一个程序使用这些接口,所有的后继操作必须也通过这些接口。(该文件类型不是一个真正的文件描述符。) 

        基于这种工作机制,服务器中大多数程序应该使用这些接口而不是调用C语言库中的程序。否则我们也许会发现缺少实际描述符。 

        这个文件过去包含一堆的东西来支持RAID级别 0,1,5。现在并行查询处理代码已经没有了,因此相关的东西也就没有了。如果你确实需要它,就去postgresql原始的代码中获取。




 * Private Routines
 *
 * Delete		- delete a file from the Lru ring
 * LruDelete	   	- remove a file from the Lru ring and close its FD
 * Insert		- put a file at the front of the Lru ring
 * LruInsert	  	- put a file at the front of the Lru ring and open it
 * ReleaseLruFile  	- Release an fd by closing the last entry in the Lru ring
 * AllocateVfd	   	- grab a free (or new) file record (from VfdArray)
 * FreeVfd		- free a file record
 *
 * The Least Recently Used ring is a doubly linked list that begins and
 * ends on element zero.  Element zero is special -- it doesn't represent
 * a file and its "fd" field always == VFD_CLOSED.	Element zero is just an
 * anchor that shows us the beginning/end of the ring.
 * Only VFD elements that are currently really open (have an FD assigned) are
 * in the Lru ring.  Elements that are "virtually" open can be recognized
 * by having a non-null fileName field.

1、VFD插入到LRU 中:


Vfd数据结构

typedef struct vfd
{
	int	fd;				/* current FD, or VFD_CLOSED if none */
	unsigned short fdstate;		/* bitflags for VFD's state */
	ResourceOwner resowner;		/* owner, for automatic cleanup */
	File		nextFree;		/* link to next free VFD, if in freelist */
	File		lruMoreRecently;	/* doubly linked recency-of-use list */
	File		lruLessRecently;
	off_t		seekPos;		/* current logical file position */
	char	   *fileName;		/* name of file, or NULL for unused VFD */
	/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
	int			fileFlags;		/* open(2) flags for (re)opening the file */
	int			fileMode;		/* mode to pass to open(2) */
} Vfd;



        所有的系统文件描述符封装到vfd当中进行管理,vfd中第一个成员变量装载的就是实际的fd。进程在打开第一个文件的时候,声明并初始化一个数组Vfdcache[32],表示可以存放32个Vfd,同时给这32个Vfd分配内存空间,并将每一个Vfd中的fd字段置为VFD_CLOSED.这32个数组元素通过Vfd中成员nextFree链接成FreeList。

        当需要打开一个文件的时候,就取出FreeList链表头元素,然后将该文件的文件描述符,文件名以及相关的标志信息填充到Vfd中。Postgresql 将所有的打开的文件的Vfd通过lruMoreRecently,lruLessRecently链接成一个双向链表。


根据文件名打开一个文件,分配一个Vfd并初始化该Vfd。

这里用到了strdup,表示用malloc分配一个内存空间,并且初始化内容为参数的内容,这段空间同样需要实用free进行释放,否则会造成内存泄漏。


/*
 * open a file in an arbitrary directory
 *
 * NB: if the passed pathname is relative (which it usually is),
 * it will be interpreted relative to the process' working directory
 * (which should always be $PGDATA when this code is running).
 */
File
PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)
{
	char	   *fnamecopy;
	File		file;
	Vfd		   *vfdP;

	DO_DB(elog(LOG, "PathNameOpenFile: %s %x %o",
			   fileName, fileFlags, fileMode));

	/*
	 * We need a malloc'd copy of the file name; fail cleanly if no room.
	 */
	fnamecopy = strdup(fileName);//复制文件名
	if (fnamecopy == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));

	file = AllocateVfd();//分配一个Vfd
	vfdP = &VfdCache[file];//指向file对应的Vfd的内存空间首地址

	while (nfile + numAllocatedDescs >= max_safe_fds)//如果Vfd的数目已经达到了使用上限,则从LRU中释放最不常用的空间;
	{
		if (!ReleaseLruFile())
			break;
	}

	vfdP->fd = BasicOpenFile(fileName, fileFlags, fileMode);//通过文件名获取操作系统提供的文件fd

	if (vfdP->fd < 0)//如果fd不合法,则释放Vfd的空间和fnamecopy
	{
FreeVfd(file);free(fnamecopy);return -1;}++nfile;//打开的文件数加1DO_DB(elog(LOG, "PathNameOpenFile: success %d", vfdP->fd));Insert(file);//将该文件插入VfdCache中 vfdP->fileName = fnamecopy;/* Saved flags are adjusted to be OK for re-opening file */vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);vfdP->fileMode = fileMode; vfdP->seekPos = 0;vfdP->fdstate = 0x0; vfdP->resowner = NULL;return file; }


void
InitFileAccess(void)
{
	Assert(SizeVfdCache == 0);	/* call me only once */

	/* initialize cache header entry */
	VfdCache = (Vfd *) malloc(sizeof(Vfd));
	if (VfdCache == NULL)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("out of memory")));

	MemSet((char *) &(VfdCache[0]), 0, sizeof(Vfd));//初始化内存空间为0
	VfdCache->fd = VFD_CLOSED;//初始化fd为VFD_CLOSED

	SizeVfdCache = 1;//VfdCache的大小为1

	/* register proc-exit hook to ensure temp files are dropped at exit */
	on_proc_exit(AtProcExit_Files, 0);
}

InitFileAccess 主要功能是初始化VfdCache,分配一个Vfd的内存空间,并将其中所有的内存内容设置为0,VfdCache[0].fd设置为VFD_CLOSED。该Vfd不会分配给任何文件,主要是用做LRU池的访问头部。

虚拟文件描述符从0开始,第一次申请32个,紧接着申请的个数为上一次申请数量的两倍。编码为:0,1,2,3,4... ... 。




typedef int File;


static File
AllocateVfd(void)
{
	Index		i;
	File		file;

	DO_DB(elog(LOG, "AllocateVfd. Size %lu", SizeVfdCache));

	Assert(SizeVfdCache > 0);	/* InitFileAccess not called? */

	if (VfdCache[0].nextFree == 0)
	{
		/*
		 * The free list is empty so it is time to increase the size of the
		 * array.  We choose to double it each time this happens. However,
		 * there's not much point in starting *real* small.
		 */
		Size		newCacheSize = SizeVfdCache * 2;
		Vfd		   *newVfdCache;

		if (newCacheSize < 32)
			newCacheSize = 32;

		/*
		 * Be careful not to clobber VfdCache ptr if realloc fails.
		 */
		newVfdCache = (Vfd *) realloc(VfdCache, sizeof(Vfd) * newCacheSize);
		if (newVfdCache == NULL)
			ereport(ERROR,
					(errcode(ERRCODE_OUT_OF_MEMORY),
					 errmsg("out of memory")));
		VfdCache = newVfdCache;

		/*
		 * Initialize the new entries and link them into the free list.
		 */
		for (i = SizeVfdCache; i < newCacheSize; i++)
		{
			MemSet((char *) &(VfdCache[i]), 0, sizeof(Vfd));
			VfdCache[i].nextFree = i + 1;
			VfdCache[i].fd = VFD_CLOSED;
		}
		VfdCache[newCacheSize - 1].nextFree = 0;
		VfdCache[0].nextFree = SizeVfdCache;

		/*
		 * Record the new size
		 */
		SizeVfdCache = newCacheSize;
	}

	file = VfdCache[0].nextFree;

	VfdCache[0].nextFree = VfdCache[file].nextFree;

	return file;
}


static void
Insert(File file)
{
	Vfd	*vfdP;//申明一个临时Vfd变量,

	Assert(file != 0);//断言file是否为空

	DO_DB(elog(LOG, "Insert %d (%s)", file, VfdCache[file].fileName));
	DO_DB(_dump_lru());

	vfdP = &VfdCache[file];

	vfdP->lruMoreRecently = 0;
	vfdP->lruLessRecently = VfdCache[0].lruLessRecently;
	VfdCache[0].lruLessRecently = file;
	VfdCache[vfdP->lruLessRecently].lruMoreRecently = file;

	DO_DB(_dump_lru());
}

/* returns 0 on success, -1 on re-open failure (with errno set) */
static int
LruInsert(File file)
{
	Vfd		   *vfdP;

	Assert(file != 0);

	DO_DB(elog(LOG, "LruInsert %d (%s)", file, VfdCache[file].fileName));

	vfdP = &VfdCache[file];

	if (FileIsNotOpen(file))
	{
		while (nfile + numAllocatedDescs >= max_safe_fds)
		{
			if (!ReleaseLruFile())
				break;
		}

		/*
		 * The open could still fail for lack of file descriptors, eg due to
		 * overall system file table being full.  So, be prepared to release
		 * another FD if necessary...
		 */
		vfdP->fd = BasicOpenFile(vfdP->fileName, vfdP->fileFlags, vfdP->fileMode);
		if (vfdP->fd < 0)
		{
			DO_DB(elog(LOG, "RE_OPEN FAILED: %d", errno));
			return vfdP->fd;
		}
		else
		{
			DO_DB(elog(LOG, "RE_OPEN SUCCESS"));
			++nfile;
		}

		/* seek to the right position */
		if (vfdP->seekPos != (off_t) 0)
		{
			off_t		returnValue;

			returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
			Assert(returnValue != (off_t) -1);
		}
	}

	/*
	 * put it at the head of the Lru ring
	 */

	Insert(file);

	return 0;
}





  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值