在创建连接套接字之前,数据库主进程Postmaster需要保证当前只有1个Postmaster在运行,如何保证呢?这个要通过函数CreateLockFile来完成,通过函数CreateLockFile在PGDATA数据库目录中创建锁文件postmaster.pid,每次postmaster运行时,都会在PGDATA目录中创建这个文件(创建文件时设置了标志位O_EXCL)。
在数据库停止工作时,会删除这个锁文件,在数据库成功启动时,会创建这个锁文件。可以看下启动时该锁文件内容
(1)文件postmaster.pid
[wln@localhost linux]$ pg_ctl -V
pg_ctl (PostgreSQL) 9.3beta2
[wln@localhost linux]$ cat /home/wln/postgres9.3/data/postmaster.pid
28179
/home/wln/postgres9.3/data
1411423233
5432
/tmp
localhost
5432001 4947977
[wln@localhost linux]$ pg_ctl stop -m f
waiting for server to shut down....... done
server stopped
[wln@localhost linux]$ pg_ctl start
server starting
[wln@localhost linux]$ cat /home/wln/postgres9.3/data/postmaster.pid
16304 --对应进程 PID
/home/wln/postgres9.3/data --对应PGDATA
1411461916
5432 --对应PGPORT
/tmp
localhost --对应PGHOST,本地执行默认localhost
5432001 4980745
[wln@localhost linux]$ ps ux | grep 16304 | grep -v grep
wln 16304 1.2 0.9 152576 10360 pts/3 S 16:45 0:00 /home/wln/postgres9.3/install/bin/postgres
postmaster.pid文件内容具体如下:
* As of 9.1, the contents of the data-directory lock file are:
*
* line #
* 1 postmaster PID (or negative of a standalone backend's PID)
* 2 data directory path
* 3 postmaster start timestamp (time_t representation)
* 4 port number
* 5 first Unix socket directory path (empty if none)
* 6 first listen_address (IP address or "*"; empty if no TCP port)
* 7 shared memory key (not present on Windows)
*
* Lines 6 and up are added via AddToDataDirLockFile() after initial file
* creation.
*
* The socket lock file, if used, has the same contents as lines 1-5.
*/
#define LOCK_FILE_LINE_PID 1
#define LOCK_FILE_LINE_DATA_DIR 2
#define LOCK_FILE_LINE_START_TIME 3
#define LOCK_FILE_LINE_PORT 4
#define LOCK_FILE_LINE_SOCKET_DIR 5
#define LOCK_FILE_LINE_LISTEN_ADDR 6
#define LOCK_FILE_LINE_SHMEM_KEY 7
(2)/tmp下内容
[wln@localhost linux]$ cat /tmp/.s.PGSQL.5432.lock
16304
/home/wln/postgres9.3/data
1411461916
5432
/tmp
[wln@localhost tmp]$ ll .s.PGSQL.5432
srwxrwxrwx 1 wln wln 0 09-23 16:45 .s.PGSQL.5432
[wln@localhost tmp]$ cat .s.PGSQL.5432
cat: .s.PGSQL.5432: 没有那个设备或地址
--不知怎么报这个错误,该文件内容长度为0,那为什么要存在这个文件呢?不是已经有个.s.PGSQL.5432.lock 文件了?
(3)函数CreateLockFile
<span style="font-size:14px;">/*
* Create a lockfile.
*
* filename is the path name of the lockfile to create.
* amPostmaster is used to determine how to encode the output PID.
* socketDir is the Unix socket directory path to include (possibly empty).
* isDDLock and refName are used to determine what error message to produce.
*/
static void
CreateLockFile(const char *filename, bool amPostmaster,
const char *socketDir,
bool isDDLock, const char *refName)
{
int fd;
char buffer[MAXPGPATH * 2 + 256];
int ntries;
int len;
int encoded_pid;
pid_t other_pid;
pid_t my_pid,
my_p_pid,
my_gp_pid;
const char *envvar;
/*
* If the PID in the lockfile is our own PID or our parent's or
* grandparent's PID, then the file must be stale (probably left over from
* a previous system boot cycle). We need to check this because of the
* likelihood that a reboot will assign exactly the same PID as we had in
* the previous reboot, or one that's only one or two counts larger and
* hence the lockfile's PID now refers to an ancestor shell process. We
* allow pg_ctl to pass down its parent shell PID (our grandparent PID)
* via the environment variable PG_GRANDPARENT_PID; this is so that
* launching the postmaster via pg_ctl can be just as reliable as
* launching it directly. There is no provision for detecting
* further-removed ancestor processes, but if the init script is written
* carefully then all but the immediate parent shell will be root-owned
* processes and so the kill test will fail with EPERM. Note that we
* cannot get a false negative this way, because an existing postmaster
* would surely never launch a competing postmaster or pg_ctl process
* directly.
*/
my_pid = getpid();
#ifndef WIN32
my_p_pid = getppid();
#else
/*
* Windows hasn't got getppid(), but doesn't need it since it's not using
* real kill() either...
*/
my_p_pid = 0;
#endif
envvar = getenv("PG_GRANDPARENT_PID");
if (envvar)
my_gp_pid = atoi(envvar);
else
my_gp_pid = 0;
/*
* We need a loop here because of race conditions. But don't loop forever
* (for example, a non-writable $PGDATA directory might cause a failure
* that won't go away). 100 tries seems like plenty.
*/
for (ntries = 0;; ntries++)
{
/*
* Try to create the lock file --- O_EXCL makes this atomic.
*
* Think not to make the file protection weaker than 0600. See
* comments below.
*/
fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0600);
if (fd >= 0)
break; /* Success; exit the retry loop */
/*
* Couldn't create the pid file. Probably it already exists.
*/
if ((errno != EEXIST && errno != EACCES) || ntries > 100)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not create lock file \"%s\": %m",
filename)));
/*
* Read the file to get the old owner's PID. Note race condition
* here: file might have been deleted since we tried to create it.
*/
fd = open(filename, O_RDONLY, 0600);
if (fd < 0)
{
if (errno == ENOENT)
continue; /* race condition; try again */
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open lock file \"%s\": %m",
filename)));
}
if ((len = read(fd, buffer, sizeof(buffer) - 1)) < 0)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not read lock file \"%s\": %m",
filename)));
close(fd);
if (len == 0)
{
ereport(FATAL,
(errcode(ERRCODE_LOCK_FILE_EXISTS),
errmsg("lock file \"%s\" is empty", filename),
errhint("Either another server is starting, or the lock file is the remnant of a previous server startup crash.")));
}
buffer[len] = '\0';
encoded_pid = atoi(buffer);
/* if pid < 0, the pid is for postgres, not postmaster */
other_pid = (pid_t) (encoded_pid < 0 ? -encoded_pid : encoded_pid);
if (other_pid <= 0)
elog(FATAL, "bogus data in lock file \"%s\": \"%s\"",
filename, buffer);
/*
* Check to see if the other process still exists
*
* Per discussion above, my_pid, my_p_pid, and my_gp_pid can be
* ignored as false matches.
*
* Normally kill() will fail with ESRCH if the given PID doesn't
* exist.
*
* We can treat the EPERM-error case as okay because that error
* implies that the existing process has a different userid than we
* do, which means it cannot be a competing postmaster. A postmaster
* cannot successfully attach to a data directory owned by a userid
* other than its own. (This is now checked directly in
* checkDataDir(), but has been true for a long time because of the
* restriction that the data directory isn't group- or
* world-accessible.) Also, since we create the lockfiles mode 600,
* we'd have failed above if the lockfile belonged to another userid
* --- which means that whatever process kill() is reporting about
* isn't the one that made the lockfile. (NOTE: this last
* consideration is the only one that keeps us from blowing away a
* Unix socket file belonging to an instance of Postgres being run by
* someone else, at least on machines where /tmp hasn't got a
* stickybit.)
*/
if (other_pid != my_pid && other_pid != my_p_pid &&
other_pid != my_gp_pid)
{
if (kill(other_pid, 0) == 0 ||
(errno != ESRCH && errno != EPERM))
{
/* lockfile belongs to a live process */
ereport(FATAL,
(errcode(ERRCODE_LOCK_FILE_EXISTS),
errmsg("lock file \"%s\" already exists",
filename),
isDDLock ?
(encoded_pid < 0 ?
errhint("Is another postgres (PID %d) running in data directory \"%s\"?",
(int) other_pid, refName) :
errhint("Is another postmaster (PID %d) running in data directory \"%s\"?",
(int) other_pid, refName)) :
(encoded_pid < 0 ?
errhint("Is another postgres (PID %d) using socket file \"%s\"?",
(int) other_pid, refName) :
errhint("Is another postmaster (PID %d) using socket file \"%s\"?",
(int) other_pid, refName))));
}
}
/*
* No, the creating process did not exist. However, it could be that
* the postmaster crashed (or more likely was kill -9'd by a clueless
* admin) but has left orphan backends behind. Check for this by
* looking to see if there is an associated shmem segment that is
* still in use.
*
* Note: because postmaster.pid is written in multiple steps, we might
* not find the shmem ID values in it; we can't treat that as an
* error.
*/
if (isDDLock)
{
char *ptr = buffer;
unsigned long id1,
id2;
int lineno;
for (lineno = 1; lineno < LOCK_FILE_LINE_SHMEM_KEY; lineno++)
{
if ((ptr = strchr(ptr, '\n')) == NULL)
break;
ptr++;
}
if (ptr != NULL &&
sscanf(ptr, "%lu %lu", &id1, &id2) == 2)
{
if (PGSharedMemoryIsInUse(id1, id2))
ereport(FATAL,
(errcode(ERRCODE_LOCK_FILE_EXISTS),
errmsg("pre-existing shared memory block "
"(key %lu, ID %lu) is still in use",
id1, id2),
errhint("If you're sure there are no old "
"server processes still running, remove "
"the shared memory block "
"or just delete the file \"%s\".",
filename)));
}
}
/*
* Looks like nobody's home. Unlink the file and try again to create
* it. Need a loop because of possible race condition against other
* would-be creators.
*/
if (unlink(filename) < 0)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not remove old lock file \"%s\": %m",
filename),
errhint("The file seems accidentally left over, but "
"it could not be removed. Please remove the file "
"by hand and try again.")));
}
/*
* Successfully created the file, now fill it. See comment in miscadmin.h
* about the contents. Note that we write the same first five lines into
* both datadir and socket lockfiles; although more stuff may get added to
* the datadir lockfile later.
*/
snprintf(buffer, sizeof(buffer), "%d\n%s\n%ld\n%d\n%s\n",
amPostmaster ? (int) my_pid : -((int) my_pid),
DataDir,
(long) MyStartTime,
PostPortNumber,
socketDir);
/*
* In a standalone backend, the next line (LOCK_FILE_LINE_LISTEN_ADDR)
* will never receive data, so fill it in as empty now.
*/
if (isDDLock && !amPostmaster)
strlcat(buffer, "\n", sizeof(buffer));
errno = 0;
if (write(fd, buffer, strlen(buffer)) != strlen(buffer))
{
int save_errno = errno;
close(fd);
unlink(filename);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not write lock file \"%s\": %m", filename)));
}
if (pg_fsync(fd) != 0)
{
int save_errno = errno;
close(fd);
unlink(filename);
errno = save_errno;
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not write lock file \"%s\": %m", filename)));
}
if (close(fd) != 0)
{
int save_errno = errno;
unlink(filename);
errno = save_errno;
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not write lock file \"%s\": %m", filename)));
}
/*
* Arrange to unlink the lock file(s) at proc_exit. If this is the first
* one, set up the on_proc_exit function to do it; then add this lock file
* to the list of files to unlink.
*/
if (lock_files == NIL)
on_proc_exit(UnlinkLockFiles, 0);
lock_files = lappend(lock_files, pstrdup(filename));
}</span>
(4)learn CreateLockFile
open(pathname, O_RDWR | O_CREAT | O_EXCL,0666); 打开失败,返回-1
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
int main(void)
{
int fd;
extern int errno;
if((fd = open("/dev/dsp2",O_WRONLY)) < 0)
{
printf("errno=%d\n",errno);
char * mesg = strerror(errno);
printf("Mesg:%s\n",mesg);
}
exit(0);
}
[wln@localhost linux]$ ./erron2
errno=2
Mesg:No such file or directory
相关函数 raise,signal
表头文件 #include<sys/types.h>
#include<signal.h>
定义函数 int kill(pid_t pid,int sig);
函数说明
kill()可以用来送参数sig指定的信号给参数pid指定的进程。参数
pid有几种情况:
pid>0 将信号传给进程识别码为pid 的进程。
pid=0 将信号传给和目前进程相同进程组的所有进程
pid=-1 将信号广播传送给系统内所有的进程
pid<0 将信号传给进程组识别码为pid绝对值的所有进程
参数sig代表的信号编号可参考附录D
返回值 执行成功则返回0,如果有错误则返回-1。
错误代码 EINVAL 参数sig 不合法
ESRCH 参数pid 所指定的进程或进程组不存在
EPERM 权限不够无法传送信号给指定进程