LVM锁机制分析

LVM支持的锁类型

  1. 无锁(No Locking) - 不使用锁,此状态下不能并行执行命令,或者由使用者处理并行问题。
  2. 本地锁(Local Locking) - 当使用本地flock锁,只能在单机环境下使用;当use_lvmlockd=1时,同时使用lvmlockd提供的锁,可用于集群环境。
  3. 外部锁(External Locking) - 使用外部共享库提供的锁功能,可以在配置文件中设置库的名称,在共享库中只要按要求。
  4. 集群锁(Clustered Locking) - 使用内建的集群锁,必须配合clvmd来使用,同时必须关闭lvmetad缓存功能。
  5. 只读锁(Read-Only Locking) - 只读模式,禁止元数据修改。
  6. 哑巴锁(Dummy Locking) - 只读模式,被不需要锁的工具使用。

配置文件:

$ cat /etc/lvm/lvm.conf
...

	# Configuration option global/locking_type.
	# Type of locking to use.
	# 
	# Accepted values:
	#   0
	#     Turns off locking. Warning: this risks metadata corruption if
	#     commands run concurrently.
	#   1
	#     LVM uses local file-based locking, the standard mode.
	#   2
	#     LVM uses the external shared library locking_library.
	#   3
	#     LVM uses built-in clustered locking with clvmd.
	#     This is incompatible with lvmetad. If use_lvmetad is enabled,
	#     LVM prints a warning and disables lvmetad use.
	#   4
	#     LVM uses read-only locking which forbids any operations that
	#     might change metadata.
	#   5
	#     Offers dummy locking for tools that do not need any locks.
	#     You should not need to set this directly; the tools will select
	#     when to use it instead of the configured locking_type.
	#     Do not use lvmetad or the kernel device-mapper driver with this
	#     locking type. It is used by the --readonly option that offers
	#     read-only access to Volume Group metadata that cannot be locked
	#     safely because it belongs to an inaccessible domain and might be
	#     in use, for example a virtual machine image or a disk that is
	#     shared by a clustered machine.
	# 
	locking_type = 1

...

上层抽象接口

在LVM中,所有的与锁相关的源码都集中在lib/locking目录中,对所有其他LVM命令和守护进程提供锁错做的抽象接口,而不必关心到底使用的是哪种锁。

执行范围

  • 全部
  • 本地
  • 远程
$ cat lib/locking/locking.h
...

#ifndef NODE_ALL
#  define NODE_ALL     "*"
#  define NODE_LOCAL   "."
#  define NODE_REMOTE  "^"
#endif

...

锁定类型

  • 共享读
  • 独占读
  • 独占写
  • 独占全部
...

/*
 * Lock type - these numbers are the same as VMS and the IBM DLM
 */
#define LCK_TYPE_MASK	0x00000007U

#define LCK_NULL	0x00000000U	/* LCK$_NLMODE (Deactivate) */
#define LCK_READ	0x00000001U	/* LCK$_CRMODE (Activate) */
					/* LCK$_CWMODE */
#define LCK_PREAD       0x00000003U	/* LCK$_PRMODE */
#define LCK_WRITE	0x00000004U	/* LCK$_PWMODE (Suspend) */
#define LCK_EXCL	0x00000005U	/* LCK$_EXMODE (Exclusive) */
#define LCK_UNLOCK      0x00000006U	/* This is ours (Resume) */

...

锁定范围

  • 卷组(或独立的物理卷)
  • 逻辑卷
  • 激活状态
...

/*
 * Lock scope
 */
#define LCK_SCOPE_MASK	0x00001008U
#define LCK_VG		0x00000000U	/* Volume Group */
#define LCK_LV		0x00000008U	/* Logical Volume */
#define LCK_ACTIVATION  0x00001000U	/* Activation */

...

锁定标志

...

/*
 * Lock flags - these numbers are the same as DLM
 */
#define LCKF_NOQUEUE	0x00000001U	/* LKF$_NOQUEUE */
#define LCKF_CONVERT	0x00000004U	/* LKF$_CONVERT */
...

/*
 * Lock bits.
 * Bottom 8 bits except LCK_LOCAL form args[0] in cluster comms.
 */
#define LCK_NONBLOCK	0x00000010U	/* Don't block waiting for lock? */
#define LCK_HOLD	0x00000020U	/* Hold lock when lock_vol returns? */
#define LCK_CLUSTER_VG	0x00000080U	/* VG is clustered */

#define LCK_LOCAL	0x00000040U	/* Don't propagate to other nodes */
#define LCK_REMOTE	0x00000800U	/* Propagate to remote nodes only */
#define LCK_CACHE	0x00000100U	/* Operation on cache only using P_ lock */
#define LCK_ORIGIN_ONLY	0x00000200U	/* Operation should bypass any snapshots */
#define LCK_REVERT	0x00000400U	/* Revert any incomplete change */

...

/*
 * Additional lock bits for cluster communication via args[1]
 */
#define LCK_PARTIAL_MODE        	0x01	/* Partial activation? */
#define LCK_MIRROR_NOSYNC_MODE		0x02	/* Mirrors don't require sync */
#define LCK_DMEVENTD_MONITOR_MODE	0x04	/* Register with dmeventd */

/* Not yet used. */
#define LCK_CONVERT_MODE		0x08	/* Convert existing lock */

#define LCK_TEST_MODE			0x10    /* Test mode: No activation */
#define LCK_ORIGIN_ONLY_MODE		0x20	/* Same as above */
#define LCK_DMEVENTD_MONITOR_IGNORE     0x40	/* Whether to ignore dmeventd */
#define LCK_REVERT_MODE			0x80	/* Remove inactive tables */

/*
 * Special cases of VG locks.
 */
#define VG_ORPHANS	"#orphans"
#define VG_GLOBAL	"#global"
#define VG_SYNC_NAMES	"#sync_names"

...

组合操作

  • 激活、去激活
  • 卷组读、写、解锁、丢弃缓存、备份、同步
  • 逻辑卷独占、挂起、回复、激活、去激活
...

/*
 * Common combinations
 */
#define LCK_NONE		(LCK_VG | LCK_NULL)

#define LCK_ACTIVATE_LOCK	(LCK_ACTIVATION | LCK_WRITE | LCK_HOLD)
#define LCK_ACTIVATE_UNLOCK	(LCK_ACTIVATION | LCK_UNLOCK)

#define LCK_VG_READ		(LCK_VG | LCK_READ | LCK_HOLD)
#define LCK_VG_WRITE		(LCK_VG | LCK_WRITE | LCK_HOLD)
#define LCK_VG_UNLOCK		(LCK_VG | LCK_UNLOCK)
#define LCK_VG_DROP_CACHE	(LCK_VG | LCK_WRITE | LCK_CACHE)

/* FIXME: LCK_HOLD abused here */
#define LCK_VG_COMMIT		(LCK_VG | LCK_WRITE | LCK_CACHE | LCK_HOLD)
#define LCK_VG_REVERT		(LCK_VG | LCK_READ  | LCK_CACHE | LCK_HOLD)

#define LCK_VG_BACKUP		(LCK_VG | LCK_CACHE)

#define LCK_VG_SYNC		(LCK_NONE | LCK_CACHE)
#define LCK_VG_SYNC_LOCAL	(LCK_NONE | LCK_CACHE | LCK_LOCAL)

#define LCK_LV_EXCLUSIVE	(LCK_LV | LCK_EXCL)
#define LCK_LV_SUSPEND		(LCK_LV | LCK_WRITE)
#define LCK_LV_RESUME		(LCK_LV | LCK_UNLOCK)
#define LCK_LV_ACTIVATE		(LCK_LV | LCK_READ)
#define LCK_LV_DEACTIVATE	(LCK_LV | LCK_NULL)

...

操作函数

  • 初始化、退出、复位和查询:
$ cat lib/locking/locking.h
...

int init_locking(int type, struct cmd_context *cmd, int suppress_messages);
void fin_locking(void);
void reset_locking(void);
int vg_write_lock_held(void);
int locking_is_clustered(void);
int locking_supports_remote_queries(void);

...
  • 锁定和解锁:
...

/*
 * LCK_VG:
 *   Lock/unlock on-disk volume group data.
 *   Use VG_ORPHANS to lock all orphan PVs.
 *   Use VG_GLOBAL as a global lock and to wipe the internal cache.
 *   char *vol holds volume group name.
 *   Set LCK_CACHE flag when manipulating 'vol' metadata in the internal cache.
 *   (Like commit, revert or invalidate metadata.)
 *   If more than one lock needs to be held simultaneously, they must be
 *   acquired in alphabetical order of 'vol' (to avoid deadlocks), with
 *   VG_ORPHANS last.
 *
 *   Use VG_SYNC_NAMES to ensure /dev is up-to-date for example, with udev,
 *   by waiting for any asynchronous events issued to have completed.
 *
 * LCK_LV:
 *   Lock/unlock an individual logical volume
 *   char *vol holds lvid
 */
int lock_vol(struct cmd_context *cmd, const char *vol, uint32_t flags, const struct logical_volume *lv);

...

#define lock_lv_vol(cmd, lv, flags)	\
	(find_replicator_vgs((lv)) ? \
		 lock_vol(cmd, (lv)->lvid.s, flags | LCK_LV_CLUSTERED(lv), lv) :	\
		0)

...

/*
 * Place temporary exclusive 'activation' lock around an LV locking operation
 * to serialise it.
 */
#define lock_lv_vol_serially(cmd, lv, flags) \
({ \
	int rr = 0; \
\
	if (lock_activation((cmd), (lv))) { \
		rr = lock_lv_vol((cmd), (lv), (flags)); \
		unlock_activation((cmd), (lv)); \
	} \
	rr; \
})

#define unlock_vg(cmd, vg, vol)	\
	do { \
		if (vg && !lvmetad_vg_update_finish(vg)) \
			stack; \
		if (is_real_vg(vol) && !sync_dev_names(cmd)) \
			stack; \
		if (!lock_vol(cmd, vol, LCK_VG_UNLOCK, NULL)) \
			stack;	\
	} while (0)
#define unlock_and_release_vg(cmd, vg, vol) \
	do { \
		unlock_vg(cmd, vg, vol); \
		release_vg(vg); \
	} while (0)

...
  • 挂起和恢复
...

#define resume_lv(cmd, lv)	\
({ \
	int rr = lock_lv_vol((cmd), (lv), LCK_LV_RESUME); \
	unlock_activation((cmd), (lv)); \
	rr; \
})
#define resume_lv_origin(cmd, lv)	lock_lv_vol(cmd, lv, LCK_LV_RESUME | LCK_ORIGIN_ONLY)
#define revert_lv(cmd, lv)	\
({ \
	int rr = lock_lv_vol((cmd), (lv), LCK_LV_RESUME | LCK_REVERT); \
\
	unlock_activation((cmd), (lv)); \
	rr; \
})
#define suspend_lv(cmd, lv)	\
	(lock_activation((cmd), (lv)) ? lock_lv_vol((cmd), (lv), LCK_LV_SUSPEND | LCK_HOLD) : 0)
#define suspend_lv_origin(cmd, lv)	lock_lv_vol(cmd, lv, LCK_LV_SUSPEND | LCK_HOLD | LCK_ORIGIN_ONLY)

...
  • 激活和去激活:
...

/*
 * Activation locks are wrapped around activation commands that have to
 * be processed atomically one-at-a-time.
 * If a VG WRITE lock is held, an activation lock is redundant.
 *
 * FIXME Test and support this for thin and cache types.
 * FIXME Add cluster support.
 */
#define lv_supports_activation_locking(lv) (!vg_is_clustered((lv)->vg) && !lv_is_thin_type(lv) && !lv_is_cache_type(lv))
#define lock_activation(cmd, lv)	(vg_write_lock_held() && lv_supports_activation_locking(lv) ? 1 : lock_vol(cmd, (lv)->lvid.s, LCK_ACTIVATE_LOCK, lv))
#define unlock_activation(cmd, lv)	(vg_write_lock_held() && lv_supports_activation_locking(lv) ? 1 : lock_vol(cmd, (lv)->lvid.s, LCK_ACTIVATE_UNLOCK, lv))

...

#define deactivate_lv(cmd, lv)	lock_lv_vol_serially(cmd, lv, LCK_LV_DEACTIVATE)

#define activate_lv(cmd, lv)	lock_lv_vol_serially(cmd, lv, LCK_LV_ACTIVATE | LCK_HOLD)
#define activate_lv_excl_local(cmd, lv)	\
				lock_lv_vol_serially(cmd, lv, LCK_LV_EXCLUSIVE | LCK_HOLD | LCK_LOCAL)
#define activate_lv_excl_remote(cmd, lv)	\
				lock_lv_vol(cmd, lv, LCK_LV_EXCLUSIVE | LCK_HOLD | LCK_REMOTE)

struct logical_volume;
int activate_lv_excl(struct cmd_context *cmd, struct logical_volume *lv);

#define activate_lv_local(cmd, lv)	\
	lock_lv_vol_serially(cmd, lv, LCK_LV_ACTIVATE | LCK_HOLD | LCK_LOCAL)
#define deactivate_lv_local(cmd, lv)	\
	lock_lv_vol_serially(cmd, lv, LCK_LV_DEACTIVATE | LCK_LOCAL)

...
  • 缓存操作
...

#define drop_cached_metadata(vg)	\
	lock_vol((vg)->cmd, (vg)->name, LCK_VG_DROP_CACHE, NULL)
#define remote_commit_cached_metadata(vg)	\
	lock_vol((vg)->cmd, (vg)->name, LCK_VG_COMMIT, NULL)
#define remote_revert_cached_metadata(vg)	\
	lock_vol((vg)->cmd, (vg)->name, LCK_VG_REVERT, NULL)
#define remote_backup_metadata(vg)	\
	lock_vol((vg)->cmd, (vg)->name, LCK_VG_BACKUP, NULL)

...

下层抽象接口

LVM对下层的具体实现使用统一的接口,每种新增的锁类型只需实现其接口函数即可。

接口抽象数据结构

$ cat lib/locking/locking_types.h
...

typedef int (*lock_resource_fn) (struct cmd_context * cmd, const char *resource,
				 uint32_t flags, const struct logical_volume *lv);
typedef int (*query_resource_fn) (const char *resource, const char *node, int *mode);

typedef void (*fin_lock_fn) (void);
typedef void (*reset_lock_fn) (void);

#define LCK_PRE_MEMLOCK			0x00000001	/* Is memlock() needed before calls? */
#define LCK_CLUSTERED			0x00000002
#define LCK_SUPPORTS_REMOTE_QUERIES	0x00000004

struct locking_type {
	uint32_t flags;
	lock_resource_fn lock_resource;
	query_resource_fn query_resource;

	reset_lock_fn reset_locking;
	fin_lock_fn fin_locking;
};

...

锁初始化函数

目前实现的6中锁的实现的初始化函数:

...

/*
 * Locking types
 */
void init_no_locking(struct locking_type *locking, struct cmd_context *cmd,
		     int suppress_messages);

void init_dummy_locking(struct locking_type *locking, struct cmd_context *cmd,
			int suppress_messages);

int init_readonly_locking(struct locking_type *locking, struct cmd_context *cmd,
			  int suppress_messages);

int init_file_locking(struct locking_type *locking, struct cmd_context *cmd,
		      int suppress_messages);

int init_external_locking(struct locking_type *locking, struct cmd_context *cmd,
			  int suppress_messages);

int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd,
			 int suppress_messages);

...

锁初始化:

$ cat lib/locking/locking.c
...

/*
 * Select a locking type
 * type: locking type; if < 0, then read config tree value
 */
int init_locking(int type, struct cmd_context *cmd, int suppress_messages)
{
	if (getenv("LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES"))
		suppress_messages = 1;

	if (type < 0)
		type = find_config_tree_int(cmd, global_locking_type_CFG, NULL);

	_blocking_supported = find_config_tree_bool(cmd, global_wait_for_locks_CFG, NULL);

	switch (type) {
	case 0:
		init_no_locking(&_locking, cmd, suppress_messages);
		log_warn_suppress(suppress_messages,
			"WARNING: Locking disabled. Be careful! "
			"This could corrupt your metadata.");
		return 1;

	case 1:
		log_very_verbose("%sFile-based locking selected.",
				 _blocking_supported ? "" : "Non-blocking ");

		if (!init_file_locking(&_locking, cmd, suppress_messages)) {
			log_error_suppress(suppress_messages,
					   "File-based locking initialisation failed.");
			break;
		}
		return 1;

#ifdef HAVE_LIBDL
	case 2:
		if (!is_static()) {
			log_very_verbose("External locking selected.");
			if (init_external_locking(&_locking, cmd, suppress_messages))
				return 1;
		}
		if (!find_config_tree_bool(cmd, global_fallback_to_clustered_locking_CFG, NULL)) {
			log_error_suppress(suppress_messages, "External locking initialisation failed.");
			break;
		}
#endif

#ifdef CLUSTER_LOCKING_INTERNAL
		log_very_verbose("Falling back to internal clustered locking.");
		/* Fall through */

	case 3:
		log_very_verbose("Cluster locking selected.");
		if (!init_cluster_locking(&_locking, cmd, suppress_messages)) {
			log_error_suppress(suppress_messages,
					   "Internal cluster locking initialisation failed.");
			break;
		}
		return 1;
#endif

	case 4:
		log_verbose("Read-only locking selected. "
			    "Only read operations permitted.");
		if (!init_readonly_locking(&_locking, cmd, suppress_messages))
			break;
		return 1;

	case 5:
		init_dummy_locking(&_locking, cmd, suppress_messages);
		log_verbose("Locking disabled for read-only access.");
		return 1;

	default:
		log_error("Unknown locking type requested.");
		return 0;
	}

	if ((type == 2 || type == 3) &&
	    find_config_tree_bool(cmd, global_fallback_to_local_locking_CFG, NULL)) {
		log_warn_suppress(suppress_messages, "WARNING: Falling back to local file-based locking.");
		log_warn_suppress(suppress_messages,
				  "Volume Groups with the clustered attribute will "
				  "be inaccessible.");
		if (init_file_locking(&_locking, cmd, suppress_messages))
			return 1;
		else
			log_error_suppress(suppress_messages,
					   "File-based locking initialisation failed.");
	}

	if (!ignorelockingfailure())
		return 0;

	log_verbose("Locking disabled - only read operations permitted.");
	init_readonly_locking(&_locking, cmd, suppress_messages);

	return 1;
}

...

锁功能具体实现

外部锁

$ cat lib/locking/external_locking.c
...

static void *_locking_lib = NULL;
static void (*_reset_fn) (void) = NULL;
static void (*_end_fn) (void) = NULL;
static int (*_lock_fn) (struct cmd_context * cmd, const char *resource,
			uint32_t flags) = NULL;
static int (*_init_fn) (int type, struct dm_config_tree * cft,
			uint32_t *flags) = NULL;
static int (*_lock_query_fn) (const char *resource, int *mode) = NULL;

static int _lock_resource(struct cmd_context *cmd, const char *resource,
			  uint32_t flags, const struct logical_volume *lv __attribute__((unused)))
{
	if (!_lock_fn)
		return 0;

	if (!strcmp(resource, VG_SYNC_NAMES)) {
		/* Hide this lock request from external locking */
		fs_unlock();
		return 1;
	}

	return _lock_fn(cmd, resource, flags);
}

static void _fin_external_locking(void)
{
	if (_end_fn)
		_end_fn();

	dlclose(_locking_lib);

	_locking_lib = NULL;
	_init_fn = NULL;
	_end_fn = NULL;
	_lock_fn = NULL;
	_reset_fn = NULL;
}

static void _reset_external_locking(void)
{
	if (_reset_fn)
		_reset_fn();
}

int init_external_locking(struct locking_type *locking, struct cmd_context *cmd,
			  int suppress_messages)
{
	const char *libname;

	if (_locking_lib) {
		log_error_suppress(suppress_messages, "External locking already initialised");
		return 1;
	}

	locking->lock_resource = _lock_resource;
	locking->fin_locking = _fin_external_locking;
	locking->reset_locking = _reset_external_locking;
	locking->flags = 0;

	if (!(libname = find_config_tree_str(cmd, global_locking_library_CFG, NULL)))
		return_0;

	if (!(_locking_lib = load_shared_library(cmd, libname, "locking", 1)))
		return_0;

	/* Get the functions we need */
	if (!(_init_fn = dlsym(_locking_lib, "locking_init")) ||
	    !(_lock_fn = dlsym(_locking_lib, "lock_resource")) ||
	    !(_reset_fn = dlsym(_locking_lib, "reset_locking")) ||
	    !(_end_fn = dlsym(_locking_lib, "locking_end"))) {
		log_error_suppress(suppress_messages, "Shared library %s does "
				   "not contain locking functions", libname);
		dlclose(_locking_lib);
		_locking_lib = NULL;
		return 0;
	}

	if (!(_lock_query_fn = dlsym(_locking_lib, "query_resource")))
		log_warn_suppress(suppress_messages, "WARNING: %s: _query_resource() "
				  "missing: Using inferior activation method.", libname);

	log_verbose("Loaded external locking library %s", libname);
	return _init_fn(2, cmd->cft, &locking->flags);
}

...

无锁、只读锁、哑巴锁

$ cat lib/locking/no_locking.c
...

/*
 * No locking
 */

static void _no_fin_locking(void)
{
}

static void _no_reset_locking(void)
{
}

static int _no_lock_resource(struct cmd_context *cmd, const char *resource,
			     uint32_t flags, const struct logical_volume *lv)
{
	switch (flags & LCK_SCOPE_MASK) {
	case LCK_ACTIVATION:
		break;
	case LCK_VG:
		if (!strcmp(resource, VG_SYNC_NAMES))
			fs_unlock();
		break;
	case LCK_LV:
		switch (flags & LCK_TYPE_MASK) {
		case LCK_NULL:
			return lv_deactivate(cmd, resource, lv_committed(lv));
		case LCK_UNLOCK:
			return lv_resume_if_active(cmd, resource, (flags & LCK_ORIGIN_ONLY) ? 1: 0, 0,
						   (flags & LCK_REVERT) ? 1 : 0, lv_committed(lv));
		case LCK_READ:
			return lv_activate_with_filter(cmd, resource, 0, (lv->status & LV_NOSCAN) ? 1 : 0,
						       (lv->status & LV_TEMPORARY) ? 1 : 0, lv_committed(lv));
		case LCK_WRITE:
			return lv_suspend_if_active(cmd, resource, (flags & LCK_ORIGIN_ONLY) ? 1 : 0, 0,
						    lv_committed(lv), lv);
		case LCK_EXCL:
			return lv_activate_with_filter(cmd, resource, 1, (lv->status & LV_NOSCAN) ? 1 : 0,
						       (lv->status & LV_TEMPORARY) ? 1 : 0, lv_committed(lv));
		default:
			break;
		}
		break;
	default:
		log_error("Unrecognised lock scope: %d",
			  flags & LCK_SCOPE_MASK);
		return 0;
	}

	return 1;
}

static int _no_query_resource(const char *resource, const char *node, int *mode)
{
	log_very_verbose("Locking is disabled: Treating lock %s as not held.",
			 resource);
	return 1;
}

static int _readonly_lock_resource(struct cmd_context *cmd,
				   const char *resource,
				   uint32_t flags, const struct logical_volume *lv)
{
	if ((flags & LCK_TYPE_MASK) == LCK_WRITE &&
	    (flags & LCK_SCOPE_MASK) == LCK_VG &&
	    !(flags & LCK_CACHE) &&
	    strcmp(resource, VG_GLOBAL)) {
		log_error("Read-only locking type set. "
			  "Write locks are prohibited.");
		return 0;
	}

	return _no_lock_resource(cmd, resource, flags, lv);
}

void init_no_locking(struct locking_type *locking, struct cmd_context *cmd __attribute__((unused)),
		    int suppress_messages)
{
	locking->lock_resource = _no_lock_resource;
	locking->query_resource = _no_query_resource;
	locking->reset_locking = _no_reset_locking;
	locking->fin_locking = _no_fin_locking;
	locking->flags = LCK_CLUSTERED;
}

int init_readonly_locking(struct locking_type *locking, struct cmd_context *cmd __attribute__((unused)),
			  int suppress_messages)
{
	locking->lock_resource = _readonly_lock_resource;
	locking->query_resource = _no_query_resource;
	locking->reset_locking = _no_reset_locking;
	locking->fin_locking = _no_fin_locking;
	locking->flags = 0;

	return 1;
}

void init_dummy_locking(struct locking_type *locking, struct cmd_context *cmd __attribute__((unused)),
		    int suppress_messages)
{
	locking->lock_resource = _readonly_lock_resource;
	locking->query_resource = _no_query_resource;
	locking->reset_locking = _no_reset_locking;
	locking->fin_locking = _no_fin_locking;
	locking->flags = LCK_CLUSTERED;
}

...

本地锁

可以看出本地锁其实是使用系统提供的flock函数实现的。

$ cat lib/locking/file_locking.c
...

static char _lock_dir[PATH_MAX];

static void _fin_file_locking(void)
{
	release_flocks(1);
}

static void _reset_file_locking(void)
{
	release_flocks(0);
}

static int _file_lock_resource(struct cmd_context *cmd, const char *resource,
			       uint32_t flags, const struct logical_volume *lv)
{
	char lockfile[PATH_MAX];
	unsigned origin_only = (flags & LCK_ORIGIN_ONLY) ? 1 : 0;
	unsigned revert = (flags & LCK_REVERT) ? 1 : 0;

	switch (flags & LCK_SCOPE_MASK) {
	case LCK_ACTIVATION:
		if (dm_snprintf(lockfile, sizeof(lockfile),
				"%s/A_%s", _lock_dir, resource + 1) < 0) {
			log_error("Too long locking filename %s/A_%s.", _lock_dir, resource + 1);
			return 0;
		}

		if (!lock_file(lockfile, flags))
			return_0;
		break;
	case LCK_VG:
		if (!strcmp(resource, VG_SYNC_NAMES)) {
			fs_unlock();
		} else if (strcmp(resource, VG_GLOBAL))
			/* Skip cache refresh for VG_GLOBAL - the caller handles it */
			lvmcache_drop_metadata(resource, 0);

		/* LCK_CACHE does not require a real lock */
		if (flags & LCK_CACHE)
			break;

		if (is_orphan_vg(resource) || is_global_vg(resource)) {
			if (dm_snprintf(lockfile, sizeof(lockfile),
					"%s/P_%s", _lock_dir, resource + 1) < 0) {
				log_error("Too long locking filename %s/P_%s.",
					  _lock_dir, resource + 1);
				return 0;
			}
		} else
			if (dm_snprintf(lockfile, sizeof(lockfile),
					"%s/V_%s", _lock_dir, resource) < 0) {
				log_error("Too long locking filename %s/V_%s.",
					  _lock_dir, resource);
				return 0;
			}

		if (!lock_file(lockfile, flags))
			return_0;
		break;
	case LCK_LV:
		switch (flags & LCK_TYPE_MASK) {
		case LCK_UNLOCK:
			log_very_verbose("Unlocking LV %s%s%s", resource, origin_only ? " without snapshots" : "", revert ? " (reverting)" : "");
			if (!lv_resume_if_active(cmd, resource, origin_only, 0, revert, lv_committed(lv)))
				return 0;
			break;
		case LCK_NULL:
			log_very_verbose("Locking LV %s (NL)", resource);
			if (!lv_deactivate(cmd, resource, lv_committed(lv)))
				return 0;
			break;
		case LCK_READ:
			log_very_verbose("Locking LV %s (R)", resource);
			if (!lv_activate_with_filter(cmd, resource, 0, (lv->status & LV_NOSCAN) ? 1 : 0,
						     (lv->status & LV_TEMPORARY) ? 1 : 0, lv_committed(lv)))
				return 0;
			break;
		case LCK_PREAD:
			log_very_verbose("Locking LV %s (PR) - ignored", resource);
			break;
		case LCK_WRITE:
			log_very_verbose("Locking LV %s (W)%s", resource, origin_only ? " without snapshots" : "");
			if (!lv_suspend_if_active(cmd, resource, origin_only, 0, lv_committed(lv), lv))
				return 0;
			break;
		case LCK_EXCL:
			log_very_verbose("Locking LV %s (EX)", resource);
			if (!lv_activate_with_filter(cmd, resource, 1, (lv->status & LV_NOSCAN) ? 1 : 0,
						     (lv->status & LV_TEMPORARY) ? 1 : 0, lv_committed(lv)))
				return 0;
			break;
		default:
			break;
		}
		break;
	default:
		log_error("Unrecognised lock scope: %d",
			  flags & LCK_SCOPE_MASK);
		return 0;
	}

	return 1;
}

int init_file_locking(struct locking_type *locking, struct cmd_context *cmd,
		      int suppress_messages)
{
	int r;
	const char *locking_dir;

	init_flock(cmd);

	locking->lock_resource = _file_lock_resource;
	locking->reset_locking = _reset_file_locking;
	locking->fin_locking = _fin_file_locking;
	locking->flags = 0;

	/* Get lockfile directory from config file */
	locking_dir = find_config_tree_str(cmd, global_locking_dir_CFG, NULL);
	if (!dm_strncpy(_lock_dir, locking_dir, sizeof(_lock_dir))) {
		log_error("Path for locking_dir %s is invalid.", locking_dir);
		return 0;
	}

	(void) dm_prepare_selinux_context(_lock_dir, S_IFDIR);
	r = dm_create_dir(_lock_dir);
	(void) dm_prepare_selinux_context(NULL, 0);

	if (!r)
		return 0;

	/* Trap a read-only file system */
	if ((access(_lock_dir, R_OK | W_OK | X_OK) == -1) && (errno == EROFS))
		return 0;

	return 1;
}

...

集群锁

可以看出,这里其实只是使用套接字与本地clvmd守护进程进行通信而已,具体的锁的实现有clvmd来完成。

$ cat lib/locking/cluster_locking.c
...

typedef struct lvm_response {
	char node[255];
	char *response;
	int status;
	int len;
} lvm_response_t;

/*
 * This gets stuck at the start of memory we allocate so we
 * can sanity-check it at deallocation time
 */
#define LVM_SIGNATURE 0x434C564D

/*
 * NOTE: the LVMD uses the socket FD as the client ID, this means
 * that any client that calls fork() will inherit the context of
 * it's parent.
 */
static int _clvmd_sock = -1;

/* FIXME Install SIGPIPE handler? */

/* Open connection to the Cluster Manager daemon */
static int _open_local_sock(int suppress_messages)
{
	int local_socket;
	struct sockaddr_un sockaddr = { .sun_family = AF_UNIX };

	if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) {
		log_error("%s: clvmd socket name too long.", CLVMD_SOCKNAME);
		return -1;
	}

	/* Open local socket */
	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
		log_error_suppress(suppress_messages, "Local socket "
				   "creation failed: %s", strerror(errno));
		return -1;
	}


	if (connect(local_socket,(struct sockaddr *) &sockaddr,
		    sizeof(sockaddr))) {
		int saved_errno = errno;

		log_error_suppress(suppress_messages, "connect() failed "
				   "on local socket: %s", strerror(errno));
		if (close(local_socket))
			stack;

		errno = saved_errno;
		return -1;
	}

	return local_socket;
}

...

int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd,
			 int suppress_messages)
{
	locking->lock_resource = _lock_resource;
	locking->query_resource = _query_resource;
	locking->fin_locking = _locking_end;
	locking->reset_locking = _reset_locking;
	locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED | LCK_SUPPORTS_REMOTE_QUERIES;

	_clvmd_sock = _open_local_sock(suppress_messages);
	if (_clvmd_sock == -1)
		return 0;

	return 1;
}

...

转载于:https://my.oschina.net/LastRitter/blog/1535852

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值