Mutex lock(互斥锁)是在多线程编程中最常用的锁,它的基本原理是sleep waiting,当锁有冲突时,等锁的一方会挂起线程进入睡眠状态,等待信号量来再唤醒挂起的线程。
Spin lock(自旋锁)它的基本原理是busy waiting,当锁有冲突时,不会挂起线程而是通过cup不停的轮询锁状态,一旦有锁从临界区出来它可以马上尝试获取锁。
那在实际使用时该如何选择该用哪种锁呢?小编特意编写了如下代码来验证不同情况下两种锁的不同表现:
/*
* mutex_spin_lock_test.c
*
* Created on: Dec 10, 2022
* Author: Xiong Zhen
*/
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <semaphore.h>
#include <time.h>
#include <assert.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
typedef int (*lockfun)(volatile void *);
struct arg {
uint64_t total_cost_ns;
uint64_t avg_cost_per_req_ns;
uint64_t total_requests;
uint64_t total_test_time_ns;
uint64_t requests_per_seconds;
double avg_cpu_usage;
volatile void *lock_obj;
lockfun lock;
lockfun unlock;
int repeat_cnt;
bool stop;
};
void *lock_thread(void *p) {
struct arg *arg = p;
struct timespec now;
struct timespec start;
volatile int i;
while (!arg->stop) {
arg->lock(arg->lock_obj);
clock_gettime(CLOCK_MONOTONIC, &start);
for (i = 0; i < arg->repeat_cnt; i++) {}
arg->total_requests++;
clock_gettime(CLOCK_MONOTONIC, &now);
arg->total_cost_ns +=
(now.tv_sec * 1000000000 + now.tv_nsec \
- start.tv_sec * 1000000000 - start.tv_nsec);
arg->unlock(arg->lock_obj);
}
return NULL;
}
int run_cmd_to_double(char *cmd, double *result)
{
int rc = 0;
FILE *fp = NULL;
char buf[1024];
if ((fp = popen(cmd, "re")) != NULL) {
if (result != NULL) {
fgets(buf, sizeof(buf), fp);
*result = atof(buf);
}
pclose(fp);
} else {
rc = -1;
}
return rc;
}
double get_current_process_cpu_usage() {
pid_t pid = getpid();
char cmd[1024];
double ret;
int rc = 0;
sprintf(cmd, "top -n 1 -p %d | grep mutex | awk '{print $10}'", pid);
rc = run_cmd_to_double(cmd, &ret);
assert(rc == 0);
return ret;
}
void lock_test(struct arg *arg, int tnum) {
pthread_t pid[tnum];
int i;
struct timespec now;
struct timespec start;
clock_gettime(CLOCK_MONOTONIC, &start);
for (i = 0; i < tnum; i++)
pthread_create(&pid[i], NULL, lock_thread, arg);
for (i = 0; i < 10; i++) {
sleep(1);
arg->avg_cpu_usage += get_current_process_cpu_usage();
}
arg->avg_cpu_usage /= 10;
arg->stop = true;
for (i = 0; i < tnum; i++)
pthread_join(pid[i], NULL);
clock_gettime(CLOCK_MONOTONIC, &now);
arg->total_test_time_ns +=
(now.tv_sec * 1000000000 + now.tv_nsec
- start.tv_sec * 1000000000 - start.tv_nsec);
arg->avg_cost_per_req_ns = arg->total_cost_ns / arg->total_requests;
arg->requests_per_seconds = arg->total_requests / (((double)arg->total_test_time_ns)/1000000000);
}
void init_arg_mutex(struct arg * arg, pthread_mutex_t *mlock, int repeat) {
memset(arg, 0, sizeof(*arg));
pthread_mutex_init(mlock, NULL);
arg->lock_obj = mlock;
arg->lock = (lockfun)pthread_mutex_lock;
arg->unlock = (lockfun)pthread_mutex_unlock;
arg->repeat_cnt = repeat;
}
void init_arg_spin(struct arg * arg, pthread_spinlock_t *slock, int repeat) {
memset(arg, 0, sizeof(*arg));
pthread_spin_init(slock, 0);
arg->lock_obj = slock;
arg->lock=(lockfun)pthread_spin_lock;
arg->unlock=(lockfun)pthread_spin_unlock;
arg->repeat_cnt = repeat;
}
int main() {
struct arg mutex_arg;
struct arg spin_arg;
pthread_mutex_t mlock;
pthread_spinlock_t slock;
int i, power;
printf("%12s %8s | ", "LOCK_TYPE", "TNUM");
for(i = 0; i < 4; i++) {
printf("%8s %8s %8s %8s | ", "LOOP_CNT", "TIME(ns)",
"CPU(%)", "QPS");
}
printf("\n");
power= 1;
printf("%12s %8d | ", "MUTEX", 2);
for(i = 0; i < 4; i++) {
init_arg_mutex(&mutex_arg, &mlock, 10*power);
lock_test(&mutex_arg, 2);
printf("%8d %8lu %8.2f %8lu | ", mutex_arg.repeat_cnt, mutex_arg.avg_cost_per_req_ns,
mutex_arg.avg_cpu_usage, mutex_arg.requests_per_seconds);
power *= 100;
}
printf("\n");
power= 1;
printf("%12s %8d | ", "SPIN", 2);
for(i = 0; i < 4; i++) {
init_arg_spin(&spin_arg, &slock, 10*power);
power *= 100;
lock_test(&spin_arg, 2);
printf("%8d %8lu %8.2f %8lu | ", spin_arg.repeat_cnt, spin_arg.avg_cost_per_req_ns,
spin_arg.avg_cpu_usage, spin_arg.requests_per_seconds);
}
printf("\n");
power= 1;
printf("%12s %8d | ", "MUTEX", 4);
for(i = 0; i < 4; i++) {
init_arg_mutex(&mutex_arg, &mlock, 10*power);
lock_test(&mutex_arg, 4);
printf("%8d %8lu %8.2f %8lu | ", mutex_arg.repeat_cnt, mutex_arg.avg_cost_per_req_ns,
mutex_arg.avg_cpu_usage, mutex_arg.requests_per_seconds);
power *= 100;
}
printf("\n");
power= 1;
printf("%12s %8d | ", "SPIN", 4);
for(i = 0; i < 4; i++) {
init_arg_spin(&spin_arg, &slock, 10*power);
power *= 100;
lock_test(&spin_arg, 4);
printf("%8d %8lu %8.2f %8lu | ", spin_arg.repeat_cnt, spin_arg.avg_cost_per_req_ns,
spin_arg.avg_cpu_usage, spin_arg.requests_per_seconds);
}
printf("\n");
return 0;
}
代码在小编的笔记本电脑的Centos7虚拟机中运行的结果如下:
LOCK_TYPE TNUM | LOOP_CNT TIME(ns) CPU(%) QPS | LOOP_CNT TIME(ns) CPU(%) QPS | LOOP_CNT TIME(ns) CPU(%) QPS | LOOP_CNT TIME(ns) CPU(%) QPS |
MUTEX 2 | 10 38 198.12 4737621 | 1000 1304 123.22 638896 | 100000 146943 103.35 6440 | 10000000 14032580 99.38 71 |
SPIN 2 | 10 25 201.34 13857119 | 1000 1213 200.00 789640 | 100000 123677 202.06 8081 | 10000000 14635475 198.75 68 |
MUTEX 4 | 10 32 259.33 7139308 | 1000 1369 148.00 469975 | 100000 145273 101.44 6502 | 10000000 14113660 102.01 70 |
SPIN 4 | 10 26 394.37 11659900 | 1000 1260 395.62 755971 | 100000 128739 400.00 7763 | 10000000 15602026 391.87 64 |
从结果中我们可以看到如下几点:
1,Spin lock不管等锁的时间有多长,它都将CPU吃的满满的;
2,在等锁的时间(TIME项)在1us及以下时,spin lock的QPS明显大大优于mutex lock;
3,随着等锁时间(TIME项)的增大mutex lock的QPS与spin lock的QPS慢慢趋于一致。
结论:
1,在等锁时间为ns级,并且互斥的线程(TNUM项)数为2时,推荐采用spin lock这时它的性能最佳而且与mutex lock的cpu使用率相较并没有太大差别;
2,在互斥线程(TNUM项)大于2时,并且等锁时间在us级,如果系统有额外可以浪费的cpu,那这时可以选用spin lock;
3,其它情况从cpu使用率因素和QPS带来的获益考虑,都推荐使用mutex lock。