实时核多线程性能低问题分析
一、核介绍
核 | 核名 |
---|---|
标准核 | 3.10.0-862.1.ky4.x86_64 |
实时核 | 3.10.0-1127.rt56.1093.ky4.x86_64 |
二、性能测试
使用./Run -c 4
进行测试
指标名 | 标准核分数 | 实时核分数 |
---|---|---|
System Benchmarks Index Values | 13375.7 | 12690.9 |
Double-Precision Whetstone | 2474.2 | 2499.2 |
Execl Throughput | 2192.9 | 1341.7 |
File Copy 1024 bufsize 2000 maxblocks | 2764.3 | 118.5 |
File Copy 256 bufsize 500 maxblocks | 1784.8 | 69.8 |
File Copy 4096 bufsize 8000 maxblocks | 5749.1 | 318.2 |
Pipe Throughput | 1898.1 | 1451.4 |
Pipe-based Context Switching | 1275.2 | 631.1 |
Process Creation | 2504.0 | 1050.2 |
Shell Scripts (1 concurrent) | 3721.4 | 1790.7 |
Shell Scripts (8 concurrent) | 3584.0 | 1182.1 |
System Call Overhead | 1323.1 | 862.5 |
System Benchmarks Index Score | 2787.6 | 873.2 |
从上述结果可以看出,File Copy 1024 bufsize 2000 maxblocks
,File Copy 256 bufsize 500 maxblocks
,File Copy 4096 bufsize 8000 maxblocks
的性能相差非常明显。
指标名 | 普通核分数 | 实时核分数 |
---|---|---|
File Copy 1024 bufsize 2000 maxblocks | 2764.3 | 118.5 |
File Copy 256 bufsize 500 maxblocks | 1784.8 | 69.8 |
File Copy 4096 bufsize 8000 maxblocks | 5749.1 | 318.2 |
三、性能分析
3.1 分析 file copy性能
使用./Run -c 3 fstime
进行File Copy 1024 bufsize 2000 maxblocks
测试。
fstime进程普通核大部分时间CPU占用率接近100%,状态大部分时间为R状态;
fstime进程实时核大部分时间CPU最大占用率为60%左右,状态在D与R之间变换。
状态名 | 进程状态 | 宏定义 |
---|---|---|
运行状态 | R | TASK_RUNNING |
睡眠状态 | S | TASK_INTERRUPTIBLE |
不可中断的睡眠状态 | D | TASK_UNINTERRUPTIBLE |
四个目录下UnixBench,UnixBench2,UnixBench3,UnixBench4,开四个终端同时运行./Run -c fstime,分数累加,为正常。
当四个目录下UnixBench,UnixBench2,UnixBench3,UnixBench4,中的tmp,同时软连接到一个tmp时,测试时,测试分数累加,分数将变得很低。
故可能是多个进程同时操作一个文件所致,read,write中对文件操作的锁,将进程状态置为TASK_UNINTERRUPTIBLE。
3.2 分析 TASK_UNINTERRUPTIBLE 在实时核中的处理
由此可见,实时核将进程状态置为了TASK_UNINTERRUPTIBLE,下载一个实时核的patch,patch-3.10.105-rt120.patch
文件与当前版本实时核相近。进行补丁修改分析,查看添加TASK_UNINTERRUPTIBLE的地方。下表可以看出,补丁中,新填的,将状态置为TASK_UNINTERRUPTIBLE状态的函数只有9个。
从上述多进程文件读写拷贝测试,可能是文件读写,对文件的锁操作导致性能变低。下面关于锁机制的,只有./kernel/rtmutex.c文件。
文件 | 函数 | 添加的代码内容 |
---|---|---|
./arch/x86/kernel/kvm.c | void kvm_async_pf_task_wait(u32 token) | swait_prepare(&n.wq, &wait, TASK_UNINTERRUPTIBLE); |
./include/linux/wait-simple.h | __swait_event(wq, condition) | swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); |
./include/linux/wait-simple.h | __swait_event_timeout(wq, condition, ret) | swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); |
./kernel/cpu.c | static void wait_for_pinned_cpus(struct hotplug_pcp *hp) | set_current_state(TASK_UNINTERRUPTIBLE); |
./kernel/cpu.c | static int sync_unplug_thread(void *data) | set_current_state(TASK_UNINTERRUPTIBLE); |
./kernel/cpu.c | void cpu_chill(void) | __hrtimer_nanosleep(&tu,NULL,HRTIMER_MODE_REL,CLOCK_MONOTONIC,TASK_UNINTERRUPTIBLE); |
./kernel/rtmutex.c | void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,struct rt_mutex_waiter *waiter,unsigned long flags) | __set_current_state(TASK_UNINTERRUPTIBLE); |
./kernel/rtmutex.c | void __sched rt_mutex_lock(struct rt_mutex *lock) | rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE); |
./kernel/rtmutex.c | int __sched __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); |
四、linux的mutex与spinlock
4.1普通核
spinlock简单调用raw_spinlock,spinlock与raw_spinlock是一样的。在2.6.33之后的版本,内核加入了raw_spin_lock系列,使用方法和spin_lock系列一模一样,只是参数有spinlock_t变为了raw_spinlock_t。而且在内核的主线版本中,spin_lock系列只是简单地调用了raw_spin_lock系列的函数。
4.2 实时核
spinlock不再简单调用raw_spinlock,而是重新实现了spinlock相关内核函数。spinlock的实现是通过struct rt_mutex完成的。
4.2.1 对普通版本的mutex
进行替换
首先添加文件./include/linux/rtmutex.h
,文件中定义struct rt_mutex
结构体,内部是使用raw_spinlock_t锁实现的。
// ./include/linux/rtmutex.h 文件
struct rt_mutex {
raw_spinlock_t wait_lock;
#ifdef __GENKSYMS__
struct plist_head wait_list;
#else
struct rb_root waiters;
struct rb_node *waiters_leftmost;
#endif
struct task_struct *owner;
int save_state;
#ifdef CONFIG_DEBUG_RT_MUTEXES
const char *file;
const char *name;
int line;
void *magic;
#endif
};
.......
而后添加文件./kernel/rtmutex.c
,文件中实现了对struct rt_mutex
操作的相关函数。
// ./kernel/rtmutex.c文件
int __sched rt_mutex_lock_state(struct rt_mutex *lock, int state)
{
might_sleep();
return rt_mutex_fastlock(lock, state, 0, NULL, rt_mutex_slowlock);
}
/**
* rt_mutex_lock - lock a rt_mutex
*
* @lock: the rt_mutex to be locked
*/
void __sched rt_mutex_lock(struct rt_mutex *lock)
{
rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE);
}
.......
添加文件./include/linux/mutex_rt.h
,里面使用struct rt_mutex
替换原始的struct mutex
,及其相关函数。最后修改./include/linux/mutex.h
,对新文件./include/linux/mutex_rt.h
进行引用。
// ./include/linux/mutex_rt.h 文件
#include <linux/rtmutex.h>
/* FIXME: Just for __lockfunc */
#include <linux/spinlock.h>
struct mutex {
struct rt_mutex lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
extern void __lockfunc _mutex_lock(struct mutex *lock);
extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
extern int __lockfunc _mutex_trylock(struct mutex *lock);
extern void __lockfunc _mutex_unlock(struct mutex *lock);
#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
#define mutex_lock(l) _mutex_lock(l)
#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
#define mutex_lock_killable(l) _mutex_lock_killable(l)
#define mutex_trylock(l) _mutex_trylock(l)
#define mutex_unlock(l) _mutex_unlock(l)
./include/linux/mutex.h
文件中CONFIG_PREEMPT_RT_FULL 使用宏定义判断是否替换mutex
// ./include/linux/mutex.h 文件
#ifdef CONFIG_PREEMPT_RT_FULL
# include <linux/mutex_rt.h>
#else
struct mutex {
/* 1: unlocked, 0: locked, negative: locked, possible waiters */
atomic_t count;
spinlock_t wait_lock;
struct list_head wait_list;
#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
struct task_struct *owner;
#endif
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
RH_KABI_REPLACE(void *spin_mlock, /* Spinner MCS lock */
struct optimistic_spin_queue osq) /* Spinner MCS lock */
#endif
#ifdef CONFIG_DEBUG_MUTEXES
const char *name;
void *magic;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
.......
#endif
替换mutex完成。
4.2.2 对普通版本的spinlock
进行替换
添加文件./include/linux/spinlock_types_rt.h
,文件中重新定义spinlock_t
// ./include/linux/spinlock_types_rt.h
/*
* PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
*/
typedef struct spinlock {
struct rt_mutex lock;
unsigned int break_lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} spinlock_t;
......
添加文件./include/linux/spinlock_rt.h
,文件中重新定义一套对spinlock_t
操作的函数。
// ./include/linux/spinlock_rt.h
.......
extern void __lockfunc rt_spin_lock(spinlock_t *lock);
extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
extern void __lockfunc rt_spin_unlock_after_trylock_in_irq(spinlock_t *lock);
extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
.......
#define spin_lock(lock) \
do { \
migrate_disable(); \
rt_spin_lock(lock); \
} while (0)
#define spin_lock_bh(lock) \
do { \
local_bh_disable(); \
migrate_disable(); \
rt_spin_lock(lock); \
} while (0)
#define spin_lock_irq(lock) spin_lock(lock)
#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
......
修改文件./include/linux/spinlock_types.h
,添加内容包含./include/linux/spinlock_types_rt.h
文件。
// ./include/linux/spinlock_types.h
#include <linux/spinlock_types_raw.h>
#ifndef CONFIG_PREEMPT_RT_FULL
# include <linux/spinlock_types_nort.h>
# include <linux/rwlock_types.h>
#else
# include <linux/rtmutex.h>
# include <linux/spinlock_types_rt.h>
# include <linux/rwlock_types_rt.h>
#endif
修改文件./include/linux/spinlock.h
,文件中包含了./include/linux/spinlock_types.h
,添加新内容包含./include/linux/spinlock_rt.h
文件
// ./include/linux/spinlock.h
#ifdef CONFIG_PREEMPT_RT_FULL
# include <linux/spinlock_rt.h>
#else /* PREEMPT_RT_FULL */
/*
* Map the spin_lock functions to the raw variants for PREEMPT_RT=n
*/
static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
{
return &lock->rlock;
}
#define spin_lock_init(_lock) \
do { \
spinlock_check(_lock); \
raw_spin_lock_init(&(_lock)->rlock); \
} while (0)
static inline void spin_lock(spinlock_t *lock)
{
raw_spin_lock(&lock->rlock);
}
.......
#endif
替换spinlock完成。
4.2.3 实时核下mutex
与spinlock
与普通核的区别
允许在临界区中被抢占,而且申请临界区的操作可以导致进程休眠等待,这将导致自旋锁的机制被修改,不再进行忙等待,由原来的整数原子操作变更为信号量操作。当没有得到锁的时候,会让出CPU,将进程状态改为TASK_UNINTERRUPTIBLE,进入休眠,而不是强占着CPU进行自旋操作。
五、linux文件系统
5.1 file结构体解析
struct file结构体定义在include/linux/fs.h中定义。文件结构体代表一个打开的文件,系统中的每个打开的文件在内核空间都有一个关联的 struct file。可见结构体中包含,spinlock_t f_lock;
,struct mutex f_pos_lock;
两个锁。
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
* fu_rcuhead for RCU freeing
*/
union {
struct list_head fu_list;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
#define f_dentry f_path.dentry
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
#ifdef __GENKSYMS__
#ifdef CONFIG_SMP
int f_sb_list_cpu;
#endif
#else
#ifdef CONFIG_SMP
int f_sb_list_cpu_deprecated;
#endif
#endif
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
#ifndef __GENKSYMS__
struct mutex f_pos_lock;
#endif
};
5.2 lseek, read,write函数解析
lseek,read,write的实现如下,在./fs/read_write.c中实现。
SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
{
off_t retval;
struct fd f = fdget_pos(fd);
if (!f.file)
return -EBADF;
retval = -EINVAL;
if (whence <= SEEK_MAX) {
loff_t res = vfs_llseek(f.file, offset, whence);
retval = res;
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
fdput_pos(f);
return retval;
}
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_read(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
}
return ret;
}
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
}
return ret;
}
lseek,read,write的中都调用了fdget_pos与fdput_pos,对文件进行加锁解锁。
fdget_pos中调用了mutex_lock(&file->f_pos_lock);
fdput_pos中调用了mutex_unlock(&f.file->f_pos_lock);
static inline struct fd fdget_pos(int fd)
{
struct fd f = fdget(fd);
struct file *file = f.file;
if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
if (file_count(file) > 1) {
f.flags |= FDPUT_POS_UNLOCK;
mutex_lock(&file->f_pos_lock);
}
}
return f;
}
static inline void fdput_pos(struct fd f)
{
if (f.flags & FDPUT_POS_UNLOCK)
mutex_unlock(&f.file->f_pos_lock);
fdput(f);
}
5.3 实时核锁修改对文件读写的影响
实时核重新定义修改了struct mutex
与spinlock_t
,file结构体中包含了,这两种锁,lseek,read,write会对struct mutex
进行加锁解锁,其实内部是对struct rt_mutex
进行加锁解锁。如若得不到锁,将让出CPU,进入D状态,进入休眠。
六、总结
普通核,mutex_t里面使用的是spinlock_t自旋锁,自旋锁不会去将自己的TASK_RUNNING改为TASK_UNINTERRUPTIBLE,会一直占用CPU,不会交出,进入忙等待,所以CPU可以达到99%;
实时核,mutex_t是rt_mutex_t,当没有得到锁的时候,会让出CPU,将进程状态改为TASK_UNINTERRUPTIBLE,进入休眠。
文件系统file结构体使用了mutex锁,在lseek,read,write等函数操作文件时都有加锁解锁操作。当多进程进行操作文件时,实时核下,若得不到锁,将让出CPU,进入D状态,进入休眠;普通核下,若得不到锁,将进入忙等待,不会让出CPU。