kernel 到 user 的单向数据流
kernel 模拟数据源,每隔3秒一次,每次3个数据
user 不断poll,每秒一次
user代码
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <poll.h>
#include <string.h>
#include <errno.h>
int main(int argc, char **argv)
{
int i, j;
int ret;
int fd;
struct pollfd fds[1];
unsigned char tmp[20];
fd = open("/dev/MyDemo", O_RDONLY);
if(fd < 0) {
printf("open failed, errno %d\n", errno);
return -1;
}
printf("open successful\n");
sleep(5);
fds[0].fd = fd;
fds[0].events = POLLIN;
i = 20;
while (i--) {
printf("before poll!\n");
ret = poll(fds, 1, 1000); // 第三个参数是毫秒数
printf("after poll!\n");
if(ret == 0) {
printf("time out!\n");
} else {
if(fds->revents == POLLIN) {
printf("before read!\n");
memset(&tmp, 0, sizeof(tmp));
ret = read(fd, &tmp, 20);
if (ret <= 0) {
printf("read err!\n");
} else {
for (j = 0; j < ret; j++) {
printf("%d ", tmp[j]);
}
printf("\n");
}
printf("after read!\n");
} else {
printf("not POLLIN!\n");
}
}
}
ret = close(fd);
if (ret < 0) {
printf("close failed!\n");
} else if (ret == 0) {
printf("close success!\n");
} else {
printf("close invalid!\n");
}
return 0;
}
kernel代码
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/kdev_t.h>
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/uaccess.h>
#include <linux/kthread.h>
#include <linux/unistd.h>
#include <linux/delay.h>
#include <linux/printk.h>
#include <linux/types.h>
#include <linux/syscalls.h>
#define DEMO_BUFFER_SIZE 256
#define DEVICE_NAME "MyDemo"
#define DEVICE_CNT 1
// #define PRINT_DEBUG
struct chr_dev{
dev_t devid;
struct cdev cdev;
struct class *class;
struct device *device;
int major;
int minor;
};
struct chr_dev dump_dev;
// 以一条通道(读)为例进行设计
struct chan_waitqueue {
wait_queue_head_t queue; //等待队列的结点
atomic_t in_open;
struct mutex mutex;
};
static struct chan_waitqueue kern2user_Q;
struct channel {
unsigned char buffer[DEMO_BUFFER_SIZE]; // 缓存区
int user_read_pos; // user 待读的位置
int kern_write_pos; // kern 待写的位置
};
static struct channel kern2user_Ch;
// 一个全局flag
// g_stopping
// g_stopping 为 true 表示停止
static bool g_stopping;
static struct task_struct *generate_and_wake;
// 返回值
// 可以读取从 user_read_pos 开始的 返回值个字节
// 比如返回值是3,则表示可以读取从 user_read_pos 开始的3个字节
unsigned int can_read(int can_sleep)
{
struct channel *chan = &kern2user_Ch;
if (chan->user_read_pos == chan->kern_write_pos) {
if (can_sleep) {
// __wait_event_interruptible 返回值,除了0就是负数
// 第二个参数是condition,只要条件满足就会从内部for循环中跳出来
int ret = __wait_event_interruptible(kern2user_Q.queue,
(chan->user_read_pos != chan->kern_write_pos) || g_stopping);
// 出错导致的返回
if (ret)
return ret;
// 条件满足导致的返回,并且是g_stopping条件满足
if (g_stopping)
return 0;
// 条件满足导致的返回,不是g_stopping条件满足,则进入最后的return
} else {
// 非阻塞模式
return 0;
}
}
return (chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos) % DEMO_BUFFER_SIZE;
// 等价于:
// if (chan->user_read_pos <= chan->kern_write_pos)
// return chan->kern_write_pos - chan->user_read_pos;
// else
// return chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos;
}
void print_debug(unsigned long start, unsigned long num) {
#ifdef PRINT_DEBUG
unsigned long i = 0;
struct channel *chan = &kern2user_Ch;
printk("demo(nums %lu): ", num);
for (i = start; i < start + num; i++) {
printk("%d ", chan->buffer[i]);
}
printk("\n");
#endif
}
void print_user(char __user *buffer, unsigned long num) {
#ifdef PRINT_DEBUG
unsigned long i = 0;
printk("demo(nums %lu): ", num);
for (; i < num; i++) {
printk("%d ", buffer[i]);
}
printk("\n");
#endif
}
static ssize_t file_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
unsigned long failed;
unsigned long nums, nums2;
struct channel *chan = &kern2user_Ch;
// 传入参数:是否允许睡眠
// 1表示可睡眠,即上层采用阻塞模式
// 0表示不可睡眠,上层采用非阻塞模式
if (!can_read((file->f_flags & O_NONBLOCK) ? 0 : 1)) {
pr_info("%s, no data to read\n", __func__);
return 0;
}
mutex_lock(&(kern2user_Q.mutex));
smp_rmb();
nums = 0;
nums2 = 0;
// 此次需要读取的字节数
count = min(count,
(size_t)((chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos) % DEMO_BUFFER_SIZE));
if (chan->kern_write_pos >= chan->user_read_pos) {
// 写指针超前读指针
// 容量有限,只能装 min(count, chan->kern_write_pos - chan->user_read_pos)个字节
nums = count;
pr_info("%s, %lu bytes will copy to user\n", __func__, nums);
// 拷贝 [chan->user_read_pos, chan->user_read_pos + nums)
print_debug(chan->user_read_pos, nums);
failed = copy_to_user(buffer, chan->buffer + chan->user_read_pos, nums);
if (failed) {
pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
goto out;
} else {
pr_info("%s, %lu bytes copy to user success\n", __func__, nums);
}
} else {
// 写指针落后读指针
// 容量有限,只能装 min(count, chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos)个字节
// 第一段:
nums = DEMO_BUFFER_SIZE - chan->user_read_pos;
pr_info("%s, first segment %lu bytes\n", __func__, nums);
if (nums < count) {
// 装完第一段后,还可以去装第二段
pr_info("%s, first segment %lu bytes will copy to user\n", __func__, nums);
// 拷贝 [chan->buffer + chan->user_read_pos, chan->buffer + DEMO_BUFFER_SIZE)
print_debug(chan->user_read_pos, nums);
failed = copy_to_user(buffer, chan->buffer + chan->user_read_pos, nums);
if (failed) {
pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
goto out;
} else {
pr_info("%s, %lu bytes copy to user success\n", __func__, nums);
}
pr_info("%s, second segment %lu bytes\n", __func__, chan->kern_write_pos);
nums2 = count - nums;
pr_info("%s, second segment, %lu bytes will copy to user\n", __func__, nums2);
// 拷贝 [0, nums2)
print_debug(0, nums2);
failed = copy_to_user(buffer + nums, chan->buffer, nums2);
if (failed) {
pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
goto out;
} else {
pr_info("%s, %lu bytes copy to user success\n", __func__, nums2);
}
} else {
// 只能装部分第一段
nums = count;
pr_info("%s, first segment %lu bytes will copy to user\n", __func__, nums);
// 拷贝 [chan->buffer + chan->user_read_pos, chan->buffer + chan->user_read_pos + nums)
print_debug(chan->user_read_pos, nums);
failed = copy_to_user(buffer, chan->buffer + chan->user_read_pos, nums);
if (failed) {
pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
goto out;
} else {
pr_info("%s, %lu bytes copy to user success\n", __func__, nums);
}
}
}
out:
count = nums + nums2 - failed;
*ppos += count;
print_user(buffer, count);
smp_wmb();
chan->user_read_pos = (chan->user_read_pos + count) % DEMO_BUFFER_SIZE;
smp_wmb();
mutex_unlock(&(kern2user_Q.mutex));
return count;
}
static unsigned file_poll(struct file *file, poll_table *wait)
{
unsigned mask = 0;
poll_wait(file, &(kern2user_Q.queue), wait);
if (can_read(0))
mask |= POLLIN | POLLRDNORM;
return mask;
}
void demo_starting(void) {
g_stopping = false;
wake_up_interruptible(&kern2user_Q.queue);
}
void demo_stopping(void) {
g_stopping = true;
wake_up_interruptible(&kern2user_Q.queue);
}
static int file_open(struct inode *inode, struct file *filp)
{
int ret = 0;
if (atomic_inc_return(&kern2user_Q.in_open) > 1) {
pr_err("%s, chan already opened\n", __func__);
ret = -EBUSY;
goto out_fail;
}
return 0;
out_fail:
smp_mb();
atomic_dec(&kern2user_Q.in_open);
smp_mb();
return ret;
}
static int file_release(struct inode *inode, struct file *filp)
{
atomic_dec(&kern2user_Q.in_open);
return 0;
}
const struct file_operations demo_fops = {
.owner = THIS_MODULE,
.open = file_open,
.release = file_release,
.read = file_read,
.poll = file_poll,
.llseek = noop_llseek,
};
int demo_generate_data_thread(void *data) {
struct channel *chan = &kern2user_Ch;
int i;
// 模拟一个固定到来的事件,每 3s 往前走3步
while (!g_stopping) {
smp_wmb();
for (i = 0; i < 3; i++) {
chan->buffer[chan->kern_write_pos] = chan->kern_write_pos;
chan->kern_write_pos = (chan->kern_write_pos + 1) % DEMO_BUFFER_SIZE;
}
pr_info("%s, write buffer, cur kern_write_pos %lu\n", __func__, chan->kern_write_pos);
smp_wmb();
pr_info("%s, before wake up\n", __func__);
wake_up_interruptible(&kern2user_Q.queue);
pr_info("%s, after wake up\n", __func__);
msleep(3000);
}
return 0;
}
static int demo_init(void) {
int result = 0;
pr_info("%s ===>\n", __func__);
if (dump_dev.major) {
dump_dev.devid = MKDEV(dump_dev.major, 0);
result = register_chrdev_region(dump_dev.devid, DEVICE_CNT, DEVICE_NAME);
if(result < 0){
goto out_err_1;
}
} else {
result = alloc_chrdev_region(&dump_dev.devid, 0, DEVICE_CNT, DEVICE_NAME);
if(result < 0){
goto out_err_1;
}
dump_dev.major = MAJOR(dump_dev.devid);
dump_dev.minor = MINOR(dump_dev.devid);
}
pr_info("%s, major=%d, minor=%d\r\n", __func__, dump_dev.major, dump_dev.minor);
dump_dev.cdev.owner = THIS_MODULE;
cdev_init(&dump_dev.cdev, &demo_fops);
cdev_add(&dump_dev.cdev, dump_dev.devid, DEVICE_CNT);
dump_dev.class = class_create(THIS_MODULE, DEVICE_NAME);
if (IS_ERR(dump_dev.class)) {
pr_info("%s, class_create failed\n", __func__);
result = PTR_ERR(dump_dev.class);
goto out_err_2;
}
dump_dev.device = device_create(dump_dev.class, NULL, dump_dev.devid, NULL, DEVICE_NAME);
if (IS_ERR(dump_dev.device)) {
pr_info("%s, device_create failed\n", __func__);
result = PTR_ERR(dump_dev.device);
goto out_err_3;
}
init_waitqueue_head(&kern2user_Q.queue);
atomic_set(&kern2user_Q.in_open, 0);
mutex_init(&kern2user_Q.mutex);
kern2user_Ch.kern_write_pos = 0;
kern2user_Ch.user_read_pos = 0;
pr_info("%s, memset buffer, size %lu\n", __func__, sizeof(kern2user_Ch.buffer));
memset(&kern2user_Ch.buffer, 0, sizeof(kern2user_Ch.buffer));
demo_starting();
generate_and_wake = kthread_run(demo_generate_data_thread, NULL, "demo_thread");
if (IS_ERR_OR_NULL(generate_and_wake)) {
generate_and_wake = NULL;
pr_err("%s, thread create failed\n", __func__);
result = -EINVAL;
} else {
pr_info("%s, thread create success\n", __func__);
}
pr_info("%s <===\n", __func__);
return result;
out_err_3:
device_destroy(dump_dev.class, dump_dev.devid);
out_err_2:
class_destroy(dump_dev.class);
unregister_chrdev_region(dump_dev.devid, DEVICE_CNT);
cdev_del(&dump_dev.cdev);
out_err_1:
return result;
}
static void demo_exit(void) {
pr_info("%s ===>\n", __func__);
demo_stopping();
generate_and_wake = NULL;
device_destroy(dump_dev.class, dump_dev.devid);
class_destroy(dump_dev.class);
unregister_chrdev_region(dump_dev.devid, DEVICE_CNT);
cdev_del(&dump_dev.cdev);
pr_info("%s <===\n", __func__);
return;
}
module_init(demo_init);
module_exit(demo_exit);
MODULE_LICENSE("GPL");
源码参考
#define __wait_event_interruptible(wq_head, condition) \
___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, schedule())
/*
* The below macro ___wait_event() has an explicit shadow of the __ret
* variable when used from the wait_event_*() macros.
*
* This is so that both can use the ___wait_cond_timeout() construct
* to wrap the condition.
*
* The type inconsistency of the wait_event_*() __ret variable is also
* on purpose; we use long where we can return timeout values and int
* otherwise.
*/
#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd) \
({ \
__label__ __out; \
struct wait_queue_entry __wq_entry; \
long __ret = ret; /* explicit shadow */ \
\
init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \
for (;;) { \
long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
\
if (condition) \
break; \
\
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
goto __out; \
} \
\
cmd; \
} \
finish_wait(&wq_head, &__wq_entry); \
__out: __ret; \
})
// 这里的ret除了0就是负数
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
{
unsigned long flags;
long ret = 0;
spin_lock_irqsave(&wq_head->lock, flags);
if (signal_pending_state(state, current)) {
/*
* Exclusive waiter must not fail if it was selected by wakeup,
* it should "consume" the condition we were waiting for.
*
* The caller will recheck the condition and return success if
* we were already woken up, we can not miss the event because
* wakeup locks/unlocks the same wq_head->lock.
*
* But we need to ensure that set-condition + wakeup after that
* can't see us, it should wake up another exclusive waiter if
* we fail.
*/
list_del_init(&wq_entry->entry);
ret = -ERESTARTSYS;
} else {
if (list_empty(&wq_entry->entry)) {
if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
__add_wait_queue_entry_tail(wq_head, wq_entry);
else
__add_wait_queue(wq_head, wq_entry);
}
set_current_state(state);
}
spin_unlock_irqrestore(&wq_head->lock, flags);
return ret;
}
EXPORT_SYMBOL(prepare_to_wait_event);
//========================================================================
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
__wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
EXPORT_SYMBOL(__wake_up);
总结
该文缺陷如下:
(1) demo_exit 时,要先 demo_stopping , 同时把 demo_generate_data_thread 也要 kill 掉,实际上,用kthread_stop发现无法停止,因此该bug的学习放入另一个文章中,该方案使用的是demo_generate_data_thread中用flag的方式结束它
(2) 借鉴其他文章, 采用 内存屏障的方式进行同步,需要单独在另一个文章中学习
(3) 只有 read , write 未实现
该文运用如下:
(1) 可将kernel的数据搬到user
学习了一下午,收获蛮多,poll 的功能很强大,这只是通用方案,epoll 也是基于 poll 机制实现的