简介
goldfish_pipe是android中实现的一个高速管道驱动程序,主要应用在虚拟化场景中。goldfish_pipe为android内核与qemu之间提供了一个高性能的管道。其实现原理是共享内存,算是一个极简化的virtio的实现吧。要使用goldfish_pipe需要使用google提供的专用qemu。
用法
goldfish_pipe驱动对应的字符设备路径在/dev/goldfish_pipe下面,通过读写这个文件就可以完成guest与qemu之间的高速通信,每打开一个文件描述符对应一个管道id,这个管道id对用户来说是透明的,所以使用起来也是相当的方便啊。
goldfish_pipe逻辑上的交互协议很简单,第一次使用先写入要链接的服务,注意吧字符结尾的0传送出去。android提供的qemu一共实现了:TCP、unix、opengles、qemud这几种服务,自己需要其他功能的服务可以自行扩展。
使用起来的源码也很简单,大概就是下面这个样子:
fd = open("/dev/qemu_pipe", O_RDWR);
if (fd < 0 && errno == ENOENT)
fd = open("/dev/goldfish_pipe", O_RDWR);
if (fd < 0) {
D("%s: Could not open /dev/qemu_pipe: %s", __FUNCTION__, strerror(errno));
//errno = ENOSYS;
return -1;
}
buffLen = strlen(buff);
ret = TEMP_FAILURE_RETRY(write(fd, "server name", 12));
if (ret != buffLen+1) {
D("%s: Could not connect to %s pipe service: %s", __FUNCTION__, pipeName, strerror(errno));
if (ret == 0) {
errno = ECONNRESET;
} else if (ret > 0) {
errno = EINVAL;
}
return -1;
}
然后就可以用这个管道传输数据了,操作goldfish_pipe传输数据跟操作普通的字符设备是一模一样的,goldfish除了支持read,write基本的操作以为还支持poll操作。
源码
我这里使用的是android 9的源码,goldfish_pipe的驱动源码在:drivers/platform/goldfish/goldfish_pipe.c
我们先看一下设备匹配上之后的初始化:
static int goldfish_pipe_probe(struct platform_device *pdev)
{
...
spin_lock_init(&dev->lock);
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (r == NULL || resource_size(r) < PAGE_SIZE) {
dev_err(&pdev->dev, "can't allocate i/o page\n");
return -EINVAL;
}
dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
if (dev->base == NULL) {
dev_err(&pdev->dev, "ioremap failed\n");
return -EINVAL;
}
r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
if (r == NULL) {
err = -EINVAL;
goto error;
}
dev->irq = r->start;
writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
dev->version = readl(dev->base + PIPE_REG_VERSION);
if (WARN_ON(dev->version < PIPE_CURRENT_DEVICE_VERSION))
return -EINVAL;
err = goldfish_pipe_device_init(pdev);
...
}
spin_lock_init先初始化了一把自旋锁
platform_get_resource来申请io内存资源,资源地址事先已经定义在设备当中了。这里platform_get_resource只是告诉系统需要使用那些io内存区域,以防各个驱动程序产生冲突。
platform_get_resource申请中断资源,最后向设备写入PIPE_DRIVER_VERSION来读取设备的版本号。
goldfish_pipe_device_init进入后续的初始化流程:
static int goldfish_pipe_device_init(struct platform_device *pdev)
{
...
page = (char *)__get_free_page(GFP_KERNEL);
if (!page) {
kfree(dev->pipes);
return -ENOMEM;
}
dev->buffers = (struct goldfish_pipe_dev_buffers *)page;
/* Send the buffer addresses to the host */
{
u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers);
writel((u32)(unsigned long)(paddr >> 32),
dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
writel((u32)(unsigned long)paddr,
dev->base + PIPE_REG_SIGNAL_BUFFER);
writel((u32)MAX_SIGNALLED_PIPES,
dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
paddr = __pa(&dev->buffers->open_command_params);
writel((u32)(unsigned long)(paddr >> 32),
dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
writel((u32)(unsigned long)paddr,
dev->base + PIPE_REG_OPEN_BUFFER);
}
return 0;
}
可以看到goldfish_pipe_device_init里面申请了一个页来作为驱动与设备交互的缓存,最后将这个缓存的地址通过writel传送到host的qemu上进行备用。之后驱动与qemu之间的元数据(matedata)都通过这块共享内存进行传递了,真的是做到了极致化的性能了。dev->base设备的特殊寄存器含义请参考源码,这里就不贴了。
下面看看open操作:
static int goldfish_pipe_open(struct inode *inode, struct file *file)
{
...
pipe->command_buffer =
(struct goldfish_pipe_command *)__get_free_page(GFP_KERNEL);
id = get_free_pipe_id_locked(dev);
pipe->command_buffer->id = id;
dev->buffers->open_command_params.rw_params_max_count =
MAX_BUFFERS_PER_COMMAND;
dev->buffers->open_command_params.command_buffer_ptr =
(u64)(unsigned long)__pa(pipe->command_buffer);
status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN);
...
}
open这里主要申请了一个页作为管道的matedata区域并且将这个页注册到buffer(驱动的matedata)中。然后申请了一个空闲的管道id,调用goldfish_cmd_locked将管道的初始化信息添加到设备中。
下面看看write操作:
static ssize_t goldfish_pipe_read_write(struct file *filp,
char __user *buffer, size_t bufflen, int is_write)
{
address = (unsigned long)buffer;
address_end = address + bufflen;
last_page = (address_end - 1) & PAGE_MASK;
last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
while (address < address_end) {
s32 consumed_size;
int status;
ret = transfer_max_buffers(pipe, address, address_end, is_write,
last_page, last_page_size, &consumed_size,
&status);
if (ret < 0)
break;
if (consumed_size > 0) {
/* No matter what's the status, we've transferred
* something.
*/
count += consumed_size;
address += consumed_size;
}
if (status > 0)
continue;
if (status == 0) {
/* EOF */
ret = 0;
break;
}
}
可以看到在write函数中循环的调用transfer_max_buffers去写数据,直到数据全部写完。这里首先会计算首地址和尾地址,因为这里不一定是一个完整的页。
static int transfer_max_buffers(struct goldfish_pipe *pipe,
unsigned long address, unsigned long address_end, int is_write,
unsigned long last_page, unsigned int last_page_size,
s32 *consumed_size, int *status)
{
static struct page *pages[MAX_BUFFERS_PER_COMMAND];
unsigned long first_page = address & PAGE_MASK;
unsigned int iter_last_page_size;
int pages_count = pin_user_pages(first_page, last_page,
last_page_size, is_write,
pages, &iter_last_page_size);
if (pages_count < 0)
return pages_count;
/* Serialize access to the pipe command buffers */
if (mutex_lock_interruptible(&pipe->lock))
return -ERESTARTSYS;
populate_rw_params(pages, pages_count, address, address_end,
first_page, last_page, iter_last_page_size, is_write,
pipe->command_buffer);
/* Transfer the data */
*status = goldfish_cmd_locked(pipe,
is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ);
*consumed_size = pipe->command_buffer->rw_params.consumed_size;
release_user_pages(pages, pages_count, is_write, *consumed_size);
mutex_unlock(&pipe->lock);
return 0;
}
可以看到这里transfer_max_buffers一次性会传送MAX_BUFFERS_PER_COMMAND个page的数据,数据量大小不一定等于MAX_BUFFERS_PER_COMMAND*page,第一个page和最后一个page可能不完整。transfer_max_buffers会先将user空间的虚拟地址映射到内核空间虚拟地址:
static int pin_user_pages(unsigned long first_page, unsigned long last_page,
unsigned int last_page_size, int is_write,
struct page *pages[MAX_BUFFERS_PER_COMMAND],
unsigned int *iter_last_page_size)
{
int ret;
int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
if (requested_pages > MAX_BUFFERS_PER_COMMAND) {
requested_pages = MAX_BUFFERS_PER_COMMAND;
*iter_last_page_size = PAGE_SIZE;
} else {
*iter_last_page_size = last_page_size;
}
ret = get_user_pages_fast(
first_page, requested_pages, !is_write, pages);
if (ret <= 0)
return -EFAULT;
if (ret < requested_pages)
*iter_last_page_size = PAGE_SIZE;
return ret;
}
get_user_pages_fast将用户空间虚拟地址映射到内核空间的虚拟地址。
transfer_max_buffers主要完成物理地址的计算和pipe元数据的填充:
static void populate_rw_params(
struct page **pages, int pages_count,
unsigned long address, unsigned long address_end,
unsigned long first_page, unsigned long last_page,
unsigned int iter_last_page_size, int is_write,
struct goldfish_pipe_command *command)
{
unsigned long xaddr = page_to_phys(pages[0]);
unsigned long xaddr_prev = xaddr;
int buffer_idx = 0;
int i = 1;
int size_on_page = first_page == last_page
? (int)(address_end - address)
: (PAGE_SIZE - (address & ~PAGE_MASK));
command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK));
command->rw_params.sizes[0] = size_on_page;
for (; i < pages_count; ++i) {
xaddr = page_to_phys(pages[i]);
size_on_page = (i == pages_count - 1) ?
iter_last_page_size : PAGE_SIZE;
if (xaddr == xaddr_prev + PAGE_SIZE) {
command->rw_params.sizes[buffer_idx] += size_on_page;
} else {
++buffer_idx;
command->rw_params.ptrs[buffer_idx] = (u64)xaddr;
command->rw_params.sizes[buffer_idx] = size_on_page;
}
xaddr_prev = xaddr;
}
command->rw_params.buffers_count = buffer_idx + 1;
}
这里的算法也比较简单就不细说了,主要就是将虚拟地址转换成物理地址并注意处理头尾page的大小和起始地址。最后将元数据填充好准备发起数据传输。
goldfish_cmd_locked函数发起数据传输:
static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
{
pipe->command_buffer->cmd = cmd;
/* failure by default */
pipe->command_buffer->status = PIPE_ERROR_INVAL;
writel(pipe->id, pipe->dev->base + PIPE_REG_CMD);
return pipe->command_buffer->status;
}
可以看到writel实际上只传送了一个pipe_id到qemu中,之后就退到qemu中去做设备模拟操作了,qemu根据pipe id找到pipe元数据地址,根据元数据读取buffer中的内容,在一些极端的场景中甚至可以做到零拷贝。
最后来看一下数据在内存中的状态:
read操作方式类似,就不展开说了。
opengles
opengles是goldfish_pipe的一个应用场景,类似virtio。不过goldfish_pipe会比virtio-gpu性能好一点,毕竟goldfish_pipe比起virtio来说会轻量很多。
相关网址
https://android.googlesource.com/device/generic/goldfish/
https://android.googlesource.com/kernel/goldfish/