0)写在最前面
本来是研究virtio-gpu双屏,但后面发现其对fb的支持有问题,并且VT切换时卡死,本文记录了对这些问题的分析。
1)基础环境调查
在xorg.conf中配置了:Option “Xinerama” “1”
spice弹出了两个界面,应该是使能了两个connector
但是xrandr中只能看到Virtual-0,说明另一个screen未链接。
实在不行只能通过fb0和fb1的方式写入,但是看起来对fb的支持有问题:
# strace a.out
ioctl(3, FBIOGET_FSCREENINFO, 0x7ffc65153d90) = 0
ioctl(3, FBIOGET_VSCREENINFO, 0x7ffc65153de0) = 0
write(1, "1024x768, 32bpp\n", 161024x768, 32bpp
) = 16
mmap(NULL, 3145728, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = -1 EINVAL (Invalid argument)
dup(2) = 4
fcntl(4, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(4, {st_mode=S_IFCHR|0600, st_rdev=makedev(136, 0), ...}) = 0
write(4, "Error: failed to map framebuffer"..., 68Error: failed to map framebuffer device to memory: Invalid argument
看流程前面ioctl相关获取都是正常的,到了mmap就报EINVAL
2)研究解决mmap问题
drm mmap调用栈:
Sep 16 12:14:56 Linx kernel: [ 2570.217975] [<ffffffffbd52b8d5>] ? dump_stack+0x5c/0x77
Sep 16 12:14:56 Linx kernel: [ 2570.218018] [<ffffffffc06dbfe2>] ? virtio_gpu_mmap+0x12/0x60 [virtio_gpu]
Sep 16 12:14:56 Linx kernel: [ 2570.218026] [<ffffffffbd3bdbb1>] ? mmap_region+0x341/0x590
Sep 16 12:14:56 Linx kernel: [ 2570.218030] [<ffffffffbd3be256>] ? do_mmap+0x456/0x560
Sep 16 12:14:56 Linx kernel: [ 2570.218035] [<ffffffffbd4a4986>] ? security_mmap_file+0x66/0xe0
Sep 16 12:14:56 Linx kernel: [ 2570.218041] [<ffffffffbd3a02ae>] ? vm_mmap_pgoff+0xbe/0x100
Sep 16 12:14:56 Linx kernel: [ 2570.218046] [<ffffffffbd3bc0d1>] ? SyS_mmap_pgoff+0x1b1/0x270
Sep 16 12:14:56 Linx kernel: [ 2570.218051] [<ffffffffbd203b5c>] ? do_syscall_64+0x5c/0x170
Sep 16 12:14:56 Linx kernel: [ 2570.218058] [<ffffffffbd7fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25
mmap初始化调用栈:
dump_stack+0x5c/0x77
virtio_gpufb_create+0x49/0x320 [virtio_gpu]
drm_setup_crtcs+0x372/0x990 [drm_kms_helper]
drm_fb_helper_initial_config+0x20f/0x3da [drm_kms_helper]
virtio_gpu_fbdev_init+0xde/0x100 [virtio_gpu]
virtio_gpu_driver_load+0x44c/0x650 [virtio_gpu]
wake_up_atomic_t+0x30/0x30
drm_dev_register+0x9c/0xc0 [drm]
drm_virtio_init+0x60/0x1a0 [virtio_gpu]
vp_finalize_features+0x6a/0x90 [virtio_pci]
virtio_dev_probe+0x141/0x1e0 [virtio]
driver_probe_device+0x21e/0x430
__driver_attach+0xd6/0xe0
driver_probe_device+0x430/0x430
bus_for_each_dev+0x67/0xb0
bus_add_driver+0x40/0x260
driver_register+0x57/0xd0
do_one_initcall+0x4c/0x180
preempt_schedule_common+0x14/0x20
_cond_resched+0x19/0x20
do_init_module+0x5a/0x1f1
load_module+0x24e3/0x28f0
__symbol_put+0x60/0x60
vfs_read+0x114/0x130
security_capable+0x41/0x60
SYSC_finit_module+0x8e/0xe0
entry_SYSCALL_64_fastpath+0x1e/0xad
看不出来,再看fb的mmap调用栈:
fb_mmap+0x45/0x140
mmap_region+0x341/0x590
do_mmap+0x456/0x560
security_mmap_file+0x66/0xe0
vm_mmap_pgoff+0xbe/0x100
SyS_mmap_pgoff+0x1b1/0x270
do_syscall_64+0x5c/0x170
entry_SYSCALL64_slow_path+0x25/0x25
看了下fb_mmap的代码:
static int
fb_mmap(struct file *file, struct vm_area_struct * vma)
{
struct fb_info *info = file_fb_info(file);
struct fb_ops *fb;
unsigned long mmio_pgoff;
unsigned long start;
u32 len;
dump_stack();
if (!info)
return -ENODEV;
fb = info->fbops;
if (!fb)
return -ENODEV;
mutex_lock(&info->mm_lock);
if (fb->fb_mmap) {
int res;
res = fb->fb_mmap(info, vma);
mutex_unlock(&info->mm_lock);
return res;
}
start = info->fix.smem_start;
len = info->fix.smem_len;
mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT;
if (vma->vm_pgoff >= mmio_pgoff) {
if (info->var.accel_flags) {
mutex_unlock(&info->mm_lock);
return -EINVAL;
}
vma->vm_pgoff -= mmio_pgoff;
start = info->fix.mmio_start;
len = info->fix.mmio_len;
}
mutex_unlock(&info->mm_lock);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
fb_pgprotect(file, vma, start);
return vm_iomap_memory(vma, start, len);
}
再结合virtio_gpufb_create中的代码,这里并未设置fix.smem_start和fix.smem_len,因此这两个参数会被默认置0:
static int virtio_gpufb_create(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
{
...
fb = &vfbdev->vgfb.base;
vfbdev->helper.fb = fb;
strcpy(info->fix.id, "virtiodrmfb");
info->flags = FBINFO_DEFAULT;
info->fbops = &virtio_gpufb_ops;
info->pixmap.flags = FB_PIXMAP_SYSTEM;
info->screen_base = obj->vmap;
info->screen_size = obj->gem_base.size;
drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
drm_fb_helper_fill_var(info, &vfbdev->helper,
sizes->fb_width, sizes->fb_height);
info->fix.mmio_start = 0;
info->fix.mmio_len = 0;
...
start = info->fix.mmio_start;
len = info->fix.mmio_len;
return vm_iomap_memory(vma, start, len);
}
以下调用设置了accel_flags,其中的FB_ACCELF_TEXT为1
drm_fb_helper_fill_var
->info->var.accel_flags = FB_ACCELF_TEXT;
可以看出virtio_gpufb_create上的代码直接导致fb_mmap中的判断条件成立返回-EINVAL,这里即便将info->var.accel_flags重置为0,也没用即便进去了也是io_remap_pfn_range早晚会出问题,因为这里传入的iomap的start和len都为0,这也就是virtio-gpu和qxl之间的分别,qxl使用的是“设备地址”,虽然这对虚拟设备来说也没什么卵用。
从上面可以看出fbmem的代码只适应了物理设备内存,但是它在前面给出了一个接口,这里其实只要实现了fb_mmap的接口就能将映射到虚拟内存:
static int
fb_mmap(struct file *file, struct vm_area_struct * vma)
{
struct fb_info *info = file_fb_info(file);
struct fb_ops *fb;
unsigned long mmio_pgoff;
unsigned long start;
u32 len;
if (!info)
return -ENODEV;
fb = info->fbops;
if (!fb)
return -ENODEV;
mutex_lock(&info->mm_lock);
if (fb->fb_mmap) {
int res;
res = fb->fb_mmap(info, vma);
mutex_unlock(&info->mm_lock);
return res;
}
。。。
}
之后mmap正常的,但是写入过程中出现了SIGBUS的错误,如下:
# ./a.out
The framebuffer device was opened successfully.
1024x768, 32bpp
The framebuffer device was mapped to memory successfully.
总线错误
因为mmap内存是“写时分配”,也就是在写入时候触发page fault异常,然后才正式映射,调用流程如下:
ttm_bo_vm_fault+0x34/0x540
tty_insert_flip_string_fixed_flag+0x85/0xe0
list_del+0x9/0x20
remove_wait_queue+0x20/0x30
n_tty_write+0x2d7/0x470
__wake_up+0x34/0x50
__do_fault+0x84/0x190
handle_mm_fault+0x79d/0x1710
__do_page_fault+0x253/0x510
async_page_fault+0x28/0x30
排查后,在ttm_bo_vm_fault中,异常出现在如下区域,这里的page_offset为18446744073708503140,num pages为768,明显page_offset有问题。
page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff - drm_vma_node_start(&bo->vma_node);
page_last = vma_pages(vma) + vma->vm_pgoff -
drm_vma_node_start(&bo->vma_node);
if (unlikely(page_offset >= bo->num_pages)) {
retval = VM_FAULT_SIGBUS;
goto out_io_unlock;
}
细致排查后page_offset的计算参数获取如下(pages):address:34243474630; vma->vm_start:34243474530; vma->vm_pgoff:0; vma node start:1048576,这里的page_offset的计算出现下溢,所以才得到这么大的page_offset。
对比了下qxl:
address:34332914588; vma->vm_start:34332914268; vma->vm_pgoff:1060031; vma node start:1060031;
qxl中的pgoff和vma node start是相同的,或许是virtio-gpu中没有设置pgoff?
在xserver中会计算fboff:
fPtr->fboff = (unsigned long) fPtr->fix.smem_start & ~PAGE_MASK;
fPtr->fbmem_len = (fPtr->fboff+fPtr->fix.smem_len+~PAGE_MASK) &
PAGE_MASK;
修改驱动中fix项的smem_len,示例如下:
@@ -337,6 +403,9 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper,
info->fbops = &virtio_gpufb_ops;
info->pixmap.flags = FB_PIXMAP_SYSTEM;
+ info->fix.smem_len = obj->gem_base.size;
info->screen_base = obj->vmap;
之后能跑过,然后还是进不去主界面,查看日志出现告警:
Sep 29 18:31:55 Linx kernel: [ 22.227166] Call Trace:
Sep 29 18:31:55 Linx kernel: [ 22.227169] [<ffffffff8212b955>] ? dump_stack+0x5c/0x77
Sep 29 18:31:55 Linx kernel: [ 22.227172] [<ffffffff81e77794>] ? __warn+0xc4/0xe0
Sep 29 18:31:55 Linx kernel: [ 22.227174] [<ffffffffc050b1df>] ? ttm_bo_vm_open+0x6f/0x80 [ttm]
Sep 29 18:31:55 Linx kernel: [ 22.227176] [<ffffffff81e7598a>] ? copy_process.part.33+0xd4a/0x1c50
Sep 29 18:31:55 Linx kernel: [ 22.227177] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227179] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227181] [<ffffffff81e03b5c>] ? do_syscall_64+0x5c/0x170
Sep 29 18:31:55 Linx kernel: [ 22.227183] [<ffffffff823fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25
Sep 29 18:31:55 Linx kernel: [ 22.227184] ---[ end trace 6e9f62b113d5170a ]---
Sep 29 18:31:55 Linx pulseaudio[1739]: Disabling timer-based scheduling because running in
这个告警的判断如下,这个暂时先放下:
static void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
(void)ttm_bo_reference(bo);
}
现在的问题看起来是因为屏幕不会自动刷新:
图1(左上方区域文字未刷新导致无法显示矩形图像):
图2(左上方区域刷新导致矩形图像显示):
有个办法可以验证这个问题:
1)在startx卡死时通过ssh登上去获取fb数据
$ cat /dev/fb0 > screenap
2)关机切换为VGA显卡再启动后执行
$ cat screenap > /dev/fb0
3)这里只给出部分屏幕截图,能看到主界面的,说明startx卡死时xserver是向fb中写入了数据的
看来virtio-gpu缺乏定时刷新的功能,可以添加定时器对其进行定期更新,示例如下:
static void my_timer_func(unsigned long data)
{
virtio_gpu_dirty_update(svgfb, true, 0, 0, xxx, xxx);
add_timer_xxx();
}
添加后屏幕无法自动刷新的问题解决了,矩形框能正常显示,但startx后仍然无法显示,同时获取/dev/fb0中的数据是正常的,这就说明或许xserver启动后显示设备的内存映射改变了,再回到先前的日志告警和代码:
Sep 29 18:31:55 Linx kernel: [ 22.227166] Call Trace:
Sep 29 18:31:55 Linx kernel: [ 22.227169] [<ffffffff8212b955>] ? dump_stack+0x5c/0x77
Sep 29 18:31:55 Linx kernel: [ 22.227172] [<ffffffff81e77794>] ? __warn+0xc4/0xe0
Sep 29 18:31:55 Linx kernel: [ 22.227174] [<ffffffffc050b1df>] ? ttm_bo_vm_open+0x6f/0x80 [ttm]
Sep 29 18:31:55 Linx kernel: [ 22.227176] [<ffffffff81e7598a>] ? copy_process.part.33+0xd4a/0x1c50
Sep 29 18:31:55 Linx kernel: [ 22.227177] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227179] [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0
Sep 29 18:31:55 Linx kernel: [ 22.227181] [<ffffffff81e03b5c>] ? do_syscall_64+0x5c/0x170
Sep 29 18:31:55 Linx kernel: [ 22.227183] [<ffffffff823fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25
Sep 29 18:31:55 Linx kernel: [ 22.227184] ---[ end trace 6e9f62b113d5170a ]---
Sep 29 18:31:55 Linx pulseaudio[1739]: Disabling timer-based scheduling because running in
这个告警的判断如下,但是看起来并没有影响,而fb的内存在probe之后应该也是不会改变的,所以得换个角度思考:
static void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
(void)ttm_bo_reference(bo);
}
通过各种打印、延时和猜测最后查到该问题是由于KDSETMODE导致的,在startx过程中会切换VT模式为Graphics模式,主要是为了屏蔽光标和控制台打印的影响:
1574 open("/dev/vc/1", O_RDWR|O_NONBLOCK) = -1 ENOENT (No such file or directory)
1574 open("/dev/tty1", O_RDWR|O_NONBLOCK) = 8
1574 ioctl(8, VT_GETSTATE, 0x7ffd0fee9b40) = 0
1574 ioctl(8, VT_ACTIVATE, 0x1) = 0
1574 ioctl(8, VT_WAITACTIVE, 0x1) = 0
1574 ioctl(8, VIDIOC_RESERVED or VT_GETMODE, 0x7ffd0fee9b50) = 0
1574 rt_sigaction(SIGUSR1, {0x5594241ab5f0, [USR1], SA_RESTORER|SA_RESTART, 0x7fe3d4396040}, {SIG_IGN, [], SA_RESTORER, 0x7fe3d4396040}, 8) = 0
1574 ioctl(8, VIDIOC_ENUM_FMT or VT_SETMODE, 0x7ffd0fee9b50) = 0
1574 ioctl(8, KDSETMODE, 0x1) = 0
这里编写了个复现示例代码段如下:
int vconsole_fd;
vconsole_fd = open("/dev/tty1", O_RDWR);
ioctl( vconsole_fd, KDSETMODE, KD_GRAPHICS);
sleep(3);
ioctl( vconsole_fd, KDSETMODE, KD_TEXT);
close(vconsole_fd);
在该sleep期间向/dev/fb0中写入的所有数据均不会在界面上显示。
排查后发现是virtio_gpu_dirty_update的定时任务没有调用virtio_gpu_cmd_resource_flush更新所致,判断如下:
if (in_atomic() || store)
store_for_later = true;
这里的in_atomic确保了此处不能处于spinlock下,因为后续会调用wait_event,而wait_event中会sleep,再spinlock中不能睡眠。这里举个例子说明这个问题:
[进程A] 关抢占
[进程A] 获得锁
[进程A] 睡眠调度 ...... 尽管已经关闭了抢占,[1]依然可以通过主动调用schedule(), schedule_timeout()等主动让出CPU,调度其它进程。
[进程B] 关抢占 ...... [1]已经关闭抢占,所以这里相当于nop操作
[进程B] 获得锁失败 ...... [1]已经获得了锁,并且还没有释放
[进程B] 反复尝试获得锁 ...... 由于关闭了抢占,已经没人能够终止这个反复尝试的操作了,所以这里出现了死锁
而在init_timer中是自带锁的,因此init_timer永远不会flush virtio cmd,解决办法很简单,也就是在virtio_gpu_dirty_update后边添加一个支持抢占的延时执行任务schedule_delayed_work就好。
结果:
后续测试startx后显示均正常。