有可能是内核从4.4升级到了4.9的缘故,原来可以工作的vmware12不能启动了,还是老样子,模块vmmon和vmnet
参考 https://communities.vmware.com/message/2604831
https://communities.vmware.com/thread/536977?start=0&tstart=0
使用脚本 https://bugfixes.promisedev.com/
但出现问题:
Starting VMware services:
Virtual machine monitor failed
Virtual machine communication interface done
VM communication interface socket family done
Blocking file system done
Virtual ethernet failed
VMware Authentication Daemon done
仔细看编译过程,还是 get_user_pages 有问题,猜想有可能是因为4.9 新内核,使用 /usr/src/linux-headers-4.9.0-deepin4-common/include/linux/*.h等头文件和/usr/lib/vmware/modules/source/vmnet-only or vmmon-only/*.c 文件存在版本匹配问题
上面的参考并没有解决问题,但 http://www.jianshu.com/p/df30c0c3889b 一文提出了问题所在,就是内核更新后,同一个函数名,参数个数和形式有变化
在文件 /usr/src/linux-headers-4.9.0-deepin4-common/include/linux/mm.h 中(即新内核中)
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
在 vmnet-only/userif.c 中(即 vmware 12 的模块中)
struct page *page = NULL;
int retval;
down_read(¤t->mm->mmap_sem);
retval = get_user_pages(current, current->mm, addr,
1, 1, 0, &page, NULL);
可以发现,即使把 get_user_pages 修改为 get_user_pages_remote,仍然存在参数个数不匹配的问题(get_user_pages_remote只有7个参数)
current 是宏,就是 get_current(),将返回 struct task_struct * 指针类型
仔细比对一下 4.4.0 内核中 mm.h 文件中的 get_user_pages,其形式如下:
long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int foll_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking);
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas);
就是原来多一个 __get_user_pages,但没有 get_user_pages_remote,而 get_user_pages 有8个参数。怎么把8个参数中的 int write 和 int force 与7个参数中的 unsigned int gup_flags对应起来是个问题
花了不少时间,终于发现这些函数的实现在4.9内核的gup.c文件中 (参考linux内核源码 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/mm/gup.c?h=linux-4.9.y )
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
NULL, false,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
my god!其实是调用了 __get_user_pages_locked函数:
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
unsigned int flags)
{
.................
if (pages)
flags |= FOLL_GET; -------------->注意这里喽!
....................
ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
vmas, locked);
....................
ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
pages, NULL, NULL);
...............
}
继续看 __get_user_pages函数的第5个参数:
static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
{
.............................
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
if (!(gup_flags & FOLL_FORCE))
gup_flags |= FOLL_NUMA;
do {
struct page *page;
unsigned int foll_flags = gup_flags;
.............................
从这里,似乎可以看到 gup_flags 的相应位,和 FOLL_XXX 掩码定义是有关系的,特别关注 FOLL_FORCE 和 FOLL_WRITE,4.9内核的mm.h中
#define FOLL_WRITE 0x01/* check pte is writable */
#define FOLL_TOUCH 0x02/* mark page accessed */
#define FOLL_GET 0x04/* do get_page on page */
#define FOLL_DUMP 0x08/* give error on hole if it would be zero */
#define FOLL_FORCE 0x10/* get_user_pages read/write w/o permission */
#define FOLL_NOWAIT 0x20/* if a disk transfer is needed, start the IO
* and return without waiting upon it */
再去看老的内核中对应的函数
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages, int write,
int force, struct page **pages, struct vm_area_struct **vmas)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
pages, vmas, NULL, false, FOLL_TOUCH);
}
跟踪 __get_user_pages_locked 并关注第5\6参数 write \force
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
int write, int force,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
unsigned int flags)
{
.....................
if (pages)
flags |= FOLL_GET;
if (write)
flags |= FOLL_WRITE; ---------->注意这里喽!!!
if (force)
flags |= FOLL_FORCE;
在老版内核中,write/force信息是叠加到 flags参数的!所以,应该只需要按位将write/force换算到gup_flags就行了
cd /usr/lib/vmware/modules/source
sudo cp vmmon.tar vmmon.tar.bak
sudo cp vmnet.tar vmnet.tar.bak
sudo tar -xvf vmmon.tar
sudo tar -xvf vmnet.tar
需要修补的代码如下:
vmnet-only/userif.c 代码
retval = get_user_pages(current, current->mm, addr,
1, 1, 0, &page, NULL);
修改为
retval = get_user_pages_remote(current, current->mm, addr,
1, 1, &page, NULL);
vmmon-only/linux/hostif.c代码
retval = get_user_pages(current, current->mm, (unsigned long)uvAddr,
numPages, 0, 0, ppages, NULL);
修改为
retval = get_user_pages_remote(current, current->mm, (unsigned long)uvAddr,
numPages, 0, ppages, NULL);
重新打包
sudo rm vmmon.tar
sudo rm vmnet.tar
sudo tar -cvf vmnet.tar vmnet-only
sudo tar -cvf vmmon.tar vmmon-only
结果还是不行,参考 https://sysadmin.compxtreme.ro/vmware-modules-arch-linux-kernel-4-8-13/
vmmon-only/linux/hostif.c 修改
unsigned int anonPages = global_page_state(NR_ANON_PAGES); --------->这句注释掉就可以了
为
unsigned int anonPages = global_page_state(NR_ANON_MAPPED);
vmnet-only/netif.c 修改
dev->trans_start = jiffies;
为
netif_trans_update(dev);
这次老实了,根据 /tmp/vmware-root/目录下的错误日志log文件,手动修改文件并执行对应编译命令,看看会不会出错,终于,成功编译为 .ko
重复前面的步骤,重新打包为tar文件
sudo tar -cvf vmnet.tar vmnet-only
sudo tar -cvf vmmon.tar vmmon-only
yeah!!!!!!!!!!!!