[PATCH-tip v4] sched: Fix NULL user_cpus_ptr check in dup_user_cpus_ptr() - Waiman Long
https://android-review.googlesource.com/c/kernel/common/+/2266724
https://android-review.googlesource.com/c/kernel/common/+/2266744
https://android-review.googlesource.com/c/kernel/common/+/2266745
https://android-review.googlesource.com/c/kernel/common/+/2266804
https://android-review.googlesource.com/c/kernel/common/+/2266784
https://android-review.googlesource.com/c/kernel/common/+/2267468
https://android-review.googlesource.com/c/kernel/common/+/2267664
https://android-review.googlesource.com/c/kernel/common/+/2319170
最后一个patch 分析由来:
It looks like we have found the root cause of slub double free issue and also can explain why qualcomm’s patch works well, but google’s patch works failed. Please check whether it is right.
Following is the analysis from the new dump which is from customer side after runing a lot of 32bit apps to increase its’ reproduce rate:
- From task pid 27961’s call stack we can see it is clone(fork) to new a task pid 28051, then it try to wake up this task pid 28051 and call do_set_cpus_allowed to set its’ user_mask 0xFFFFFF884FBF9200 and try to free it. But found this 0xFFFFFF884FBF9200 has been freed just. We can see this address has been freed in core6 which task migration/6-61 call do_set_cpus_allowed to set task pid 27961’s user_cpus_ptr(user_mask) 0xffffff884fbf9200.
2. So the root cause is Task pid task pid 27961 fork(clone) task pid 28051, call dup_task_struct -> arch_dup_task_struct(tsk, orig); will copy the data from task pid 27961 to task pid 28051 include p->user_cpus_ptr. So task 27961’s user_cpus_ptr is same with task 28051’s user_cpus_ptr, both are 0xffffff884fbf9200. Then core6 task migration/6-61 call do_set_cpus_allowed clear task pid 27961’s user_cpus_ptr to null and free this address 0xffffff884fbf9200. So at this time 27961’s user_cpus_ptr = NULL and 0xffffff884fbf9200 has been freed. Then core0 rso-inner-27961 call dup_task_struct -> dup_user_cpus_ptr(tsk, orig, node); which found rso-inner-27961’s user_cpus_ptr == null, so task pid 28051’s user_cpu_ptr will not change, it still has a free slub 0xffffff884fbf9200 in it. Finaly task 27961 try to wake up task pid 28051 will free task pid 28051’s user_cpus_ptr 0xffffff884fbf9200 again.
Dmesg log: | task pid 27961 call stack: | Trace log: | Core0: (1)Line 936 copy all data from task pid 27961 to task pid 28051. task pid 27961’ user_cpus_ptr = 28051’s user_cpus_ptr = 0xffffff884fbf9200 -------------------------------------------------------------------à (3)Line 979 find task pid 27961 == NULL, So it don’t change task pid 28051’s user_cpu_ptr lead to it still has a free slub address in it. ---------------------------------------------------------------------à | Core6: (2) change task pid 27961’s user_cpus_ptr = NULL and free slub 0xffffff884fbf9200 <<------------------------------------------ (4) task pid 27961 set task pid 28051 cpus_allowed after it has been created done. Then it will try to free 0xffffff884fbf9200, So it doube free again. |
[56115.188875][T27961] gdsc_regulator(E) debug_regulator(E) proxy_consumer(E) clk_rpmh(E) qcom_rpmh(E) cmd_db(E) qcom_ipc_logging(E) qcom_cpu_vendor_hooks(E) gh_virt_wdt(E) qcom_wdt_core(E) qcom_scm(E) minidump(E) smem(E) qcom_hwspinlock(E) gh_rm_drv(E) gh_dbl(E) gh_msgq(E) gh_arm_drv(E) [56115.188925][T27961] CPU: 0 PID: 27961 Comm: rso-inner Tainted: G S WC OE 5.15.41 #1 [56115.188932][T27961] Hardware name: Qualcomm Technologies, Inc. 8550 socrates (DT) [56115.188937][T27961] pstate: 624000c5 (nZCv daIF +PAN -UAO +TCO -DIT -SSBS BTYPE=--) [56115.188944][T27961] pc : __slab_free+0x230/0x28c [56115.188963][T27961] lr : kfree+0x220/0x2cc [56115.188970][T27961] sp : ffffffc054e9baf0 [56115.188973][T27961] x29: ffffffc054e9baf0 x28: ffffff887253d400 x27: 0000000000000000 [56115.188983][T27961] x26: ffffff884fbf9200 x25: 0000000000000000 x24: ffffff884fbf9200 [56115.188992][T27961] x23: 0000000000000001 x22: ffffff884fbf9200 x21: 0000000000000000 [56115.189001][T27961] x20: ffffff80011d2300 x19: fffffffe213efe40 x18: ffffffc030b97060 [56115.189010][T27961] x17: 0000000000000000 x16: 00000000000000e4 x15: ffffffda69085b44 [56115.189018][T27961] x14: ffffff89e9895900 x13: ffffff89e9895900 x12: 0000000017417185 [56115.189027][T27961] x11: ffffff887253d400 x10: fffffffe204598c0 x9 : 00001fffffffbae6 [56115.189035][T27961] x8 : ffffff8a7889f000 x7 : 6f6c6c615f737570 x6 : 635f7465735f6f64 [56115.189044][T27961] x5 : ffffffda689dec40 x4 : 0000000000000001 x3 : ffffff884fbf9200 [56115.189052][T27961] x2 : ffffff884fbf9200 x1 : fffffffe213efe40 x0 : ffffff80011d2300 [56115.189060][T27961] Call trace: [56115.189063][T27961] __slab_free+0x230/0x28c [56115.189068][T27961] kfree+0x220/0x2cc [56115.189072][T27961] do_set_cpus_allowed+0x74/0xa4 [56115.189083][T27961] select_fallback_rq+0x12c/0x200 [56115.189088][T27961] wake_up_new_task+0x26c/0x304 [56115.189093][T27961] kernel_clone+0x2c0/0x470 [56115.189100][T27961] __arm64_sys_clone+0x5c/0x8c [56115.189104][T27961] invoke_syscall+0x60/0x150 [56115.189113][T27961] el0_svc_common.llvm.13030543509303927816+0x98/0x114 [56115.189118][T27961] do_el0_svc_compat+0x20/0x30 [56115.189122][T27961] el0_svc_compat+0x28/0x90 [56115.189132][T27961] el0t_32_sync_handler+0x7c/0xbc [56115.189137][T27961] el0t_32_sync+0x1b8/0x1bc [56115.189149][T27961] Code: aa1403e0 aa1303e1 940002bf 17ffffcc (d4210000) | -013 |set_freepointer(inline) | s = 0xFFFFFF80011D2300 | object = 0xFFFFFF884FBF9200 | fp = 0xFFFFFF884FBF9200 -013 |__slab_free(s = 0xFFFFFF80011D2300, page = 0xFFFFFFFE213EFE40, head = 0xFFFFFF884FBF9200, tail = 0xF | s = 0xFFFFFF80011D2300 | page = 0xFFFFFFFE213EFE40 | head = 0xFFFFFF884FBF9200 | tail = 0xFFFFFF884FBF9200 | cnt = 1 | flags = 0 | n = 0x0 | prior = 0xFFFFFF884FBF9200 -014 |slab_free(inline) -014 |kfree(x = ?) -015 |do_set_cpus_allowed(p = 0xFFFFFF8040DCB800, new_mask = ?) | p = 0xFFFFFF8040DCB800 | new_mask = ? | trace_printk_fmt = 0xFFFFFFDA6A7AB403 | trace_printk_fmt = 0xFFFFFFDA6A7AB403 | ac = (new_mask = 0xFFFFFFDA6B5908F8, user_mask_=_0xFFFFFF884FBF9200, flags = 8) -016 |task_cpu_possible_mask(inline) -016 |select_fallback_rq(cpu = 0, p = 0xFFFFFF8040DCB800) | cpu = 0 | p = 0xFFFFFF8040DCB800 | state = cpuset -017 |select_task_rq(inline) | p = 0xFFFFFF8040DCB800 | cpu = 0 | wake_flags = 4 -017 |wake_up_new_task(p = 0xFFFFFF8040DCB800) | p = 0xFFFFFF8040DCB800 | rf = (flags = 0, cookie = (val = 0), clock_update_flags = 0) | rq = 0x0 -018 |task_unlock(inline) -018 |kernel_clone(args = ?) | vfork = (done = 0, wait = (lock = (raw_lock = (val = (counter = 0), locked = 0, pending = 0, locke | nr = 28051 | trace = 0 | p = 0xFFFFFF8040DCB800 | pid = 0xFFFFFF882C4B8200 | clone_flags = 4001536 -019 |__do_sys_clone(inline) | args = (flags = 4001536, pidfd = 0xBAFE91C8, child_tid = 0xBAFE91C8, parent_tid = 0xBAFE91C8, exit -019 |__se_sys_clone(inline) | Line 12470308: migration/6-61 [006] 56114.972937: bprint: do_set_cpus_allowed: do_set_cpus_allowed: p->comm:rso-inner pid:27961, maskp:0 ac.user_mask:ffffff884fbf9200 Line 12470338: rso-inner-27961 [000] 56114.973966: bprint: do_set_cpus_allowed: do_set_cpus_allowed: p->comm:rso-inner pid:28051, maskp:0 ac.user_mask:ffffff884fbf9200 | 914 static struct task_struct *dup_task_struct(struct task_struct *orig, int node) 915 { 916 struct task_struct *tsk; 917 unsigned long *stack; 918 struct vm_struct *stack_vm_area __maybe_unused; 919 int err; 920 921 if (node == NUMA_NO_NODE) 922 node = tsk_fork_get_node(orig); 923 tsk = alloc_task_struct_node(node); 924 if (!tsk) 925 return NULL; 926 927 stack = alloc_thread_stack_node(tsk, node); 928 if (!stack) 929 goto free_tsk; 930 931 if (memcg_charge_kernel_stack(tsk)) 932 goto free_stack; 933 934 stack_vm_area = task_stack_vm_area(tsk); 935 936 err = arch_dup_task_struct(tsk, orig); ---step 1 937 938 /* 939 * arch_dup_task_struct() clobbers the stack-related fields. Make 940 * sure they're properly initialized before using any stack-related 941 * functions again. 942 */ 943 tsk->stack = stack; 944 #ifdef CONFIG_VMAP_STACK 945 tsk->stack_vm_area = stack_vm_area; 946 #endif 947 #ifdef CONFIG_THREAD_INFO_IN_TASK 948 refcount_set(&tsk->stack_refcount, 1); 949 #endif 950 951 if (err) 952 goto free_stack; 953 954 err = scs_prepare(tsk, node); 955 if (err) 956 goto free_stack; 957 958 #ifdef CONFIG_SECCOMP 959 /* 960 * We must handle setting up seccomp filters once we're under 961 * the sighand lock in case orig has changed between now and 962 * then. Until then, filter must be NULL to avoid messing up 963 * the usage counts on the error path calling free_task. 964 */ 965 tsk->seccomp.filter = NULL; 966 #endif 967 968 setup_thread_stack(tsk, orig); 969 clear_user_return_notifier(tsk); 970 clear_tsk_need_resched(tsk); 971 set_task_stack_end_magic(tsk); 972 clear_syscall_work_syscall_user_dispatch(tsk); 973 974 #ifdef CONFIG_STACKPROTECTOR 975 tsk->stack_canary = get_random_canary(); 976 #endif 977 if (orig->cpus_ptr == &orig->cpus_mask) 978 tsk->cpus_ptr = &tsk->cpus_mask; 979 dup_user_cpus_ptr(tsk, orig, node); 980 int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { unsigned long flags; if (!src->user_cpus_ptr) return 0; dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); if (!dst->user_cpus_ptr) return -ENOMEM; /* Use pi_lock to protect content of user_cpus_ptr */ raw_spin_lock_irqsave(&src->pi_lock, flags); cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); raw_spin_unlock_irqrestore(&src->pi_lock, flags); return 0; } |