From 79d54b249c176ba4abb9a580951400246dd974b1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 14 Sep 2012 18:03:59 +0200 Subject: [PATCH 01/22] uprobes: Do not leak UTASK_BP_HIT if find_active_uprobe() fails If handle_swbp()->find_active_uprobe() fails we return with utask->state = UTASK_BP_HIT. Change handle_swbp() to reset utask->state at the start. Note that we do this unconditionally, see the next patch(es). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 912ef48d28ab..2c1ff05af6f5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1468,6 +1468,10 @@ static void handle_swbp(struct pt_regs *regs) bp_vaddr = uprobe_get_swbp_addr(regs); uprobe = find_active_uprobe(bp_vaddr, &is_swbp); + utask = current->utask; + if (utask) + utask->state = UTASK_RUNNING; + if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ @@ -1486,7 +1490,6 @@ static void handle_swbp(struct pt_regs *regs) return; } - utask = current->utask; if (!utask) { utask = add_utask(); /* Cannot allocate; re-execute the instruction. */ From 746a9e6ba24af2ccf03279c99d435a1b88ca5d17 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 14 Sep 2012 18:23:51 +0200 Subject: [PATCH 02/22] uprobes: Do not setup ->active_uprobe/state prematurely handle_swbp() sets utask->active_uprobe before handler_chain(), and UTASK_SSTEP before pre_ssout(). This complicates the code for no reason, arch_ hooks or consumer->handler() should not (and can't) use this info. Change handle_swbp() to initialize them after pre_ssout(), and remove the no longer needed cleanup-utask code. Signed-off-by: Oleg Nesterov cked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2c1ff05af6f5..41f048c91425 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1496,22 +1496,19 @@ static void handle_swbp(struct pt_regs *regs) if (!utask) goto cleanup_ret; } - utask->active_uprobe = uprobe; + handler_chain(uprobe, regs); if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs)) goto cleanup_ret; - utask->state = UTASK_SSTEP; if (!pre_ssout(uprobe, regs, bp_vaddr)) { arch_uprobe_enable_step(&uprobe->arch); + utask->active_uprobe = uprobe; + utask->state = UTASK_SSTEP; return; } cleanup_ret: - if (utask) { - utask->active_uprobe = NULL; - utask->state = UTASK_RUNNING; - } if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) /* From 0578a97098dab967ece4b025fe5eb4984c4c86c0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 14 Sep 2012 18:31:23 +0200 Subject: [PATCH 03/22] uprobes: Fix UPROBE_SKIP_SSTEP checks in handle_swbp() If handle_swbp()->add_utask() fails but UPROBE_SKIP_SSTEP is set, cleanup_ret: path do not restart the insn, this is wrong. Remove this check and add the additional label for can_skip_sstep() = T case. Note also that UPROBE_SKIP_SSTEP can be false positive, we simply can not trust it unless arch_uprobe_skip_sstep() was already called. Also, move another UPROBE_SKIP_SSTEP check before can_skip_sstep() into this helper, this looks more clean and understandable. Note: probably we should rename "skip" to "emulate" and I think that "clear UPROBE_SKIP_SSTEP" should be moved to arch_can_skip. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 41f048c91425..d2392968d4e6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1389,10 +1389,11 @@ bool uprobe_deny_signal(void) */ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) { - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) - return true; - - uprobe->flags &= ~UPROBE_SKIP_SSTEP; + if (uprobe->flags & UPROBE_SKIP_SSTEP) { + if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) + return true; + uprobe->flags &= ~UPROBE_SKIP_SSTEP; + } return false; } @@ -1494,12 +1495,12 @@ static void handle_swbp(struct pt_regs *regs) utask = add_utask(); /* Cannot allocate; re-execute the instruction. */ if (!utask) - goto cleanup_ret; + goto restart; } handler_chain(uprobe, regs); - if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs)) - goto cleanup_ret; + if (can_skip_sstep(uprobe, regs)) + goto out; if (!pre_ssout(uprobe, regs, bp_vaddr)) { arch_uprobe_enable_step(&uprobe->arch); @@ -1508,15 +1509,13 @@ static void handle_swbp(struct pt_regs *regs) return; } -cleanup_ret: - if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) - - /* - * cannot singlestep; cannot skip instruction; - * re-execute the instruction. - */ - instruction_pointer_set(regs, bp_vaddr); - +restart: + /* + * cannot singlestep; cannot skip instruction; + * re-execute the instruction. + */ + instruction_pointer_set(regs, bp_vaddr); +out: put_uprobe(uprobe); } From 1b08e907211cdc744f54871736005d9f3e7f182c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 14 Sep 2012 18:52:10 +0200 Subject: [PATCH 04/22] uprobes: Kill UTASK_BP_HIT state Kill UTASK_BP_HIT state, it buys nothing but complicates the code. It is only used in uprobe_notify_resume() to decide who should be called, we can check utask->active_uprobe != NULL instead. And this allows us to simplify handle_swbp(), no need to clear utask->state. Likewise we could kill UTASK_SSTEP, but UTASK_BP_HIT is worse and imho should die. The problem is, it creates the special case when task->utask is NULL, we can't distinguish RUNNING and BP_HIT. With this patch utask == NULL always means RUNNING. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 1 - kernel/events/uprobes.c | 29 +++++++++-------------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e6f0331e3d45..18d839da6517 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -59,7 +59,6 @@ struct uprobe_consumer { #ifdef CONFIG_UPROBES enum uprobe_task_state { UTASK_RUNNING, - UTASK_BP_HIT, UTASK_SSTEP, UTASK_SSTEP_ACK, UTASK_SSTEP_TRAPPED, diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d2392968d4e6..d3f5381e7482 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1469,10 +1469,6 @@ static void handle_swbp(struct pt_regs *regs) bp_vaddr = uprobe_get_swbp_addr(regs); uprobe = find_active_uprobe(bp_vaddr, &is_swbp); - utask = current->utask; - if (utask) - utask->state = UTASK_RUNNING; - if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ @@ -1491,6 +1487,7 @@ static void handle_swbp(struct pt_regs *regs) return; } + utask = current->utask; if (!utask) { utask = add_utask(); /* Cannot allocate; re-execute the instruction. */ @@ -1547,13 +1544,12 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) } /* - * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag. (and on - * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and - * allows the thread to return from interrupt. + * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and + * allows the thread to return from interrupt. After that handle_swbp() + * sets utask->active_uprobe. * - * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and - * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from - * interrupt. + * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag + * and allows the thread to return from interrupt. * * While returning to userspace, thread notices the TIF_UPROBE flag and calls * uprobe_notify_resume(). @@ -1563,10 +1559,10 @@ void uprobe_notify_resume(struct pt_regs *regs) struct uprobe_task *utask; utask = current->utask; - if (!utask || utask->state == UTASK_BP_HIT) - handle_swbp(regs); - else + if (utask && utask->active_uprobe) handle_singlestep(utask, regs); + else + handle_swbp(regs); } /* @@ -1575,17 +1571,10 @@ void uprobe_notify_resume(struct pt_regs *regs) */ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { - struct uprobe_task *utask; - if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) return 0; - utask = current->utask; - if (utask) - utask->state = UTASK_BP_HIT; - set_thread_flag(TIF_UPROBE); - return 1; } From db023ea595015058270be6a62fe60a7b6b5c50d7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 14 Sep 2012 19:05:46 +0200 Subject: [PATCH 05/22] uprobes: Move clear_thread_flag(TIF_UPROBE) to uprobe_notify_resume() Move clear_thread_flag(TIF_UPROBE) from do_notify_resume() to uprobe_notify_resume() for !CONFIG_UPROBES case. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/signal.c | 4 +--- kernel/events/uprobes.c | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376e..0041e5a5293b 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -785,10 +785,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ - if (thread_info_flags & _TIF_UPROBE) { - clear_thread_flag(TIF_UPROBE); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); - } /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d3f5381e7482..198d732ab901 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1558,6 +1558,8 @@ void uprobe_notify_resume(struct pt_regs *regs) { struct uprobe_task *utask; + clear_thread_flag(TIF_UPROBE); + utask = current->utask; if (utask && utask->active_uprobe) handle_singlestep(utask, regs); From 75ed82ea53bd0d2d8083261123576250f7ba851e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 16 Sep 2012 17:20:06 +0200 Subject: [PATCH 06/22] uprobes: Change write_opcode() to use FOLL_FORCE write_opcode()->get_user_pages() needs FOLL_FORCE to ensure we can read the page even if the probed task did mprotect(PROT_NONE) after uprobe_register(). Without FOLL_WRITE, FOLL_FORCE doesn't have any side effect but allows to read the !VM_READ memory. Otherwiese the subsequent uprobe_unregister()->set_orig_insn() fails and we leak "int3". If that task does mprotect(PROT_READ | EXEC) and execute the probed insn later it will be killed. Note: in fact this is also needed for _register, see the next patch. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 198d732ab901..80e8c7b697b9 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -221,7 +221,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, retry: /* Read the page with vaddr into memory */ - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); + ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); if (ret <= 0) return ret; From 78a320542e6cdb2800cd736b2d136e4261d34f43 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 16 Sep 2012 19:07:41 +0200 Subject: [PATCH 07/22] uprobes: Change valid_vma() to demand VM_MAYEXEC rather than VM_EXEC uprobe_register() or uprobe_mmap() requires VM_READ | VM_EXEC, this is not right. An apllication can do mprotect(PROT_EXEC) later and execute this code. Change valid_vma(is_register => true) to check VM_MAYEXEC instead. No need to check VM_MAYREAD, it is always set. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 80e8c7b697b9..a9de40815391 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -106,8 +106,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) if (!is_register) return true; - if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) - == (VM_READ|VM_EXEC)) + if ((vma->vm_flags & (VM_HUGETLB | VM_WRITE | VM_MAYEXEC | VM_SHARED)) + == VM_MAYEXEC) return true; return false; From e40cfce626a5537994058ee9a940dcfdc0f68ef0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 16 Sep 2012 19:31:39 +0200 Subject: [PATCH 08/22] uprobes: Restrict valid_vma(false) to skip VM_SHARED vmas valid_vma(false) ignores ->vm_flags, this is not actually right. We should never try to write into MAP_SHARED mapping, this can confuse an apllication which actually writes to ->vm_file. With this patch valid_vma(false) ignores VM_WRITE only but checks other (immutable) bits checked by valid_vma(true). This can also speedup uprobe_munmap() and uprobe_unregister(). Note: even after this patch _unregister can confuse the probed application if it does mprotect(PROT_WRITE) after _register and installs "int3", but this is hardly possible to avoid and this doesn't differ from gdb case. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a9de40815391..8d182bdecc2e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -100,17 +100,12 @@ struct uprobe { */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { - if (!vma->vm_file) - return false; + vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; - if (!is_register) - return true; + if (is_register) + flags |= VM_WRITE; - if ((vma->vm_flags & (VM_HUGETLB | VM_WRITE | VM_MAYEXEC | VM_SHARED)) - == VM_MAYEXEC) - return true; - - return false; + return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC; } static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) From e97f65a17deafacc360a4cb75ae944897ecea6d7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 19 Sep 2012 16:36:01 +0200 Subject: [PATCH 09/22] uprobes: Kill set_swbp()->is_swbp_at_addr() A separate patch for better documentation. set_swbp()->is_swbp_at_addr() is not needed for correctness, it is harmless to do the unnecessary __replace_page(old_page, new_page) when these 2 pages are identical. And it can not be counted as optimization. mmap/register races are very unlikely, while in the likely case is_swbp_at_addr() adds the extra get_user_pages() even if the caller is uprobe_mmap(current->mm) and returns false. Note also that the semantics/usage of is_swbp_at_addr() in uprobe.c is confusing. set_swbp() uses it to detect the case when this insn was already modified by uprobes, that is why it should always compare the opcode with UPROBE_SWBP_INSN even if the hardware (like powerpc) has other trap insns. It doesn't matter if this breakpoint was in fact installed by gdb or application itself, we are going to "steal" this breakpoint anyway and execute the original insn from vm_file even if it no longer matches the memory. OTOH, handle_swbp()->find_active_uprobe() uses is_swbp_at_addr() to figure out whether we need to send SIGTRAP or not if we can not find uprobe, so in this case it should return true for all trap variants, not only for UPROBE_SWBP_INSN. This patch removes set_swbp()->is_swbp_at_addr(), the next patches will remove it from set_orig_insn() which is similar to set_swbp() in this respect. So the only caller will be handle_swbp() and we can make its semantics clear. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8d182bdecc2e..a4453d1c8199 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -321,17 +321,6 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - int result; - /* - * See the comment near uprobes_hash(). - */ - result = is_swbp_at_addr(mm, vaddr); - if (result == 1) - return 0; - - if (result) - return result; - return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); } From cceb55aab73d2aea8f4d6f7414d2e1b647a3dacb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Sep 2012 21:10:18 +0200 Subject: [PATCH 10/22] uprobes: Introduce copy_opcode(), kill read_opcode() No functional changes, preparations. 1. Extract the kmap-and-memcpy code from read_opcode() into the new trivial helper, copy_opcode(). The next patch will add another user. 2. read_opcode() becomes really trivial, fold it into its single caller, is_swbp_at_addr(). 3. Remove "auprobe" argument from write_opcode(), it is not used since f403072c6. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 63 +++++++++++++---------------------------- 1 file changed, 19 insertions(+), 44 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a4453d1c8199..b6f0f716a884 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -183,19 +183,25 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) return *insn == UPROBE_SWBP_INSN; } +static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) +{ + void *kaddr = kmap_atomic(page); + memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); + kunmap_atomic(kaddr); +} + /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction - * supported by that architecture then we need to modify read_opcode / + * supported by that architecture then we need to modify is_swbp_at_addr and * write_opcode accordingly. This would never be a problem for archs that * have fixed length instructions. */ /* * write_opcode - write the opcode at a given virtual address. - * @auprobe: arch breakpointing information. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. @@ -206,8 +212,8 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) * For mm @mm, write the opcode at @vaddr. * Return 0 (success) or a negative errno. */ -static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, - unsigned long vaddr, uprobe_opcode_t opcode) +static int write_opcode(struct mm_struct *mm, unsigned long vaddr, + uprobe_opcode_t opcode) { struct page *old_page, *new_page; void *vaddr_old, *vaddr_new; @@ -253,40 +259,9 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; } -/** - * read_opcode - read the opcode at a given virtual address. - * @mm: the probed process address space. - * @vaddr: the virtual address to read the opcode. - * @opcode: location to store the read opcode. - * - * Called with mm->mmap_sem held (for read and with a reference to - * mm. - * - * For mm @mm, read the opcode at @vaddr and store it in @opcode. - * Return 0 (success) or a negative errno. - */ -static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode) -{ - struct page *page; - void *vaddr_new; - int ret; - - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); - if (ret <= 0) - return ret; - - vaddr_new = kmap_atomic(page); - vaddr &= ~PAGE_MASK; - memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); - kunmap_atomic(vaddr_new); - - put_page(page); - - return 0; -} - static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) { + struct page *page; uprobe_opcode_t opcode; int result; @@ -300,14 +275,14 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) goto out; } - result = read_opcode(mm, vaddr, &opcode); - if (result) + result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + if (result < 0) return result; -out: - if (is_swbp_insn(&opcode)) - return 1; - return 0; + copy_opcode(page, vaddr, &opcode); + put_page(page); + out: + return is_swbp_insn(&opcode); } /** @@ -321,7 +296,7 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); + return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } /** @@ -345,7 +320,7 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v if (result != 1) return result; - return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) From ed6f6a50dc5f183c53e7b3b7fed4794bc50d9aa7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Sep 2012 21:30:44 +0200 Subject: [PATCH 11/22] uprobes: Kill set_orig_insn()->is_swbp_at_addr() Unlike set_swbp(), set_orig_insn()->is_swbp_at_addr() makes sense, although it can't prevent all confusions. But the usage of is_swbp_at_addr() is equally confusing, and it adds the extra get_user_pages() we can avoid. This patch removes set_orig_insn()->is_swbp_at_addr() but changes write_opcode() to do the necessary checks before replace_page(). Perhaps it also makes sense to ensure PAGE_MAPPING_ANON in unregister case. find_active_uprobe() becomes the only user of is_swbp_at_addr(), we can change its semantics. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b6f0f716a884..9248ee76b4bb 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -190,6 +190,25 @@ static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t kunmap_atomic(kaddr); } +static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) +{ + uprobe_opcode_t old_opcode; + bool is_swbp; + + copy_opcode(page, vaddr, &old_opcode); + is_swbp = is_swbp_insn(&old_opcode); + + if (is_swbp_insn(new_opcode)) { + if (is_swbp) /* register: already installed? */ + return 0; + } else { + if (!is_swbp) /* unregister: was it changed by us? */ + return -EINVAL; + } + + return 1; +} + /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for @@ -226,6 +245,10 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr, if (ret <= 0) return ret; + ret = verify_opcode(old_page, vaddr, &opcode); + if (ret <= 0) + goto put_old; + ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) @@ -311,15 +334,6 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - int result; - - result = is_swbp_at_addr(mm, vaddr); - if (!result) - return -EINVAL; - - if (result != 1) - return result; - return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } From ec75fba93ef0c00c91545b5e53841a80cffad0c4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 23 Sep 2012 21:55:19 +0200 Subject: [PATCH 12/22] uprobes: Simplify is_swbp_at_addr(), remove stale comments After the previous change is_swbp_at_addr() is always called with current->mm. Remove this check and move it close to its single caller. Also, remove the obsolete comment about is_swbp_at_addr() and uprobe_state.count. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 73 ++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 49 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9248ee76b4bb..6136854da6c6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -282,32 +282,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr, return ret; } -static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) -{ - struct page *page; - uprobe_opcode_t opcode; - int result; - - if (current->mm == mm) { - pagefault_disable(); - result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, - sizeof(opcode)); - pagefault_enable(); - - if (likely(result == 0)) - goto out; - } - - result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); - if (result < 0) - return result; - - copy_opcode(page, vaddr, &opcode); - put_page(page); - out: - return is_swbp_insn(&opcode); -} - /** * set_swbp - store breakpoint at a given address. * @auprobe: arch specific probepoint information. @@ -589,29 +563,6 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); } -/* - * How mm->uprobes_state.count gets updated - * uprobe_mmap() increments the count if - * - it successfully adds a breakpoint. - * - it cannot add a breakpoint, but sees that there is a underlying - * breakpoint (via a is_swbp_at_addr()). - * - * uprobe_munmap() decrements the count if - * - it sees a underlying breakpoint, (via is_swbp_at_addr) - * (Subsequent uprobe_unregister wouldnt find the breakpoint - * unless a uprobe_mmap kicks in, since the old vma would be - * dropped just after uprobe_munmap.) - * - * uprobe_register increments the count if: - * - it successfully adds a breakpoint. - * - * uprobe_unregister decrements the count if: - * - it sees a underlying breakpoint and removes successfully. - * (via is_swbp_at_addr) - * (Subsequent uprobe_munmap wouldnt find the breakpoint - * since there is no underlying breakpoint after the - * breakpoint removal.) - */ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) @@ -1389,6 +1340,30 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) clear_bit(MMF_HAS_UPROBES, &mm->flags); } +static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) +{ + struct page *page; + uprobe_opcode_t opcode; + int result; + + pagefault_disable(); + result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, + sizeof(opcode)); + pagefault_enable(); + + if (likely(result == 0)) + goto out; + + result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + if (result < 0) + return result; + + copy_opcode(page, vaddr, &opcode); + put_page(page); + out: + return is_swbp_insn(&opcode); +} + static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; From b64b9c937a533f0bfbfc9f6ac93d3c3e2f97ab02 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 29 Sep 2012 21:31:08 +0200 Subject: [PATCH 13/22] uprobes/x86: Only rep+nop can be emulated correctly __skip_sstep() correctly detects the "nontrivial" nop insns, but since it doesn't update regs->ip we can not really skip "0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0", the probed application is killed by SIGILL'ed handle_swbp(). Remove these additional checks. If we want to implement this correctly we need to know the full insn length to update ->ip. rep* + nop is fine even without updating ->ip. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 9538f00827a9..aafa5557b396 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -651,31 +651,19 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) /* * Skip these instructions as per the currently known x86 ISA. - * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } + * rep=0x66*; nop=0x90 */ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; for (i = 0; i < MAX_UINSN_BYTES; i++) { - if ((auprobe->insn[i] == 0x66)) + if (auprobe->insn[i] == 0x66) continue; if (auprobe->insn[i] == 0x90) return true; - if (i == (MAX_UINSN_BYTES - 1)) - break; - - if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f)) - return true; - - if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19)) - return true; - - if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0)) - return true; - break; } return false; From a5f658b71bc622b731961ea3addcf146ed3c599f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Sep 2012 18:21:09 +0200 Subject: [PATCH 14/22] uprobes: Don't return success if alloc_uprobe() fails If alloc_uprobe() fails uprobe_register() should return ENOMEM, not 0. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6136854da6c6..588a5575d64c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -813,7 +813,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * mutex_lock(uprobes_hash(inode)); uprobe = alloc_uprobe(inode, offset); - if (uprobe && !consumer_add(uprobe, uc)) { + if (!uprobe) { + ret = -ENOMEM; + } else if (!consumer_add(uprobe, uc)) { ret = __uprobe_register(uprobe); if (ret) { uprobe->consumers = NULL; From 076a365b3da99b68c5d58e394714d0611f1fa002 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Sep 2012 18:54:53 +0200 Subject: [PATCH 15/22] uprobes: Do not delete uprobe if uprobe_unregister() fails delete_uprobe() must not be called if register_for_each_vma(false) fails to remove all breakpoints, __uprobe_unregister() is correct. The problem is that register_for_each_vma(false) always returns 0 and thus this logic does not work. 1. Change verify_opcode() to return 0 rather than -EINVAL when unregister detects the !is_swbp insn, we can treat this case as success and currently unregister paths ignore the error code anyway. 2. Change remove_breakpoint() to propagate the error code from write_opcode(). 3. Change register_for_each_vma(is_register => false) to remove as much breakpoints as possible but return non-zero if remove_breakpoint() fails at least once. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 588a5575d64c..a1b466d17c17 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -203,7 +203,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t return 0; } else { if (!is_swbp) /* unregister: was it changed by us? */ - return -EINVAL; + return 0; } return 1; @@ -616,15 +616,15 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, return ret; } -static void +static int remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { /* can happen if uprobe_register() fails */ if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) - return; + return 0; set_bit(MMF_RECALC_UPROBES, &mm->flags); - set_orig_insn(&uprobe->arch, mm, vaddr); + return set_orig_insn(&uprobe->arch, mm, vaddr); } /* @@ -740,7 +740,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) struct mm_struct *mm = info->mm; struct vm_area_struct *vma; - if (err) + if (err && is_register) goto free; down_write(&mm->mmap_sem); @@ -756,7 +756,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) if (is_register) err = install_breakpoint(uprobe, mm, vma, info->vaddr); else - remove_breakpoint(uprobe, mm, info->vaddr); + err |= remove_breakpoint(uprobe, mm, info->vaddr); unlock: up_write(&mm->mmap_sem); From 142b18ddc81439acda4bc4231b291e99fe67d507 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 29 Sep 2012 21:56:57 +0200 Subject: [PATCH 16/22] uprobes: Fix handle_swbp() vs unregister() + register() race Strictly speaking this race was added by me in 56bb4cf6. However I think that this bug is just another indication that we should move copy_insn/uprobe_analyze_insn code from install_breakpoint() to uprobe_register(), there are a lot of other reasons for that. Until then, add a hack to close the race. A task can hit uprobe U1, but before it calls find_uprobe() this uprobe can be unregistered *AND* another uprobe U2 can be added to uprobes_tree at the same inode/offset. In this case handle_swbp() will use the not-fully-initialized U2, in particular its arch.insn for xol. Add the additional !UPROBE_COPY_INSN check into handle_swbp(), if this flag is not set we simply restart as if the new uprobe was not inserted yet. This is not very nice, we need barriers, but we will remove this hack when we change uprobe_register(). Note: with or without this patch install_breakpoint() can race with itself, yet another reson to kill UPROBE_COPY_INSN altogether. And even the usage of uprobe->flags is not safe. See the next patches. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a1b466d17c17..c718fef28617 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -596,6 +596,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ uprobe->flags |= UPROBE_COPY_INSN; } @@ -1436,6 +1437,14 @@ static void handle_swbp(struct pt_regs *regs) } return; } + /* + * TODO: move copy_insn/etc into _register and remove this hack. + * After we hit the bp, _unregister + _register can install the + * new and not-yet-analyzed uprobe at the same address, restart. + */ + smp_rmb(); /* pairs with wmb() in install_breakpoint() */ + if (unlikely(!(uprobe->flags & UPROBE_COPY_INSN))) + goto restart; utask = current->utask; if (!utask) { From cb9a19fe4aa51afa34786bd383e6614fa0083d58 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Sep 2012 20:11:45 +0200 Subject: [PATCH 17/22] uprobes: Introduce prepare_uprobe() Preparation. Extract the copy_insn/arch_uprobe_analyze_insn code from install_breakpoint() into the new helper, prepare_uprobe(). And move uprobe->flags defines from uprobes.h to uprobes.c, nobody else can use them anyway. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 10 ------- kernel/events/uprobes.c | 60 ++++++++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 18d839da6517..24594571c5a3 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,16 +35,6 @@ struct inode; # include #endif -/* flags that denote/change uprobes behaviour */ - -/* Have a copy of original instruction */ -#define UPROBE_COPY_INSN 0x1 - -/* Dont run handlers when first register/ last unregister in progress*/ -#define UPROBE_RUN_HANDLER 0x2 -/* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP 0x4 - struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); /* diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c718fef28617..4f315fa94c52 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -78,6 +78,13 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; */ static atomic_t uprobe_events = ATOMIC_INIT(0); +/* Have a copy of original instruction */ +#define UPROBE_COPY_INSN 0x1 +/* Dont run handlers when first register/ last unregister in progress*/ +#define UPROBE_RUN_HANDLER 0x2 +/* Can skip singlestep */ +#define UPROBE_SKIP_SSTEP 0x4 + struct uprobe { struct rb_node rb_node; /* node in the rb tree */ atomic_t ref; @@ -563,6 +570,37 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); } +static int prepare_uprobe(struct uprobe *uprobe, struct file *file, + struct mm_struct *mm, unsigned long vaddr) +{ + int ret = 0; + + if (uprobe->flags & UPROBE_COPY_INSN) + return ret; + + ret = copy_insn(uprobe, file); + if (ret) + goto out; + + ret = -ENOTSUPP; + if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) + goto out; + + ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); + if (ret) + goto out; + + /* write_opcode() assumes we don't cross page boundary */ + BUG_ON((uprobe->offset & ~PAGE_MASK) + + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + + smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ + uprobe->flags |= UPROBE_COPY_INSN; + + out: + return ret; +} + static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) @@ -580,25 +618,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, if (!uprobe->consumers) return 0; - if (!(uprobe->flags & UPROBE_COPY_INSN)) { - ret = copy_insn(uprobe, vma->vm_file); - if (ret) - return ret; - - if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) - return -ENOTSUPP; - - ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); - if (ret) - return ret; - - /* write_opcode() assumes we don't cross page boundary */ - BUG_ON((uprobe->offset & ~PAGE_MASK) + - UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - - smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ - uprobe->flags |= UPROBE_COPY_INSN; - } + ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); + if (ret) + return ret; /* * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), From 4710f05fd146d4739e57a8832a3abc5bd3bf0997 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Sep 2012 20:31:41 +0200 Subject: [PATCH 18/22] uprobes: Fix prepare_uprobe() race with itself install_breakpoint() is called under mm->mmap_sem, this protects set_swbp() but not prepare_uprobe(). Two or more different tasks can call install_breakpoint()->prepare_uprobe() at the same time, this leads to numerous problems if UPROBE_COPY_INSN is not set. Just for example, the second copy_insn() can corrupt the already analyzed/fixuped uprobe->arch.insn and race with handle_swbp(). This patch simply adds uprobe->copy_mutex to serialize this code. We could probably reuse ->consumer_rwsem, but this would mean that consumer->handler() can not use mm->mmap_sem, not good. Note: this is another temporary ugly hack until we move this logic into uprobe_register(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4f315fa94c52..7f62b30c4172 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -89,6 +89,7 @@ struct uprobe { struct rb_node rb_node; /* node in the rb tree */ atomic_t ref; struct rw_semaphore consumer_rwsem; + struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */ struct list_head pending_list; struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ @@ -444,6 +445,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = igrab(inode); uprobe->offset = offset; init_rwsem(&uprobe->consumer_rwsem); + mutex_init(&uprobe->copy_mutex); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); @@ -578,6 +580,10 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (uprobe->flags & UPROBE_COPY_INSN) return ret; + mutex_lock(&uprobe->copy_mutex); + if (uprobe->flags & UPROBE_COPY_INSN) + goto out; + ret = copy_insn(uprobe, file); if (ret) goto out; @@ -598,6 +604,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, uprobe->flags |= UPROBE_COPY_INSN; out: + mutex_unlock(&uprobe->copy_mutex); + return ret; } From 71434f2fcba5c22d6e0d51878ba8e241a5dea5fb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Sep 2012 21:12:44 +0200 Subject: [PATCH 19/22] uprobes: Fix the racy uprobe->flags manipulation Multiple threads can manipulate uprobe->flags, this is obviously unsafe. For example mmap can set UPROBE_COPY_INSN while register tries to set UPROBE_RUN_HANDLER, the latter can also race with can_skip_sstep() which clears UPROBE_SKIP_SSTEP. Change this code to use bitops. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7f62b30c4172..c92651d619ca 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -79,11 +79,11 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; static atomic_t uprobe_events = ATOMIC_INIT(0); /* Have a copy of original instruction */ -#define UPROBE_COPY_INSN 0x1 +#define UPROBE_COPY_INSN 0 /* Dont run handlers when first register/ last unregister in progress*/ -#define UPROBE_RUN_HANDLER 0x2 +#define UPROBE_RUN_HANDLER 1 /* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP 0x4 +#define UPROBE_SKIP_SSTEP 2 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ @@ -94,7 +94,7 @@ struct uprobe { struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ loff_t offset; - int flags; + unsigned long flags; struct arch_uprobe arch; }; @@ -423,7 +423,7 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) spin_unlock(&uprobes_treelock); /* For now assume that the instruction need not be single-stepped */ - uprobe->flags |= UPROBE_SKIP_SSTEP; + __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); return u; } @@ -466,7 +466,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; - if (!(uprobe->flags & UPROBE_RUN_HANDLER)) + if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags)) return; down_read(&uprobe->consumer_rwsem); @@ -577,11 +577,11 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, { int ret = 0; - if (uprobe->flags & UPROBE_COPY_INSN) + if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) return ret; mutex_lock(&uprobe->copy_mutex); - if (uprobe->flags & UPROBE_COPY_INSN) + if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) goto out; ret = copy_insn(uprobe, file); @@ -601,7 +601,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ - uprobe->flags |= UPROBE_COPY_INSN; + set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: mutex_unlock(&uprobe->copy_mutex); @@ -852,7 +852,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * uprobe->consumers = NULL; __uprobe_unregister(uprobe); } else { - uprobe->flags |= UPROBE_RUN_HANDLER; + set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); } } @@ -885,7 +885,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume if (consumer_del(uprobe, uc)) { if (!uprobe->consumers) { __uprobe_unregister(uprobe); - uprobe->flags &= ~UPROBE_RUN_HANDLER; + clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); } } @@ -1346,10 +1346,10 @@ bool uprobe_deny_signal(void) */ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) { - if (uprobe->flags & UPROBE_SKIP_SSTEP) { + if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) { if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) return true; - uprobe->flags &= ~UPROBE_SKIP_SSTEP; + clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); } return false; } @@ -1473,7 +1473,7 @@ static void handle_swbp(struct pt_regs *regs) * new and not-yet-analyzed uprobe at the same address, restart. */ smp_rmb(); /* pairs with wmb() in install_breakpoint() */ - if (unlikely(!(uprobe->flags & UPROBE_COPY_INSN))) + if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto restart; utask = current->utask; From 8e49f418c9632790bf456634742d34d97120a784 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Wed, 10 Oct 2012 16:40:27 -0700 Subject: [PATCH 20/22] ring-buffer: Check for uninitialized cpu buffer before resizing With a system where, num_present_cpus < num_possible_cpus, even if all CPUs are online, non-present CPUs don't have per_cpu buffers allocated. If per_cpu//buffer_size_kb is modified for such a CPU, it can cause a panic due to NULL dereference in ring_buffer_resize(). To fix this, resize operation is allowed only if the per-cpu buffer has been initialized. Link: http://lkml.kernel.org/r/1349912427-6486-1-git-send-email-vnagarnaik@google.com Cc: stable@vger.kernel.org # 3.5+ Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b32ed0e385a5..b979426d16c6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1567,6 +1567,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, put_online_cpus(); } else { + /* Make sure this CPU has been intitialized */ + if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) + goto out; + cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) From 44009105081b51417f311f4c3be0061870b6b8ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Oct 2012 10:18:35 +0300 Subject: [PATCH 21/22] oprofile, x86: Fix wrapping bug in op_x86_get_ctrl() The "event" variable is a u16 so the shift will always wrap to zero making the line a no-op. Signed-off-by: Dan Carpenter Cc: v2.6.32.. Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 26b8a8514ee5..48768df2471a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, val |= counter_config->extra; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; - val |= (event & 0x0F00) << 24; + val |= (u64)(event & 0x0F00) << 24; return val; } From a05123bdd1b9ba961ed262864924a5b3ee81afe8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 21 Aug 2012 17:08:37 +0800 Subject: [PATCH 22/22] perf/x86: Disable uncore on virtualized CPUs Initializing uncore PMU on virtualized CPU may hang the kernel. This is because kvm does not emulate the entire hardware. Thers are lots of uncore related MSRs, making kvm enumerate them all is a non-trival task. So just disable uncore on virtualized CPU. Signed-off-by: Yan, Zheng Tested-by: Pekka Enberg Cc: a.p.zijlstra@chello.nl Cc: eranian@google.com Cc: andi@firstfloor.org Cc: avi@redhat.com Link: http://lkml.kernel.org/r/1345540117-14164-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 99d96a4978b5..5df8d32ba91e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2926,6 +2926,9 @@ static int __init intel_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; + if (cpu_has_hypervisor) + return -ENODEV; + ret = uncore_pci_init(); if (ret) goto fail;