From dcc17d1baef3721d1574e5b2f4f2d4607514bcff Mon Sep 17 00:00:00 2001 From: Peter Keilty Date: Mon, 31 Oct 2005 16:44:47 -0500 Subject: [PATCH 1/9] [IA64] Use bitmaps for efficient context allocation/free Corrects the very inefficent method of finding free context_ids in get_mmu_context(). Instead of walking the task_list of all processes, 2 bitmaps are used to efficently store and lookup state, inuse and needs flushing. The entire rid address space is now used before calling wrap_mmu_context and global tlb flushing. Special thanks to Ken and Rohit for their review and modifications in using a bit flushmap. Signed-off-by: Peter Keilty Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 1 + arch/ia64/mm/tlb.c | 56 ++++++++++++++++------------------ include/asm-ia64/mmu_context.h | 19 +++++++++--- include/asm-ia64/tlbflush.h | 1 + 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index fc56ca2da358..c9388a92cf4a 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -454,6 +454,7 @@ setup_arch (char **cmdline_p) #endif cpu_init(); /* initialize the bootstrap CPU */ + mmu_context_init(); /* initialize context_id bitmap */ #ifdef CONFIG_ACPI acpi_boot_init(); diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index c79a9b96d02b..39628fca274c 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -8,6 +8,8 @@ * Modified RID allocation for SMP * Goutham Rao * IPI based ptc implementation and A-step IPI implementation. + * Rohit Seth + * Ken Chen */ #include #include @@ -16,12 +18,14 @@ #include #include #include +#include #include #include #include #include #include +#include static struct { unsigned long mask; /* mask of supported purge page-sizes */ @@ -31,49 +35,43 @@ static struct { struct ia64_ctx ia64_ctx = { .lock = SPIN_LOCK_UNLOCKED, .next = 1, - .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ .max_ctx = ~0U }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); +/* + * Initializes the ia64_ctx.bitmap array based on max_ctx+1. + * Called after cpu_init() has setup ia64_ctx.max_ctx based on + * maximum RID that is supported by boot CPU. + */ +void __init +mmu_context_init (void) +{ + ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); + ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); +} + /* * Acquire the ia64_ctx.lock before calling this function! */ void wrap_mmu_context (struct mm_struct *mm) { - unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; - struct task_struct *tsk; int i; + unsigned long flush_bit; - if (ia64_ctx.next > max_ctx) - ia64_ctx.next = 300; /* skip daemons */ - ia64_ctx.limit = max_ctx + 1; - - /* - * Scan all the task's mm->context and set proper safe range - */ - - read_lock(&tasklist_lock); - repeat: - for_each_process(tsk) { - if (!tsk->mm) - continue; - tsk_context = tsk->mm->context; - if (tsk_context == ia64_ctx.next) { - if (++ia64_ctx.next >= ia64_ctx.limit) { - /* empty range: reset the range limit and start over */ - if (ia64_ctx.next > max_ctx) - ia64_ctx.next = 300; - ia64_ctx.limit = max_ctx + 1; - goto repeat; - } - } - if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) - ia64_ctx.limit = tsk_context; + for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { + flush_bit = xchg(&ia64_ctx.flushmap[i], 0); + ia64_ctx.bitmap[i] ^= flush_bit; } - read_unlock(&tasklist_lock); + + /* use offset at 300 to skip daemons */ + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, 300); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ { int cpu = get_cpu(); /* prevent preemption/migration */ diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index 8d6e72f7b08e..8d9b30b5f7d4 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -32,13 +32,17 @@ struct ia64_ctx { spinlock_t lock; unsigned int next; /* next context number to use */ - unsigned int limit; /* next >= limit => must call wrap_mmu_context() */ - unsigned int max_ctx; /* max. context value supported by all CPUs */ + unsigned int limit; /* available free range */ + unsigned int max_ctx; /* max. context value supported by all CPUs */ + /* call wrap_mmu_context when next >= max */ + unsigned long *bitmap; /* bitmap size is max_ctx+1 */ + unsigned long *flushmap;/* pending rid to be flushed */ }; extern struct ia64_ctx ia64_ctx; DECLARE_PER_CPU(u8, ia64_need_tlb_flush); +extern void mmu_context_init (void); extern void wrap_mmu_context (struct mm_struct *mm); static inline void @@ -83,9 +87,16 @@ get_mmu_context (struct mm_struct *mm) context = mm->context; if (context == 0) { cpus_clear(mm->cpu_vm_mask); - if (ia64_ctx.next >= ia64_ctx.limit) - wrap_mmu_context(mm); + if (ia64_ctx.next >= ia64_ctx.limit) { + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + if (ia64_ctx.next >= ia64_ctx.max_ctx) + wrap_mmu_context(mm); + } mm->context = context = ia64_ctx.next++; + __set_bit(context, ia64_ctx.bitmap); } } spin_unlock_irqrestore(&ia64_ctx.lock, flags); diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h index b65c62702724..a35b323bae4c 100644 --- a/include/asm-ia64/tlbflush.h +++ b/include/asm-ia64/tlbflush.h @@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm) if (!mm) return; + set_bit(mm->context, ia64_ctx.flushmap); mm->context = 0; if (atomic_read(&mm->mm_users) == 0) From 58cd90829918dabbd81a453de676d41fb7b628ad Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Sat, 29 Oct 2005 18:47:04 -0700 Subject: [PATCH 2/9] [IA64] make mmu_context.h and tlb.c 80-column friendly wrap_mmu_context(), delayed_tlb_flush(), get_mmu_context() all have an extra { } block which cause one extra indentation. get_mmu_context() is particularly bad with 5 indentations to the most inner "if". It finally gets on my nerve that I can't keep the code within 80 columns. Remove the extra { } block and while I'm at it, reformat all the comments to 80-column friendly. No functional change at all with this patch. Signed-off-by: Ken Chen Signed-off-by: Tony Luck --- arch/ia64/mm/tlb.c | 33 +++++++------- include/asm-ia64/mmu_context.h | 80 +++++++++++++++++----------------- 2 files changed, 59 insertions(+), 54 deletions(-) diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 39628fca274c..41105d454423 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -29,7 +29,7 @@ static struct { unsigned long mask; /* mask of supported purge page-sizes */ - unsigned long max_bits; /* log2() of largest supported purge page-size */ + unsigned long max_bits; /* log2 of largest supported purge page-size */ } purge; struct ia64_ctx ia64_ctx = { @@ -58,7 +58,7 @@ mmu_context_init (void) void wrap_mmu_context (struct mm_struct *mm) { - int i; + int i, cpu; unsigned long flush_bit; for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { @@ -72,20 +72,21 @@ wrap_mmu_context (struct mm_struct *mm) ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, ia64_ctx.max_ctx, ia64_ctx.next); - /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ - { - int cpu = get_cpu(); /* prevent preemption/migration */ - for_each_online_cpu(i) { - if (i != cpu) - per_cpu(ia64_need_tlb_flush, i) = 1; - } - put_cpu(); - } + /* + * can't call flush_tlb_all() here because of race condition + * with O(1) scheduler [EF] + */ + cpu = get_cpu(); /* prevent preemption/migration */ + for_each_online_cpu(i) + if (i != cpu) + per_cpu(ia64_need_tlb_flush, i) = 1; + put_cpu(); local_flush_tlb_all(); } void -ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) +ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) { static DEFINE_SPINLOCK(ptcg_lock); @@ -133,7 +134,8 @@ local_flush_tlb_all (void) } void -flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, + unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long size = end - start; @@ -147,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long #endif nbits = ia64_fls(size + 0xfff); - while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) + while (unlikely (((1UL << nbits) & purge.mask) == 0) && + (nbits < purge.max_bits)) ++nbits; if (nbits > purge.max_bits) nbits = purge.max_bits; @@ -189,5 +192,5 @@ ia64_tlb_init (void) local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; - local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ } diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index 8d9b30b5f7d4..b5c65081a3aa 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -7,12 +7,13 @@ */ /* - * Routines to manage the allocation of task context numbers. Task context numbers are - * used to reduce or eliminate the need to perform TLB flushes due to context switches. - * Context numbers are implemented using ia-64 region ids. Since the IA-64 TLB does not - * consider the region number when performing a TLB lookup, we need to assign a unique - * region id to each region in a process. We use the least significant three bits in a - * region id for this purpose. + * Routines to manage the allocation of task context numbers. Task context + * numbers are used to reduce or eliminate the need to perform TLB flushes + * due to context switches. Context numbers are implemented using ia-64 + * region ids. Since the IA-64 TLB does not consider the region number when + * performing a TLB lookup, we need to assign a unique region id to each + * region in a process. We use the least significant three bits in aregion + * id for this purpose. */ #define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ @@ -51,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk) } /* - * When the context counter wraps around all TLBs need to be flushed because an old - * context number might have been reused. This is signalled by the ia64_need_tlb_flush - * per-CPU variable, which is checked in the routine below. Called by activate_mm(). - * + * When the context counter wraps around all TLBs need to be flushed because + * an old context number might have been reused. This is signalled by the + * ia64_need_tlb_flush per-CPU variable, which is checked in the routine + * below. Called by activate_mm(). */ static inline void delayed_tlb_flush (void) @@ -64,11 +65,9 @@ delayed_tlb_flush (void) if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { spin_lock_irqsave(&ia64_ctx.lock, flags); - { - if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { - local_flush_tlb_all(); - __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; - } + if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { + local_flush_tlb_all(); + __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; } spin_unlock_irqrestore(&ia64_ctx.lock, flags); } @@ -80,27 +79,27 @@ get_mmu_context (struct mm_struct *mm) unsigned long flags; nv_mm_context_t context = mm->context; - if (unlikely(!context)) { - spin_lock_irqsave(&ia64_ctx.lock, flags); - { - /* re-check, now that we've got the lock: */ - context = mm->context; - if (context == 0) { - cpus_clear(mm->cpu_vm_mask); - if (ia64_ctx.next >= ia64_ctx.limit) { - ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, ia64_ctx.next); - ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, ia64_ctx.next); - if (ia64_ctx.next >= ia64_ctx.max_ctx) - wrap_mmu_context(mm); - } - mm->context = context = ia64_ctx.next++; - __set_bit(context, ia64_ctx.bitmap); - } + if (likely(context)) + goto out; + + spin_lock_irqsave(&ia64_ctx.lock, flags); + /* re-check, now that we've got the lock: */ + context = mm->context; + if (context == 0) { + cpus_clear(mm->cpu_vm_mask); + if (ia64_ctx.next >= ia64_ctx.limit) { + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + if (ia64_ctx.next >= ia64_ctx.max_ctx) + wrap_mmu_context(mm); } - spin_unlock_irqrestore(&ia64_ctx.lock, flags); + mm->context = context = ia64_ctx.next++; + __set_bit(context, ia64_ctx.bitmap); } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); +out: /* * Ensure we're not starting to use "context" before any old * uses of it are gone from our TLB. @@ -111,8 +110,8 @@ get_mmu_context (struct mm_struct *mm) } /* - * Initialize context number to some sane value. MM is guaranteed to be a brand-new - * address-space, so no TLB flushing is needed, ever. + * Initialize context number to some sane value. MM is guaranteed to be a + * brand-new address-space, so no TLB flushing is needed, ever. */ static inline int init_new_context (struct task_struct *p, struct mm_struct *mm) @@ -173,7 +172,10 @@ activate_context (struct mm_struct *mm) if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) cpu_set(smp_processor_id(), mm->cpu_vm_mask); reload_context(context); - /* in the unlikely event of a TLB-flush by another thread, redo the load: */ + /* + * in the unlikely event of a TLB-flush by another thread, + * redo the load. + */ } while (unlikely(context != mm->context)); } @@ -186,8 +188,8 @@ static inline void activate_mm (struct mm_struct *prev, struct mm_struct *next) { /* - * We may get interrupts here, but that's OK because interrupt handlers cannot - * touch user-space. + * We may get interrupts here, but that's OK because interrupt + * handlers cannot touch user-space. */ ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); activate_context(next); From 9138d581b0ef855c0314c41c14852a7231b9941c Mon Sep 17 00:00:00 2001 From: Keith Owens Date: Mon, 7 Nov 2005 11:27:13 -0800 Subject: [PATCH 3/9] [IA64] Extend notify_die() hooks for IA64 notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple notification points for these events because they can take many seconds to run which has nasty effects on the behaviour of the rest of the system. DIE_SS replaced by a generic DIE_FAULT which checks the vector number, to allow interception of faults other than SS. DIE_MACHINE_{HALT,RESTART} added to allow last minute close down processing, especially when the halt/restart routines are called from error handlers. DIE_OOPS added. The check for kprobe's break numbers has been moved from traps.c to kprobes.c, allowing DIE_BREAK to be used for any additional break numbers, i.e. it is no longer kprobes specific. Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE. Both of these disable the system for long periods which impact on watchdogs and heartbeat systems in general. More patches to come that use these events to reset watchdogs and heartbeats. unregister_die_notifier() added and both routines exported. Requested by Dean Nelson. Lock removed from {un,}register_die_notifier. notifier_chain_register() already takes a lock. Also the generic notifier chain locking is being reworked to distinguish between callbacks that can block and those that cannot, the lock in {un,}register_die_notifier would interfere with that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 Leading white space removed from arch/ia64/kernel/kprobes.c. Typo in mca.c in original version of this patch found & fixed by Dean Nelson. Signed-off-by: Keith Owens Acked-by: Dean Nelson Acked-by: Anil Keshavamurthy Signed-off-by: Tony Luck --- arch/ia64/kernel/kprobes.c | 22 ++++--- arch/ia64/kernel/mca.c | 120 +++++++++++++++++++++++++++++-------- arch/ia64/kernel/process.c | 6 ++ arch/ia64/kernel/traps.c | 44 +++++++------- include/asm-ia64/kdebug.h | 30 +++++++++- 5 files changed, 162 insertions(+), 60 deletions(-) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 96736a119c91..801eeaeaf3de 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -347,7 +347,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) ((struct fnptr *)kretprobe_trampoline)->ip; spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + head = kretprobe_inst_table_head(current); /* * It is possible to have multiple instances associated with a given @@ -363,9 +363,9 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * kretprobe_trampoline */ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) + if (ri->task != current) /* another task is sharing our hash bucket */ - continue; + continue; if (ri->rp && ri->rp->handler) ri->rp->handler(ri, regs); @@ -394,7 +394,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * kprobe_handler() that we don't want the post_handler * to run (and have re-enabled preemption) */ - return 1; + return 1; } /* Called with kretprobe_lock held */ @@ -739,12 +739,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, switch(val) { case DIE_BREAK: - if (pre_kprobes_handler(args)) - ret = NOTIFY_STOP; + /* err is break number from ia64_bad_break() */ + if (args->err == 0x80200 || args->err == 0x80300) + if (pre_kprobes_handler(args)) + ret = NOTIFY_STOP; break; - case DIE_SS: - if (post_kprobes_handler(args->regs)) - ret = NOTIFY_STOP; + case DIE_FAULT: + /* err is vector number from ia64_fault() */ + if (args->err == 36) + if (post_kprobes_handler(args->regs)) + ret = NOTIFY_STOP; break; case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 52c47da17246..355af15287c7 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -51,6 +51,9 @@ * * 2005-08-12 Keith Owens * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. */ #include #include @@ -58,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -69,6 +71,7 @@ #include #include +#include #include #include #include @@ -132,6 +135,14 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init; + +static void inline +ia64_mca_spin(const char *func) +{ + printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + while (1) + cpu_relax(); +} /* * IA64_MCA log support */ @@ -526,13 +537,16 @@ ia64_mca_wakeup_all(void) * Outputs : None */ static irqreturn_t -ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) { unsigned long flags; int cpu = smp_processor_id(); /* Mask all interrupts */ local_irq_save(flags); + if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; /* Register with the SAL monarch that the slave has @@ -540,10 +554,18 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) */ ia64_sal_mc_rendez(); + if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ + if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + /* Enable all interrupts */ local_irq_restore(flags); return IRQ_HANDLED; @@ -933,6 +955,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; + if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); ia64_wait_for_slaves(cpu); /* Wakeup all the processors which are spinning in the rendezvous loop. @@ -942,6 +967,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, * spinning in SAL does not work. */ ia64_mca_wakeup_all(); + if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); @@ -960,6 +988,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; } + if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, 0, 0, recover) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); set_curr_task(cpu, previous_current); monarch_cpu = -1; @@ -1188,6 +1219,37 @@ ia64_mca_cpe_poll (unsigned long dummy) #endif /* CONFIG_ACPI */ +static int +default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) +{ + int c; + struct task_struct *g, *t; + if (val != DIE_INIT_MONARCH_PROCESS) + return NOTIFY_DONE; + printk(KERN_ERR "Processes interrupted by INIT -"); + for_each_online_cpu(c) { + struct ia64_sal_os_state *s; + t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); + s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); + g = s->prev_task; + if (g) { + if (g->pid) + printk(" %d", g->pid); + else + printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); + } + } + printk("\n\n"); + if (read_trylock(&tasklist_lock)) { + do_each_thread (g, t) { + printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); + show_stack(t, NULL); + } while_each_thread (g, t); + read_unlock(&tasklist_lock); + } + return NOTIFY_DONE; +} + /* * C portion of the OS INIT handler * @@ -1212,8 +1274,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, static atomic_t slaves; static atomic_t monarchs; task_t *previous_current; - int cpu = smp_processor_id(), c; - struct task_struct *g, *t; + int cpu = smp_processor_id(); oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ console_loglevel = 15; /* make sure printks make it to console */ @@ -1253,8 +1314,17 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; while (monarch_cpu == -1) cpu_relax(); /* spin until monarch enters */ + if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ + if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); printk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -1263,6 +1333,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, } monarch_cpu = cpu; + if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); /* * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be @@ -1273,27 +1346,16 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, printk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu); - printk(KERN_ERR "Processes interrupted by INIT -"); - for_each_online_cpu(c) { - struct ia64_sal_os_state *s; - t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); - s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); - g = s->prev_task; - if (g) { - if (g->pid) - printk(" %d", g->pid); - else - printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); - } - } - printk("\n\n"); - if (read_trylock(&tasklist_lock)) { - do_each_thread (g, t) { - printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); - show_stack(t, NULL); - } while_each_thread (g, t); - read_unlock(&tasklist_lock); - } + /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through + * to default_monarch_init_process() above and just print all the + * tasks. + */ + if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); @@ -1462,6 +1524,10 @@ ia64_mca_init(void) s64 rc; struct ia64_sal_retval isrv; u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + static struct notifier_block default_init_monarch_nb = { + .notifier_call = default_monarch_init_process, + .priority = 0/* we need to notified last */ + }; IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); @@ -1555,6 +1621,10 @@ ia64_mca_init(void) "(status %ld)\n", rc); return; } + if (register_die_notifier(&default_init_monarch_nb)) { + printk(KERN_ERR "Failed to register default monarch INIT process\n"); + return; + } IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 051e050359e4..c78355cb8902 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -4,6 +4,9 @@ * Copyright (C) 1998-2003 Hewlett-Packard Co * David Mosberger-Tang * 04/11/17 Ashok Raj Added CPU Hotplug Support + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. */ #define __KERNEL_SYSCALLS__ /* see */ #include @@ -34,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -804,12 +808,14 @@ cpu_halt (void) void machine_restart (char *restart_cmd) { + (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); } void machine_halt (void) { + (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); cpu_halt(); } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index f970359e7edf..fba5fdd1f968 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -30,17 +30,20 @@ fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); struct notifier_block *ia64die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); -int register_die_notifier(struct notifier_block *nb) +int +register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&ia64die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + return notifier_chain_register(&ia64die_chain, nb); } +EXPORT_SYMBOL_GPL(register_die_notifier); + +int +unregister_die_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ia64die_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_die_notifier); void __init trap_init (void) @@ -105,6 +108,7 @@ die (const char *str, struct pt_regs *regs, long err) if (++die.lock_owner_depth < 3) { printk("%s[%d]: %s %ld [%d]\n", current->comm, current->pid, str, err, ++die_counter); + (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_regs(regs); } else printk(KERN_ERR "Recursive die() failure, output suppressed\n"); @@ -155,9 +159,8 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) { + == NOTIFY_STOP) return; - } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -210,15 +213,6 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; - case 0x80200: - case 0x80300: - if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) { - return; - } - sig = SIGTRAP; code = TRAP_BRKPT; - break; - default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -226,6 +220,9 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) if (break_num < 0x80000) { sig = SIGILL; code = __ILL_BREAK; } else { + if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; sig = SIGTRAP; code = TRAP_BRKPT; } } @@ -578,12 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: - if (notify_die(DIE_SS, "ss", ®s, vector, - vector, SIGTRAP) == NOTIFY_STOP) - return; - siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; } + if (notify_die(DIE_FAULT, "ia64_fault", ®s, vector, siginfo.si_code, SIGTRAP) + == NOTIFY_STOP) + return; siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; siginfo.si_addr = (void __user *) ifa; diff --git a/include/asm-ia64/kdebug.h b/include/asm-ia64/kdebug.h index 4d376e1663f7..8b01a083dde6 100644 --- a/include/asm-ia64/kdebug.h +++ b/include/asm-ia64/kdebug.h @@ -22,6 +22,9 @@ * 2005-Apr Rusty Lynch and Anil S Keshavamurthy * adopted from * include/asm-x86_64/kdebug.h + * + * 2005-Oct Keith Owens . Expand notify_die to cover more + * events. */ #include @@ -35,13 +38,36 @@ struct die_args { int signr; }; -int register_die_notifier(struct notifier_block *nb); +extern int register_die_notifier(struct notifier_block *); +extern int unregister_die_notifier(struct notifier_block *); extern struct notifier_block *ia64die_chain; enum die_val { DIE_BREAK = 1, - DIE_SS, + DIE_FAULT, + DIE_OOPS, DIE_PAGE_FAULT, + DIE_MACHINE_HALT, + DIE_MACHINE_RESTART, + DIE_MCA_MONARCH_ENTER, + DIE_MCA_MONARCH_PROCESS, + DIE_MCA_MONARCH_LEAVE, + DIE_MCA_SLAVE_ENTER, + DIE_MCA_SLAVE_PROCESS, + DIE_MCA_SLAVE_LEAVE, + DIE_MCA_RENDZVOUS_ENTER, + DIE_MCA_RENDZVOUS_PROCESS, + DIE_MCA_RENDZVOUS_LEAVE, + DIE_INIT_MONARCH_ENTER, + DIE_INIT_MONARCH_PROCESS, + DIE_INIT_MONARCH_LEAVE, + DIE_INIT_SLAVE_ENTER, + DIE_INIT_SLAVE_PROCESS, + DIE_INIT_SLAVE_LEAVE, + DIE_KDEBUG_ENTER, + DIE_KDEBUG_LEAVE, + DIE_KDUMP_ENTER, + DIE_KDUMP_LEAVE, }; static inline int notify_die(enum die_val val, char *str, struct pt_regs *regs, From 97835245768a638002722a36ba9a3b76d0910f68 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Sat, 29 Oct 2005 17:23:05 -0400 Subject: [PATCH 4/9] [IA64] fix memory less node allocation The original memory less node allocation attempted to use NODEDATA_ALIGN for alignment. The bootmem allocator only allows a power of two alignments. This causes a BUG_ON for some nodes. For cpu only nodes just allocate with a PERCPU_PAGE_SIZE alignment. Some older firmware reports SLIT distances of 0xff and results in bestnode not being computed. This is now treated correctly. The failed allocation check was removed because it's redundant. The bootmem allocator already makes this check. This fix has been boot tested on 4 node machine which has 4 cpu only nodes and 1 memory node. Thanks to Pete Keilty for reporting this and helping me test it. Signed-off-by: Bob Picco Signed-off-by: Tony Luck --- arch/ia64/mm/discontig.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index a88cdb7232f8..0f776b032d31 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -350,14 +350,12 @@ static void __init initialize_pernode_data(void) * for best. * @nid: node id * @pernodesize: size of this node's pernode data - * @align: alignment to use for this node's pernode data */ -static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, - unsigned long align) +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) { void *ptr = NULL; u8 best = 0xff; - int bestnode = -1, node; + int bestnode = -1, node, anynode = 0; for_each_online_node(node) { if (node_isset(node, memory_less_mask)) @@ -366,13 +364,15 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, best = node_distance(nid, node); bestnode = node; } + anynode = node; } - ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, - pernodesize, align, __pa(MAX_DMA_ADDRESS)); + if (bestnode == -1) + bestnode = anynode; + + ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize, + PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); - if (!ptr) - panic("NO memory for memory less node\n"); return ptr; } @@ -413,8 +413,7 @@ static void __init memory_less_nodes(void) for_each_node_mask(node, memory_less_mask) { pernodesize = compute_pernodesize(node); - pernode = memory_less_node_alloc(node, pernodesize, - (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024)); + pernode = memory_less_node_alloc(node, pernodesize); fill_pernode(node, __pa(pernode), pernodesize); } From cf20d1eafb648bf395b153cbcd0cde40f88c220a Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 2 Nov 2005 22:40:19 -0800 Subject: [PATCH 5/9] [IA64] align signal-frame even when not using alternate signal-stack At the moment, attempting to invoke a signal-handler on the normal stack is guaranteed to fail if the stack-pointer happens not to be 16-byte aligned. This is because the signal-trampoline will attempt to store fp-regs with stf.spill instructions, which will trap for misaligned addresses. This isn't terribly useful behavior. It's better to just always align the signal frame to the next lower 16-byte boundary. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- arch/ia64/kernel/signal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 774f34b675cf..58ce07efc56e 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -387,15 +387,14 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct sigscratch *scr) { extern char __kernel_sigtramp[]; - unsigned long tramp_addr, new_rbs = 0; + unsigned long tramp_addr, new_rbs = 0, new_sp; struct sigframe __user *frame; long err; - frame = (void __user *) scr->pt.r12; + new_sp = scr->pt.r12; tramp_addr = (unsigned long) __kernel_sigtramp; - if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { - frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size) - & ~(STACK_ALIGN - 1)); + if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags(new_sp) == 0) { + new_sp = current->sas_ss_sp + current->sas_ss_size; /* * We need to check for the register stack being on the signal stack * separately, because it's switched separately (memory stack is switched @@ -404,7 +403,7 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); } - frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); + frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return force_sigsegv_info(sig, frame); From a14f25a076a8e5040d6f4e93f84034c81bcddbf7 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 4 Nov 2005 13:39:38 -0600 Subject: [PATCH 6/9] [IA64] MCA recovery based on PSP bits The determination of whether an MCA is recoverable or not must be based on the bits set in the PSP (Processor State Parameter). The specific bits are shown in the Intel IA-64 Architecture Software Developer's Manual, Vol 2, Table 11-6 Software Recovery Bits in Processor State Parameter. Those bits should be consistent across the entire IA-64 family of processors. Signed-off-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/kernel/mca_drv.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index f081c60ab206..eb860e293415 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -546,9 +546,20 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, (pal_processor_state_info_t*)peidx_psp(peidx); /* - * We cannot recover errors with other than bus_check. + * Processor recovery status must key off of the PAL recovery + * status in the Processor State Parameter. */ - if (psp->cc || psp->rc || psp->uc) + + /* + * The machine check is corrected. + */ + if (psp->cm == 1) + return 1; + + /* + * The error was not contained. Software must be reset. + */ + if (psp->us || psp->ci == 0) return 0; /* @@ -569,8 +580,6 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, return 0; if (pbci->eb && pbci->bsi > 0) return 0; - if (psp->ci == 0) - return 0; /* * This is a local MCA and estimated as recoverble external bus error. From 4f41d5a4e665d05b4e74eef164469b7d81932ef1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 7 Nov 2005 15:13:59 -0700 Subject: [PATCH 7/9] [IA64] add the MMIO regions that are translated to I/O port space to /proc/iomem ia64 translates normal loads and stores to special MMIO regions into I/O port accesses. Reserve these special MMIO regions in /proc/iomem. Sample /proc/iomem: f8100000000-f81003fffff : PCI Bus 0000:80 I/O Ports 00000000-00000fff f8100400000-f81007fffff : PCI Bus 0000:8e I/O Ports 00001000-00001fff f8100800000-f8100ffffff : PCI Bus 0000:9c I/O Ports 00002000-00003fff f8101000000-f81017fffff : PCI Bus 0000:aa I/O Ports 00004000-00005fff and corresponding /proc/ioports: 00000000-00000fff : PCI Bus 0000:80 00001000-00001fff : PCI Bus 0000:8e 00002000-00003fff : PCI Bus 0000:9c 00004000-00005fff : PCI Bus 0000:aa Signed-off-by: Bjorn Helgaas Signed-off-by: Tony Luck --- arch/ia64/pci/pci.c | 106 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 24 deletions(-) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 017cfc3f4789..20d76fae24e8 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -95,7 +95,7 @@ pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn, } static struct pci_raw_ops pci_sal_ops = { - .read = pci_sal_read, + .read = pci_sal_read, .write = pci_sal_write }; @@ -137,35 +137,98 @@ alloc_pci_controller (int seg) return controller; } -static u64 __devinit -add_io_space (struct acpi_resource_address64 *addr) +struct pci_root_info { + struct pci_controller *controller; + char *name; +}; + +static unsigned int +new_space (u64 phys_base, int sparse) { - u64 offset; - int sparse = 0; + u64 mmio_base; int i; - if (addr->address_translation_offset == 0) - return IO_SPACE_BASE(0); /* part of legacy IO space */ + if (phys_base == 0) + return 0; /* legacy I/O port space */ - if (addr->attribute.io.translation_attribute == ACPI_SPARSE_TRANSLATION) - sparse = 1; - - offset = (u64) ioremap(addr->address_translation_offset, 0); + mmio_base = (u64) ioremap(phys_base, 0); for (i = 0; i < num_io_spaces; i++) - if (io_space[i].mmio_base == offset && + if (io_space[i].mmio_base == mmio_base && io_space[i].sparse == sparse) - return IO_SPACE_BASE(i); + return i; if (num_io_spaces == MAX_IO_SPACES) { - printk("Too many IO port spaces\n"); + printk(KERN_ERR "PCI: Too many IO port spaces " + "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES); return ~0; } i = num_io_spaces++; - io_space[i].mmio_base = offset; + io_space[i].mmio_base = mmio_base; io_space[i].sparse = sparse; - return IO_SPACE_BASE(i); + return i; +} + +static u64 __devinit +add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr) +{ + struct resource *resource; + char *name; + u64 base, min, max, base_port; + unsigned int sparse = 0, space_nr, len; + + resource = kzalloc(sizeof(*resource), GFP_KERNEL); + if (!resource) { + printk(KERN_ERR "PCI: No memory for %s I/O port space\n", + info->name); + goto out; + } + + len = strlen(info->name) + 32; + name = kzalloc(len, GFP_KERNEL); + if (!name) { + printk(KERN_ERR "PCI: No memory for %s I/O port space name\n", + info->name); + goto free_resource; + } + + min = addr->min_address_range; + max = min + addr->address_length - 1; + if (addr->attribute.io.translation_attribute == ACPI_SPARSE_TRANSLATION) + sparse = 1; + + space_nr = new_space(addr->address_translation_offset, sparse); + if (space_nr == ~0) + goto free_name; + + base = __pa(io_space[space_nr].mmio_base); + base_port = IO_SPACE_BASE(space_nr); + snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name, + base_port + min, base_port + max); + + /* + * The SDM guarantees the legacy 0-64K space is sparse, but if the + * mapping is done by the processor (not the bridge), ACPI may not + * mark it as sparse. + */ + if (space_nr == 0) + sparse = 1; + + resource->name = name; + resource->flags = IORESOURCE_MEM; + resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); + resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); + insert_resource(&iomem_resource, resource); + + return base_port; + +free_name: + kfree(name); +free_resource: + kfree(resource); +out: + return ~0; } static acpi_status __devinit resource_to_window(struct acpi_resource *resource, @@ -205,11 +268,6 @@ count_window (struct acpi_resource *resource, void *data) return AE_OK; } -struct pci_root_info { - struct pci_controller *controller; - char *name; -}; - static __devinit acpi_status add_window(struct acpi_resource *res, void *data) { struct pci_root_info *info = data; @@ -231,7 +289,7 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data) } else if (addr.resource_type == ACPI_IO_RANGE) { flags = IORESOURCE_IO; root = &ioport_resource; - offset = add_io_space(&addr); + offset = add_io_space(info, &addr); if (offset == ~0) return AE_OK; } else @@ -241,7 +299,7 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data) window->resource.name = info->name; window->resource.flags = flags; window->resource.start = addr.min_address_range + offset; - window->resource.end = addr.max_address_range + offset; + window->resource.end = window->resource.start + addr.address_length - 1; window->resource.child = NULL; window->offset = offset; @@ -739,7 +797,7 @@ int pci_vector_resources(int last, int nr_released) { int count = nr_released; - count += (IA64_LAST_DEVICE_VECTOR - last); + count += (IA64_LAST_DEVICE_VECTOR - last); return count; } From baf47fb66020e5c3fe2386680fa2d79d1f8e0052 Mon Sep 17 00:00:00 2001 From: Panagiotis Issaris Date: Wed, 9 Nov 2005 02:08:42 +0100 Subject: [PATCH 8/9] [IA64] Replace kcalloc(1, with kzalloc. Conversion from kcalloc(1, to kzalloc. Signed-off-by: Panagiotis Issaris Signed-off-by: Tony Luck --- arch/ia64/kernel/efi.c | 2 +- arch/ia64/sn/kernel/io_init.c | 2 +- arch/ia64/sn/pci/tioce_provider.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f72ea6aebcb1..a3aa45cbcfa0 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -987,7 +987,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, break; } - if ((res = kcalloc(1, sizeof(struct resource), GFP_KERNEL)) == NULL) { + if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) { printk(KERN_ERR "failed to alocate resource for iomem\n"); return; } diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index b4f5053f5e1b..05e4ea889981 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -349,7 +349,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) return; /*bus # does not exist */ prom_bussoft_ptr = __va(prom_bussoft_ptr); - controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + controller = kzalloc(sizeof(struct pci_controller), GFP_KERNEL); controller->segment = segment; if (!controller) BUG(); diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 9f03d4e5121c..dda196c9e324 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -218,7 +218,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, if (i > last) return 0; - map = kcalloc(1, sizeof(struct tioce_dmamap), GFP_ATOMIC); + map = kzalloc(sizeof(struct tioce_dmamap), GFP_ATOMIC); if (!map) return 0; @@ -555,7 +555,7 @@ tioce_kern_init(struct tioce_common *tioce_common) struct tioce *tioce_mmr; struct tioce_kernel *tioce_kern; - tioce_kern = kcalloc(1, sizeof(struct tioce_kernel), GFP_KERNEL); + tioce_kern = kzalloc(sizeof(struct tioce_kernel), GFP_KERNEL); if (!tioce_kern) { return NULL; } @@ -727,7 +727,7 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont * Allocate kernel bus soft and copy from prom. */ - tioce_common = kcalloc(1, sizeof(struct tioce_common), GFP_KERNEL); + tioce_common = kzalloc(sizeof(struct tioce_common), GFP_KERNEL); if (!tioce_common) return NULL; From 780d09e895032207a6b070a44d392a3c60574b70 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 9 Nov 2005 14:41:57 -0600 Subject: [PATCH 9/9] [IA64] utilize notify_die() for XPC disengage XPC (as in arch/ia64/sn/kernel/xp*) has a need to notify other partitions (SGI Altix) whenever a partition is going down in order to get them to disengage from accessing the halting partition's memory. If this is not done before the reset of the hardware, the other partitions can find themselves encountering MCAs that bring them down. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc.h | 2 +- arch/ia64/sn/kernel/xpc_main.c | 102 ++++++++++++++++++++++++++++ arch/ia64/sn/kernel/xpc_partition.c | 8 +-- 3 files changed, 107 insertions(+), 5 deletions(-) diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index fbcedc7c27fa..5483a9f227d4 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h @@ -163,7 +163,7 @@ struct xpc_vars { u8 version; u64 heartbeat; u64 heartbeating_to_mask; - u64 kdb_status; /* 0 = machine running */ + u64 heartbeat_offline; /* if 0, heartbeat should be changing */ int act_nasid; int act_phys_cpuid; u64 vars_part_pa; diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index cece3c7c69be..b617236524c6 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "xpc.h" @@ -188,6 +189,11 @@ static struct notifier_block xpc_reboot_notifier = { .notifier_call = xpc_system_reboot, }; +static int xpc_system_die(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_die_notifier = { + .notifier_call = xpc_system_die, +}; + /* * Timer function to enforce the timelimit on the partition disengage request. @@ -997,6 +1003,9 @@ xpc_do_exit(enum xpc_retval reason) /* take ourselves off of the reboot_notifier_list */ (void) unregister_reboot_notifier(&xpc_reboot_notifier); + /* take ourselves off of the die_notifier list */ + (void) unregister_die_notifier(&xpc_die_notifier); + /* close down protections for IPI operations */ xpc_restrict_IPI_ops(); @@ -1010,6 +1019,63 @@ xpc_do_exit(enum xpc_retval reason) } +/* + * Called when the system is about to be either restarted or halted. + */ +static void +xpc_die_disengage(void) +{ + struct xpc_partition *part; + partid_t partid; + unsigned long engaged; + long time, print_time, disengage_request_timeout; + + + /* keep xpc_hb_checker thread from doing anything (just in case) */ + xpc_exiting = 1; + + xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> + remote_vars_version)) { + + /* just in case it was left set by an earlier XPC */ + xpc_clear_partition_engaged(1UL << partid); + continue; + } + + if (xpc_partition_engaged(1UL << partid) || + part->act_state != XPC_P_INACTIVE) { + xpc_request_partition_disengage(part); + xpc_mark_partition_disengaged(part); + xpc_IPI_send_disengage(part); + } + } + + print_time = rtc_time(); + disengage_request_timeout = print_time + + (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); + + /* wait for all other partitions to disengage from us */ + + while ((engaged = xpc_partition_engaged(-1UL)) && + (time = rtc_time()) < disengage_request_timeout) { + + if (time >= print_time) { + dev_info(xpc_part, "waiting for remote partitions to " + "disengage, engaged=0x%lx\n", engaged); + print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL * + sn_rtc_cycles_per_second); + } + } + dev_info(xpc_part, "finished waiting for remote partitions to " + "disengage, engaged=0x%lx\n", engaged); +} + + /* * This function is called when the system is being rebooted. */ @@ -1038,6 +1104,33 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) } +/* + * This function is called when the system is being rebooted. + */ +static int +xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) +{ + switch (event) { + case DIE_MACHINE_RESTART: + case DIE_MACHINE_HALT: + xpc_die_disengage(); + break; + case DIE_MCA_MONARCH_ENTER: + case DIE_INIT_MONARCH_ENTER: + xpc_vars->heartbeat++; + xpc_vars->heartbeat_offline = 1; + break; + case DIE_MCA_MONARCH_LEAVE: + case DIE_INIT_MONARCH_LEAVE: + xpc_vars->heartbeat++; + xpc_vars->heartbeat_offline = 0; + break; + } + + return NOTIFY_DONE; +} + + int __init xpc_init(void) { @@ -1154,6 +1247,12 @@ xpc_init(void) dev_warn(xpc_part, "can't register reboot notifier\n"); } + /* add ourselves to the die_notifier list (i.e., ia64die_chain) */ + ret = register_die_notifier(&xpc_die_notifier); + if (ret != 0) { + dev_warn(xpc_part, "can't register die notifier\n"); + } + /* * Set the beating to other partitions into motion. This is @@ -1179,6 +1278,9 @@ xpc_init(void) /* take ourselves off of the reboot_notifier_list */ (void) unregister_reboot_notifier(&xpc_reboot_notifier); + /* take ourselves off of the die_notifier list */ + (void) unregister_die_notifier(&xpc_die_notifier); + del_timer_sync(&xpc_hb_timer); free_irq(SGI_XPC_ACTIVATE, NULL); xpc_restrict_IPI_ops(); diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 581e113d2d37..cdd6431853a1 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -436,13 +436,13 @@ xpc_check_remote_hb(void) } dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" - " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, - remote_vars->heartbeat, part->last_heartbeat, - remote_vars->kdb_status, + " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n", + partid, remote_vars->heartbeat, part->last_heartbeat, + remote_vars->heartbeat_offline, remote_vars->heartbeating_to_mask); if (((remote_vars->heartbeat == part->last_heartbeat) && - (remote_vars->kdb_status == 0)) || + (remote_vars->heartbeat_offline == 0)) || !xpc_hb_allowed(sn_partition_id, remote_vars)) { XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);