From 76d08bb3f09054edc45326ce5c698a3f6c45f5d0 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 5 Jun 2006 13:54:14 -0700 Subject: [PATCH 01/17] [IA64] Add "model name" to /proc/cpuinfo Linux ia64 port tried to decode the processor family number to something human-readable, but Intel brandnames don't change synchronously with updates to the family number. Adopt a more i386-like approach and just print the family number in decimal. Add a new field "model name" that uses PAL_BRAND_INFO to find the official name for the cpu, or on older systems, falls back to using the well-known codenames (Merced, McKinley, Madison). Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 41 +++++++++++++++++++++++++++--------- include/asm-ia64/pal.h | 10 +++++++++ include/asm-ia64/processor.h | 1 + 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index e4dfda1eb7dd..3f7067c68b08 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -509,7 +509,7 @@ show_cpuinfo (struct seq_file *m, void *v) { 1UL << 1, "spontaneous deferral"}, { 1UL << 2, "16-byte atomic ops" } }; - char family[32], features[128], *cp, sep; + char features[128], *cp, sep; struct cpuinfo_ia64 *c = v; unsigned long mask; unsigned long proc_freq; @@ -517,12 +517,6 @@ show_cpuinfo (struct seq_file *m, void *v) mask = c->features; - switch (c->family) { - case 0x07: memcpy(family, "Itanium", 8); break; - case 0x1f: memcpy(family, "Itanium 2", 10); break; - default: sprintf(family, "%u", c->family); break; - } - /* build the feature string: */ memcpy(features, " standard", 10); cp = features; @@ -553,8 +547,9 @@ show_cpuinfo (struct seq_file *m, void *v) "processor : %d\n" "vendor : %s\n" "arch : IA-64\n" - "family : %s\n" + "family : %u\n" "model : %u\n" + "model name : %s\n" "revision : %u\n" "archrev : %u\n" "features :%s\n" /* don't change this---it _is_ right! */ @@ -563,7 +558,8 @@ show_cpuinfo (struct seq_file *m, void *v) "cpu MHz : %lu.%06lu\n" "itc MHz : %lu.%06lu\n" "BogoMIPS : %lu.%02lu\n", - cpunum, c->vendor, family, c->model, c->revision, c->archrev, + cpunum, c->vendor, c->family, c->model, + c->model_name, c->revision, c->archrev, features, c->ppn, c->number, proc_freq / 1000, proc_freq % 1000, c->itc_freq / 1000000, c->itc_freq % 1000000, @@ -611,6 +607,31 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; +static char brandname[128]; + +static char * __cpuinit +get_model_name(__u8 family, __u8 model) +{ + char brand[128]; + + if (ia64_pal_get_brand_info(brand)) { + if (family == 0x7) + memcpy(brand, "Merced", 7); + else if (family == 0x1f) switch (model) { + case 0: memcpy(brand, "McKinley", 9); break; + case 1: memcpy(brand, "Madison", 8); break; + case 2: memcpy(brand, "Madison up to 9M cache", 23); break; + } else + memcpy(brand, "Unknown", 8); + } + if (brandname[0] == '\0') + return strcpy(brandname, brand); + else if (strcmp(brandname, brand) == 0) + return brandname; + else + return kstrdup(brand, GFP_KERNEL); +} + static void __cpuinit identify_cpu (struct cpuinfo_ia64 *c) { @@ -640,7 +661,6 @@ identify_cpu (struct cpuinfo_ia64 *c) pal_status_t status; unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ int i; - for (i = 0; i < 5; ++i) cpuid.bits[i] = ia64_get_cpuid(i); @@ -663,6 +683,7 @@ identify_cpu (struct cpuinfo_ia64 *c) c->family = cpuid.field.family; c->archrev = cpuid.field.archrev; c->features = cpuid.field.features; + c->model_name = get_model_name(c->family, c->model); status = ia64_pal_vm_summary(&vm1, &vm2); if (status == PAL_STATUS_SUCCESS) { diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h index 37e52a2836b0..312109d7be97 100644 --- a/include/asm-ia64/pal.h +++ b/include/asm-ia64/pal.h @@ -78,6 +78,7 @@ #define PAL_VM_TR_READ 261 /* read contents of translation register */ #define PAL_GET_PSTATE 262 /* get the current P-state */ #define PAL_SET_PSTATE 263 /* set the P-state */ +#define PAL_BRAND_INFO 274 /* Processor branding information */ #ifndef __ASSEMBLY__ @@ -1133,6 +1134,15 @@ ia64_pal_set_pstate (u64 pstate_index) return iprv.status; } +/* Processor branding information*/ +static inline s64 +ia64_pal_get_brand_info (char *brand_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0); + return iprv.status; +} + /* Cause the processor to enter LIGHT HALT state, where prefetching and execution are * suspended, but cache and TLB coherency is maintained. */ diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index b3bd58e80690..03100c4272c6 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -164,6 +164,7 @@ struct cpuinfo_ia64 { __u8 family; __u8 archrev; char vendor[16]; + char *model_name; #ifdef CONFIG_NUMA struct ia64_node_data *node_data; From 2ab561a116e16cdee3ae0e13d51910634c15aee9 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Wed, 21 Jun 2006 11:19:22 -0700 Subject: [PATCH 02/17] [IA64] esi-support Add support for making ESI calls [1]. ESI stands for "Extensible SAL specification" and is basically a way for invoking firmware subroutines which are identified by a GUID. I don't know whether ESI is used by vendors other than HP (if you do, please let me know) but as firmware "backdoors" go, this seems one of the cleaner methods, so it seems reasonable to support it, even though I'm not aware of any publicly documented ESI calls. I'd have liked to make the ESI module completely stand-alone, but unfortunately that is not easily (or not at all) possible because in order to make ESI calls in physical mode, a small stub similar to the EFI stub is needed in the kernel proper. I did try to create a stub that would work in user-level, but it quickly got ugly beyond recognition (e.g., the stub had to make assumptions about how the module-loader generated call-stubs work) and I didn't even get it to work (that's probably fixable, but I didn't bother because I concluded it was too ugly anyhow). While it's not terribly elegant to have kernel code which isn't actively used in the kernel proper, I think it might be worth making an exception here for two reasons: the code is trivially small (all that's really needed is esi_stub.S) and by including it in the normal kernel distro, it might encourage other OEMs to also use ESI, which I think would be far better than each inventing their own firmware "backdoor". The code was originally written by Alex. I just massaged and packaged it a bit (and perhaps messed up some things along the way...). Changes since first version of patch that was posted to mailing list: * Export ia64_esi_call and ia64_esi_call_phys() as GPL symbols. * Disallow building esi.c as a module for now. Building as a module would currently lead to an unresolved reference to "sal_lock" on SMP kernels because that symbol doesn't get exported. * Export esi_call_phys() only if ESI is enabled. * Remove internal stuff from esi.h and add a "proc_type" argument to ia64_esi_call() such that serialization-requirements can be expressed (ESI follows SAL here, where procedure calls may have to be serialized, are MP-safe, or MP-safe andr reentrant). [1] h21007.www2.hp.com/dspp/tech/tech_TechDocumentDetailPage_IDX/1,1701,919,00.html Signed-off-by: David Mosberger Signed-off-by: Alex Williamson Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 8 ++ arch/ia64/kernel/Makefile | 5 + arch/ia64/kernel/esi.c | 205 ++++++++++++++++++++++++++++++++++ arch/ia64/kernel/esi_stub.S | 96 ++++++++++++++++ arch/ia64/kernel/ia64_ksyms.c | 4 + include/asm-ia64/esi.h | 30 +++++ 6 files changed, 348 insertions(+) create mode 100644 arch/ia64/kernel/esi.c create mode 100644 arch/ia64/kernel/esi_stub.S create mode 100644 include/asm-ia64/esi.h diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 0f3076a820c3..bc106519482c 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -413,6 +413,14 @@ config IA64_PALINFO config SGI_SN def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) +config IA64_ESI + bool "ESI (Extensible SAL Interface) support" + help + If you say Y here, support is built into the kernel to + make ESI calls. ESI calls are used to support vendor-specific + firmware extensions, such as the ability to inject memory-errors + for test-purposes. If you're unsure, say N. + source "drivers/sn/Kconfig" source "drivers/firmware/Kconfig" diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 09a0dbc17fb6..66ac8e3dca9c 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -31,6 +31,11 @@ obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_ESI) += esi.o +ifneq ($(CONFIG_IA64_ESI),) +obj-y += esi_stub.o # must be in kernel proper +endif + # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c new file mode 100644 index 000000000000..ebf4e988e78c --- /dev/null +++ b/arch/ia64/kernel/esi.c @@ -0,0 +1,205 @@ +/* + * Extensible SAL Interface (ESI) support routines. + * + * Copyright (C) 2006 Hewlett-Packard Co + * Alex Williamson + */ +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Alex Williamson "); +MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "esi" + +#define ESI_TABLE_GUID \ + EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \ + 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4) + +enum esi_systab_entry_type { + ESI_DESC_ENTRY_POINT = 0 +}; + +/* + * Entry type: Size: + * 0 48 + */ +#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)] + +typedef struct ia64_esi_desc_entry_point { + u8 type; + u8 reserved1[15]; + u64 esi_proc; + u64 gp; + efi_guid_t guid; +} ia64_esi_desc_entry_point_t; + +struct pdesc { + void *addr; + void *gp; +}; + +static struct ia64_sal_systab *esi_systab; + +static int __init esi_init (void) +{ + efi_config_table_t *config_tables; + struct ia64_sal_systab *systab; + unsigned long esi = 0; + char *p; + int i; + + config_tables = __va(efi.systab->tables); + + for (i = 0; i < (int) efi.systab->nr_tables; ++i) { + if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) { + esi = config_tables[i].table; + break; + } + } + + if (!esi) + return -ENODEV;; + + systab = __va(esi); + + if (strncmp(systab->signature, "ESIT", 4) != 0) { + printk(KERN_ERR "bad signature in ESI system table!"); + return -ENODEV; + } + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case ESI_DESC_ENTRY_POINT: + break; + default: + printk(KERN_WARNING "Unkown table type %d found in " + "ESI table, ignoring rest of table\n", *p); + return -ENODEV; + } + + p += ESI_DESC_SIZE(*p); + } + + esi_systab = systab; + return 0; +} + + +int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp, + enum esi_proc_type proc_type, u64 func, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags = 0; + int i; + char *p; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = __va(esi->esi_proc); + pdesc.gp = __va(esi->gp); + + esi_proc = (ia64_sal_handler) &pdesc; + + ia64_save_scratch_fpregs(fr); + if (proc_type == ESI_PROC_SERIALIZED) + spin_lock_irqsave(&sal_lock, flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_save(flags); + else + preempt_disable(); + *isrvp = (*esi_proc)(func, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + if (proc_type == ESI_PROC_SERIALIZED) + spin_unlock_irqrestore(&sal_lock, + flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_restore(flags); + else + preempt_enable(); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call); + +int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp, + u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4, + u64 arg5, u64 arg6, u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags; + u64 esi_params[8]; + char *p; + int i; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = (void *)esi->esi_proc; + pdesc.gp = (void *)esi->gp; + + esi_proc = (ia64_sal_handler) &pdesc; + + esi_params[0] = func; + esi_params[1] = arg1; + esi_params[2] = arg2; + esi_params[3] = arg3; + esi_params[4] = arg4; + esi_params[5] = arg5; + esi_params[6] = arg6; + esi_params[7] = arg7; + ia64_save_scratch_fpregs(fr); + spin_lock_irqsave(&sal_lock, flags); + *isrvp = esi_call_phys(esi_proc, esi_params); + spin_unlock_irqrestore(&sal_lock, flags); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call_phys); + +static void __exit esi_exit (void) +{ +} + +module_init(esi_init); +module_exit(esi_exit); /* makes module removable... */ diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S new file mode 100644 index 000000000000..6b3d6c1f99b6 --- /dev/null +++ b/arch/ia64/kernel/esi_stub.S @@ -0,0 +1,96 @@ +/* + * ESI call stub. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Alex Williamson + * + * Based on EFI call stub by David Mosberger. The stub is virtually + * identical to the one for EFI phys-mode calls, except that ESI + * calls may have up to 8 arguments, so they get passed to this routine + * through memory. + * + * This stub allows us to make ESI calls in physical mode with interrupts + * turned off. ESI calls may not support calling from virtual mode. + * + * Google for "Extensible SAL specification" for a document describing the + * ESI standard. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include +#include + +/* + * Inputs: + * in0 = address of function descriptor of ESI routine to call + * in1 = address of array of ESI parameters + * + * Outputs: + * r8 = result returned by called function + */ +GLOBAL_ENTRY(esi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,7,8,0 + ld8 r2=[in0],8 // load ESI function's entry point + mov loc0=rp + .body + ;; + ld8 out0=[in1],8 // ESI params loaded from array + ;; // passing all as inputs doesn't work + ld8 out1=[in1],8 + ;; + ld8 out2=[in1],8 + ;; + ld8 out3=[in1],8 + ;; + ld8 out4=[in1],8 + ;; + ld8 out5=[in1],8 + ;; + ld8 out6=[in1],8 + ;; + ld8 out7=[in1] + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load ESI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov loc5=r19 // old ar.bsp + mov loc6=r20 // old sp + br.call.sptk.many rp=b6 // call the ESI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // save virtual mode psr + mov r19=loc5 // save virtual mode bspstore + mov r20=loc6 // save virtual mode sp + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(esi_call_phys) diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index bbcfd08378a6..9583f310d486 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -106,5 +106,9 @@ EXPORT_SYMBOL(ia64_spinlock_contention); # endif #endif +#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE) +extern void esi_call_phys (void); +EXPORT_SYMBOL_GPL(esi_call_phys); +#endif extern char ia64_ivt[]; EXPORT_SYMBOL(ia64_ivt); diff --git a/include/asm-ia64/esi.h b/include/asm-ia64/esi.h new file mode 100644 index 000000000000..84aac0e0b583 --- /dev/null +++ b/include/asm-ia64/esi.h @@ -0,0 +1,30 @@ +/* + * ESI service calls. + * + * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + */ +#ifndef esi_h +#define esi_h + +#include + +#define ESI_QUERY 0x00000001 +#define ESI_OPEN_HANDLE 0x02000000 +#define ESI_CLOSE_HANDLE 0x02000001 + +enum esi_proc_type { + ESI_PROC_SERIALIZED, /* calls need to be serialized */ + ESI_PROC_MP_SAFE, /* MP-safe, but not reentrant */ + ESI_PROC_REENTRANT /* MP-safe and reentrant */ +}; + +extern int ia64_esi_init (void); +extern struct ia64_sal_retval esi_call_phys (void *, u64 *); +extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *, + enum esi_proc_type, + u64, u64, u64, u64, u64, u64, u64, u64); +extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64, + u64, u64, u64, u64, u64, u64); + +#endif /* esi_h */ From 214ddde2f95037e129eff7e895869771719c7c1b Mon Sep 17 00:00:00 2001 From: bibo mao Date: Tue, 26 Sep 2006 11:20:37 -0700 Subject: [PATCH 03/17] [IA64] kprobe opcode 16 bytes alignment on IA64 On IA64 instruction opcode must be 16 bytes alignment, in kprobe structure there is one element to save original instruction, currently saved opcode is not statically allocated in kprobe structure, that can not assure 16 bytes alignment. This patch dynamically allocated kprobe instruction opcode to assure 16 bytes alignment. Signed-off-by: bibo mao Acked-by: Anil S Keshavamurthy Signed-off-by: Tony Luck --- arch/ia64/kernel/kprobes.c | 55 ++++++++++++++++++++------------------ include/asm-ia64/kprobes.h | 9 ++++--- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 781960f80b6f..320cb7a91c9d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -136,10 +136,8 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, static int __kprobes unsupported_inst(uint template, uint slot, uint major_opcode, unsigned long kprobe_inst, - struct kprobe *p) + unsigned long addr) { - unsigned long addr = (unsigned long)p->addr; - if (bundle_encoding[template][slot] == I) { switch (major_opcode) { case 0x0: //I_UNIT_MISC_OPCODE: @@ -217,7 +215,7 @@ static void __kprobes prepare_break_inst(uint template, uint slot, struct kprobe *p) { unsigned long break_inst = BREAK_INST; - bundle_t *bundle = &p->ainsn.insn.bundle; + bundle_t *bundle = &p->opcode.bundle; /* * Copy the original kprobe_inst qualifying predicate(qp) @@ -423,11 +421,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); unsigned long kprobe_inst=0; unsigned int slot = addr & 0xf, template, major_opcode = 0; - bundle_t *bundle = &p->ainsn.insn.bundle; - - memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); - memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + bundle_t *bundle; + bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; template = bundle->quad0.template; if(valid_kprobe_addr(template, slot, addr)) @@ -440,30 +436,30 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) /* Get kprobe_inst and major_opcode from the bundle */ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr)) return -EINVAL; + + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); + memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); return 0; } -void __kprobes flush_insn_slot(struct kprobe *p) -{ - unsigned long arm_addr; - - arm_addr = ((unsigned long)&p->opcode.bundle) & ~0xFULL; - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); -} - void __kprobes arch_arm_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - flush_insn_slot(p); - memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + flush_icache_range((unsigned long)p->ainsn.insn, + (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_disarm_kprobe(struct kprobe *p) @@ -471,11 +467,18 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - /* p->opcode contains the original unaltered bundle */ - memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); - flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); + /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ + memcpy((char *) arm_addr, (char *) p->ainsn.insn, + sizeof(kprobe_opcode_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + mutex_lock(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + mutex_unlock(&kprobe_mutex); +} /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction @@ -486,12 +489,12 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) */ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { - unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; + unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle); unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; unsigned long template; int slot = ((unsigned long)p->addr & 0xf); - template = p->opcode.bundle.quad0.template; + template = p->ainsn.insn->bundle.quad0.template; if (slot == 1 && bundle_encoding[template][1] == L) slot = 2; @@ -553,7 +556,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) { - unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle; unsigned long slot = (unsigned long)p->addr & 0xf; /* single step inline if break instruction */ diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 938904910115..1b45b71c79b9 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -29,7 +29,8 @@ #include #include -#define MAX_INSN_SIZE 16 +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 #define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) typedef union cmp_inst { @@ -94,7 +95,7 @@ struct kprobe_ctlblk { #define IP_RELATIVE_PREDICT_OPCODE (7) #define LONG_BRANCH_OPCODE (0xC) #define LONG_CALL_OPCODE (0xD) -#define arch_remove_kprobe(p) do {} while (0) +#define flush_insn_slot(p) do { } while (0) typedef struct kprobe_opcode { bundle_t bundle; @@ -108,7 +109,7 @@ struct fnptr { /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the instruction to be emulated */ - kprobe_opcode_t insn; + kprobe_opcode_t *insn; #define INST_FLAG_FIX_RELATIVE_IP_ADDR 1 #define INST_FLAG_FIX_BRANCH_REG 2 #define INST_FLAG_BREAK_INST 4 @@ -125,6 +126,6 @@ static inline void jprobe_return(void) } extern void invalidate_stacked_regs(void); extern void flush_register_stack(void); -extern void flush_insn_slot(struct kprobe *p); +extern void arch_remove_kprobe(struct kprobe *p); #endif /* _ASM_KPROBES_H */ From fd32cb3a9c9f9399421408e8734cd8a6d9d1a09f Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 25 Sep 2006 16:32:20 -0700 Subject: [PATCH 04/17] [IA64] kprobes: fixup the pagefault exception caused by probehandlers If the user-specified kprobe handler causes the page fault when accessing user space address, fixup this fault since do_page_fault() should not continue as the kprobe handler are run with preemption disabled. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/kernel/kprobes.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 320cb7a91c9d..169ec3a7156c 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -771,6 +771,12 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) */ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (ia64_done_with_exception(regs)) + return 1; /* * Let ia64_do_page_fault() fix it. From dd562c05410e13e878a3ee0deb8ac06db2e132c7 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 21 Sep 2006 10:35:44 -0700 Subject: [PATCH 05/17] [IA64] Add interface so modules can discover whether multithreading is on. Add is_multithreading_enabled() to check whether multi-threading is enabled independently of which cpu is currently online Signed-off-by: stephane eranian Signed-off-by: Tony Luck --- arch/ia64/kernel/smpboot.c | 24 ++++++++++++++++++++++++ include/asm-ia64/smp.h | 1 + 2 files changed, 25 insertions(+) diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 6203ed4ec8cf..f7d7f5668144 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -879,3 +879,27 @@ identify_siblings(struct cpuinfo_ia64 *c) c->core_id = info.log1_cid; c->thread_id = info.log1_tid; } + +/* + * returns non zero, if multi-threading is enabled + * on at least one physical package. Due to hotplug cpu + * and (maxcpus=), all threads may not necessarily be enabled + * even though the processor supports multi-threading. + */ +int is_multithreading_enabled(void) +{ + int i, j; + + for_each_present_cpu(i) { + for_each_present_cpu(j) { + if (j == i) + continue; + if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) { + if (cpu_data(j)->core_id == cpu_data(i)->core_id) + return 1; + } + } + } + return 0; +} +EXPORT_SYMBOL_GPL(is_multithreading_enabled); diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h index 719ff309ce09..6533eb4e86ad 100644 --- a/include/asm-ia64/smp.h +++ b/include/asm-ia64/smp.h @@ -128,6 +128,7 @@ extern void smp_send_reschedule (int cpu); extern void lock_ipi_calllock(void); extern void unlock_ipi_calllock(void); extern void identify_siblings (struct cpuinfo_ia64 *); +extern int is_multithreading_enabled(void); #else From 35589a8fa8138244e7f2ef9317c440aa580c9335 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Tue, 26 Sep 2006 12:03:13 -0700 Subject: [PATCH 06/17] [IA64] Move perfmon tables from thread_struct to pfm_context This patch renders thread_struct->pmcs[] and thread_struct->pmds[] OBSOLETE. The actual table is moved to pfm_context structure which saves space in thread_struct (in turn saving space in task_struct which frees up more space for kernel stacks). Signed-off-by: Stephane Eranian Signed-off-by: Anil S Keshavamurthy Signed-off-by: Tony Luck --- arch/ia64/kernel/perfmon.c | 113 +++++++++++++++-------------------- include/asm-ia64/processor.h | 12 +--- 2 files changed, 50 insertions(+), 75 deletions(-) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 84a7e52f56f6..45000d5d0cfa 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -62,6 +62,9 @@ #define PFM_INVALID_ACTIVATION (~0UL) +#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ +#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ + /* * depth of message queue */ @@ -296,14 +299,17 @@ typedef struct pfm_context { unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ - unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ - pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ + + unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ + unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ u64 ctx_saved_psr_up; /* only contains psr.up value */ @@ -867,7 +873,6 @@ static void pfm_mask_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, val, ovfl_mask; int i; @@ -888,7 +893,7 @@ pfm_mask_monitoring(struct task_struct *task) * So in both cases, the live register contains the owner's * state. We can ONLY touch the PMU registers and NOT the PSR. * - * As a consequence to this call, the thread->pmds[] array + * As a consequence to this call, the ctx->th_pmds[] array * contains stale information which must be ignored * when context is reloaded AND monitoring is active (see * pfm_restart). @@ -923,9 +928,9 @@ pfm_mask_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); - th->pmcs[i] &= ~0xfUL; - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); + ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); + ctx->th_pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } /* * make all of this visible @@ -942,7 +947,6 @@ static void pfm_restore_monitoring(struct task_struct *task) { pfm_context_t *ctx = PFM_GET_CTX(task); - struct thread_struct *th = &task->thread; unsigned long mask, ovfl_mask; unsigned long psr, val; int i, is_system; @@ -1008,9 +1012,9 @@ pfm_restore_monitoring(struct task_struct *task) mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { if ((mask & 0x1) == 0UL) continue; - th->pmcs[i] = ctx->ctx_pmcs[i]; - ia64_set_pmc(i, th->pmcs[i]); - DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, ctx->th_pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i])); } ia64_srlz_d(); @@ -1069,7 +1073,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask) static inline void pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long ovfl_val = pmu_conf->ovfl_val; unsigned long mask = ctx->ctx_all_pmds[0]; unsigned long val; @@ -1091,11 +1094,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) ctx->ctx_pmds[i].val = val & ~ovfl_val; val &= ovfl_val; } - thread->pmds[i] = val; + ctx->th_pmds[i] = val; DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", i, - thread->pmds[i], + ctx->th_pmds[i], ctx->ctx_pmds[i].val)); } } @@ -1106,7 +1109,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) static inline void pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) { - struct thread_struct *thread = &task->thread; unsigned long mask = ctx->ctx_all_pmcs[0]; int i; @@ -1114,8 +1116,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) for (i=0; mask; i++, mask>>=1) { /* masking 0 with ovfl_val yields 0 */ - thread->pmcs[i] = ctx->ctx_pmcs[i]; - DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); } } @@ -2859,7 +2861,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) static int pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, pmc_pm; @@ -2880,7 +2881,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (is_loaded) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3035,7 +3035,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). * - * The value in thread->pmcs[] may be modified on overflow, i.e., when + * The value in th_pmcs[] may be modified on overflow, i.e., when * monitoring needs to be stopped. */ if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); @@ -3049,7 +3049,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmcs[cnum] = value; + if (is_system == 0) ctx->th_pmcs[cnum] = value; /* * write hardware register if we can @@ -3101,7 +3101,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) static int pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; pfarg_reg_t *req = (pfarg_reg_t *)arg; unsigned long value, hw_value, ovfl_mask; @@ -3125,7 +3124,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * the owner of the local PMU. */ if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3233,7 +3231,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) /* * write thread state */ - if (is_system == 0) thread->pmds[cnum] = hw_value; + if (is_system == 0) ctx->th_pmds[cnum] = hw_value; /* * write hardware register if we can @@ -3299,7 +3297,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) static int pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) { - struct thread_struct *thread = NULL; struct task_struct *task; unsigned long val = 0UL, lval, ovfl_mask, sval; pfarg_reg_t *req = (pfarg_reg_t *)arg; @@ -3323,7 +3320,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (state == PFM_CTX_ZOMBIE) return -EINVAL; if (likely(is_loaded)) { - thread = &task->thread; /* * In system wide and when the context is loaded, access can only happen * when the caller is running on the CPU being monitored by the session. @@ -3385,7 +3381,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * if context is zombie, then task does not exist anymore. * In this case, we use the full value saved in the context (pfm_flush_regs()). */ - val = is_loaded ? thread->pmds[cnum] : 0UL; + val = is_loaded ? ctx->th_pmds[cnum] : 0UL; } rd_func = pmu_conf->pmd_desc[cnum].read_check; @@ -4354,8 +4350,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_copy_pmds(task, ctx); pfm_copy_pmcs(task, ctx); - pmcs_source = thread->pmcs; - pmds_source = thread->pmds; + pmcs_source = ctx->th_pmcs; + pmds_source = ctx->th_pmds; /* * always the case for system-wide @@ -5864,14 +5860,12 @@ void pfm_save_regs(struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; u64 psr; ctx = PFM_GET_CTX(task); if (ctx == NULL) return; - t = &task->thread; /* * we always come here with interrupts ALREADY disabled by @@ -5929,19 +5923,19 @@ pfm_save_regs(struct task_struct *task) * guarantee we will be schedule at that same * CPU again. */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * we will need it on the restore path to check * for pending overflow. */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * finally, allow context access. @@ -5986,7 +5980,6 @@ static void pfm_lazy_save_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long flags; { u64 psr = pfm_get_psr(); @@ -5994,7 +5987,6 @@ pfm_lazy_save_regs (struct task_struct *task) } ctx = PFM_GET_CTX(task); - t = &task->thread; /* * we need to mask PMU overflow here to @@ -6019,19 +6011,19 @@ pfm_lazy_save_regs (struct task_struct *task) /* * save all the pmds we use */ - pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); /* * save pmc0 ia64_srlz_d() done in pfm_save_pmds() * it is needed to check for pended overflow * on the restore path */ - t->pmcs[0] = ia64_get_pmc(0); + ctx->th_pmcs[0] = ia64_get_pmc(0); /* * unfreeze PMU if had pending overflows */ - if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); /* * now get can unmask PMU interrupts, they will @@ -6050,7 +6042,6 @@ void pfm_load_regs (struct task_struct *task) { pfm_context_t *ctx; - struct thread_struct *t; unsigned long pmc_mask = 0UL, pmd_mask = 0UL; unsigned long flags; u64 psr, psr_up; @@ -6061,11 +6052,10 @@ pfm_load_regs (struct task_struct *task) BUG_ON(GET_PMU_OWNER()); - t = &task->thread; /* * possible on unload */ - if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; + if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; /* * we always come here with interrupts ALREADY disabled by @@ -6147,21 +6137,21 @@ pfm_load_regs (struct task_struct *task) * * XXX: optimize here */ - if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); - if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); + if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); + if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt @@ -6214,7 +6204,6 @@ pfm_load_regs (struct task_struct *task) void pfm_load_regs (struct task_struct *task) { - struct thread_struct *t; pfm_context_t *ctx; struct task_struct *owner; unsigned long pmd_mask, pmc_mask; @@ -6223,7 +6212,6 @@ pfm_load_regs (struct task_struct *task) owner = GET_PMU_OWNER(); ctx = PFM_GET_CTX(task); - t = &task->thread; psr = pfm_get_psr(); BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); @@ -6286,22 +6274,22 @@ pfm_load_regs (struct task_struct *task) */ pmc_mask = ctx->ctx_all_pmcs[0]; - pfm_restore_pmds(t->pmds, pmd_mask); - pfm_restore_pmcs(t->pmcs, pmc_mask); + pfm_restore_pmds(ctx->th_pmds, pmd_mask); + pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); /* * check for pending overflow at the time the state * was saved. */ - if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { /* * reload pmc0 with the overflow information * On McKinley PMU, this will trigger a PMU interrupt */ - ia64_set_pmc(0, t->pmcs[0]); + ia64_set_pmc(0, ctx->th_pmcs[0]); ia64_srlz_d(); - t->pmcs[0] = 0UL; + ctx->th_pmcs[0] = 0UL; /* * will replay the PMU interrupt @@ -6376,11 +6364,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) */ pfm_unfreeze_pmu(); } else { - pmc0 = task->thread.pmcs[0]; + pmc0 = ctx->th_pmcs[0]; /* * clear whatever overflow status bits there were */ - task->thread.pmcs[0] = 0; + ctx->th_pmcs[0] = 0; } ovfl_val = pmu_conf->ovfl_val; /* @@ -6401,7 +6389,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) /* * can access PMU always true in system wide mode */ - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; + val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; if (PMD_IS_COUNTING(i)) { DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", @@ -6433,7 +6421,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); - if (is_self) task->thread.pmds[i] = pmd_val; + if (is_self) ctx->th_pmds[i] = pmd_val; ctx->ctx_pmds[i].val = val; } @@ -6677,7 +6665,7 @@ pfm_init(void) ffz(pmu_conf->ovfl_val)); /* sanity check */ - if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { + if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); pmu_conf = NULL; return -1; @@ -6752,7 +6740,6 @@ void dump_pmu_state(const char *from) { struct task_struct *task; - struct thread_struct *t; struct pt_regs *regs; pfm_context_t *ctx; unsigned long psr, dcr, info, flags; @@ -6797,16 +6784,14 @@ dump_pmu_state(const char *from) ia64_psr(regs)->up = 0; ia64_psr(regs)->pp = 0; - t = ¤t->thread; - for (i=1; PMC_IS_LAST(i) == 0; i++) { if (PMC_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); } for (i=1; PMD_IS_LAST(i) == 0; i++) { if (PMD_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); } if (ctx) { diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index ead68f8eb883..5830d36fd8e6 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -20,12 +20,6 @@ #include #define IA64_NUM_DBG_REGS 8 -/* - * Limits for PMC and PMD are set to less than maximum architected values - * but should be sufficient for a while - */ -#define IA64_NUM_PMC_REGS 64 -#define IA64_NUM_PMD_REGS 64 #define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000) #define DEFAULT_TASK_SIZE __IA64_UL_CONST(0xa000000000000000) @@ -263,13 +257,9 @@ struct thread_struct { # define INIT_THREAD_IA32 #endif /* CONFIG_IA32_SUPPORT */ #ifdef CONFIG_PERFMON - __u64 pmcs[IA64_NUM_PMC_REGS]; - __u64 pmds[IA64_NUM_PMD_REGS]; void *pfm_context; /* pointer to detailed PMU context */ unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */ -# define INIT_THREAD_PM .pmcs = {0UL, }, \ - .pmds = {0UL, }, \ - .pfm_context = NULL, \ +# define INIT_THREAD_PM .pfm_context = NULL, \ .pfm_needs_checking = 0UL, #else # define INIT_THREAD_PM From fd1dfc6f0165e2ad426665e517103672d6832f90 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Sep 2006 11:56:51 -0700 Subject: [PATCH 07/17] [IA64-SGI] Do not request DMA memory for BTE The GFP_DMA option usually does nothing on SN2 since all of memory is in thei DMA zone and the BTE has always been capable of addressing all of memory. So there is no need to get memory from a restricted range of memory (which is what GFP_DMA is for). Remove useless __GFP_DMA option. Signed-off-by: Christoph Lameter Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 27dee4584061..7f73ad4408aa 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -277,8 +277,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) } /* temporary buffer used during unaligned transfers */ - bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, - GFP_KERNEL | GFP_DMA); + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); if (bteBlock_unaligned == NULL) { return BTEFAIL_NOTAVAIL; } From a192dc16000241dc02990a36b6830839b73c44de Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 26 Sep 2006 14:00:56 -0700 Subject: [PATCH 08/17] [IA64] Implement futex primitives Implement futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic() on IA64 in order to fully support all futex functionality. Signed-off-by: Jakub Jelinek Signed-off-by: David Woodhouse Signed-off-by: Tony Luck --- include/asm-ia64/futex.h | 122 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h index 6a332a9f099c..07d77f3a8cbe 100644 --- a/include/asm-ia64/futex.h +++ b/include/asm-ia64/futex.h @@ -1,6 +1,124 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#include +#include +#include +#include +#include -#endif +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ +do { \ + register unsigned long r8 __asm ("r8") = 0; \ + __asm__ __volatile__( \ + " mf;; \n" \ + "[1:] " insn ";; \n" \ + " .xdata4 \"__ex_table\", 1b-., 2f-. \n" \ + "[2:]" \ + : "+r" (r8), "=r" (oldval) \ + : "r" (uaddr), "r" (oparg) \ + : "memory"); \ + ret = r8; \ +} while (0) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ +do { \ + register unsigned long r8 __asm ("r8") = 0; \ + int val, newval; \ + do { \ + __asm__ __volatile__( \ + " mf;; \n" \ + "[1:] ld4 %3=[%4];; \n" \ + " mov %2=%3 \n" \ + insn ";; \n" \ + " mov ar.ccv=%2;; \n" \ + "[2:] cmpxchg4.acq %1=[%4],%3,ar.ccv;; \n" \ + " .xdata4 \"__ex_table\", 1b-., 3f-.\n" \ + " .xdata4 \"__ex_table\", 2b-., 3f-.\n" \ + "[3:]" \ + : "+r" (r8), "=r" (val), "=&r" (oldval), \ + "=&r" (newval) \ + : "r" (uaddr), "r" (oparg) \ + : "memory"); \ + if (unlikely (r8)) \ + break; \ + } while (unlikely (val != oldval)); \ + ret = r8; \ +} while (0) + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr, + oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr, + ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +{ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + { + register unsigned long r8 __asm ("r8"); + __asm__ __volatile__( + " mf;; \n" + " mov ar.ccv=%3;; \n" + "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n" + " .xdata4 \"__ex_table\", 1b-., 2f-. \n" + "[2:]" + : "=r" (r8) + : "r" (uaddr), "r" (newval), + "rO" ((long) (unsigned) oldval) + : "memory"); + return r8; + } +} + +#endif /* _ASM_FUTEX_H */ From 5c55cd63a77a85f603c98c2171a8054ca34b6a9f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 26 Sep 2006 14:04:42 -0700 Subject: [PATCH 09/17] Revert "[IA64] Unwire set/get_robust_list" This reverts commit 2636255488484e04d6d54303d2b0ec30f7ef7e02. Jakub Jelinek provided the missing futex_atomic_cmpxchg_inatomic() function, so now it should be safe to re-enable these syscalls. Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 4 ++-- include/asm-ia64/unistd.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index fef06571be99..12701cf32d99 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1605,8 +1605,8 @@ sys_call_table: data8 sys_ni_syscall // 1295 reserved for ppoll data8 sys_unshare data8 sys_splice - data8 sys_ni_syscall // reserved for set_robust_list - data8 sys_ni_syscall // reserved for get_robust_list + data8 sys_set_robust_list + data8 sys_get_robust_list data8 sys_sync_file_range // 1300 data8 sys_tee data8 sys_vmsplice diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index f581662c5ab8..bb0eb727dcd0 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -286,7 +286,8 @@ /* 1294, 1295 reserved for pselect/ppoll */ #define __NR_unshare 1296 #define __NR_splice 1297 -/* 1298, 1299 reserved for set_robust_list/get_robust_list */ +#define __NR_set_robust_list 1298 +#define __NR_get_robust_list 1299 #define __NR_sync_file_range 1300 #define __NR_tee 1301 #define __NR_vmsplice 1302 From f5a3f3dc189485d607fbd42678cc23958acc0a6e Mon Sep 17 00:00:00 2001 From: Zou Nan hai Date: Thu, 14 Sep 2006 08:25:15 +0800 Subject: [PATCH 10/17] [IA64] Make gp value point to Region 5 in mca handler MCA dispatch code take physical address of GP passed from SAL, then call DATA_PA_TO_VA twice on GP before call into C code. The first time is in ia64_set_kernel_register, the second time is in VIRTUAL_MODE_ENTER. The gp is changed to a virtual address in region 7 because DATA_PA_TO_VA is implemented by dep instruction. However when notify blocks were called from MCA handler code, because notify blocks are supported by callback function pointers, gp value value was switched to region 5 again. The patch set gp register to kernel gp of region 5 at entry of MCA dispatch. Signed-off-by: Zou Nan hai Signed-off-by: Tony Luck --- arch/ia64/kernel/mca_asm.S | 5 ----- include/asm-ia64/mca_asm.h | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 96047491d1b9..ee3ff76c8a35 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -1025,18 +1025,13 @@ ia64_old_stack: ia64_set_kernel_registers: add temp3=MCA_SP_OFFSET, r3 - add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3 mov b0=r2 // save return address GET_IA64_MCA_DATA(temp1) ;; - add temp4=temp4, temp1 // &struct ia64_sal_os_state.os_gp add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack add r13=temp1, r3 // set current to start of MCA/INIT stack add r20=temp1, r3 // physical start of MCA/INIT stack ;; - ld8 r1=[temp4] // OS GP from SAL OS state - ;; - DATA_PA_TO_VA(r1,temp1) DATA_PA_TO_VA(r12,temp2) DATA_PA_TO_VA(r13,temp3) ;; diff --git a/include/asm-ia64/mca_asm.h b/include/asm-ia64/mca_asm.h index 27c9203d8ce3..76203f9a8718 100644 --- a/include/asm-ia64/mca_asm.h +++ b/include/asm-ia64/mca_asm.h @@ -197,9 +197,9 @@ movl temp2 = start_addr; \ ;; \ mov cr.iip = temp2; \ + movl gp = __gp \ ;; \ DATA_PA_TO_VA(sp, temp1); \ - DATA_PA_TO_VA(gp, temp2); \ srlz.i; \ ;; \ nop 1; \ From 709a6c1c07673a9f7879b5f7306dc8d723db93a2 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Wed, 13 Sep 2006 08:43:42 -0400 Subject: [PATCH 11/17] [IA64] show_mem() printk levels Use the default sysrq printk level for printing show_mem() output both for disconfig and contig versions. This is consistent with the printk level used on other architectures (well ia32 at least). Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/mm/contig.c | 17 +++++++++-------- arch/ia64/mm/discontig.c | 29 +++++++++++++++-------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index e004143ba86b..537c2f3833ee 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -41,10 +41,11 @@ show_mem (void) int i, total = 0, reserved = 0; int shared = 0, cached = 0; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; for (i = 0; i < max_mapnr; i++) { if (!pfn_valid(i)) { @@ -63,12 +64,12 @@ show_mem (void) else if (page_count(mem_map + i)) shared += page_count(mem_map + i) - 1; } - printk("%d pages of RAM\n", total); - printk("%d reserved pages\n", reserved); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); - printk("%ld pages in page table cache\n", - pgtable_quicklist_total_size()); + printk(KERN_INFO "%d pages of RAM\n", total); + printk(KERN_INFO "%d reserved pages\n", reserved); + printk(KERN_INFO "%d pages shared\n", shared); + printk(KERN_INFO "%d pages swap cached\n", cached); + printk(KERN_INFO "%ld pages in page table cache\n", + pgtable_quicklist_total_size()); } /* physical address where the bootmem map is located */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index d260bffa01ab..0ce247444f21 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -547,15 +547,16 @@ void show_mem(void) unsigned long total_present = 0; pg_data_t *pgdat; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", + nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; - printk("Node ID: %d\n", pgdat->node_id); + printk(KERN_INFO "Node ID: %d\n", pgdat->node_id); pgdat_resize_lock(pgdat, &flags); present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { @@ -579,18 +580,18 @@ void show_mem(void) total_reserved += reserved; total_cached += cached; total_shared += shared; - printk("\t%ld pages of RAM\n", present); - printk("\t%d reserved pages\n", reserved); - printk("\t%d pages shared\n", shared); - printk("\t%d pages swap cached\n", cached); + printk(KERN_INFO "\t%ld pages of RAM\n", present); + printk(KERN_INFO "\t%d reserved pages\n", reserved); + printk(KERN_INFO "\t%d pages shared\n", shared); + printk(KERN_INFO "\t%d pages swap cached\n", cached); } - printk("%ld pages of RAM\n", total_present); - printk("%d reserved pages\n", total_reserved); - printk("%d pages shared\n", total_shared); - printk("%d pages swap cached\n", total_cached); - printk("Total of %ld pages in page table cache\n", - pgtable_quicklist_total_size()); - printk("%d free buffer pages\n", nr_free_buffer_pages()); + printk(KERN_INFO "%ld pages of RAM\n", total_present); + printk(KERN_INFO "%d reserved pages\n", total_reserved); + printk(KERN_INFO "%d pages shared\n", total_shared); + printk(KERN_INFO "%d pages swap cached\n", total_cached); + printk(KERN_INFO "Total of %ld pages in page table cache\n", + pgtable_quicklist_total_size()); + printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); } /** From 816add4e986499145135c4014a7c8a8857f9f3c3 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Thu, 14 Sep 2006 07:10:30 -0400 Subject: [PATCH 12/17] [IA64] trim output of show_mem() Cut the number of lines of memory info output per node from five to one line. Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/mm/discontig.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 0ce247444f21..6ea279766334 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -551,12 +551,12 @@ void show_mem(void) show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; unsigned long flags; int shared = 0, cached = 0, reserved = 0; - printk(KERN_INFO "Node ID: %d\n", pgdat->node_id); pgdat_resize_lock(pgdat, &flags); present = pgdat->node_present_pages; for(i = 0; i < pgdat->node_spanned_pages; i++) { @@ -580,10 +580,9 @@ void show_mem(void) total_reserved += reserved; total_cached += cached; total_shared += shared; - printk(KERN_INFO "\t%ld pages of RAM\n", present); - printk(KERN_INFO "\t%d reserved pages\n", reserved); - printk(KERN_INFO "\t%d pages shared\n", shared); - printk(KERN_INFO "\t%d pages swap cached\n", cached); + printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " + "shrd: %10d, swpd: %10d\n", pgdat->node_id, + present, reserved, shared, cached); } printk(KERN_INFO "%ld pages of RAM\n", total_present); printk(KERN_INFO "%d reserved pages\n", total_reserved); From 43ed3baf623410b3fa6ca14a9d3f6deca3493c56 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 26 Sep 2006 14:44:37 -0700 Subject: [PATCH 13/17] [IA64] printing support for MCA/INIT Printing message to console from MCA/INIT handler is useful, however doing oops_in_progress = 1 in them exactly makes something in kernel wrong. Especially it sounds ugly if system goes wrong after returning from recoverable MCA. This patch adds ia64_mca_printk() function that collects messages into temporary-not-so-large message buffer during in MCA/INIT environment and print them out later, after returning to normal context or when handlers determine to down the system. Also this print function is exported for use in extensional MCA handler. It would be useful to describe detail about recovery. NOTE: I don't think it is sane thing if temporary message buffer is enlarged enough to hold whole stack dumps from INIT, so buffering is disabled during stack dump from INIT-monarch (= default_monarch_init_process). please fix it in future. Signed-off-by: Hidetoshi Seto Acked-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/kernel/mca.c | 216 +++++++++++++++++++++++++++++++++---- arch/ia64/kernel/mca_drv.c | 54 ++++++---- arch/ia64/kernel/mca_drv.h | 4 + arch/ia64/kernel/salinfo.c | 4 + 4 files changed, 242 insertions(+), 36 deletions(-) diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 2fbe4536fe18..98f3b26d7aff 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -54,6 +54,9 @@ * * 2005-10-07 Keith Owens * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto + * Add printing support for MCA/INIT. */ #include #include @@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init __initdata; +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk(temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk(temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + spin_lock_init(&mlogbuf_rlock); + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} +EXPORT_SYMBOL(ia64_mlogbuf_finish); + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} static void inline ia64_mca_spin(const char *func) { - printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); while (1) cpu_relax(); } @@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type) } if (!missing) goto all_in; - printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) - printk(" %d", c); + mprintk(" %d", c); } - printk("\n"); + mprintk("\n"); return; all_in: - printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); return; } @@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", - sos->proc_state_param, cpu, sos->monarch); + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; @@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, rh->severity = sal_log_severity_corrected; ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; + } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) @@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi struct task_struct *g, *t; if (val != DIE_INIT_MONARCH_PROCESS) return NOTIFY_DONE; + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + printk(KERN_ERR "Processes interrupted by INIT -"); for_each_online_cpu(c) { struct ia64_sal_os_state *s; @@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi } while_each_thread (g, t); read_unlock(&tasklist_lock); } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); return NOTIFY_DONE; } @@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ - console_loglevel = 15; /* make sure printks make it to console */ - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); - printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); @@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { - printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", __FUNCTION__, cpu); atomic_dec(&slaves); sos->monarch = 1; @@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * fix their proms and get their customers updated. */ if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { - printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", __FUNCTION__, cpu); atomic_dec(&monarchs); sos->monarch = 0; @@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("Slave on cpu %d returning to normal service.\n", cpu); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; atomic_dec(&slaves); @@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * same serial line, the user will need some time to switch out of the BMC before * the dump begins. */ - printk("Delaying for 5 seconds...\n"); + mprintk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu, "INIT"); /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through @@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) == NOTIFY_STOP) ia64_mca_spin(__FUNCTION__); - printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); monarch_cpu = -1; diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 8db6e0cedadc..a45009d2bc90 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -79,14 +79,30 @@ static int fatal_mca(const char *fmt, ...) { va_list args; + char buf[256]; va_start(args, fmt); - vprintk(fmt, args); + vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); return MCA_NOT_RECOVERED; } +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + /** * mca_page_isolate - isolate a poisoned page in order not to use it later * @paddr: poisoned memory location @@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) void mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { + ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", raw_smp_processor_id(), current->pid, current->uid, @@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is target address valid? */ if (!pbci->tv) - return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); + return fatal_mca("target address not valid"); /* * cpu read or memory-mapped io read @@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is minstate valid? */ if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) - return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); + return fatal_mca("minstate not valid"); psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); @@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, psr2->bn = 1; psr2->i = 0; - return MCA_RECOVERED; + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); } } - return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," - " iip 0x%lx\n", pmsa->pmsa_iip); + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); } /** @@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * The machine check is corrected. */ if (psp->cm == 1) - return MCA_RECOVERED; + return mca_recovered("machine check is already corrected."); /* * The error was not contained. Software must be reset. */ if (psp->us || psp->ci == 0) - return fatal_mca(KERN_ALERT "MCA: error not contained\n"); + return fatal_mca("error not contained"); /* * The cache check and bus check bits have four possible states @@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * 1 1 Memory error, attempt recovery */ if (psp->bc == 0 || pbci == NULL) - return fatal_mca(KERN_ALERT "MCA: No bus check\n"); + return fatal_mca("No bus check"); /* * Sorry, we cannot handle so many. */ if (peidx_bus_check_num(peidx) > 1) - return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); + return fatal_mca("Too many bus checks"); /* * Well, here is only one bus error. */ if (pbci->ib) - return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); + return fatal_mca("Internal Bus error"); if (pbci->cc) - return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); + return fatal_mca("Cache-cache error"); if (pbci->eb && pbci->bsi > 0) - return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); + return fatal_mca("External bus check fatal status"); /* * This is a local MCA and estimated as recoverble external bus error. @@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, /* * On account of strange SAL error record, we cannot recover. */ - return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); + return fatal_mca("Strange SAL record"); } /** @@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Now, OS can recover when there is one processor error section */ if (n_proc_err > 1) - return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); + return fatal_mca("Too Many Errors"); else if (n_proc_err == 0) - /* Weird SAL record ... We need not to recover */ - return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); /* Make index of processor error section */ mca_make_peidx((sal_log_processor_info_t*) @@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Check whether MCA is global or not */ if (is_mca_global(&peidx, &pbci, sos)) - return fatal_mca(KERN_ALERT "MCA: global MCA\n"); + return fatal_mca("global MCA"); /* Try to recover a processor error */ return recover_from_processor_error(platform_err, &slidx, &peidx, diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index 31a2e52bb16f..c85e943ba5fd 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h @@ -118,3 +118,7 @@ struct mca_table_entry { extern const struct mca_table_entry *search_mca_tables (unsigned long addr); extern int mca_recover_range(unsigned long); +extern void ia64_mca_printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void ia64_mlogbuf_dump(void); + diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 9065f0f01ba3..e63b8ca5344a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) /* Check for outstanding MCA/INIT records every minute (arbitrary) */ #define SALINFO_TIMER_DELAY (60*HZ) static struct timer_list salinfo_timer; +extern void ia64_mlogbuf_dump(void); static void salinfo_timeout_check(struct salinfo_data *data) @@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data) static void salinfo_timeout (unsigned long arg) { + ia64_mlogbuf_dump(); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; @@ -332,6 +334,8 @@ salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t if (cpu == -1) goto retry; + ia64_mlogbuf_dump(); + /* for next read, start checking at next CPU */ data->cpu_check = cpu; if (++data->cpu_check == NR_CPUS) From 8f9e146732dcba5161dad3747ee73be1f8c13133 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Mon, 18 Sep 2006 18:37:15 -0500 Subject: [PATCH 14/17] [IA64] ar.fpsr not set on MCA/INIT kernel entry When entering the kernel due to an MCA or INIT, ar.fpsr (ar40) was not getting set to the kernel default value (remaining at the user value). The effect depends on the user setting of ar.fpsr. In the test case, the effect was addresses printing with strange hex values. Setting ar.fpsr in ia64_set_kernel_registers sets it for both the MCA and INIT paths. The user value of ar.fpsr is correctly saved (in ia64_state_save) and restored (in ia64_state_restore). Below is an example of output with very strange hex values. Anyone know the value of hex 'g'? :-) Processes interrupted by INIT - 0 (cpu 14 task 0xdfffg55g7a4c6gA) Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- arch/ia64/kernel/mca_asm.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index ee3ff76c8a35..c6b607c00dee 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -1062,6 +1062,10 @@ ia64_set_kernel_registers: mov cr.itir=r18 mov cr.ifa=r13 mov r20=IA64_TR_CURRENT_STACK + + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value ;; itr.d dtr[r20]=r21 ;; From b29e7132b5a9f2496beed37beef7ba4d010afb2c Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Tue, 26 Sep 2006 14:47:48 -0500 Subject: [PATCH 15/17] [IA64] PAL calls need physical mode, stacked PAL_CACHE_READ and PAL_CACHE_WRITE need to be called in physical mode with stacked registers. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- include/asm-ia64/pal.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h index d1587e4f5759..2c8fd92d0ece 100644 --- a/include/asm-ia64/pal.h +++ b/include/asm-ia64/pal.h @@ -964,7 +964,8 @@ static inline s64 ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr) { struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_CACHE_READ, line_id.pclid_data, physical_addr, 0); + PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data, + physical_addr, 0); return iprv.status; } @@ -986,7 +987,8 @@ static inline s64 ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data) { struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_CACHE_WRITE, line_id.pclid_data, physical_addr, data); + PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data, + physical_addr, data); return iprv.status; } From ddb4f0df0424d174567a011a176782ffa4202071 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 26 Sep 2006 15:27:56 -0700 Subject: [PATCH 16/17] [IA64] CMC/CPE: Reverse the order of fetching log and checking poll threshold This patch reverses the order of fetching log from SAL and checking poll threshold. This will fix following trivial issues: - If SAL_GET_SATE_INFO is unbelievably slow (due to huge system or just its silly implementation) and if it takes more than 1/5 sec, CMCI/CPEI will never switch to CMCP/CPEP. - Assuming terrible flood of interrupt (continuous corrected errors let all CPUs enter to handler at once and bind them in it), CPUs will be serialized by IA64_LOG_LOCK(*). Now we check the poll threshold after the lock and log fetch, so we need to call SAL_GET_STATE_INFO (num_online_cpus() + 4) times in the worst case. if we can check the threshold before the lock, we can shut up interrupts quickly without waiting preceding log fetches, and the number of times will be reduced to (num_online_cpus()) in the same situation. Signed-off-by: Hidetoshi Seto Signed-off-by: Tony Luck --- arch/ia64/kernel/mca.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 98f3b26d7aff..bfbd8986153b 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -511,9 +511,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CPE error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); - spin_lock(&cpe_history_lock); if (!cpe_poll_enabled && cpe_vector >= 0) { @@ -542,7 +539,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cpe_history[index++] = now; if (index == CPE_HISTORY_LENGTH) @@ -550,6 +547,10 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cpe_history_lock); +out: + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + return IRQ_HANDLED; } @@ -1278,9 +1279,6 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); - /* Get the CMC error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); - spin_lock(&cmc_history_lock); if (!cmc_polling_enabled) { int i, count = 1; /* we know 1 happened now */ @@ -1313,7 +1311,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); /* lock already released, get out now */ - return IRQ_HANDLED; + goto out; } else { cmc_history[index++] = now; if (index == CMC_HISTORY_LENGTH) @@ -1321,6 +1319,10 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) } } spin_unlock(&cmc_history_lock); +out: + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + return IRQ_HANDLED; } From df8f0ec1a413ae610899f009e25dc9777881e149 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Sat, 12 Aug 2006 11:08:12 -0600 Subject: [PATCH 17/17] [IA64] minor reformatting to vmlinux.lds.S Minor reformatting to vmlinux.lds.S to make it 80-column usable, in accordance with Linux coding style. Signed-off-by: Al Stone Signed-off-by: Tony Luck --- arch/ia64/kernel/vmlinux.lds.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5b0d5f64a9b1..b3b2e389d6b2 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -184,7 +184,9 @@ SECTIONS *(.data.gate) __stop_gate_section = .; } - . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ + . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose + * kernel data + */ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } @@ -202,7 +204,9 @@ SECTIONS *(.data.percpu) __per_cpu_end = .; } - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits + * into percpu page size + */ data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET)