From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: [PATCH 001/186] percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/vmlinux_32.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 1 + include/linux/percpu.h | 7 +++++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0463a946247..b2f54fafb8bc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,7 +21,7 @@ #include "cpu.h" -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e9..0f7c29a8c4ea 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -189,6 +189,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778e916c..69e5c1182fde 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -348,6 +348,7 @@ . = ALIGN(align); \ __per_cpu_start = .; \ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ } \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4cdd393e71e1..2edacc8e6b8b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -23,12 +23,19 @@ __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.page_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #endif #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) From e61d98d8dad0048619bb138b0ff996422ffae53b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:35 -0700 Subject: [PATCH 002/186] x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization code reorganization of the generic Intel vt-d parsing related routines and linux iommu routines specific to Intel vt-d. drivers/pci/dmar.c now contains the generic vt-d parsing related routines drivers/pci/intel_iommu.c contains the iommu routines specific to vt-d Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dma_remapping.h | 155 ++++++++++++++++++++++++++++++++++ drivers/pci/dmar.c | 90 +++++++++++++++++++- drivers/pci/intel-iommu.c | 92 ++------------------ drivers/pci/intel-iommu.h | 163 +++--------------------------------- 4 files changed, 264 insertions(+), 236 deletions(-) create mode 100644 drivers/pci/dma_remapping.h diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h new file mode 100644 index 000000000000..05aac8ef96c7 --- /dev/null +++ b/drivers/pci/dma_remapping.h @@ -0,0 +1,155 @@ +#ifndef _DMA_REMAPPING_H +#define _DMA_REMAPPING_H + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); +extern void free_dmar_iommu(struct intel_iommu *iommu); + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f941f609dbf3..c00e387f5b75 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -19,9 +19,11 @@ * Author: Shaohua Li * Author: Anil S Keshavamurthy * - * This file implements early detection/parsing of DMA Remapping Devices + * This file implements early detection/parsing of Remapping Devices * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI * tables. + * + * These routines are used by both DMA-remapping and Interrupt-remapping */ #include @@ -300,6 +302,37 @@ parse_dmar_table(void) return ret; } +int dmar_pci_device_match(struct pci_dev *devices[], int cnt, + struct pci_dev *dev) +{ + int index; + + while (dev) { + for (index = 0; index < cnt; index++) + if (dev == devices[index]) + return 1; + + /* Check our parent */ + dev = dev->bus->self; + } + + return 0; +} + +struct dmar_drhd_unit * +dmar_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd = NULL; + + list_for_each_entry(drhd, &dmar_drhd_units, list) { + if (drhd->include_all || dmar_pci_device_match(drhd->devices, + drhd->devices_cnt, dev)) + return drhd; + } + + return NULL; +} + int __init dmar_table_init(void) { @@ -343,3 +376,58 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } + +struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) +{ + int map_size; + u32 ver; + + iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = PAGE_ALIGN_4K(map_size); + if (map_size > PAGE_SIZE_4K) { + iounmap(iommu->reg); + iommu->reg = ioremap(drhd->reg_base_addr, map_size); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + } + + ver = readl(iommu->reg + DMAR_VER_REG); + pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + iommu->cap, iommu->ecap); + + spin_lock_init(&iommu->register_lock); + + drhd->iommu = iommu; + return iommu; +error: + kfree(iommu); + return NULL; +} + +void free_iommu(struct intel_iommu *iommu) +{ + if (!iommu) + return; + +#ifdef CONFIG_DMAR + free_dmar_iommu(iommu); +#endif + + if (iommu->reg) + iounmap(iommu->reg); + kfree(iommu); +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bb0642318a95..1c0270d3e2e5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -990,6 +990,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } + spin_lock_init(&iommu->lock); + /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -998,62 +1000,15 @@ static int iommu_init_domains(struct intel_iommu *iommu) set_bit(0, iommu->domain_ids); return 0; } -static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd) -{ - int ret; - int map_size; - u32 ver; - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - } - - ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); - ret = iommu_init_domains(iommu); - if (ret) - goto error_unmap; - spin_lock_init(&iommu->lock); - spin_lock_init(&iommu->register_lock); - - drhd->iommu = iommu; - return iommu; -error_unmap: - iounmap(iommu->reg); -error: - kfree(iommu); - return NULL; -} static void domain_exit(struct dmar_domain *domain); -static void free_iommu(struct intel_iommu *iommu) + +void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; - if (!iommu) - return; - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; @@ -1078,10 +1033,6 @@ static void free_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); - - if (iommu->reg) - iounmap(iommu->reg); - kfree(iommu); } static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) @@ -1426,37 +1377,6 @@ find_domain(struct pci_dev *pdev) return NULL; } -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, - struct pci_dev *dev) -{ - int index; - - while (dev) { - for (index = 0; index < cnt; index++) - if (dev == devices[index]) - return 1; - - /* Check our parent */ - dev = dev->bus->self; - } - - return 0; -} - -static struct dmar_drhd_unit * -dmar_find_matched_drhd_unit(struct pci_dev *dev) -{ - struct dmar_drhd_unit *drhd = NULL; - - list_for_each_entry(drhd, &dmar_drhd_units, list) { - if (drhd->include_all || dmar_pci_device_match(drhd->devices, - drhd->devices_cnt, dev)) - return drhd; - } - - return NULL; -} - /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { @@ -1764,6 +1684,10 @@ int __init init_dmars(void) goto error; } + ret = iommu_init_domains(iommu); + if (ret) + goto error; + /* * TBD: * we could share the same root & context tables diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index afc0ad96122e..9e5e98c76c05 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,19 +27,7 @@ #include #include "iova.h" #include - -/* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. - */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) +#include "dma_remapping.h" /* * Intel IOMMU register specification per version 1.0 public spec. @@ -187,158 +175,31 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & PAGE_MASK_4K; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): - NULL); -} - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) - -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - -struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - -extern int init_dmars(void); - struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; u64 ecap; - unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain **domains; /* ptr to domains */ int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t lock; /* protect context, domain ids */ spinlock_t register_lock; /* protect register handling */ + +#ifdef CONFIG_DMAR + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ unsigned int irq; unsigned char name[7]; /* Device Name */ struct msi_msg saved_msg; struct sys_device sysdev; +#endif }; -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd); +extern void free_iommu(struct intel_iommu *iommu); #endif From c42d9f32443397aed2d37d37df161392e6a5862f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:36 -0700 Subject: [PATCH 003/186] x64, x2apic/intr-remap: fix the need for sequential array allocation of iommus Clean up the intel-iommu code related to deferred iommu flush logic. There is no need to allocate all the iommu's as a sequential array. This will be used later in the interrupt-remapping patch series to allocate iommu much early and individually for each device remapping hardware unit. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 11 +++++++++-- drivers/pci/intel-iommu.c | 24 +++++++----------------- drivers/pci/intel-iommu.h | 4 ++-- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index c00e387f5b75..1a59423a8eda 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -377,11 +377,18 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } -struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd) +struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) { + struct intel_iommu *iommu; int map_size; u32 ver; + static int iommu_allocated = 0; + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return NULL; + + iommu->seq_id = iommu_allocated++; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1c0270d3e2e5..4d59a6a1f4dd 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -58,8 +58,6 @@ static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); -static struct intel_iommu *g_iommus; - #define HIGH_WATER_MARK 250 struct deferred_flush_tables { int next; @@ -1649,8 +1647,6 @@ int __init init_dmars(void) * endfor */ for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; g_num_of_iommus++; /* * lock not needed as this is only incremented in the single @@ -1659,26 +1655,17 @@ int __init init_dmars(void) */ } - g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); - if (!g_iommus) { - ret = -ENOMEM; - goto error; - } - deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } - i = 0; for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(&g_iommus[i], drhd); - i++; + iommu = alloc_iommu(drhd); if (!iommu) { ret = -ENOMEM; goto error; @@ -1770,7 +1757,6 @@ int __init init_dmars(void) iommu = drhd->iommu; free_iommu(iommu); } - kfree(g_iommus); return ret; } @@ -1927,7 +1913,10 @@ static void flush_unmaps(void) /* just flush them all */ for (i = 0; i < g_num_of_iommus; i++) { if (deferred_flush[i].next) { - iommu_flush_iotlb_global(&g_iommus[i], 0); + struct intel_iommu *iommu = + deferred_flush[i].domain[0]->iommu; + + iommu_flush_iotlb_global(iommu, 0); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); @@ -1957,7 +1946,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) if (list_size == HIGH_WATER_MARK) flush_unmaps(); - iommu_id = dom->iommu - g_iommus; + iommu_id = dom->iommu->seq_id; + next = deferred_flush[iommu_id].next; deferred_flush[iommu_id].domain[next] = dom; deferred_flush[iommu_id].iova[next] = iova; diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 9e5e98c76c05..75c63f65b3f5 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -182,6 +182,7 @@ struct intel_iommu { int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ @@ -198,8 +199,7 @@ struct intel_iommu { extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd); +extern struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); #endif From 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:37 -0700 Subject: [PATCH 004/186] x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Allocate the iommu during the parse of DMA remapping hardware definition structures. And also, introduce routines for device scope initialization which will be explicitly called during dma-remapping initialization. These will be used for enabling interrupt remapping separately from the existing DMA-remapping enabling sequence. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 89 ++++++++++++++++++++++++++++++++------- drivers/pci/intel-iommu.c | 10 ++--- drivers/pci/intel-iommu.h | 2 +- include/linux/dmar.h | 10 ++++- 4 files changed, 88 insertions(+), 23 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 1a59423a8eda..158bc5bfcf75 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -174,19 +174,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) struct acpi_dmar_hardware_unit *drhd; struct dmar_drhd_unit *dmaru; int ret = 0; - static int include_all; dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); if (!dmaru) return -ENOMEM; + dmaru->hdr = header; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru->reg_base_addr = drhd->address; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ + ret = alloc_iommu(dmaru); + if (ret) { + kfree(dmaru); + return ret; + } + dmar_register_drhd_unit(dmaru); + return 0; +} + +static int __init +dmar_parse_dev(struct dmar_drhd_unit *dmaru) +{ + struct acpi_dmar_hardware_unit *drhd; + static int include_all; + int ret; + + drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; + if (!dmaru->include_all) ret = dmar_parse_dev_scope((void *)(drhd + 1), - ((void *)drhd) + header->length, + ((void *)drhd) + drhd->header.length, &dmaru->devices_cnt, &dmaru->devices, drhd->segment); else { @@ -199,10 +217,10 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) include_all = 1; } - if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) + if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) { + list_del(&dmaru->list); kfree(dmaru); - else - dmar_register_drhd_unit(dmaru); + } return ret; } @@ -211,23 +229,35 @@ dmar_parse_one_rmrr(struct acpi_dmar_header *header) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; - int ret = 0; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) return -ENOMEM; + rmrru->hdr = header; rmrr = (struct acpi_dmar_reserved_memory *)header; rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; + + dmar_register_rmrr_unit(rmrru); + return 0; +} + +static int __init +rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) +{ + struct acpi_dmar_reserved_memory *rmrr; + int ret; + + rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; ret = dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + header->length, + ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - if (ret || (rmrru->devices_cnt == 0)) + if (ret || (rmrru->devices_cnt == 0)) { + list_del(&rmrru->list); kfree(rmrru); - else - dmar_register_rmrr_unit(rmrru); + } return ret; } @@ -333,15 +363,42 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) return NULL; } +int __init dmar_dev_scope_init(void) +{ + struct dmar_drhd_unit *drhd; + struct dmar_rmrr_unit *rmrr; + int ret = -ENODEV; + + for_each_drhd_unit(drhd) { + ret = dmar_parse_dev(drhd); + if (ret) + return ret; + } + + for_each_rmrr_units(rmrr) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } + + return ret; +} + int __init dmar_table_init(void) { - + static int dmar_table_initialized; int ret; + if (dmar_table_initialized) + return 0; + + dmar_table_initialized = 1; + ret = parse_dmar_table(); if (ret) { - printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); + if (ret != -ENODEV) + printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); return ret; } @@ -377,7 +434,7 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } -struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; int map_size; @@ -386,7 +443,7 @@ struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) - return NULL; + return -ENOMEM; iommu->seq_id = iommu_allocated++; @@ -419,10 +476,10 @@ struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; - return iommu; + return 0; error: kfree(iommu); - return NULL; + return -1; } void free_iommu(struct intel_iommu *iommu) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4d59a6a1f4dd..218a1f357b4d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1665,11 +1665,8 @@ int __init init_dmars(void) for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(drhd); - if (!iommu) { - ret = -ENOMEM; - goto error; - } + + iommu = drhd->iommu; ret = iommu_init_domains(iommu); if (ret) @@ -2324,6 +2321,9 @@ int __init intel_iommu_init(void) if (dmar_table_init()) return -ENODEV; + if (dmar_dev_scope_init()) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 75c63f65b3f5..371e3b9caf32 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -199,7 +199,7 @@ struct intel_iommu { extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd); +extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); #endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 56c73b847551..3ab07e425583 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -46,12 +46,14 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ @@ -62,6 +64,7 @@ struct dmar_drhd_unit { struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ @@ -72,6 +75,8 @@ struct dmar_rmrr_unit { list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline void detect_intel_iommu(void) { @@ -81,6 +86,9 @@ static inline int intel_iommu_init(void) { return -ENODEV; } - +static inline int dmar_table_init(void) +{ + return -ENODEV; +} #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ From aaa9d1dd63bf89b62f4ea9f46de376ab1a3fbc6c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:38 -0700 Subject: [PATCH 005/186] x64, x2apic/intr-remap: use CONFIG_DMAR for DMA-remapping specific code DMA remapping specific code covered with CONFIG_DMAR in the generic code which will also be used later for enabling Interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 158bc5bfcf75..5c99f9973987 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -39,7 +39,6 @@ * these units are not supported by the architecture. */ LIST_HEAD(dmar_drhd_units); -LIST_HEAD(dmar_rmrr_units); static struct acpi_table_header * __initdata dmar_tbl; @@ -55,11 +54,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) list_add(&drhd->list, &dmar_drhd_units); } -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ - list_add(&rmrr->list, &dmar_rmrr_units); -} - static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, struct pci_dev **dev, u16 segment) { @@ -224,6 +218,15 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru) return ret; } +#ifdef CONFIG_DMAR +LIST_HEAD(dmar_rmrr_units); + +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &dmar_rmrr_units); +} + + static int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) { @@ -260,6 +263,7 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) } return ret; } +#endif static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) @@ -284,6 +288,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) } } + /** * parse_dmar_table - parses the DMA reporting table */ @@ -316,7 +321,9 @@ parse_dmar_table(void) ret = dmar_parse_one_drhd(entry_header); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: +#ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); +#endif break; default: printk(KERN_WARNING PREFIX @@ -366,7 +373,6 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) int __init dmar_dev_scope_init(void) { struct dmar_drhd_unit *drhd; - struct dmar_rmrr_unit *rmrr; int ret = -ENODEV; for_each_drhd_unit(drhd) { @@ -375,11 +381,16 @@ int __init dmar_dev_scope_init(void) return ret; } - for_each_rmrr_units(rmrr) { - ret = rmrr_parse_dev(rmrr); - if (ret) - return ret; +#ifdef CONFIG_DMAR + { + struct dmar_rmrr_unit *rmrr; + for_each_rmrr_units(rmrr) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } } +#endif return ret; } @@ -407,10 +418,12 @@ int __init dmar_table_init(void) return -ENODEV; } +#ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) { printk(KERN_INFO PREFIX "No RMRR found\n"); return -ENODEV; } +#endif return 0; } From 2d6b5f85bb4ca919d8ab0f30311309b53fb93bc3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:39 -0700 Subject: [PATCH 006/186] x64, x2apic/intr-remap: Fix the need for RMRR in the DMA-remapping detection Presence of RMRR structures is not compulsory for enabling DMA-remapping. Signed-off-by: Suresh Siddha Signed-off-by: Yong Y Wang Cc: Yong Y Wang Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 5c99f9973987..903f1701edff 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -419,10 +419,8 @@ int __init dmar_table_init(void) } #ifdef CONFIG_DMAR - if (list_empty(&dmar_rmrr_units)) { + if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); - return -ENODEV; - } #endif return 0; From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: [PATCH 007/186] x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/Makefile | 2 ++ drivers/pci/dmar.c | 3 ++ drivers/pci/intel-iommu.h | 2 ++ drivers/pci/intr_remapping.c | 70 ++++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.h | 6 ++++ include/linux/dmar.h | 1 + 6 files changed, 84 insertions(+) create mode 100644 drivers/pci/intr_remapping.c create mode 100644 drivers/pci/intr_remapping.h diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4d1ce2e7361e..1c409c7b0d56 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o # Build Intel IOMMU support obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o +obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 903f1701edff..127764cfbe27 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -423,6 +423,9 @@ int __init dmar_table_init(void) printk(KERN_INFO PREFIX "No RMRR found\n"); #endif +#ifdef CONFIG_INTR_REMAP + parse_ioapics_under_ir(); +#endif return 0; } diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 371e3b9caf32..eb167e39b464 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -114,6 +114,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_eim_support(e) ((e >> 4) & 0x1) +#define ecap_ir_support(e) ((e >> 3) & 0x1) /* IOTLB_REG */ diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c new file mode 100644 index 000000000000..a80b87921c68 --- /dev/null +++ b/drivers/pci/intr_remapping.c @@ -0,0 +1,70 @@ +#include +#include +#include "intel-iommu.h" +#include "intr_remapping.h" + +static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; +static int ir_ioapic_num; + +static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, + struct intel_iommu *iommu) +{ + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + void *start, *end; + + drhd = (struct acpi_dmar_hardware_unit *)header; + + start = (void *)(drhd + 1); + end = ((void *)drhd) + header->length; + + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { + if (ir_ioapic_num == MAX_IO_APICS) { + printk(KERN_WARNING "Exceeded Max IO APICS\n"); + return -1; + } + + printk(KERN_INFO "IOAPIC id %d under DRHD base" + " 0x%Lx\n", scope->enumeration_id, + drhd->address); + + ir_ioapic[ir_ioapic_num].iommu = iommu; + ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; + ir_ioapic_num++; + } + start += scope->length; + } + + return 0; +} + +/* + * Finds the assocaition between IOAPIC's and its Interrupt-remapping + * hardware unit. + */ +int __init parse_ioapics_under_ir(void) +{ + struct dmar_drhd_unit *drhd; + int ir_supported = 0; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (ecap_ir_support(iommu->ecap)) { + if (ir_parse_ioapic_scope(drhd->hdr, iommu)) + return -1; + + ir_supported = 1; + } + } + + if (ir_supported && ir_ioapic_num != nr_ioapics) { + printk(KERN_WARNING + "Not all IO-APIC's listed under remapping hardware\n"); + return -1; + } + + return ir_supported; +} diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h new file mode 100644 index 000000000000..c4a40b2f33fa --- /dev/null +++ b/drivers/pci/intr_remapping.h @@ -0,0 +1,6 @@ +#include "intel-iommu.h" + +struct ioapic_scope { + struct intel_iommu *iommu; + unsigned int id; +}; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 3ab07e425583..c4e96eb29617 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,7 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); +extern int parse_ioapics_under_ir(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; From cf1337f0447e5be8e66daa944f0ea3bcac2b6179 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:41 -0700 Subject: [PATCH 008/186] x64, x2apic/intr-remap: move IOMMU_WAIT_OP() macro to intel-iommu.h move IOMMU_WAIT_OP() macro to header file. This will be used by both DMA-remapping and Intr-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 15 --------------- drivers/pci/intel-iommu.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 218a1f357b4d..fb701d9dd8c0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -49,8 +49,6 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) @@ -486,19 +484,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) return 0; } -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} - static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index eb167e39b464..3a650e8cba33 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -177,6 +177,21 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; From fe962e90cb17a8426e144dee970e77ed789d98ee Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:42 -0700 Subject: [PATCH 009/186] x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d) Queued invalidation (part of Intel Virtualization Technology for Directed I/O architecture) infrastructure. This will be used for invalidating the interrupt entry cache in the case of Interrupt-remapping and IOTLB invalidation in the case of DMA-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 150 ++++++++++++++++++++++++++++++++++++++ drivers/pci/intel-iommu.c | 7 -- drivers/pci/intel-iommu.h | 61 ++++++++++++++++ 3 files changed, 211 insertions(+), 7 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 127764cfbe27..aba151ca6d26 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -28,6 +28,7 @@ #include #include +#include #include "iova.h" #include "intel-iommu.h" @@ -509,3 +510,152 @@ void free_iommu(struct intel_iommu *iommu) iounmap(iommu->reg); kfree(iommu); } + +/* + * Reclaim all the submitted descriptors which have completed its work. + */ +static inline void reclaim_free_desc(struct q_inval *qi) +{ + while (qi->desc_status[qi->free_tail] == QI_DONE) { + qi->desc_status[qi->free_tail] = QI_FREE; + qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; + qi->free_cnt++; + } +} + +/* + * Submit the queued invalidation descriptor to the remapping + * hardware unit and wait for its completion. + */ +void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +{ + struct q_inval *qi = iommu->qi; + struct qi_desc *hw, wait_desc; + int wait_index, index; + unsigned long flags; + + if (!qi) + return; + + hw = qi->desc; + + spin_lock(&qi->q_lock); + while (qi->free_cnt < 3) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + index = qi->free_head; + wait_index = (index + 1) % QI_LENGTH; + + qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + + hw[index] = *desc; + + wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + + hw[wait_index] = wait_desc; + + __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); + + qi->free_head = (qi->free_head + 2) % QI_LENGTH; + qi->free_cnt -= 2; + + spin_lock_irqsave(&iommu->register_lock, flags); + /* + * update the HW tail register indicating the presence of + * new descriptors. + */ + writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + while (qi->desc_status[wait_index] != QI_DONE) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + qi->desc_status[index] = QI_DONE; + + reclaim_free_desc(qi); + spin_unlock(&qi->q_lock); +} + +/* + * Flush the global interrupt entry cache. + */ +void qi_global_iec(struct intel_iommu *iommu) +{ + struct qi_desc desc; + + desc.low = QI_IEC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +/* + * Enable Queued Invalidation interface. This is a must to support + * interrupt-remapping. Also used by DMA-remapping, which replaces + * register based IOTLB invalidation. + */ +int dmar_enable_qi(struct intel_iommu *iommu) +{ + u32 cmd, sts; + unsigned long flags; + struct q_inval *qi; + + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + /* + * queued invalidation is already setup and enabled. + */ + if (iommu->qi) + return 0; + + iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL); + if (!iommu->qi) + return -ENOMEM; + + qi = iommu->qi; + + qi->desc = (void *)(get_zeroed_page(GFP_KERNEL)); + if (!qi->desc) { + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL); + if (!qi->desc_status) { + free_page((unsigned long) qi->desc); + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->free_head = qi->free_tail = 0; + qi->free_cnt = QI_LENGTH; + + spin_lock_init(&qi->q_lock); + + spin_lock_irqsave(&iommu->register_lock, flags); + /* write zero to the tail reg */ + writel(0, iommu->reg + DMAR_IQT_REG); + + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + + cmd = iommu->gcmd | DMA_GCMD_QIE; + iommu->gcmd |= DMA_GCMD_QIE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fb701d9dd8c0..347bf2e47168 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -181,13 +181,6 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 3a650e8cba33..2983ce895353 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,6 +27,7 @@ #include #include "iova.h" #include +#include #include "dma_remapping.h" /* @@ -51,6 +52,10 @@ #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ #define OFFSET_STRIDE (9) /* @@ -114,6 +119,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) @@ -131,6 +137,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + /* PMEN_REG */ #define DMA_PMEN_EPM (((u32)1)<<31) #define DMA_PMEN_PRS (((u32)1)<<0) @@ -140,6 +157,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) /* GSTS_REG */ @@ -147,6 +165,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) /* CCMD_REG */ @@ -192,6 +211,40 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) }\ } +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -212,8 +265,16 @@ struct intel_iommu { struct msi_msg saved_msg; struct sys_device sysdev; #endif + struct q_inval *qi; /* Queued invalidation info */ }; +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: [PATCH 010/186] x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dma_remapping.h | 2 + drivers/pci/dmar.c | 16 ++++ drivers/pci/intel-iommu.c | 21 ++---- drivers/pci/intel-iommu.h | 24 +++++- drivers/pci/intr_remapping.c | 137 +++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.h | 2 + include/linux/dmar.h | 124 +++++++++++++++++++++---------- 7 files changed, 274 insertions(+), 52 deletions(-) diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h index 05aac8ef96c7..bff5c65f81dc 100644 --- a/drivers/pci/dma_remapping.h +++ b/drivers/pci/dma_remapping.h @@ -145,6 +145,8 @@ struct device_domain_info { extern int init_dmars(void); extern void free_dmar_iommu(struct intel_iommu *iommu); +extern int dmar_disabled; + #ifndef CONFIG_DMAR_GFX_WA static inline void iommu_prepare_gfx_mapping(void) { diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index aba151ca6d26..23a119e6485e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -449,6 +449,22 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } +void __init detect_intel_iommu(void) +{ + int ret; + + ret = early_dmar_detect(); + +#ifdef CONFIG_DMAR + { + if (ret && !no_iommu && !iommu_detected && !swiotlb && + !dmar_disabled) + iommu_detected = 1; + } +#endif +} + + int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 347bf2e47168..ffccf2341b98 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -76,7 +76,7 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -static int dmar_disabled; +int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; @@ -2238,15 +2238,6 @@ static void __init iommu_exit_mempool(void) } -void __init detect_intel_iommu(void) -{ - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) - return; - if (early_dmar_detect()) { - iommu_detected = 1; - } -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -2293,15 +2284,19 @@ int __init intel_iommu_init(void) { int ret = 0; - if (no_iommu || swiotlb || dmar_disabled) - return -ENODEV; - if (dmar_table_init()) return -ENODEV; if (dmar_dev_scope_init()) return -ENODEV; + /* + * Check the need for DMA-remapping initialization now. + * Above initialization will also be used by Interrupt-remapping. + */ + if (no_iommu || swiotlb || dmar_disabled) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 2983ce895353..a81a74e2bd9e 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -56,6 +56,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) /* @@ -157,16 +158,20 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) /* GSTS_REG */ #define DMA_GSTS_TES (((u32)1) << 31) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -245,6 +250,16 @@ struct q_inval { int free_cnt; }; +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +struct ir_table { + struct irte *base; +}; +#endif + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -266,6 +281,9 @@ struct intel_iommu { struct sys_device sysdev; #endif struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif }; static inline void __iommu_flush_cache( @@ -279,5 +297,7 @@ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index a80b87921c68..3d10cdc90314 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -1,10 +1,147 @@ #include +#include +#include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; +int intr_remapping_enabled; + +static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) +{ + u64 addr; + u32 cmd, sts; + unsigned long flags; + + addr = virt_to_phys((void *)iommu->ir_table->base); + + spin_lock_irqsave(&iommu->register_lock, flags); + + dmar_writeq(iommu->reg + DMAR_IRTA_REG, + (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); + + /* Set interrupt-remapping table pointer */ + cmd = iommu->gcmd | DMA_GCMD_SIRTP; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRTPS), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* + * global invalidation of interrupt entry cache before enabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + + spin_lock_irqsave(&iommu->register_lock, flags); + + /* Enable interrupt-remapping */ + cmd = iommu->gcmd | DMA_GCMD_IRE; + iommu->gcmd |= DMA_GCMD_IRE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRES), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + + +static int setup_intr_remapping(struct intel_iommu *iommu, int mode) +{ + struct ir_table *ir_table; + struct page *pages; + + ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), + GFP_KERNEL); + + if (!iommu->ir_table) + return -ENOMEM; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + + if (!pages) { + printk(KERN_ERR "failed to allocate pages of order %d\n", + INTR_REMAP_PAGE_ORDER); + kfree(iommu->ir_table); + return -ENOMEM; + } + + ir_table->base = page_address(pages); + + iommu_set_intr_remapping(iommu, mode); + return 0; +} + +int __init enable_intr_remapping(int eim) +{ + struct dmar_drhd_unit *drhd; + int setup = 0; + + /* + * check for the Interrupt-remapping support + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (eim && !ecap_eim_support(iommu->ecap)) { + printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " + " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); + return -1; + } + } + + /* + * Enable queued invalidation for all the DRHD's. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_enable_qi(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable queued, " + " invalidation, ecap %Lx, ret %d\n", + drhd->reg_base_addr, iommu->ecap, ret); + return -1; + } + } + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (setup_intr_remapping(iommu, eim)) + goto error; + + setup = 1; + } + + if (!setup) + goto error; + + intr_remapping_enabled = 1; + + return 0; + +error: + /* + * handle error condition gracefully here! + */ + return -1; +} static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, struct intel_iommu *iommu) diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index c4a40b2f33fa..05f2635bbe4e 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -4,3 +4,5 @@ struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; }; + +#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb29617..8a0238dd2c11 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,33 +25,9 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; -extern const char *dmar_get_fault_reason(u8 fault_reason); - -/* Can't use the common MSI interrupt functions - * since DMAR is not a pci device - */ -extern void dmar_msi_unmask(unsigned int irq); -extern void dmar_msi_mask(unsigned int irq); -extern void dmar_msi_read(int irq, struct msi_msg *msg); -extern void dmar_msi_write(int irq, struct msi_msg *msg); -extern int dmar_set_interrupt(struct intel_iommu *iommu); -extern int arch_setup_dmar_msi(unsigned int irq); - -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; -extern struct list_head dmar_rmrr_units; - struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -63,6 +39,85 @@ struct dmar_drhd_unit { struct intel_iommu *iommu; }; +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR +extern const char *dmar_get_fault_reason(u8 fault_reason); + +/* Can't use the common MSI interrupt functions + * since DMAR is not a pci device + */ +extern void dmar_msi_unmask(unsigned int irq); +extern void dmar_msi_mask(unsigned int irq); +extern void dmar_msi_read(int irq, struct msi_msg *msg); +extern void dmar_msi_write(int irq, struct msi_msg *msg); +extern int dmar_set_interrupt(struct intel_iommu *iommu); +extern int arch_setup_dmar_msi(unsigned int irq); + +extern int iommu_detected, no_iommu; +extern struct list_head dmar_rmrr_units; struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: [PATCH 011/186] x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.h | 4 + drivers/pci/intr_remapping.c | 243 +++++++++++++++++++++++++++++++++++ include/linux/dmar.h | 12 ++ 3 files changed, 259 insertions(+) diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index a81a74e2bd9e..2142c01e0143 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -123,6 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) /* IOTLB_REG */ @@ -255,6 +256,8 @@ struct q_inval { #define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf +#define INTR_REMAP_TABLE_ENTRIES 65536 + struct ir_table { struct irte *base; }; @@ -300,4 +303,5 @@ extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 3d10cdc90314..bddb4b19b6c7 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" @@ -10,6 +11,248 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static struct { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +} irq_2_iommu[NR_IRQS]; + +static DEFINE_SPINLOCK(irq_2_ir_lock); + +int irq_remapped(int irq) +{ + if (irq > NR_IRQS) + return 0; + + if (!irq_2_iommu[irq].iommu) + return 0; + + return 1; +} + +int get_irte(int irq, struct irte *entry) +{ + int index; + + if (!entry || irq > NR_IRQS) + return -1; + + spin_lock(&irq_2_ir_lock); + if (!irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + struct ir_table *table = iommu->ir_table; + u16 index, start_index; + unsigned int mask = 0; + int i; + + if (!count) + return -1; + + /* + * start the IRTE search from index 0. + */ + index = start_index = 0; + + if (count > 1) { + count = __roundup_pow_of_two(count); + mask = ilog2(count); + } + + if (mask > ecap_max_handle_mask(iommu->ecap)) { + printk(KERN_ERR + "Requested mask %x exceeds the max invalidation handle" + " mask value %Lx\n", mask, + ecap_max_handle_mask(iommu->ecap)); + return -1; + } + + spin_lock(&irq_2_ir_lock); + do { + for (i = index; i < index + count; i++) + if (table->base[i].present) + break; + /* empty index found */ + if (i == index + count) + break; + + index = (index + count) % INTR_REMAP_TABLE_ENTRIES; + + if (index == start_index) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate an IRTE\n"); + return -1; + } + } while (1); + + for (i = index; i < index + count; i++) + table->base[i].present = 1; + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = mask; + + spin_unlock(&irq_2_ir_lock); + + return index; +} + +static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +{ + struct qi_desc desc; + + desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + | QI_IEC_SELECTIVE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + int index; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + *sub_handle = irq_2_iommu[irq].sub_handle; + index = irq_2_iommu[irq].irte_index; + spin_unlock(&irq_2_ir_lock); + return index; +} + +int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = subhandle; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int modify_irte(int irq, struct irte *irte_modified) +{ + int index; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + __iommu_flush_cache(iommu, irte, sizeof(*irte)); + + qi_flush_iec(iommu, index, 0); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int flush_irte(int irq) +{ + int index; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int free_irte(int irq) +{ + int index, i; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + if (!irq_2_iommu[irq].sub_handle) { + for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++) + set_64bit((unsigned long *)irte, 0); + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238dd2c11..324bbca85a26 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif From 72b1e22dfcad1daca6906148fd956ffe404bb0bc Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:45 -0700 Subject: [PATCH 012/186] x64, x2apic/intr-remap: generic irq migration support from process context Generic infrastructure for migrating the irq from the process context in the presence of CONFIG_GENERIC_PENDING_IRQ. This will be used later for migrating irq in the presence of interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- include/linux/irq.h | 1 + kernel/irq/manage.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c9..c211984b55e5 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 46d6611a33bb..628b5572a7c2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -87,7 +87,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) set_balance_irq_affinity(irq, cpumask); #ifdef CONFIG_GENERIC_PENDING_IRQ - set_pending_irq(irq, cpumask); + if (desc->status & IRQ_MOVE_PCNTXT) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->chip->set_affinity(irq, cpumask); + spin_unlock_irqrestore(&desc->lock, flags); + } else + set_pending_irq(irq, cpumask); #else desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); From d94d93ca5cc36cd78c532def62772c98fe8ba5d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:46 -0700 Subject: [PATCH 013/186] x64, x2apic/intr-remap: 8259 specific mask/unmask routines 8259 specific mask/unmask routines which be used later while enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 24 ++++++++++++++++++++++++ include/asm-x86/i8259.h | 3 +++ 2 files changed, 27 insertions(+) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index dc92b49d9204..4b8a53d841f7 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) device_initcall(i8259A_init_sysfs); +void mask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void unmask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + void init_8259A(int auto_eoi) { unsigned long flags; diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 2f98df91f1f2..31112b6c595b 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; +extern void mask_8259A(void); +extern void unmask_8259A(void); + #endif /* __ASM_I8259_H__ */ From 4dc2f96cacd1e74c688f94348a3bfd0a980817d5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:47 -0700 Subject: [PATCH 014/186] x64, x2apic/intr-remap: ioapic routines which deal with initial io-apic RTE setup Generic ioapic specific routines which be used later during enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 66 ++++++++++++++++++++++++++++++++++++ include/asm-x86/io_apic.h | 6 ++++ 2 files changed, 72 insertions(+) diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9e645cba11c4..84dd63c13d63 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -114,6 +114,9 @@ DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC RTE contents at the OS boot up */ +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -446,6 +449,69 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} + int skip_ioapic_setup; int ioapic_force; diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 14f82bbcb5fd..1c4a99d882f5 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -183,6 +183,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); +#ifdef CONFIG_X86_64 +extern int save_mask_IO_APIC_setup(void); +extern void restore_IO_APIC_setup(void); +extern void reinit_intr_remapped_IO_APIC(int); +#endif + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; From 0c81c746f9bdbfaafe64322d540c8b7b59c27314 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:48 -0700 Subject: [PATCH 015/186] x64, x2apic/intr-remap: introduce read_apic_id() to genapic routines Move the read_apic_id() to genapic routines. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 5 +++++ arch/x86/kernel/genapic_64.c | 11 ----------- arch/x86/kernel/genapic_flat_64.c | 14 +++++++++++++- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++++++++- include/asm-x86/genapic_64.h | 1 + include/asm-x86/mach-default/mach_apic.h | 1 + include/asm-x86/mach-default/mach_apicdef.h | 3 ++- include/asm-x86/smp.h | 4 +--- 8 files changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c14..9dd4ee4735f5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1096,6 +1096,11 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} + /* * Power management */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..7414871751a7 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -79,17 +79,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -unsigned int read_apic_id(void) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID); - if (uv_system_type >= UV_X2APIC) - id |= __get_cpu_var(x2apic_extra_bits); - return id; -} - enum uv_system_type get_uv_system_type(void) { return uv_system_type; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee8..400ed8df8b4e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include static cpumask_t flat_target_cpus(void) { @@ -95,9 +97,17 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int read_xapic_id(void) +{ + unsigned int id; + + id = GET_XAPIC_ID(apic_read(APIC_ID)); + return id; +} + static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_xapic_id(), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -123,6 +133,7 @@ struct genapic apic_flat = { .send_IPI_mask = flat_send_IPI_mask, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; /* @@ -187,4 +198,5 @@ struct genapic apic_physflat = { .send_IPI_mask = physflat_send_IPI_mask, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b06..1ef99be18488 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -134,9 +135,19 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int uv_read_apic_id(void) +{ + unsigned int id; + + WARN_ON(preemptible() && num_online_cpus() > 1); + id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + + return id; +} + static unsigned int phys_pkg_id(int index_msb) { - return GET_APIC_ID(read_apic_id()) >> index_msb; + return uv_read_apic_id() >> index_msb; } #ifdef ZZZ /* Needs x2apic patch */ @@ -159,6 +170,7 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ + .read_apic_id = uv_read_apic_id, }; static __cpuinit void set_x2apic_extra_bits(int pnode) diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 647e4e5c2580..d567abc347a9 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -27,6 +27,7 @@ struct genapic { /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); + unsigned int (*read_apic_id)(void); }; extern struct genapic *genapic; diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 0b2cde5e1b74..d172c554ab9f 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -30,6 +30,7 @@ static inline cpumask_t target_cpus(void) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) +#define read_apic_id (genapic->read_apic_id) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index e4b29ba37de6..453b58a67e29 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -5,8 +5,9 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (((x)>>24)&0xFFu) +#define GET_APIC_ID(x) (x) #define SET_APIC_ID(x) (((x)<<24)) +#define GET_XAPIC_ID(x) (((x) >> 24) & 0xFFu) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 2e221f1ce0b2..9848715fbd9e 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -169,12 +169,10 @@ static inline unsigned int read_apic_id(void) { return *(u32 *)(APIC_BASE + APIC_ID); } -#else -extern unsigned int read_apic_id(void); #endif -# ifdef APIC_DEFINITION +# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else # include From 2d7a66d02e11af9ab8e16c76d22767e622b4e3d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 14:24:19 -0700 Subject: [PATCH 016/186] x64, x2apic/intr-remap: Interrupt-remapping and x2apic support, fix Yinghai Lu wrote: > Setting APIC routing to physical flat > Kernel panic - not syncing: Boot APIC ID in local APIC unexpected (0 vs 4) > Pid: 1, comm: swapper Not tainted 2.6.26-rc9-tip-01763-g74f94b1-dirty #320 > > Call Trace: > [] ? set_cpu_sibling_map+0x38c/0x3bd > [] ? read_xapic_id+0x25/0x3e > [] ? verify_local_APIC+0x139/0x1b9 > [] ? read_xapic_id+0x25/0x3e > [] ? native_smp_prepare_cpus+0x224/0x2e9 > [] ? kernel_init+0x64/0x341 > [] ? child_rip+0xa/0x11 > [] ? kernel_init+0x0/0x341 > [] ? child_rip+0x0/0x11 > > > guess read_apic_id changing cuase some problem... genapic's read_apic_id() returns the actual apic id extracted from the APIC_ID register. And in some cases like UV, read_apic_id() returns completely different values from APIC ID register. Use the native apic register read, rather than genapic read_apic_id() in verify_local_APIC() And also, lapic_suspend() should also use native apic register read. Reported-by: Yinghai Lu Signed-off-by: Suresh Siddha Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: "jeremy@goop.org" Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9dd4ee4735f5..3963f590c3d4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -626,10 +626,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = read_apic_id(); + reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = read_apic_id(); + reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -1137,7 +1137,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = read_apic_id(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); From 1b374e4d6f8b3eb2fcd034fcc24ea8ba1dfde7aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:49 -0700 Subject: [PATCH 017/186] x64, x2apic/intr-remap: basic apic ops support Introduce basic apic operations which handle the apic programming. This will be used later to introduce another specific operations for x2apic. For the perfomance critial accesses like IPI's, EOI etc, we use the native operations as they are already referenced by different indirections like genapic, irq_chip etc. 64bit Paravirt ops can also define their apic operations accordingly. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +++++ arch/x86/kernel/apic_64.c | 34 ++++++++++++++++++++++++++-- arch/x86/kernel/io_apic_64.c | 8 +++---- arch/x86/kernel/paravirt.c | 8 ++++--- arch/x86/kernel/smpboot.c | 28 +++++++++-------------- include/asm-x86/apic.h | 43 +++++++++++++++++++++++++++++++----- include/asm-x86/ipi.h | 16 +++++++++----- include/asm-x86/paravirt.h | 2 ++ include/asm-x86/smp.h | 2 +- 9 files changed, 109 insertions(+), 38 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3e58b676d23b..2a83c07bd887 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,12 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +void apic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 3963f590c3d4..9bb040689b31 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -119,13 +119,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -141,6 +141,36 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return (icr1 | ((u64)icr2 << 32)); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; + +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 84dd63c13d63..b62d42ef9283 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1157,6 +1157,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) void __apicdebuginit print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; + unsigned long icr; if (apic_verbosity == APIC_QUIET) return; @@ -1200,10 +1201,9 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c19..b80105a0f474 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,9 +360,11 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, - .apic_read = native_apic_read, +#ifnded CONFIG_X86_64 + .apic_write = native_apic_mem_write, + .apic_write_atomic = native_apic_mem_write_atomic, + .apic_read = native_apic_mem_read, +#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f35c2d8016ac..c55263b3df02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; -static int boot_cpu_logical_apicid; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu) #endif #ifdef CONFIG_X86_32 +static int boot_cpu_logical_apicid; + u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_icr_write(APIC_DM_REMRD | regs[i], apicid); timeout = 0; do { @@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -639,13 +637,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, + phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -655,10 +651,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -703,12 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); + apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), + phys_apicid); /* * Give the other CPU some time to accept the IPI. @@ -1147,7 +1139,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ +#ifdef CONFIG_X86_32 boot_cpu_logical_apicid = logical_smp_processor_id(); +#endif current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 4e2c1e517f06..6fda195337c5 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -47,32 +47,59 @@ extern int disable_apic; #ifdef CONFIG_PARAVIRT #include #else -#define apic_write native_apic_write -#define apic_write_atomic native_apic_write_atomic -#define apic_read native_apic_read +#ifndef CONFIG_X86_64 +#define apic_write native_apic_mem_write +#define apic_write_atomic native_apic_mem_write_atomic +#define apic_read native_apic_mem_read +#endif #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock #endif extern int is_vsmp_box(void); -static inline void native_apic_write(unsigned long reg, u32 v) +static inline void native_apic_mem_write(u32 reg, u32 v) { *((volatile u32 *)(APIC_BASE + reg)) = v; } -static inline void native_apic_write_atomic(unsigned long reg, u32 v) +static inline void native_apic_mem_write_atomic(u32 reg, u32 v) { (void)xchg((u32 *)(APIC_BASE + reg), v); } -static inline u32 native_apic_read(unsigned long reg) +static inline u32 native_apic_mem_read(u32 reg) { return *((volatile u32 *)(APIC_BASE + reg)); } +#ifdef CONFIG_X86_32 extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); +extern void apic_icr_write(u32 low, u32 id); +#else + +struct apic_ops { + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + void (*write_atomic)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); +}; + +extern struct apic_ops *apic_ops; + +#define apic_read (apic_ops->read) +#define apic_write (apic_ops->write) +#define apic_write_atomic (apic_ops->write_atomic) +#define apic_icr_read (apic_ops->icr_read) +#define apic_icr_write (apic_ops->icr_write) +#define apic_wait_icr_idle (apic_ops->wait_icr_idle) +#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) +#endif + extern int get_physical_broadcast(void); #ifdef CONFIG_X86_GOOD_APIC @@ -95,7 +122,11 @@ static inline void ack_APIC_irq(void) */ /* Docs say use 0 for future compatibility */ +#ifdef CONFIG_X86_32 apic_write_around(APIC_EOI, 0); +#else + native_apic_mem_write(APIC_EOI, 0); +#endif } extern int lapic_get_maxlvt(void); diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index 196d63c28aa4..3d8d6a6c1f8e 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } +static inline void __xapic_wait_icr_idle(void) +{ + while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { @@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Wait for idle. */ - apic_wait_icr_idle(); + __xapic_wait_icr_idle(); /* * No need to touch the target chip field @@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write(APIC_ICR, cfg); + native_apic_mem_write(APIC_ICR, cfg); } /* @@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, if (unlikely(vector == NMI_VECTOR)) safe_apic_wait_icr_idle(); else - apic_wait_icr_idle(); + __xapic_wait_icr_idle(); /* * prepare target chip field */ cfg = __prepare_ICR2(mask); - apic_write(APIC_ICR2, cfg); + native_apic_mem_write(APIC_ICR2, cfg); /* * program the ICR @@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write(APIC_ICR, cfg); + native_apic_mem_write(APIC_ICR, cfg); } static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index ef5e8ec6a6ab..10adac02e6db 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -891,6 +891,7 @@ static inline void slow_down_io(void) /* * Basic functions accessing APICs. */ +#ifndef CONFIG_X86_64 static inline void apic_write(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); @@ -905,6 +906,7 @@ static inline u32 apic_read(unsigned long reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } +#endif static inline void setup_boot_clock(void) { diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 9848715fbd9e..d9d007d22785 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -158,13 +158,13 @@ extern int safe_smp_processor_id(void); #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 static inline int logical_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#ifndef CONFIG_X86_64 static inline unsigned int read_apic_id(void) { return *(u32 *)(APIC_BASE + APIC_ID); From 32e1d0a0651004f5fe47f85a2a5c725ad579a90c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:50 -0700 Subject: [PATCH 018/186] x64, x2apic/intr-remap: cpuid bits for x2apic feature cpuid feature for x2apic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- include/asm-x86/cpufeature.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cba..0bf4d37a0483 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 75ef959db329..5be9510ee012 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -90,6 +90,7 @@ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ @@ -188,6 +189,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 From 1cb11583a6c4ceda7426eb36f7bf0419da8dfbc2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:51 -0700 Subject: [PATCH 019/186] x64, x2apic/intr-remap: disable DMA-remapping if Interrupt-remapping is detected (temporary quirk) Interrupt-remapping enables queued invalidation. And once queued invalidation is enabled, IOTLB invalidation also needs to use the queued invalidation mechanism and the register based IOTLB invalidation doesn't work. For now, Support for IOTLB invalidation using queued invalidation is missing. Meanwhile, disable DMA-remapping, if Interrupt-remapping support is detected. For the meanwhile, if someone wants to really enable DMA-remapping, they can use nox2apic, which will disable interrupt-remapping and as such doesn't enable queued invalidation. And given that none of the release platforms support intr-remapping yet, we should be ok for this temporary hack. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 23a119e6485e..bd2c01674f5e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -457,6 +457,31 @@ void __init detect_intel_iommu(void) #ifdef CONFIG_DMAR { + struct acpi_table_dmar *dmar; + /* + * for now we will disable dma-remapping when interrupt + * remapping is enabled. + * When support for queued invalidation for IOTLB invalidation + * is added, we will not need this any more. + */ + dmar = (struct acpi_table_dmar *) dmar_tbl; + if (ret && cpu_has_x2apic && dmar->flags & 0x1) { + printk(KERN_INFO + "Queued invalidation will be enabled to support " + "x2apic and Intr-remapping.\n"); + printk(KERN_INFO + "Disabling IOMMU detection, because of missing " + "queued invalidation support for IOTLB " + "invalidation\n"); + printk(KERN_INFO + "Use \"nox2apic\", if you want to use Intel " + " IOMMU for DMA-remapping and don't care about " + " x2apic support\n"); + + dmar_disabled = 1; + return; + } + if (ret && !no_iommu && !iommu_detected && !swiotlb && !dmar_disabled) iommu_detected = 1; From 13c88fb58d0112d47f7839f24a755715c6218822 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:52 -0700 Subject: [PATCH 020/186] x64, x2apic/intr-remap: x2apic ops for x2apic mode support x2apic ops for x2apic mode support. This uses MSR interface and differs slightly from the xapic register layout. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++++++++++++++ include/asm-x86/apic.h | 22 ++++++++++++++++++++++ include/asm-x86/apicdef.h | 3 +++ 3 files changed, 60 insertions(+) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9bb040689b31..a969ef78e12a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -171,6 +171,41 @@ struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .write_atomic = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 6fda195337c5..bb54928373ca 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -73,6 +75,26 @@ static inline u32 native_apic_mem_read(u32 reg) return *((volatile u32 *)(APIC_BASE + reg)); } +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u32 low, high; + + if (reg == APIC_DFR) + return -1; + + rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); + return low; +} + #ifdef CONFIG_X86_32 extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h index 6b9008c78731..bcae297b30b2 100644 --- a/include/asm-x86/apicdef.h +++ b/include/asm-x86/apicdef.h @@ -105,6 +105,7 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 #define APIC_TDR_DIV_TMBASE (1 << 2) #define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_2 0x0 @@ -128,6 +129,8 @@ #define APIC_EILVT3 0x530 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 From cff73a6ffaed726780b001937d2a42efde553922 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:53 -0700 Subject: [PATCH 021/186] x64, x2apic/intr-remap: introcude self IPI to genapic routines Introduce self IPI op for genapic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 2 ++ include/asm-x86/genapic_64.h | 2 ++ include/asm-x86/hw_irq.h | 2 ++ include/asm-x86/mach-default/mach_apic.h | 1 + 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 7414871751a7..6657de609dcb 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -61,7 +61,7 @@ void __init setup_apic_routing(void) /* Same for both flat and physical. */ -void send_IPI_self(int vector) +void apic_send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 400ed8df8b4e..735586822135 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -131,6 +131,7 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, @@ -196,6 +197,7 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index d567abc347a9..6777d71aabc9 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -24,6 +24,7 @@ struct genapic { void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); @@ -36,6 +37,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; extern int acpi_madt_oem_check(char *, char *); +extern void apic_send_IPI_self(int vector); enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 18f067c310f7..2ae47e7c1063 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -72,7 +72,9 @@ extern void enable_IO_APIC(void); #endif /* IPI functions */ +#ifdef CONFIG_X86_32 extern void send_IPI_self(int vector); +#endif extern void send_IPI(int dest, int vector); /* Statistics */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index d172c554ab9f..e06d23975d6a 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -31,6 +31,7 @@ static inline cpumask_t target_cpus(void) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id (genapic->read_apic_id) +#define send_IPI_self (genapic->send_IPI_self) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio From 12a67cf6851871ca8df42025c94f140c303d0f7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:54 -0700 Subject: [PATCH 022/186] x64, x2apic/intr-remap: x2apic cluster mode support x2apic cluster mode support. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/genapic_64.c | 2 + arch/x86/kernel/genx2apic_cluster.c | 135 ++++++++++++++++++++++++++++ include/asm-x86/genapic_64.h | 1 + 4 files changed, 139 insertions(+) create mode 100644 arch/x86/kernel/genx2apic_cluster.c diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 55ff016e9f69..bde3e9b6fec9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_OLPC) += olpc.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o + obj-y += genx2apic_cluster.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6657de609dcb..1029e178cdf7 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -38,6 +38,8 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; + else if (cpu_has_x2apic && intr_remapping_enabled) + genapic = &apic_x2apic_cluster; else #ifdef CONFIG_ACPI /* diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 000000000000..ed0fdede800a --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +/* + * for now each logical cpu is in its own vector allocation domain. + */ +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +/* + * for now, we send the IPI's one by one in the cpumask. + * TBD: Based on the cpu mask, we can send the IPI's to the cluster group + * at once. We have 16 cpu's in a cluster. This will minimize IPI register + * writes. + */ +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +static void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +static void init_x2apic_ldr(void) +{ + int cpu = smp_processor_id(); + + per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); + return; +} + +struct genapic apic_x2apic_cluster = { + .name = "cluster x2apic", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 6777d71aabc9..232460305877 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -35,6 +35,7 @@ extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); From 5c520a6724e912a7e6153b7597192edad6752750 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:55 -0700 Subject: [PATCH 023/186] x64, x2apic/intr-remap: setup init_apic_ldr for UV Signed-off-by: Suresh Siddha Signed-off-by: Jack Steiner Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 1ef99be18488..dcd4fb219daa 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,6 +120,10 @@ static int uv_apic_id_registered(void) return 1; } +static inline void uv_init_apic_ldr(void) +{ +} + static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -164,6 +168,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .apic_id_registered = uv_apic_id_registered, + .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: [PATCH 024/186] x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 + arch/x86/kernel/io_apic_64.c | 300 +++++++++++++++++++++++++++++--- drivers/pci/intr_remapping.c | 10 ++ include/asm-x86/apic.h | 9 + include/asm-x86/io_apic.h | 14 ++ include/asm-x86/irq_remapping.h | 8 + include/linux/dmar.h | 1 + 7 files changed, 321 insertions(+), 22 deletions(-) create mode 100644 include/asm-x86/irq_remapping.h diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a969ef78e12a..d5c06917b5b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,7 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int x2apic; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b62d42ef9283..9bd02ef049a0 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include #endif #include +#include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -906,18 +913,98 @@ void setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1783,6 +2031,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1809,6 +2059,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " @@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index bddb4b19b6c7..32e55c7a9805 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -220,6 +220,16 @@ int flush_irte(int irq) return 0; } +struct intel_iommu *map_ioapic_to_ir(int apic) +{ + int i; + + for (i = 0; i < MAX_IO_APICS; i++) + if (ir_ioapic[i].id == apic) + return ir_ioapic[i].iommu; + return NULL; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index bb54928373ca..aa746704a5c9 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -134,6 +134,15 @@ extern int get_physical_broadcast(void); # define apic_write_around(x, y) apic_write_atomic((x), (y)) #endif +#ifdef CONFIG_X86_64 +static inline void ack_x2APIC_irq(void) +{ + /* Docs say use 0 for future compatibility */ + native_apic_msr_write(APIC_EOI, 0); +} +#endif + + static inline void ack_APIC_irq(void) { /* diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 1c4a99d882f5..8dc2622714c8 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -107,6 +107,20 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + #ifdef CONFIG_X86_IO_APIC /* diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h new file mode 100644 index 000000000000..78242c6ffa58 --- /dev/null +++ b/include/asm-x86/irq_remapping.h @@ -0,0 +1,8 @@ +#ifndef _ASM_IRQ_REMAPPING_H +#define _ASM_IRQ_REMAPPING_H + +extern int x2apic; + +#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) + +#endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca85a26..bf41ffa74705 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: [PATCH 025/186] x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 238 +++++++++++++++++++++++++++++++++-- drivers/pci/intr_remapping.c | 11 ++ include/asm-x86/msidef.h | 4 + include/linux/dmar.h | 1 + 4 files changed, 242 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9bd02ef049a0..877aa2e9d7e8 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2386,28 +2469,159 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) { + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +{ + int ret; struct msi_msg msg; - int irq, ret; - irq = create_irq(); - if (irq < 0) - return irq; ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) { - destroy_irq(irq); + if (ret < 0) return ret; - } set_irq_msi(irq, desc); write_msi_msg(irq, &msg); - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); return 0; } +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ + int irq, ret; + + irq = create_irq(); + if (irq < 0) + return irq; + +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) { + destroy_irq(irq); + return ret; + } + return 0; + +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} + +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; + +error: + destroy_irq(irq); + return ret; +} + void arch_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 32e55c7a9805..bb642cc5e18c 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int apic) return NULL; } +struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd; + + drhd = dmar_find_matched_drhd_unit(dev); + if (!drhd) + return NULL; + + return drhd->iommu; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29ce426d..57fd85935e5a 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -48,4 +48,8 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) #endif /* ASM_MSIDEF_H */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa74705..c360c558e59e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) From 6e1cb38a2aef7680975e71f23de187859ee8b158 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:58 -0700 Subject: [PATCH 026/186] x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping x2apic support. Interrupt-remapping must be enabled before enabling x2apic, this is needed to ensure that IO interrupts continue to work properly after the cpu mode is changed to x2apic(which uses 32bit extended physical/cluster apic id). On systems where apicid's are > 255, BIOS can handover the control to OS in x2apic mode. Or if the OS handover was in legacy xapic mode, check if it is capable of x2apic mode. And if we succeed in enabling Interrupt-remapping, then we can enable x2apic mode in the CPU. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 + arch/x86/kernel/acpi/boot.c | 2 + arch/x86/kernel/apic_64.c | 154 +++++++++++++++++++++++++++- arch/x86/kernel/cpu/common_64.c | 2 + arch/x86/kernel/genapic_64.c | 1 + arch/x86/kernel/mpparse.c | 2 + arch/x86/kernel/setup.c | 2 + arch/x86/kernel/smpboot.c | 5 + include/asm-x86/apic.h | 5 + 9 files changed, 171 insertions(+), 4 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 795c487af8e4..56689ef1a159 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1377,6 +1377,8 @@ and is between 256 and 4096 characters. It is defined in the file nolapic_timer [X86-32,APIC] Do not use the local APIC timer. + nox2apic [X86-64,APIC] Do not enable x2APIC mode. + noltlbs [PPC] Do not use large page/tlb entries for kernel lowmem mapping on PPC40x. diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 785700a08e9d..8705262ddcda 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d5c06917b5b1..dd0501039f09 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -46,8 +48,12 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int disable_x2apic; int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; + /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -896,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -943,6 +1068,11 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { + if (x2apic) { + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + return; + } + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -981,6 +1111,9 @@ int __init APIC_init_uniprocessor(void) return -1; } + enable_IR_x2apic(); + setup_apic_routing(); + verify_local_APIC(); connect_bsp_APIC(); @@ -1238,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (!x2apic) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } else + enable_x2apic(); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1381,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); + + /* * APIC command line parameters */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 36537ab9e56a..e7bf3c2dc5fe 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,6 +606,8 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1029e178cdf7..792e21ba1a81 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c6..70e1f3e287fb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -545,7 +545,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) generic_bigsmp_probe(); #endif +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 987b6fde3a99..2e78a143dec3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -730,6 +730,8 @@ void __init setup_arch(char **cmdline_p) num_physpages = max_pfn; check_efer(); + if (cpu_has_x2apic) + check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c55263b3df02..0c43e1f2e7d3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1145,6 +1145,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); +#ifdef CONFIG_X86_64 + enable_IR_x2apic(); + setup_apic_routing(); +#endif + if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index aa746704a5c9..129752dd2525 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -100,6 +100,11 @@ extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); extern void apic_icr_write(u32 low, u32 id); #else +extern int x2apic, x2apic_preenabled; +extern void check_x2apic(void); +extern void enable_x2apic(void); +extern void enable_IR_x2apic(void); +extern void x2apic_icr_write(u32 low, u32 id); struct apic_ops { u32 (*read)(u32 reg); From 2d9579a124d746a3e0e0ba45e57d80800ee80807 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:59 -0700 Subject: [PATCH 027/186] x64, x2apic/intr-remap: support for x2apic physical mode support x2apic Physical mode support. By default we will use x2apic cluster mode. x2apic physical mode can be selected using "x2apic_phys" boot parameter. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/genapic_64.c | 18 ++++- arch/x86/kernel/genx2apic_phys.c | 122 +++++++++++++++++++++++++++++++ include/asm-x86/genapic_64.h | 1 + 4 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kernel/genx2apic_phys.c diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3e9b6fec9..81280e93e792 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_OLPC) += olpc.o ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 792e21ba1a81..3940d8161f8b 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -30,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; +static int x2apic_phys = 0; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + static enum uv_system_type uv_system_type; /* @@ -39,9 +48,12 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) - genapic = &apic_x2apic_cluster; - else + else if (cpu_has_x2apic && intr_remapping_enabled) { + if (x2apic_phys) + genapic = &apic_x2apic_phys; + else + genapic = &apic_x2apic_cluster; + } else #ifdef CONFIG_ACPI /* * Quirk: some x86_64 machines can only use physical APIC mode diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c new file mode 100644 index 000000000000..3c70b9d692b2 --- /dev/null +++ b/arch/x86/kernel/genx2apic_phys.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +void init_x2apic_ldr(void) +{ + return; +} + +struct genapic apic_x2apic_phys = { + .name = "physical x2apic", + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 232460305877..122b9242a40f 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -36,6 +36,7 @@ extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); From 9fa8c481b55e80edd8c637573f87853bb6b600f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:17:00 -0700 Subject: [PATCH 028/186] x64, x2apic/intr-remap: introduce CONFIG_INTR_REMAP Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2cfccc987a26..0bf8391a7f2d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1651,6 +1651,14 @@ config DMAR_FLOPPY_WA workaround will setup a 1:1 mapping for the first 16M to make floppy (an ISA device) work. +config INTR_REMAP + bool "Support for Interrupt Remapping (EXPERIMENTAL)" + depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + help + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. + source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" From 372e92d8b3e433888bf76c36f1c7e1405eae1584 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 14:56:18 -0700 Subject: [PATCH 029/186] x64, x2apic/intr-remap: Interrupt-remapping and x2apic support On Thu, Jul 10, 2008 at 12:53:20PM -0700, Ingo Molnar wrote: > > Btw., i threw it at the -tip test-cluster and got back a quick build > bugreport: > > arch/x86/xen/enlighten.c: In function 'xen_patch': > arch/x86/xen/enlighten.c:1084: warning: label 'patch_site' defined but not used > arch/x86/xen/enlighten.c: At top level: > arch/x86/xen/enlighten.c:1272: error: expected identifier before '(' token > arch/x86/xen/enlighten.c:1273: error: expected '}' before '.' token > arch/x86/kernel/paravirt.c:376:2: error: invalid preprocessing directive > #ifndedarch/x86/kernel/paravirt.c:384:2: error: #endif without #if > > with this config: > > http://redhat.com/~mingo/misc/config-Thu_Jul_10_21_43_28_CEST_2008.bad fix the typo. Signed-off-by: Suresh Siddha Cc: "Siddha Cc: Suresh B" Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index b80105a0f474..4f29ff847ebe 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,7 +360,7 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifnded CONFIG_X86_64 +#ifndef CONFIG_X86_64 .apic_write = native_apic_mem_write, .apic_write_atomic = native_apic_mem_write_atomic, .apic_read = native_apic_mem_read, From 277d1f5846d84e16760131a93b7a67ebfa8eded4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:55 -0700 Subject: [PATCH 030/186] x2apic: uninline uv_init_apic_ldr() Andrew says: > There's no point in declaring it inline if it's always called indirectly. And point taken! Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dcd4fb219daa..c915f750241e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,7 +120,7 @@ static int uv_apic_id_registered(void) return 1; } -static inline void uv_init_apic_ldr(void) +static void uv_init_apic_ldr(void) { } From ad66dd340f561bdde2285992314d9e4fd9b6191e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:56 -0700 Subject: [PATCH 031/186] x2apic: xen64 paravirt basic apic ops Define the Xen specific basic apic ops, in additon to paravirt apic ops, with some misc warning fixes. Signed-off-by: Suresh Siddha Cc: Jeremy Fitzhardinge Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/lguest/boot.c | 4 ++-- arch/x86/xen/enlighten.c | 41 ++++++++++++++++++++++++++++++++++++-- include/asm-x86/paravirt.h | 14 +++++++------ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50dad44fb542..0c45df20e2b3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -783,11 +783,11 @@ static void lguest_wbinvd(void) * code qualifies for Advanced. It will also never interrupt anything. It * does, however, allow us to get through the Linux boot code. */ #ifdef CONFIG_X86_LOCAL_APIC -static void lguest_apic_write(unsigned long reg, u32 v) +static void lguest_apic_write(u32 reg, u32 v) { } -static u32 lguest_apic_read(unsigned long reg) +static u32 lguest_apic_read(u32 reg) { return 0; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dcd4e51f2f16..54e255667530 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -548,16 +548,45 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC -static u32 xen_apic_read(unsigned long reg) +static u32 xen_apic_read(u32 reg) { return 0; } -static void xen_apic_write(unsigned long reg, u32 val) +static void xen_apic_write(u32 reg, u32 val) { /* Warn to see if there's any stray references */ WARN_ON(1); } + +#ifdef CONFIG_X86_64 +static u64 xen_apic_icr_read(void) +{ + return 0; +} + +static void xen_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void xen_apic_wait_icr_idle(void) +{ + return; +} + +static struct apic_ops xen_basic_apic_ops = { + .read = xen_apic_read, + .write = xen_apic_write, + .write_atomic = xen_apic_write, + .icr_read = xen_apic_icr_read, + .icr_write = xen_apic_icr_write, + .wait_icr_idle = xen_apic_wait_icr_idle, + .safe_wait_icr_idle = xen_apic_wait_icr_idle, +}; +#endif + #endif static void xen_flush_tlb(void) @@ -1130,9 +1159,11 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 .apic_write = xen_apic_write, .apic_write_atomic = xen_apic_write, .apic_read = xen_apic_read, +#endif .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, @@ -1291,6 +1322,12 @@ asmlinkage void __init xen_start_kernel(void) pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; +#ifdef CONFIG_X86_64 + /* + * for 64bit, set up the basic apic ops aswell. + */ + apic_ops = &xen_basic_apic_ops; +#endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 10adac02e6db..5e34d26aa3b5 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -200,13 +200,15 @@ struct pv_irq_ops { struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 /* * Direct APIC operations, principally for VMI. Ideally * these shouldn't be in this interface. */ - void (*apic_write)(unsigned long reg, u32 v); - void (*apic_write_atomic)(unsigned long reg, u32 v); - u32 (*apic_read)(unsigned long reg); + void (*apic_write)(u32 reg, u32 v); + void (*apic_write_atomic)(u32 reg, u32 v); + u32 (*apic_read)(u32 reg); +#endif void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -892,17 +894,17 @@ static inline void slow_down_io(void) * Basic functions accessing APICs. */ #ifndef CONFIG_X86_64 -static inline void apic_write(unsigned long reg, u32 v) +static inline void apic_write(u32 reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); } -static inline void apic_write_atomic(unsigned long reg, u32 v) +static inline void apic_write_atomic(u32 reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); } -static inline u32 apic_read(unsigned long reg) +static inline u32 apic_read(u32 reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } From af9d13887f9e3699bbc852c39b076d30bb9dff2f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:57 -0700 Subject: [PATCH 032/186] x2apic: kernel-parameter documentation for "x2apic_phys" Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 56689ef1a159..88b08acf2ff2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1379,6 +1379,10 @@ and is between 256 and 4096 characters. It is defined in the file nox2apic [X86-64,APIC] Do not enable x2APIC mode. + x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of + default x2apic cluster mode on platforms + supporting x2apic. + noltlbs [PPC] Do not use large page/tlb entries for kernel lowmem mapping on PPC40x. From c535b6a1a685eb23f96e2c221777d6c1e05080d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:41:54 -0700 Subject: [PATCH 033/186] x86: let 32bit use apic_ops too Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 39 +++++++++++++++++++++++++++++++-------- include/asm-x86/apic.h | 13 ++----------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 2a83c07bd887..7413354c9ffd 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,19 +145,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_icr_write(u32 low, u32 id) -{ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); -} - -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -173,6 +167,35 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 129752dd2525..fcd2f01277b6 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -49,11 +49,6 @@ extern int disable_apic; #ifdef CONFIG_PARAVIRT #include #else -#ifndef CONFIG_X86_64 -#define apic_write native_apic_mem_write -#define apic_write_atomic native_apic_mem_write_atomic -#define apic_read native_apic_mem_read -#endif #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock #endif @@ -95,16 +90,13 @@ static inline u32 native_apic_msr_read(u32 reg) return low; } -#ifdef CONFIG_X86_32 -extern void apic_wait_icr_idle(void); -extern u32 safe_apic_wait_icr_idle(void); -extern void apic_icr_write(u32 low, u32 id); -#else +#ifndef CONFIG_X86_32 extern int x2apic, x2apic_preenabled; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); +#endif struct apic_ops { u32 (*read)(u32 reg); @@ -125,7 +117,6 @@ extern struct apic_ops *apic_ops; #define apic_icr_write (apic_ops->icr_write) #define apic_wait_icr_idle (apic_ops->wait_icr_idle) #define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) -#endif extern int get_physical_broadcast(void); From 4696ca5bfd2697f5686f96d59cf0b6de14869b4e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:43:10 -0700 Subject: [PATCH 034/186] x86: mach_apicdef.h need to include before smp.h smp.h internal has include, so need to include that at first when genericarch use them need to have different apicdef.h Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/bigsmp.c | 5 ++--- arch/x86/mach-generic/es7000.c | 3 +-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 5 ++--- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 59d771714559..b31f2800638e 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -5,17 +5,16 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include #include #include -#include #include #include -#include #include +#include +#include #include #include diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4742626f08c4..9b30547d746e 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -4,16 +4,15 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include #include #include #include -#include #include #include +#include #include #include #include diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8091e68764c4..95c07efff6b7 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -4,7 +4,6 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include @@ -12,8 +11,9 @@ #include #include #include -#include #include +#include +#include #include #include #include diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index a97ea0f35b1e..752edd96b1bf 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -4,17 +4,16 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include #include #include #include -#include #include -#include #include +#include +#include #include #include From 4c9961d56ec20c27ec5d02e49fd7427748312741 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:44:16 -0700 Subject: [PATCH 035/186] x86: make read_apic_id return final apicid also remove GET_APIC_ID when read_apic_id is used. need to apply after [PATCH] x86: mach_apicdef.h need to include before smp.h Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 6 +++--- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/io_apic_32.c | 5 ++--- arch/x86/kernel/io_apic_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 6 +++--- include/asm-x86/mach-default/mach_apic.h | 2 +- include/asm-x86/mach-default/mach_apicdef.h | 3 +-- include/asm-x86/mach-es7000/mach_apic.h | 2 +- include/asm-x86/smp.h | 11 ++++++++--- 11 files changed, 25 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8705262ddcda..b314bcd08406 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7413354c9ffd..47ff978aecfd 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1230,7 +1230,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } @@ -1270,7 +1270,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dd0501039f09..c75f58a66d8e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1060,7 +1060,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /** @@ -1069,7 +1069,7 @@ void __init early_init_lapic_mapping(void) void __init init_apic_mappings(void) { if (x2apic) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); return; } @@ -1092,7 +1092,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 735586822135..7dac2f275fad 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -101,7 +101,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_XAPIC_ID(apic_read(APIC_ID)); + id = GET_APIC_ID(apic_read(APIC_ID)); return id; } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c50adb84ea6f..382208d11f8d 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1501,7 +1501,7 @@ void /*__init*/ print_local_APIC(void *dummy) smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(read_apic_id())); + GET_APIC_ID(v)); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1709,8 +1709,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(read_apic_id()); + entry.dest.physical.physical_dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 877aa2e9d7e8..2db0f98e2af5 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1246,7 +1246,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1439,7 +1439,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = GET_APIC_ID(read_apic_id()); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0c43e1f2e7d3..6cd002f3e20e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -211,7 +211,7 @@ static void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(read_apic_id()); + phys_id = read_apic_id(); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -1157,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_disable(); - if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { + if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); + read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } preempt_enable(); diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index e06d23975d6a..925b797e2a26 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -56,7 +56,7 @@ static inline void init_apic_ldr(void) static inline int apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_apic_id(), phys_cpu_present_map); } static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index 453b58a67e29..3e1be6c99b35 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -5,9 +5,8 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (x) +#define GET_APIC_ID(x) (((x)>>24) & 0xFFu) #define SET_APIC_ID(x) (((x)<<24)) -#define GET_XAPIC_ID(x) (((x) >> 24) & 0xFFu) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h index fbc8ad256f5a..b3556ec3bca5 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/mach-es7000/mach_apic.h @@ -141,7 +141,7 @@ static inline void setup_portio_remap(void) extern unsigned int boot_cpu_physical_apicid; static inline int check_phys_apicid_present(int cpu_physical_apicid) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); return (1); } diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index d9d007d22785..3b43ca202c3b 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -165,9 +165,14 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } +#include static inline unsigned int read_apic_id(void) { - return *(u32 *)(APIC_BASE + APIC_ID); + unsigned int reg; + + reg = *(u32 *)(APIC_BASE + APIC_ID); + + return GET_APIC_ID(reg); } #endif @@ -175,11 +180,11 @@ static inline unsigned int read_apic_id(void) # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else -# include +#include static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(read_apic_id()); + return read_apic_id(); } # endif /* APIC_DEFINITION */ From f910a9dc7c865896815e2a95fe33363e9522f277 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 12 Jul 2008 01:01:20 -0700 Subject: [PATCH 036/186] x86: make 64bit have get_apic_id generalize the x2apic code some more. let read_apic_id become a macro (later on a function/inline) GET_APIC_ID(apic_read(APIC_ID)) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) instead of this weird construct: -#define read_apic_id (genapic->read_apic_id) Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 26 ++++++++++++++++++--- arch/x86/kernel/genx2apic_cluster.c | 20 +++++++++++++++- arch/x86/kernel/genx2apic_phys.c | 20 +++++++++++++++- arch/x86/kernel/genx2apic_uv_x.c | 23 +++++++++++++++--- include/asm-x86/genapic_64.h | 4 +++- include/asm-x86/mach-default/mach_apic.h | 2 +- include/asm-x86/mach-default/mach_apicdef.h | 6 ++--- 7 files changed, 88 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7dac2f275fad..2c973cbf054f 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -97,11 +97,27 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = (((x)>>24) & 0xFFu); + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xFFu)<<24); + return x; +} + static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_APIC_ID(apic_read(APIC_ID)); + id = get_apic_id(apic_read(APIC_ID)); return id; } @@ -134,7 +150,9 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; /* @@ -200,5 +218,7 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ed0fdede800a..40bc0140d89f 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -94,6 +94,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -131,5 +147,7 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3c70b9d692b2..2f3c6ca19de9 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -84,6 +84,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -118,5 +134,7 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c915f750241e..3ca29cd8c23c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -139,16 +139,31 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } -static unsigned int uv_read_apic_id(void) +static unsigned int get_apic_id(unsigned long x) { unsigned int id; WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + id = x | __get_cpu_var(x2apic_extra_bits); return id; } +static long set_apic_id(unsigned int id) +{ + unsigned long x; + + /* maskout x2apic_extra_bits ? */ + x = id; + return x; +} + +static unsigned int uv_read_apic_id(void) +{ + + return get_apic_id(apic_read(APIC_ID)); +} + static unsigned int phys_pkg_id(int index_msb) { return uv_read_apic_id() >> index_msb; @@ -175,7 +190,9 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ - .read_apic_id = uv_read_apic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; static __cpuinit void set_x2apic_extra_bits(int pnode) diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 122b9242a40f..8ff2589da93b 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -28,7 +28,9 @@ struct genapic { /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); - unsigned int (*read_apic_id)(void); + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; }; extern struct genapic *genapic; diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 925b797e2a26..3d2b455581ec 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -30,7 +30,7 @@ static inline cpumask_t target_cpus(void) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) -#define read_apic_id (genapic->read_apic_id) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (genapic->send_IPI_self) extern void setup_apic_routing(void); #else diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index 3e1be6c99b35..a55518aa5a2d 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -4,9 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (((x)>>24) & 0xFFu) -#define SET_APIC_ID(x) (((x)<<24)) +#define APIC_ID_MASK (genapic->apic_id_mask) +#define GET_APIC_ID(x) (genapic->get_apic_id(x)) +#define SET_APIC_ID(x) (genapic->set_apic_id(x)) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) From 94a8c3c2437c8946f1b6c8e0b2c560a7db8ed3c6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Jul 2008 22:19:35 -0700 Subject: [PATCH 037/186] x86: let 32bit use apic_ops too - fix fix for pv - clean up the namespace there too. Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 5 ---- arch/x86/kernel/vmi_32.c | 51 +++++++++++++++++++++++++++++++++++--- arch/x86/xen/enlighten.c | 19 +++++++------- include/asm-x86/paravirt.h | 29 ---------------------- 4 files changed, 57 insertions(+), 47 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4f29ff847ebe..e0f139106c7e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,11 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - .apic_write = native_apic_mem_write, - .apic_write_atomic = native_apic_mem_write_atomic, - .apic_read = native_apic_mem_read, -#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7b..cf3074354553 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +static u32 vmi_apic_read(u32 reg) +{ + return 0; +} + +static void vmi_apic_write(u32 reg, u32 val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u64 vmi_apic_icr_read(void) +{ + return 0; +} + +static void vmi_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void vmi_apic_wait_icr_idle(void) +{ + return; +} + +static u32 vmi_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops vmi_basic_apic_ops = { + .read = vmi_apic_read, + .write = vmi_apic_write, + .write_atomic = vmi_apic_write, + .icr_read = vmi_apic_icr_read, + .icr_write = vmi_apic_icr_write, + .wait_icr_idle = vmi_apic_wait_icr_idle, + .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, +}; +#endif + /* * VMI setup common to all processors */ @@ -904,9 +948,10 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(pv_apic_ops.apic_read, APICRead); - para_fill(pv_apic_ops.apic_write, APICWrite); - para_fill(pv_apic_ops.apic_write_atomic, APICWrite); + para_fill(vmi_basic_apic_ops.read, APICRead); + para_fill(vmi_basic_apic_ops.write, APICWrite); + para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); + apic_ops = &vmi_basic_apic_ops; #endif /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 54e255667530..d11dda7ebd7a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -559,7 +559,6 @@ static void xen_apic_write(u32 reg, u32 val) WARN_ON(1); } -#ifdef CONFIG_X86_64 static u64 xen_apic_icr_read(void) { return 0; @@ -576,6 +575,11 @@ static void xen_apic_wait_icr_idle(void) return; } +static u32 xen_safe_apic_wait_icr_idle(void) +{ + return 0; +} + static struct apic_ops xen_basic_apic_ops = { .read = xen_apic_read, .write = xen_apic_write, @@ -583,9 +587,8 @@ static struct apic_ops xen_basic_apic_ops = { .icr_read = xen_apic_icr_read, .icr_write = xen_apic_icr_write, .wait_icr_idle = xen_apic_wait_icr_idle, - .safe_wait_icr_idle = xen_apic_wait_icr_idle, + .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, }; -#endif #endif @@ -1159,11 +1162,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - .apic_write = xen_apic_write, - .apic_write_atomic = xen_apic_write, - .apic_read = xen_apic_read, -#endif .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, @@ -1322,9 +1320,10 @@ asmlinkage void __init xen_start_kernel(void) pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; -#ifdef CONFIG_X86_64 + +#ifdef CONFIG_X86_LOCAL_APIC /* - * for 64bit, set up the basic apic ops aswell. + * set up the basic apic ops. */ apic_ops = &xen_basic_apic_ops; #endif diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 5e34d26aa3b5..08f89e385a92 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -200,15 +200,6 @@ struct pv_irq_ops { struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - /* - * Direct APIC operations, principally for VMI. Ideally - * these shouldn't be in this interface. - */ - void (*apic_write)(u32 reg, u32 v); - void (*apic_write_atomic)(u32 reg, u32 v); - u32 (*apic_read)(u32 reg); -#endif void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -890,26 +881,6 @@ static inline void slow_down_io(void) } #ifdef CONFIG_X86_LOCAL_APIC -/* - * Basic functions accessing APICs. - */ -#ifndef CONFIG_X86_64 -static inline void apic_write(u32 reg, u32 v) -{ - PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); -} - -static inline void apic_write_atomic(u32 reg, u32 v) -{ - PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); -} - -static inline u32 apic_read(u32 reg) -{ - return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); -} -#endif - static inline void setup_boot_clock(void) { PVOP_VCALL0(pv_apic_ops.setup_boot_clock); From 9a8f0e6b5dfe3b4f330fc82b16a4000f5688fce8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 09:59:40 -0700 Subject: [PATCH 038/186] x86: let 32bit use apic_ops too - fix Fix VMI apic_ops. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 ++++ arch/x86/kernel/vmi_32.c | 51 +++------------------------------------ 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 47ff978aecfd..cb54d9e20f94 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index cf3074354553..237082833c14 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,50 +676,6 @@ static inline int __init probe_vmi_rom(void) return 0; } -#ifdef CONFIG_X86_LOCAL_APIC -static u32 vmi_apic_read(u32 reg) -{ - return 0; -} - -static void vmi_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 vmi_apic_icr_read(void) -{ - return 0; -} - -static void vmi_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void vmi_apic_wait_icr_idle(void) -{ - return; -} - -static u32 vmi_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static struct apic_ops vmi_basic_apic_ops = { - .read = vmi_apic_read, - .write = vmi_apic_write, - .write_atomic = vmi_apic_write, - .icr_read = vmi_apic_icr_read, - .icr_write = vmi_apic_icr_write, - .wait_icr_idle = vmi_apic_wait_icr_idle, - .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, -}; -#endif - /* * VMI setup common to all processors */ @@ -948,10 +904,9 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(vmi_basic_apic_ops.read, APICRead); - para_fill(vmi_basic_apic_ops.write, APICWrite); - para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); - apic_ops = &vmi_basic_apic_ops; + para_fill(apic_ops->read, APICRead); + para_fill(apic_ops->write, APICWrite); + para_fill(apic_ops->write_atomic, APICWrite); #endif /* From 511d9d34183662aada3890883e860b151d707e22 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 14 Jul 2008 09:49:14 -0700 Subject: [PATCH 039/186] x86: apic_ops for lguest apic_ops for lguest. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/lguest/boot.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0c45df20e2b3..675ee7a6475e 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -791,6 +791,37 @@ static u32 lguest_apic_read(u32 reg) { return 0; } + +static u64 lguest_apic_icr_read(void) +{ + return 0; +} + +static void lguest_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void lguest_apic_wait_icr_idle(void) +{ + return; +} + +static u32 lguest_apic_safe_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops lguest_basic_apic_ops = { + .read = lguest_apic_read, + .write = lguest_apic_write, + .write_atomic = lguest_apic_write, + .icr_read = lguest_apic_icr_read, + .icr_write = lguest_apic_icr_write, + .wait_icr_idle = lguest_apic_wait_icr_idle, + .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle, +}; #endif /* STOP! Until an interrupt comes in. */ @@ -990,9 +1021,7 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ - pv_apic_ops.apic_write = lguest_apic_write; - pv_apic_ops.apic_write_atomic = lguest_apic_write; - pv_apic_ops.apic_read = lguest_apic_read; + apic_ops = &lguest_basic_apic_ops; #endif /* time operations */ From f586bf7df9acc26b68b0feefc0dd32fa63516d3a Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 15:58:35 -0700 Subject: [PATCH 040/186] x86: APIC: Remove apic_write_around(); use alternatives, merge fix On Fri, Jul 18, 2008 at 02:03:48PM -0700, Ingo Molnar wrote: > > * Yinghai Lu wrote: > > > > git merge tip/x86/x2apic > > CONFLICT (content): Merge conflict in arch/x86/kernel/Makefile > > CONFLICT (content): Merge conflict in arch/x86/kernel/paravirt.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/smpboot.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/vmi_32.c > > CONFLICT (content): Merge conflict in arch/x86/xen/enlighten.c > > CONFLICT (content): Merge conflict in include/asm-x86/apic.h > > CONFLICT (content): Merge conflict in include/asm-x86/paravirt.h > > that's due to the changes in tip/x86/apic and in tip/x86/uv. > > ok, i've just merged x86/apic into x86/x2apic and x86/uv as well, and > pushed out the result. > > Note: it's a first raw merge and completely untested. It will now merge > cleanly into tip/master. There are probably a few details missing. Ingo, thanks for doing this. While I was testing my merge changes, you posted yours... anyhow we need this piece, which is missing from your merge. Signed-off-by: Suresh Siddha Cc: Yinghai Lu Cc: "Maciej W. Rozycki" Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 ++--- arch/x86/kernel/apic_64.c | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index dcb897f22aa2..8728f54a93d8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -174,8 +174,8 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); } u64 xapic_icr_read(void) @@ -191,7 +191,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46e612408aca..a850bc63fb1c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -167,7 +167,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, @@ -206,7 +205,6 @@ u64 x2apic_icr_read(void) static struct apic_ops x2apic_ops = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .write_atomic = native_apic_msr_write, .icr_read = x2apic_icr_read, .icr_write = x2apic_icr_write, .wait_icr_idle = x2apic_wait_icr_idle, From caf43bf7c6a55e89b6df5179df434d67e24aa32e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 20 Jul 2008 14:06:50 +0200 Subject: [PATCH 041/186] x86, xen: fix apic_ops build on UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/xen/enlighten.c:615: error: variable ‘xen_basic_apic_ops’ has initializer but incomplete type arch/x86/xen/enlighten.c:616: error: unknown field ‘read’ specified in initializer [...] Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 008b7b69581e..e4d1459a63df 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include From 7be42004065ce4df193aeef5befd26805267d0d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 20 Jul 2008 17:04:57 +0200 Subject: [PATCH 042/186] x86, lguest: fix apic_ops build on UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/lguest/boot.c:816: error: variable ‘lguest_basic_apic_ops’ has initializer but incomplete type arch/x86/lguest/boot.c:817: error: unknown field ‘read’ specified in initializer [...] Signed-off-by: Ingo Molnar --- arch/x86/lguest/boot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 35c4349cd668..756fc489652b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include From 1b9b89e7f163336ad84200b66a17284dbf26aced Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 21 Jul 2008 22:08:21 -0700 Subject: [PATCH 043/186] x86: add apic probe for genapic 64bit, v2 introducing an APIC handling probing abstraction: static struct genapic *apic_probe[] __initdata = { &apic_x2apic_uv_x, &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, NULL, }; This way we can remove UV, x2apic specific code from genapic_64.c and move them to their specific genapic files. [ v2: fix compiling when CONFIG_ACPI is not set ] Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 85 +++++++++-------------------- arch/x86/kernel/genapic_flat_64.c | 26 +++++++++ arch/x86/kernel/genx2apic_cluster.c | 11 ++++ arch/x86/kernel/genx2apic_phys.c | 21 +++++++ arch/x86/kernel/genx2apic_uv_x.c | 32 ++++++++++- include/asm-x86/genapic_64.h | 1 + 6 files changed, 116 insertions(+), 60 deletions(-) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 3940d8161f8b..b3ba969c50d2 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,62 +16,37 @@ #include #include #include -#include #include #include #include -#ifdef CONFIG_ACPI -#include -#endif - -DEFINE_PER_CPU(int, x2apic_extra_bits); +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2xpic_uv_x; +extern struct genapic apic_x2apic_phys; +extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; -static int x2apic_phys = 0; - -static int set_x2apic_phys_mode(char *arg) -{ - x2apic_phys = 1; - return 0; -} -early_param("x2apic_phys", set_x2apic_phys_mode); - -static enum uv_system_type uv_system_type; +static struct genapic *apic_probe[] __initdata = { + &apic_x2apic_uv_x, + &apic_x2apic_phys, + &apic_x2apic_cluster, + &apic_physflat, + NULL, +}; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init setup_apic_routing(void) { - if (uv_system_type == UV_NON_UNIQUE_APIC) - genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) { - if (x2apic_phys) - genapic = &apic_x2apic_phys; - else - genapic = &apic_x2apic_cluster; - } else -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - genapic = &apic_physflat; - else -#endif - - if (max_physical_apicid < 8) - genapic = &apic_flat; - else - genapic = &apic_physflat; - - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + if (genapic == &apic_flat) { + if (max_physical_apicid >= 8) + genapic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + } } /* Same for both flat and physical. */ @@ -83,23 +58,15 @@ void apic_send_IPI_self(int vector) int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (!strcmp(oem_id, "SGI")) { - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) - uv_system_type = UV_NON_UNIQUE_APIC; + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { + genapic = apic_probe[i]; + printk(KERN_INFO "Setting APIC routing to %s.\n", + genapic->name); + return 1; + } } return 0; } - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 2c973cbf054f..1740b83329f6 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -21,6 +21,15 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif + +static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 1; +} + static cpumask_t flat_target_cpus(void) { return cpu_online_map; @@ -138,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) struct genapic apic_flat = { .name = "flat", + .acpi_madt_oem_check = flat_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, @@ -160,6 +170,21 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ +static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +#ifdef CONFIG_ACPI + /* + * Quirk: some x86_64 machines can only use physical APIC mode + * regardless of how many processors are present (x86_64 ES7000 + * is an example). + */ + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + return 1; +#endif + + return 0; +} static cpumask_t physflat_target_cpus(void) { @@ -206,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 40bc0140d89f..ef3f3182d50a 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -4,12 +4,22 @@ #include #include #include +#include + #include #include #include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled) + return 1; + + return 0; +} + /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ static cpumask_t x2apic_target_cpus(void) @@ -135,6 +145,7 @@ static void init_x2apic_ldr(void) struct genapic apic_x2apic_cluster = { .name = "cluster x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2f3c6ca19de9..f35a3bf3a9e5 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -4,10 +4,30 @@ #include #include #include +#include + #include #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + +static int x2apic_phys; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + return 1; + + return 0; +} /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ @@ -122,6 +142,7 @@ void init_x2apic_ldr(void) struct genapic apic_x2apic_phys = { .name = "physical x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a8e5cb4c4d43..a882bc3a2ce9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -27,6 +27,33 @@ #include #include +static enum uv_system_type uv_system_type; + +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) { + uv_system_type = UV_NON_UNIQUE_APIC; + return 1; + } + } + return 0; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} + DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -153,7 +180,7 @@ static unsigned int get_apic_id(unsigned long x) return id; } -static long set_apic_id(unsigned int id) +static unsigned long set_apic_id(unsigned int id) { unsigned long x; @@ -182,6 +209,7 @@ static void uv_send_IPI_self(int vector) struct genapic apic_x2apic_uv_x = { .name = "UV large system", + .acpi_madt_oem_check = uv_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, @@ -433,3 +461,5 @@ void __cpuinit uv_cpu_init(void) if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } + + diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 2871b3fccb21..1e832e49f54e 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -14,6 +14,7 @@ struct genapic { char *name; + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); From 026e2c05ef58ef413e2d52696f125d5ea1aa8bce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Jul 2008 11:58:14 +0200 Subject: [PATCH 044/186] x86, cyrix: debug Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 20 ++++++++++---------- include/asm-x86/processor-cyrix.h | 8 ++++++++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06a..db5868cd2443 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -116,7 +116,7 @@ static void __cpuinit set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -127,11 +127,11 @@ static void __cpuinit set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); } static void __cpuinit set_cx86_inc(void) @@ -144,10 +144,10 @@ static void __cpuinit set_cx86_inc(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* PCR1 -- Performance Control */ /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + setCx86_old(CX86_PCR1, getCx86_old(CX86_PCR1) | 0x02); /* PCR0 -- Performance Control */ /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) | 0x04); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ } @@ -162,14 +162,14 @@ static void __cpuinit geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -286,7 +286,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -309,7 +309,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } @@ -424,7 +424,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ local_irq_restore(flags); } diff --git a/include/asm-x86/processor-cyrix.h b/include/asm-x86/processor-cyrix.h index 97568ada1f97..1198f2a0e42c 100644 --- a/include/asm-x86/processor-cyrix.h +++ b/include/asm-x86/processor-cyrix.h @@ -28,3 +28,11 @@ static inline void setCx86(u8 reg, u8 data) outb(reg, 0x22); outb(data, 0x23); } + +#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86_old(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + From df1be4372eae5f104b7fb4991bc4b35f00b11a11 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 22 Jul 2008 10:13:11 -0400 Subject: [PATCH 045/186] x64, apic: use generic apic_write() for ack_APIC_irq() I tested tip/master and found an issue (patch attached) for x2apic support. This is not because of the recent merges we had, but because of something(where we still access memory based interface after enabling x2apic mode) that slipped through my earlier tests. Probably it is a good idea to unmap the memory mapped interface, once we switch to x2apic mode. That will catch the issues much earlier. I will post another patch for this. ack_APIC_irq() is used at too many generic places (and not just during irq_chip handling!) to use the native_apic_mem_write(). For ex, this will break x2apic based systems. Fix ack_APIC_irq() to use the generic apic_write() even for 64-bit. Signed-off-by: Suresh Siddha Cc: suresh.b.siddha@intel.com Cc: yong.y.wang@linux.intel.com Signed-off-by: Ingo Molnar --- include/asm-x86/apic.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 51339910fdc0..502ca6594b1d 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -136,11 +136,7 @@ static inline void ack_APIC_irq(void) */ /* Docs say use 0 for future compatibility */ -#ifdef CONFIG_X86_32 apic_write(APIC_EOI, 0); -#else - native_apic_mem_write(APIC_EOI, 0); -#endif } extern int lapic_get_maxlvt(void); From fd60b39f845aa7462453af8aed4f059b162f181f Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 23 Jul 2008 22:45:01 +0800 Subject: [PATCH 046/186] arch/x86/kernel/genx2apic_uv_x.c: Removed duplicated include Removed duplicated include file in arch/x86/kernel/genx2apic_uv_x.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a882bc3a2ce9..14a9772778e5 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include From 32f71aff77b6470d272f80ac28f43f9601c4d140 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 00:52:49 +0100 Subject: [PATCH 047/186] x86: PIC, L-APIC and I/O APIC debug information Dump all the PIC, local APIC and I/O APIC information at the fs_initcall() level, which is after ACPI (if used) has initialised PCI information, making the point of invocation consistent across MP-table and ACPI platforms. Remove explicit calls to print_IO_APIC() from elsewhere. Make the interface of all the functions involved consistent between 32-bit and 64-bit versions and make them all static by default by the means of a New-and-Improved(TM) __apicdebuginit() macro. Note that like print_IO_APIC() all these only output anything if "apic=debug" has been passed to the kernel through the command line. Signed-off-by: Maciej W. Rozycki Cc: Chuck Ebbert Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 29 +++++++++++++++++++---------- arch/x86/kernel/io_apic_64.c | 31 +++++++++++++++++++------------ arch/x86/pci/acpi.c | 5 ----- include/asm-x86/hw_irq.h | 1 - 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c5..d54455ec9850 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -50,6 +50,8 @@ #include #include +#define __apicdebuginit(type) static type __init + int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -1345,7 +1347,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __init print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,9 +1463,7 @@ void __init print_IO_APIC(void) return; } -#if 0 - -static void print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1483,7 +1484,7 @@ static void print_APIC_bitfield(int base) } } -void /*__init*/ print_local_APIC(void *dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1567,12 +1568,12 @@ void /*__init*/ print_local_APIC(void *dummy) printk("\n"); } -void print_all_local_APICs(void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void /*__init*/ print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1604,7 +1605,17 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + static void __init enable_IO_APIC(void) { @@ -2333,8 +2344,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d1707..8cdcc4f287cc 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -53,6 +53,8 @@ #include #include +#define __apicdebuginit(type) static type __init + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -87,8 +89,6 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; -#define __apicdebuginit __init - int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -965,7 +965,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __apicdebuginit print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1059,9 +1060,7 @@ void __apicdebuginit print_IO_APIC(void) return; } -#if 0 - -static __apicdebuginit void print_APIC_bitfield (int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1082,7 +1081,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) } } -void __apicdebuginit print_local_APIC(void * dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1159,12 +1158,12 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void __apicdebuginit print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1196,7 +1195,17 @@ void __apicdebuginit print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + void __init enable_IO_APIC(void) { @@ -1849,8 +1858,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } struct sysfs_ioapic_data { diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 19af06927fbc..1d88d2b39771 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -250,10 +250,5 @@ int __init pci_acpi_init(void) acpi_pci_irq_enable(dev); } -#ifdef CONFIG_X86_IO_APIC - if (acpi_ioapic) - print_IO_APIC(); -#endif - return 0; } diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51df5668..83e0048dca78 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -64,7 +64,6 @@ extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); From 510b37258dfd61693ca6c039865c78bd996e3718 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Jul 2008 13:20:22 -0700 Subject: [PATCH 048/186] x86: move declaring x2apic_extra_bits it is for uv only Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_phys.c | 2 -- arch/x86/kernel/genx2apic_uv_x.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f35a3bf3a9e5..3229c68aedd4 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -10,8 +10,6 @@ #include #include -DEFINE_PER_CPU(int, x2apic_extra_bits); - static int x2apic_phys; static int set_x2apic_phys_mode(char *arg) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 14a9772778e5..169388c4cce9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -26,6 +26,8 @@ #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + static enum uv_system_type uv_system_type; static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) From 36a028de785c6e6f15ac84f8b3b087b89137ea26 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:28 +0200 Subject: [PATCH 049/186] x86: apic unification - merge down lapic_get_maxlvt No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 9 ++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c898358371..447dd8c5c0e9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -193,9 +193,13 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v = apic_read(APIC_LVR); + unsigned int v; - /* 82489DXs do not report # of LVT entries. */ + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7ee..4fa2a8620c26 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -158,11 +158,14 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v, maxlvt; + unsigned int v; v = apic_read(APIC_LVR); - maxlvt = GET_APIC_MAXLVT(v); - return maxlvt; + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* From d4c63ec060f3315653c0ae5bc3a7fe2419a2282f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:29 +0200 Subject: [PATCH 050/186] x86: apic unification - merge down enable_NMI_through_LVT0 No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 447dd8c5c0e9..01708f128eee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -172,11 +172,15 @@ u32 safe_apic_wait_icr_idle(void) */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v = APIC_DM_NMI; + unsigned int v; - /* Level triggered for 82489DX */ + /* unmask and set to NMI */ + v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fa2a8620c26..7615b4b9c3f3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,6 +150,11 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } From a4dbc34d181e87a0d724dee365921e9251f831d4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:14:28 -0700 Subject: [PATCH 051/186] x86: add setup_ioapic_ids for numaq in x86_quirks Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 ++--- arch/x86/kernel/numaq_32.c | 7 +++++++ include/asm-x86/setup.h | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f3..72ba06314c7b 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1728,10 +1729,8 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) return; -#endif /* * Don't check I/O APIC IDs for xAPIC systems. They have diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20a..2434467ddf72 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } +static int __init numaq_setup_ioapic_ids(void) +{ + /* so can skip it */ + return 1; +} + static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, + .setup_ioapic_ids = numaq_setup_ioapic_ids, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index b7aeba94e430..80fcc0de80be 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -38,6 +38,7 @@ struct x86_quirks { void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, unsigned short oemsize); + int (*setup_ioapic_ids)(void); }; extern struct x86_quirks *x86_quirks; From 1176fa919292887aa738d317d61fe2bba4d764f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:21 -0700 Subject: [PATCH 052/186] x86: mach-bigsmp to bigsmp Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/bigsmp.c | 6 +++--- .../{mach-bigsmp/mach_apic.h => bigsmp/apic.h} | 10 +++++----- include/asm-x86/bigsmp/apicdef.h | 13 +++++++++++++ .../{mach-bigsmp/mach_ipi.h => bigsmp/ipi.h} | 6 +++--- include/asm-x86/mach-bigsmp/mach_apicdef.h | 13 ------------- 5 files changed, 24 insertions(+), 24 deletions(-) rename include/asm-x86/{mach-bigsmp/mach_apic.h => bigsmp/apic.h} (94%) create mode 100644 include/asm-x86/bigsmp/apicdef.h rename include/asm-x86/{mach-bigsmp/mach_ipi.h => bigsmp/ipi.h} (77%) delete mode 100644 include/asm-x86/mach-bigsmp/mach_apicdef.h diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index b31f2800638e..df37fc9d6a26 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -12,10 +12,10 @@ #include #include #include -#include +#include #include -#include -#include +#include +#include #include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/bigsmp/apic.h similarity index 94% rename from include/asm-x86/mach-bigsmp/mach_apic.h rename to include/asm-x86/bigsmp/apic.h index 05362d44a3ee..0a9cd7c5ca0c 100644 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ b/include/asm-x86/bigsmp/apic.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H -#define ASM_X86__MACH_BIGSMP__MACH_APIC_H +#ifndef __ASM_MACH_APIC_H +#define __ASM_MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) #define esr_disable (1) @@ -11,7 +11,7 @@ static inline int apic_id_registered(void) /* Round robin the irqs amoung the online cpus */ static inline cpumask_t target_cpus(void) -{ +{ static unsigned long cpu = NR_CPUS; do { if (cpu >= NR_CPUS) @@ -23,7 +23,7 @@ static inline cpumask_t target_cpus(void) } #undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 +#define APIC_DEST_LOGICAL 0 #define TARGET_CPUS (target_cpus()) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define INT_DELIVERY_MODE (dest_Fixed) @@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ +#endif /* __ASM_MACH_APIC_H */ diff --git a/include/asm-x86/bigsmp/apicdef.h b/include/asm-x86/bigsmp/apicdef.h new file mode 100644 index 000000000000..392c3f5ef2fe --- /dev/null +++ b/include/asm-x86/bigsmp/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_MACH_APICDEF_H +#define __ASM_MACH_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/bigsmp/ipi.h similarity index 77% rename from include/asm-x86/mach-bigsmp/mach_ipi.h rename to include/asm-x86/bigsmp/ipi.h index b1b0f966a009..9404c535b7ec 100644 --- a/include/asm-x86/mach-bigsmp/mach_ipi.h +++ b/include/asm-x86/bigsmp/ipi.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H -#define ASM_X86__MACH_BIGSMP__MACH_IPI_H +#ifndef __ASM_MACH_IPI_H +#define __ASM_MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ +#endif /* __ASM_MACH_IPI_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h deleted file mode 100644 index 811935d9d49b..000000000000 --- a/include/asm-x86/mach-bigsmp/mach_apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H -#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */ From c7e7964c9828083aeb41c2664cbf5a32acbfa9d1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:33 -0700 Subject: [PATCH 053/186] x86: mach_es7000 to es7000 Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/{mach-es7000 => es7000}/Makefile | 0 arch/x86/{mach-es7000 => es7000}/es7000.h | 18 +++++------ arch/x86/{mach-es7000 => es7000}/es7000plat.c | 2 +- arch/x86/mach-generic/Makefile | 2 +- arch/x86/mach-generic/es7000.c | 10 +++---- .../mach_apic.h => es7000/apic.h} | 30 +++++++++---------- include/asm-x86/es7000/apicdef.h | 13 ++++++++ .../{mach-es7000/mach_ipi.h => es7000/ipi.h} | 6 ++-- .../mach_mpparse.h => es7000/mpparse.h} | 6 ++-- .../mach_wakecpu.h => es7000/wakecpu.h} | 8 ++--- include/asm-x86/mach-es7000/mach_apicdef.h | 13 -------- 11 files changed, 54 insertions(+), 54 deletions(-) rename arch/x86/{mach-es7000 => es7000}/Makefile (100%) rename arch/x86/{mach-es7000 => es7000}/es7000.h (89%) rename arch/x86/{mach-es7000 => es7000}/es7000plat.c (99%) rename include/asm-x86/{mach-es7000/mach_apic.h => es7000/apic.h} (92%) create mode 100644 include/asm-x86/es7000/apicdef.h rename include/asm-x86/{mach-es7000/mach_ipi.h => es7000/ipi.h} (77%) rename include/asm-x86/{mach-es7000/mach_mpparse.h => es7000/mpparse.h} (81%) rename include/asm-x86/{mach-es7000/mach_wakecpu.h => es7000/wakecpu.h} (89%) delete mode 100644 include/asm-x86/mach-es7000/mach_apicdef.h diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/es7000/Makefile similarity index 100% rename from arch/x86/mach-es7000/Makefile rename to arch/x86/es7000/Makefile diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/es7000/es7000.h similarity index 89% rename from arch/x86/mach-es7000/es7000.h rename to arch/x86/es7000/es7000.h index c8d5aa132fa0..4e62f6fa95b8 100644 --- a/arch/x86/mach-es7000/es7000.h +++ b/arch/x86/es7000/es7000.h @@ -1,7 +1,7 @@ /* * Written by: Garry Forsgren, Unisys Corporation * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface + * This file contains the code to configure and interface * with Unisys ES7000 series hardware system manager. * * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. @@ -18,7 +18,7 @@ * with this program; if not, write the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Unisys Corporation, Township Line & Union Meeting + * Contact information: Unisys Corporation, Township Line & Union Meeting * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: * * http://www.unisys.com @@ -41,7 +41,7 @@ #define MIP_VALID 0x0100000000000000ULL #define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) struct mip_reg_info { unsigned long long mip_info; @@ -51,11 +51,11 @@ struct mip_reg_info { }; struct part_info { - unsigned char type; + unsigned char type; unsigned char length; unsigned char part_id; unsigned char apic_mode; - unsigned long snum; + unsigned long snum; char ptype[16]; char sname[64]; char pname[64]; @@ -68,11 +68,11 @@ struct psai { }; struct es7000_mem_info { - unsigned char type; + unsigned char type; unsigned char length; unsigned char resv[6]; - unsigned long long start; - unsigned long long size; + unsigned long long start; + unsigned long long size; }; struct es7000_oem_table { @@ -106,7 +106,7 @@ struct mip_reg { }; #define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) +#define MIP_FUNC(VALUE) (VALUE & 0xff) extern int parse_unisys_oem (char *oemptr); extern void setup_unisys(void); diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/es7000/es7000plat.c similarity index 99% rename from arch/x86/mach-es7000/es7000plat.c rename to arch/x86/es7000/es7000plat.c index 50189af14b85..7789fde13c3f 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/es7000/es7000plat.c @@ -72,7 +72,7 @@ es7000_rename_gsi(int ioapic, int gsi) base += nr_ioapic_registers[i]; } - if (!ioapic && (gsi < 16)) + if (!ioapic && (gsi < 16)) gsi += base; return gsi; } diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 0dbd7803a1d5..4706de7676b1 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -9,4 +9,4 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o -obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ +obj-$(CONFIG_X86_ES7000) += ../../x86/es7000/ diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 9b30547d746e..520cca0ee04e 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -11,12 +11,12 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include static int probe_es7000(void) { diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/es7000/apic.h similarity index 92% rename from include/asm-x86/mach-es7000/mach_apic.h rename to include/asm-x86/es7000/apic.h index ae877ec267f2..bd2c44d1f7ac 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/es7000/apic.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H -#define ASM_X86__MACH_ES7000__MACH_APIC_H +#ifndef __ASM_ES7000_APIC_H +#define __ASM_ES7000_APIC_H #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) #define esr_disable (1) @@ -10,7 +10,7 @@ static inline int apic_id_registered(void) } static inline cpumask_t target_cpus(void) -{ +{ #if defined CONFIG_ES7000_CLUSTERED_APIC return CPU_MASK_ALL; #else @@ -23,24 +23,24 @@ static inline cpumask_t target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) #define INT_DELIVERY_MODE (dest_LowestPrio) #define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ (1) +#define NO_BALANCE_IRQ (1) #undef WAKE_SECONDARY_VIA_INIT #define WAKE_SECONDARY_VIA_MIP #else #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define INT_DELIVERY_MODE (dest_Fixed) #define INT_DEST_MODE (0) /* phys delivery to target procs */ -#define NO_BALANCE_IRQ (0) +#define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 #define WAKE_SECONDARY_VIA_INIT #endif static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ +{ return 0; -} -static inline unsigned long check_apicid_present(int bit) +} +static inline unsigned long check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } @@ -80,7 +80,7 @@ static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? + (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); } @@ -150,7 +150,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) int num_bits_set; int cpus_found = 0; int cpu; - int apicid; + int apicid; num_bits_set = cpus_weight(cpumask); /* Return id to all */ @@ -160,16 +160,16 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) #else return cpu_to_logical_apicid(0); #endif - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. */ cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != + if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n",__FUNCTION__); #if defined CONFIG_ES7000_CLUSTERED_APIC @@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ +#endif /* __ASM_ES7000_APIC_H */ diff --git a/include/asm-x86/es7000/apicdef.h b/include/asm-x86/es7000/apicdef.h new file mode 100644 index 000000000000..8b234a3cb851 --- /dev/null +++ b/include/asm-x86/es7000/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_ES7000_APICDEF_H +#define __ASM_ES7000_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/es7000/ipi.h similarity index 77% rename from include/asm-x86/mach-es7000/mach_ipi.h rename to include/asm-x86/es7000/ipi.h index 3a21240e03dc..632a955fcc0a 100644 --- a/include/asm-x86/mach-es7000/mach_ipi.h +++ b/include/asm-x86/es7000/ipi.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H -#define ASM_X86__MACH_ES7000__MACH_IPI_H +#ifndef __ASM_ES7000_IPI_H +#define __ASM_ES7000_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ +#endif /* __ASM_ES7000_IPI_H */ diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/es7000/mpparse.h similarity index 81% rename from include/asm-x86/mach-es7000/mach_mpparse.h rename to include/asm-x86/es7000/mpparse.h index befde24705b7..7b5c889d8e7d 100644 --- a/include/asm-x86/mach-es7000/mach_mpparse.h +++ b/include/asm-x86/es7000/mpparse.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H -#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H +#ifndef __ASM_ES7000_MPPARSE_H +#define __ASM_ES7000_MPPARSE_H #include @@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void) } #endif -#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ +#endif /* __ASM_MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/es7000/wakecpu.h similarity index 89% rename from include/asm-x86/mach-es7000/mach_wakecpu.h rename to include/asm-x86/es7000/wakecpu.h index 97c776ce13f2..3ffc5a7bf667 100644 --- a/include/asm-x86/mach-es7000/mach_wakecpu.h +++ b/include/asm-x86/es7000/wakecpu.h @@ -1,7 +1,7 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H -#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H +#ifndef __ASM_ES7000_WAKECPU_H +#define __ASM_ES7000_WAKECPU_H -/* +/* * This file copes with machines that wakeup secondary CPUs by the * INIT, INIT, STARTUP sequence. */ @@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} #endif -#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ +#endif /* __ASM_MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h deleted file mode 100644 index a07e56744028..000000000000 --- a/include/asm-x86/mach-es7000/mach_apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H -#define ASM_X86__MACH_ES7000__MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */ From e8c48efdb971cfb1b043076cf68be535d461a20b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:44 -0700 Subject: [PATCH 054/186] x86: mach_summit to summit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/summit_32.c | 2 +- arch/x86/mach-generic/summit.c | 8 +++---- include/asm-x86/mach-summit/mach_apicdef.h | 13 ---------- .../mach_apic.h => summit/apic.h} | 24 +++++++++---------- include/asm-x86/summit/apicdef.h | 13 ++++++++++ .../{mach-summit/mach_ipi.h => summit/ipi.h} | 6 ++--- .../irq_vectors_limits.h | 6 ++--- .../mach_mpparse.h => summit/mpparse.h} | 13 +++++----- 8 files changed, 42 insertions(+), 43 deletions(-) delete mode 100644 include/asm-x86/mach-summit/mach_apicdef.h rename include/asm-x86/{mach-summit/mach_apic.h => summit/apic.h} (93%) create mode 100644 include/asm-x86/summit/apicdef.h rename include/asm-x86/{mach-summit/mach_ipi.h => summit/ipi.h} (77%) rename include/asm-x86/{mach-summit => summit}/irq_vectors_limits.h (65%) rename include/asm-x86/{mach-summit/mach_mpparse.h => summit/mpparse.h} (95%) diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d67ce5f044ba..7b987852e876 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 752edd96b1bf..6ad6b67a723d 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -11,11 +11,11 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include static int probe_summit(void) { diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h deleted file mode 100644 index d4bc8590c4f6..000000000000 --- a/include/asm-x86/mach-summit/mach_apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H -#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/summit/apic.h similarity index 93% rename from include/asm-x86/mach-summit/mach_apic.h rename to include/asm-x86/summit/apic.h index 7a66758d701d..c5b2e4b10358 100644 --- a/include/asm-x86/mach-summit/mach_apic.h +++ b/include/asm-x86/summit/apic.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H -#define ASM_X86__MACH_SUMMIT__MACH_APIC_H +#ifndef __ASM_SUMMIT_APIC_H +#define __ASM_SUMMIT_APIC_H #include @@ -21,7 +21,7 @@ static inline cpumask_t target_cpus(void) * Just start on cpu 0. IRQ balancing will spread load */ return cpumask_of_cpu(0); -} +} #define TARGET_CPUS (target_cpus()) #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -30,10 +30,10 @@ static inline cpumask_t target_cpus(void) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; -} +} /* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long check_apicid_present(int bit) { return 1; } @@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) static inline physid_mask_t apicid_to_cpu_present(int apicid) { - return physid_mask_of_physid(apicid); + return physid_mask_of_physid(0); } static inline void setup_portio_remap(void) @@ -143,22 +143,22 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) int num_bits_set; int cpus_found = 0; int cpu; - int apicid; + int apicid; num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. */ cpu = first_cpu(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != + if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n",__FUNCTION__); return 0xFF; @@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return hard_smp_processor_id() >> index_msb; } -#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ +#endif /* __ASM_SUMMIT_APIC_H */ diff --git a/include/asm-x86/summit/apicdef.h b/include/asm-x86/summit/apicdef.h new file mode 100644 index 000000000000..f3fbca1f61c1 --- /dev/null +++ b/include/asm-x86/summit/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_SUMMIT_APICDEF_H +#define __ASM_SUMMIT_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (x>>24)&0xFF; +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/summit/ipi.h similarity index 77% rename from include/asm-x86/mach-summit/mach_ipi.h rename to include/asm-x86/summit/ipi.h index a3b31c528d90..53bd1e7bd7b4 100644 --- a/include/asm-x86/mach-summit/mach_ipi.h +++ b/include/asm-x86/summit/ipi.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H -#define ASM_X86__MACH_SUMMIT__MACH_IPI_H +#ifndef __ASM_SUMMIT_IPI_H +#define __ASM_SUMMIT_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ +#endif /* __ASM_SUMMIT_IPI_H */ diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h similarity index 65% rename from include/asm-x86/mach-summit/irq_vectors_limits.h rename to include/asm-x86/summit/irq_vectors_limits.h index 22f376ad68e1..890ce3f5e09a 100644 --- a/include/asm-x86/mach-summit/irq_vectors_limits.h +++ b/include/asm-x86/summit/irq_vectors_limits.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H -#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H +#ifndef _ASM_IRQ_VECTORS_LIMITS_H +#define _ASM_IRQ_VECTORS_LIMITS_H /* * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, @@ -11,4 +11,4 @@ #define NR_IRQS 224 #define NR_IRQ_VECTORS 1024 -#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ +#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/summit/mpparse.h similarity index 95% rename from include/asm-x86/mach-summit/mach_mpparse.h rename to include/asm-x86/summit/mpparse.h index 92396f28772b..013ce6fab2d5 100644 --- a/include/asm-x86/mach-summit/mach_mpparse.h +++ b/include/asm-x86/summit/mpparse.h @@ -1,7 +1,6 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H -#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H +#ifndef __ASM_SUMMIT_MPPARSE_H +#define __ASM_SUMMIT_MPPARSE_H -#include #include extern int use_cyclone; @@ -12,11 +11,11 @@ extern void setup_summit(void); #define setup_summit() {} #endif -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, +static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) { - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) || !strncmp(productid, "EXA", 3) || !strncmp(productid, "RUTHLESS SMP", 12))){ mark_tsc_unstable("Summit based system"); @@ -107,4 +106,4 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } -#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ +#endif /* __ASM_SUMMIT_MPPARSE_H */ From edb181ac4b0c7c1240503da46349a3fb69af9ef6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:55 -0700 Subject: [PATCH 055/186] x86: mach-numaq to numaq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/numaq.c | 10 +++++----- include/asm-x86/mach-numaq/mach_mpparse.h | 7 ------- .../asm-x86/{mach-numaq/mach_apic.h => numaq/apic.h} | 6 +++--- .../{mach-numaq/mach_apicdef.h => numaq/apicdef.h} | 6 +++--- include/asm-x86/{mach-numaq/mach_ipi.h => numaq/ipi.h} | 6 +++--- include/asm-x86/numaq/mpparse.h | 7 +++++++ .../{mach-numaq/mach_wakecpu.h => numaq/wakecpu.h} | 6 +++--- 7 files changed, 24 insertions(+), 24 deletions(-) delete mode 100644 include/asm-x86/mach-numaq/mach_mpparse.h rename include/asm-x86/{mach-numaq/mach_apic.h => numaq/apic.h} (96%) rename include/asm-x86/{mach-numaq/mach_apicdef.h => numaq/apicdef.h} (55%) rename include/asm-x86/{mach-numaq/mach_ipi.h => numaq/ipi.h} (77%) create mode 100644 include/asm-x86/numaq/mpparse.h rename include/asm-x86/{mach-numaq/mach_wakecpu.h => numaq/wakecpu.h} (88%) diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 95c07efff6b7..8cf58394975e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -11,12 +11,12 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include #include static int mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h deleted file mode 100644 index 74ade184920b..000000000000 --- a/include/asm-x86/mach-numaq/mach_mpparse.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H -#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H - -extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); - -#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/numaq/apic.h similarity index 96% rename from include/asm-x86/mach-numaq/mach_apic.h rename to include/asm-x86/numaq/apic.h index 7a0d39edfcfa..a8344ba6ea15 100644 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ b/include/asm-x86/numaq/apic.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H -#define ASM_X86__MACH_NUMAQ__MACH_APIC_H +#ifndef __ASM_NUMAQ_APIC_H +#define __ASM_NUMAQ_APIC_H #include #include @@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ +#endif /* __ASM_NUMAQ_APIC_H */ diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/numaq/apicdef.h similarity index 55% rename from include/asm-x86/mach-numaq/mach_apicdef.h rename to include/asm-x86/numaq/apicdef.h index f870ec5f7782..e012a46cc22a 100644 --- a/include/asm-x86/mach-numaq/mach_apicdef.h +++ b/include/asm-x86/numaq/apicdef.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H -#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H +#ifndef __ASM_NUMAQ_APICDEF_H +#define __ASM_NUMAQ_APICDEF_H #define APIC_ID_MASK (0xF<<24) @@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ +#endif diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/numaq/ipi.h similarity index 77% rename from include/asm-x86/mach-numaq/mach_ipi.h rename to include/asm-x86/numaq/ipi.h index 1e835823f4bc..935588d286cf 100644 --- a/include/asm-x86/mach-numaq/mach_ipi.h +++ b/include/asm-x86/numaq/ipi.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H -#define ASM_X86__MACH_NUMAQ__MACH_IPI_H +#ifndef __ASM_NUMAQ_IPI_H +#define __ASM_NUMAQ_IPI_H void send_IPI_mask_sequence(cpumask_t, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ +#endif /* __ASM_NUMAQ_IPI_H */ diff --git a/include/asm-x86/numaq/mpparse.h b/include/asm-x86/numaq/mpparse.h new file mode 100644 index 000000000000..252292e077b6 --- /dev/null +++ b/include/asm-x86/numaq/mpparse.h @@ -0,0 +1,7 @@ +#ifndef __ASM_NUMAQ_MPPARSE_H +#define __ASM_NUMAQ_MPPARSE_H + +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/numaq/wakecpu.h similarity index 88% rename from include/asm-x86/mach-numaq/mach_wakecpu.h rename to include/asm-x86/numaq/wakecpu.h index 0db8cea643c0..c577bda5b1c5 100644 --- a/include/asm-x86/mach-numaq/mach_wakecpu.h +++ b/include/asm-x86/numaq/wakecpu.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H -#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H +#ifndef __ASM_NUMAQ_WAKECPU_H +#define __ASM_NUMAQ_WAKECPU_H /* This file copes with machines that wakeup secondary CPUs by NMIs */ @@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} -#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ +#endif /* __ASM_NUMAQ_WAKECPU_H */ From d25ae38b7e005af03843833bbd811ffe8c5f8cb4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 19:39:03 -0700 Subject: [PATCH 056/186] x86: add apic probe for genapic 64bit - fix intr_remapping_enabled get assigned later, so need to check that in setup_apic_routing Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 6 ++++++ arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index b3ba969c50d2..6c9bfc9e1e95 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,11 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { + if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (!intr_remapping_enabled) + genapic = &apic_flat; + } + if (genapic == &apic_flat) { if (max_physical_apicid >= 8) genapic = &apic_physflat; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50a..fed9f68efd66 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled) + if (cpu_has_x2apic) return 1; return 0; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd4..958d537b4cc9 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -21,7 +21,7 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + if (cpu_has_x2apic && x2apic_phys) return 1; return 0; From 8cb22bcb1f3ef70d4d48092e9b057175ad9ec78d Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Fri, 18 Jul 2008 16:03:52 -0500 Subject: [PATCH 057/186] x86: L3 cache index disable for 2.6.26 New versions of AMD processors have support to disable parts of their L3 caches if too many MCEs are generated by the L3 cache. This patch provides a /sysfs interface under the cache hierarchy to display which caches indices are disabled (if any) and to monitoring applications to disable a cache index. This patch does not set an automatic policy to disable the L3 cache. Policy decisions would need to be made by a RAS handler. This patch merely makes it easier to see what indices are currently disabled. Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 87 +++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index ff517f0b8cc4..d6ea50e270e0 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -16,6 +16,7 @@ #include #include +#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -130,6 +131,7 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; + unsigned long can_disable; cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; @@ -251,6 +253,13 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } +static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +{ + if (index < 3) + return; + this_leaf->can_disable = 1; +} + static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; @@ -258,9 +267,12 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le union _cpuid4_leaf_ecx ecx; unsigned edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - else + if (boot_cpu_data.x86 >= 0x10) + amd_check_l3_disable(index, this_leaf); + + } else cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ @@ -637,6 +649,61 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + int i; + ssize_t ret = 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + for (i = 0; i < 2; i++) { + unsigned int reg; + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, + (reg & 0x30000) >> 16, reg & 0xfff); + + } + return ret; + } + return sprintf(buf, "Feature not enabled\n"); +} + +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + /* write the MSR value */ + unsigned int ret; + unsigned int index, val; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + if (strlen(buf) > 15) + return -EINVAL; + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + return 1; + } + return 0; +} + struct _cache_attr { struct attribute attr; ssize_t (*show)(struct _cpuid4_info *, char *); @@ -657,6 +724,8 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); +static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); + static struct attribute * default_attrs[] = { &type.attr, &level.attr, @@ -667,12 +736,10 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, + &cache_disable.attr, NULL }; -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) { struct _cache_attr *fattr = to_attr(attr); @@ -689,7 +756,15 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) static ssize_t store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - return 0; + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : + 0; + return ret; } static struct sysfs_ops sysfs_ops = { From 7a4983bb5f94f6521aa3236fe5c035cf9bef543f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Jul 2008 13:34:21 +0200 Subject: [PATCH 058/186] x86: L3 cache index disable for 2.6.26, cleanups No change in functionality. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 111 ++++++++++++++------------ 1 file changed, 59 insertions(+), 52 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d6ea50e270e0..a61c9e399ba9 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -253,14 +253,16 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +static void __cpuinit +amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; this_leaf->can_disable = 1; } -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -271,19 +273,20 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le amd_cpuid4(index, &eax, &ebx, &ecx); if (boot_cpu_data.x86 >= 0x10) amd_check_l3_disable(index, this_leaf); - - } else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } else { + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; this_leaf->ebx = ebx; this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } @@ -649,59 +652,63 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - int i; - ssize_t ret = 0; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + ssize_t ret = 0; + int i; - for (i = 0; i < 2; i++) { - unsigned int reg; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, - (reg & 0x30000) >> 16, reg & 0xfff); + if (!this_leaf->can_disable) + return sprintf(buf, "Feature not enabled\n"); - } - return ret; + for (i = 0; i < 2; i++) { + unsigned int reg; + + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", + buf, (reg & 0x30000) >> 16, reg & 0xfff); } - return sprintf(buf, "Feature not enabled\n"); + return ret; } -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +static ssize_t +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, + size_t count) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - /* write the MSR value */ - unsigned int ret; - unsigned int index, val; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + unsigned int ret, index, val; - if (strlen(buf) > 15) - return -EINVAL; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) - return -EINVAL; - if (index > 1) - return -EINVAL; - val |= 0xc0000000; - pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); - wbinvd(); - pci_write_config_dword(dev, 0x1BC + index * 4, val); - return 1; - } - return 0; + if (!this_leaf->can_disable) + return 0; + + /* write the MSR value */ + + if (strlen(buf) > 15) + return -EINVAL; + + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + + return 1; } struct _cache_attr { From a24e8d36f5fc047dac9af6200322ed393f2e3175 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 22 Jul 2008 13:06:02 -0500 Subject: [PATCH 059/186] x86: L3 cache index disable for 2.6.26 On Monday 21 July 2008, Ingo Molnar wrote: > > applied to tip/x86/cpu, thanks Mark. > > > > I've done some coding style fixes for the new functions you've > > introduced, see that commit below. > > -tip testing found the following build failure: > > arch/x86/kernel/built-in.o: In function `show_cache_disable': > intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges' > arch/x86/kernel/built-in.o: In function `store_cache_disable': > intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges' > > please send a delta fix patch against the tip/x86/cpu branch: > > http://people.redhat.com/mingo/tip.git/README > > which has your patch plus the cleanup applied. delta fix patch follows. It removes the dependency on k8_northbridges. -Mark Langsdorf Operating System Research Center AMD Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 43 +++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a61c9e399ba9..a0c6c6ffed46 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -13,10 +13,10 @@ #include #include #include +#include #include #include -#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -135,6 +135,12 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +static struct pci_device_id k8_nb_id[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} +}; + unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -655,16 +661,39 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +static struct pci_dev *get_k8_northbridge(int node) +{ + struct pci_dev *dev = NULL; + int i; + + for (i = 0; i <= node; i++) { + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_id[0], dev)); + if (!dev) + break; + } + return dev; +} + static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; ssize_t ret = 0; int i; if (!this_leaf->can_disable) return sprintf(buf, "Feature not enabled\n"); + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + for (i = 0; i < 2; i++) { unsigned int reg; @@ -686,14 +715,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; unsigned int ret, index, val; if (!this_leaf->can_disable) return 0; - /* write the MSR value */ - if (strlen(buf) > 15) return -EINVAL; @@ -704,6 +731,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, return -EINVAL; val |= 0xc0000000; + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); From cdcf772ed163651cacac8098b4974aba7f9e1c73 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:20:08 +0200 Subject: [PATCH 060/186] x86 l3 cache index disable for 2 6 26 fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a0c6c6ffed46..535d662716de 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,8 +1,8 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to indentify caches on Intel CPU. * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -136,9 +136,9 @@ struct _cpuid4_info { }; static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} }; unsigned short num_cache_leaves; @@ -190,9 +190,10 @@ static unsigned short assocs[] __cpuinitdata = { static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void __cpuinit +amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) { unsigned dummy; unsigned line_size, lines_per_tag, assoc, size_in_kb; @@ -264,7 +265,7 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; - this_leaf->can_disable = 1; + this_leaf->can_disable = 1; } static int @@ -474,7 +475,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -511,7 +512,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) this_leaf = CPUID4_INFO_IDX(cpu, index); for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); + sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } } @@ -593,7 +594,7 @@ struct _index_kobject { /* pointer to array of kobjects for cpuX/cache/indexY */ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -675,7 +676,7 @@ static struct pci_dev *get_k8_northbridge(int node) if (!dev) break; } - return dev; + return dev; } static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) @@ -736,7 +737,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; } - + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); @@ -789,7 +790,7 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) ret = fattr->show ? fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), buf) : - 0; + 0; return ret; } @@ -800,9 +801,9 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr, struct _index_kobject *this_leaf = to_object(kobj); ssize_t ret; - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count) : + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : 0; return ret; } From 239bd83104ec6bcba90221d8b0973d2565142ef8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:45:49 +0200 Subject: [PATCH 061/186] x86: L3 cache index disable for 2.6.26, fix #2 fix !PCI build failure: arch/x86/kernel/cpu/intel_cacheinfo.c: In function 'get_k8_northbridge': arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit declaration of function 'pci_match_id' Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d763d24187c2..1677b55371a5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -135,11 +135,13 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +#ifdef CONFIG_PCI static struct pci_device_id k8_nb_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, {} }; +#endif unsigned short num_cache_leaves; @@ -663,6 +665,7 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +#ifdef CONFIG_PCI static struct pci_dev *get_k8_northbridge(int node) { struct pci_dev *dev = NULL; @@ -679,6 +682,12 @@ static struct pci_dev *get_k8_northbridge(int node) } return dev; } +#else +static struct pci_dev *get_k8_northbridge(int node) +{ + return NULL; +} +#endif static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { From a648bf4632628c787abb0514277f2a231fca39ca Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:18 -0700 Subject: [PATCH 062/186] x86, xsave: xsave cpuid feature bits Add xsave CPU feature bits. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- include/asm-x86/cpufeature.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index 0bf4d37a0483..741547225659 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -46,7 +46,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "xsave", NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 42afe9ca3a37..c76b3f67cb3f 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -92,6 +92,7 @@ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ @@ -191,6 +192,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 From dc1e35c6e95e8923cf1d3510438b63c600fee1e2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:19 -0700 Subject: [PATCH 063/186] x86, xsave: enable xsave/xrstor on cpus with xsave support Enables xsave/xrstor by turning on cr4.osxsave on cpu's which have the xsave support. For now, features that OS supports/enabled are FP and SSE. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 8 +++ arch/x86/kernel/i387.c | 12 +++++ arch/x86/kernel/traps_32.c | 1 - arch/x86/kernel/traps_64.c | 4 -- arch/x86/kernel/xsave.c | 87 +++++++++++++++++++++++++++++++ include/asm-x86/i387.h | 1 + include/asm-x86/processor-flags.h | 1 + include/asm-x86/processor.h | 12 +++++ include/asm-x86/xsave.h | 26 +++++++++ 10 files changed, 148 insertions(+), 6 deletions(-) create mode 100644 arch/x86/kernel/xsave.c create mode 100644 include/asm-x86/xsave.h diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f3312..d6ea91abaebc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o -obj-y += i387.o +obj-y += i387.o xsave.o obj-y += ptrace.o obj-y += ds.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..fabbcb7020fb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -712,6 +712,14 @@ void __cpuinit cpu_init(void) current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8efb82..e22a9a9dce8a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -61,6 +61,11 @@ void __init init_thread_xstate(void) return; } + if (cpu_has_xsave) { + xsave_cntxt_init(); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -83,6 +88,13 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + xsave_init(); + mxcsr_feature_mask_init(); /* clean state in init */ current_thread_info()->status = 0; diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 03df8e45e5a1..da5a5964fccb 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1228,7 +1228,6 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 513caaca7115..3580a7938a2e 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1172,10 +1172,6 @@ void __init trap_init(void) #ifdef CONFIG_IA32_EMULATION set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif - /* - * initialize the per thread extended state: - */ - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c new file mode 100644 index 000000000000..c68b7c4ca249 --- /dev/null +++ b/arch/x86/kernel/xsave.c @@ -0,0 +1,87 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha + */ +#include +#include +#include + +/* + * Supported feature mask by the CPU and the kernel. + */ +unsigned int pcntxt_hmask, pcntxt_lmask; + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct *init_xstate_buf; + +/* + * Enable the extended processor state save/restore feature + */ +void __cpuinit xsave_init(void) +{ + if (!cpu_has_xsave) + return; + + set_in_cr4(X86_CR4_OSXSAVE); + + /* + * Enable all the features that the HW is capable of + * and the Linux kernel is aware of. + * + * xsetbv(); + */ + asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), + "a" (pcntxt_lmask), "d" (pcntxt_hmask)); +} + +/* + * setup the xstate image representing the init state + */ +void setup_xstate_init(void) +{ + init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; +} + +/* + * Enable and initialize the xsave feature. + */ +void __init xsave_cntxt_init(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + pcntxt_lmask = eax; + pcntxt_hmask = edx; + + if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", + pcntxt_lmask); + BUG(); + } + + /* + * for now OS knows only about FP/SSE + */ + pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; + pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; + + xsave_init(); + + /* + * Recompute the context size for enabled features + */ + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + xstate_size = ebx; + + setup_xstate_init(); + + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + "cntxt size 0x%x\n", + (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); +} diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 3958de6aad0e..6a6647896670 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -18,6 +18,7 @@ #include #include #include +#include extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 5dd79774f693..dc5f0712f9fa 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -59,6 +59,7 @@ #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ +#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ /* * x86-64 Task Priority Register, CR8 diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index d60b4d81febe..d7c0221c0278 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -346,6 +346,18 @@ struct i387_soft_struct { u32 entry_eip; }; +struct xsave_hdr_struct { + u64 xstate_bv; + u64 reserved1[2]; + u64 reserved2[5]; +} __attribute__((packed)); + +struct xsave_struct { + struct i387_fxsave_struct i387; + struct xsave_hdr_struct xsave_hdr; + /* new processor state extensions will go here */ +} __attribute__ ((packed, aligned (64))); + union thread_xstate { struct i387_fsave_struct fsave; struct i387_fxsave_struct fxsave; diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h new file mode 100644 index 000000000000..6d70e62c6bdc --- /dev/null +++ b/include/asm-x86/xsave.h @@ -0,0 +1,26 @@ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include +#include + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) + +#define FXSAVE_SIZE 512 + +/* + * These are the features that the OS can handle currently. + */ +#define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) +#define XCNTXT_HMASK 0x0 + +extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; +extern struct xsave_struct *init_xstate_buf; + +extern void xsave_cntxt_init(void); +extern void xsave_init(void); + +#endif From b359e8a434cc3d09847010fc4aeccf48d69740e4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:20 -0700 Subject: [PATCH 064/186] x86, xsave: context switch support using xsave/xrstor Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch when available. Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor instructions during context switch instead of the legacy fxsave/fxrstor instructions. Thread-synchronous status word is already in L1 cache during this code patch and thus minimizes the performance penality compared to (cpu_has_xsave) checks. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 5 ++- arch/x86/kernel/i387.c | 5 ++- arch/x86/kernel/traps_64.c | 2 +- include/asm-x86/i387.h | 64 ++++++++++++++++++++++++++++++++--- include/asm-x86/processor.h | 1 + include/asm-x86/thread_info.h | 1 + include/asm-x86/xsave.h | 33 ++++++++++++++++++ 7 files changed, 103 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fabbcb7020fb..6c2b9e756db2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -709,7 +709,10 @@ void __cpuinit cpu_init(void) /* * Force FPU initialization: */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e22a9a9dce8a..b778e17e4b01 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -97,7 +97,10 @@ void __cpuinit fpu_init(void) mxcsr_feature_mask_init(); /* clean state in init */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3580a7938a2e..38eb76156a47 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void) /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ - if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + if (unlikely(restore_fpu_checking(me))) { stts(); force_sig(SIGSEGV, me); return; diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 6a6647896670..a6d256f4ac81 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -36,6 +36,8 @@ extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); #endif +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + #ifdef CONFIG_X86_64 /* Ignore delayed exceptions from user space */ @@ -46,7 +48,7 @@ static inline void tolerant_fwait(void) _ASM_EXTABLE(1b, 2b)); } -static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; @@ -66,15 +68,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) return err; } -#define X87_FSW_ES (1 << 7) /* Exception Summary */ +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); + else + return fxrstor_checking(&tsk->thread.xstate->fxsave); +} /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. The kernel data segment can be sometimes 0 and sometimes new user value. Both should be ok. Use the PDA as safe address because it should be already in L1. */ -static inline void clear_fpu_state(struct i387_fxsave_struct *fx) +static inline void clear_fpu_state(struct task_struct *tsk) { + struct xsave_struct *xstate = &tsk->thread.xstate->xsave; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + /* + * xsave header may indicate the init state of the FP. + */ + if ((task_thread_info(tsk)->status & TS_XSAVE) && + !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) + return; + if (unlikely(fx->swd & X87_FSW_ES)) asm volatile("fnclex"); alternative_input(ASM_NOP8 ASM_NOP2, @@ -107,7 +125,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) return err; } -static inline void __save_init_fpu(struct task_struct *tsk) +static inline void fxsave(struct task_struct *tsk) { /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix @@ -132,7 +150,16 @@ static inline void __save_init_fpu(struct task_struct *tsk) : "=m" (tsk->thread.xstate->fxsave) : "cdaSDb" (&tsk->thread.xstate->fxsave)); #endif - clear_fpu_state(&tsk->thread.xstate->fxsave); +} + +static inline void __save_init_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + xsave(tsk); + else + fxsave(tsk); + + clear_fpu_state(tsk); task_thread_info(tsk)->status &= ~TS_USEDFPU; } @@ -147,6 +174,10 @@ static inline void tolerant_fwait(void) static inline void restore_fpu(struct task_struct *tsk) { + if (task_thread_info(tsk)->status & TS_XSAVE) { + xrstor_checking(&tsk->thread.xstate->xsave); + return; + } /* * The "nop" is needed to make the instructions the same * length. @@ -172,6 +203,27 @@ static inline void restore_fpu(struct task_struct *tsk) */ static inline void __save_init_fpu(struct task_struct *tsk) { + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct xsave_struct *xstate = &tsk->thread.xstate->xsave; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + xsave(tsk); + + /* + * xsave header may indicate the init state of the FP. + */ + if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) + goto end; + + if (unlikely(fx->swd & X87_FSW_ES)) + asm volatile("fnclex"); + + /* + * we can do a simple return here or be paranoid :) + */ + goto clear_state; + } + /* Use more nops than strictly needed in case the compiler varies code */ alternative_input( @@ -181,6 +233,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) X86_FEATURE_FXSR, [fx] "m" (tsk->thread.xstate->fxsave), [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); +clear_state: /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -190,6 +243,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) "fildl %[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); +end: task_thread_info(tsk)->status &= ~TS_USEDFPU; } diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index d7c0221c0278..77b7af6b573b 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -362,6 +362,7 @@ union thread_xstate { struct i387_fsave_struct fsave; struct i387_fxsave_struct fxsave; struct i387_soft_struct soft; + struct xsave_struct xsave; }; #ifdef CONFIG_X86_64 diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index e64be8863b76..30586f2ee558 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -239,6 +239,7 @@ static inline struct thread_info *stack_thread_info(void) #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ +#define TS_XSAVE 0x0010 /* Use xsave/xrstor */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index 6d70e62c6bdc..e835a917ee19 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -17,10 +17,43 @@ #define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) #define XCNTXT_HMASK 0x0 +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; extern struct xsave_struct *init_xstate_buf; extern void xsave_cntxt_init(void); extern void xsave_init(void); +extern int init_fpu(struct task_struct *child); +static inline int xrstor_checking(struct xsave_struct *fx) +{ + int err; + + asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) + : "memory"); + + return err; +} + +static inline void xsave(struct task_struct *tsk) +{ + /* This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" + : : "D" (&(tsk->thread.xstate->xsave)), + "a" (-1), "d"(-1) : "memory"); +} #endif From 3c1c7f101426cb2ecc79d817a8a65928965fc860 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:21 -0700 Subject: [PATCH 065/186] x86, xsave: dynamically allocate sigframes fpstate instead of static allocation dynamically allocate fpstate on the stack, instead of static allocation in the current sigframe layout on the user stack. This will allow the fpstate structure to grow in the future, which includes extended state information supporting xsave/xrstor. signal handlers will be able to access the fpstate pointer from the sigcontext structure asusual, with no change. For the non RT sigframe's (which are supported only for 32bit apps), current static fpstate layout in the sigframe will be unused(so that we don't change the extramask[] offset in the sigframe and thus prevent breaking app's which modify extramask[]). Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 24 ++++++++++++++++-------- arch/x86/kernel/i387.c | 2 ++ arch/x86/kernel/sigframe.h | 14 ++++++++++++-- arch/x86/kernel/signal_32.c | 18 +++++++++++++----- arch/x86/kernel/signal_64.c | 2 +- arch/x86/kernel/xsave.c | 4 ++++ include/asm-x86/i387.h | 2 ++ 7 files changed, 50 insertions(+), 16 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 20af4c79579a..a05bf0fb7415 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -179,9 +179,10 @@ struct sigframe u32 pretcode; int sig; struct sigcontext_ia32 sc; - struct _fpstate_ia32 fpstate; + struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */ unsigned int extramask[_COMPAT_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe @@ -192,8 +193,8 @@ struct rt_sigframe u32 puc; compat_siginfo_t info; struct ucontext_ia32 uc; - struct _fpstate_ia32 fpstate; char retcode[8]; + /* fp state follows here */ }; #define COPY(x) { \ @@ -402,7 +403,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, * Determine which stack to use.. */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size) + size_t frame_size, + struct _fpstate_ia32 **fpstate) { unsigned long sp; @@ -421,6 +423,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ka->sa.sa_restorer) sp = (unsigned long) ka->sa.sa_restorer; + if (used_math()) { + sp = sp - sig_xstate_ia32_size; + *fpstate = (struct _fpstate_ia32 *) sp; + } + sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ @@ -434,6 +441,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, struct sigframe __user *frame; void __user *restorer; int err = 0; + struct _fpstate_ia32 __user *fpstate = NULL; /* copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -448,7 +456,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -457,8 +465,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, if (err) goto give_sigsegv; - err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, - set->sig[0]); + err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -522,6 +529,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; + struct _fpstate_ia32 __user *fpstate = NULL; /* __copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -537,7 +545,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -556,7 +564,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b778e17e4b01..51fb288a2c97 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -24,6 +24,7 @@ # define save_i387_ia32 save_i387 # define restore_i387_ia32 restore_i387 # define _fpstate_ia32 _fpstate +# define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -36,6 +37,7 @@ static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; +unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; void __cpuinit mxcsr_feature_mask_init(void) diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2dc..6dd7e2b70a4b 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -3,9 +3,18 @@ struct sigframe { char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate fpstate_unused; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe { @@ -15,13 +24,14 @@ struct rt_sigframe { void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; char retcode[8]; + /* fp state follows here */ }; #else struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; + /* fp state follows here */ }; #endif diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcdd8933..19a7a5669b5b 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -306,7 +306,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + struct _fpstate **fpstate) { unsigned long sp; @@ -332,6 +333,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) sp = (unsigned long) ka->sa.sa_restorer; } + if (used_math()) { + sp = sp - sig_xstate_size; + *fpstate = (struct _fpstate *) sp; + } + sp -= frame_size; /* * Align the stack pointer according to the i386 ABI, @@ -350,8 +356,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -366,7 +373,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (err) goto give_sigsegv; - err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -427,8 +434,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -453,7 +461,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ca316b5b742c..0deab8eff33f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -281,7 +281,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sig_xstate_size); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c68b7c4ca249..7ad169e33528 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -17,6 +17,10 @@ unsigned int pcntxt_hmask, pcntxt_lmask; */ struct xsave_struct *init_xstate_buf; +#ifdef CONFIG_X86_64 +unsigned int sig_xstate_size = sizeof(struct _fpstate); +#endif + /* * Enable the extended processor state save/restore feature */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index a6d256f4ac81..36dca8db1660 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -20,6 +20,7 @@ #include #include +extern unsigned int sig_xstate_size; extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); @@ -31,6 +32,7 @@ extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; #ifdef CONFIG_IA32_EMULATION +extern unsigned int sig_xstate_ia32_size; struct _fpstate_ia32; extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); From ab5137015fed9b948fe835a2d99a4cfbd50a0c40 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:22 -0700 Subject: [PATCH 066/186] x86, xsave: reorganization of signal save/restore fpstate code layout move 64bit routines that saves/restores fpstate in/from user stack from signal_64.c to xsave.c restore_i387_xstate() now handles the condition when user passes NULL fpstate. Other misc changes for prepartion of xsave/xrstor sigcontext support. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 28 ++++--------- arch/x86/kernel/i387.c | 44 ++++++++++++++------ arch/x86/kernel/signal_32.c | 28 ++++--------- arch/x86/kernel/signal_64.c | 83 ++----------------------------------- arch/x86/kernel/xsave.c | 79 +++++++++++++++++++++++++++++++++++ include/asm-x86/i387.h | 13 +++--- 6 files changed, 134 insertions(+), 141 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index a05bf0fb7415..c596eabbe98b 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -216,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, unsigned int *peax) { unsigned int tmpflags, gs, oldgs, err = 0; - struct _fpstate_ia32 __user *buf; + void __user *buf; u32 tmp; /* Always make any pending restarted system calls return -EINTR */ @@ -260,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, err |= __get_user(tmp, &sc->fpstate); buf = compat_ptr(tmp); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387_ia32(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate_ia32(buf); err |= __get_user(tmp, &sc->ax); *peax = tmp; return err; - -badframe: - return 1; } asmlinkage long sys32_sigreturn(struct pt_regs *regs) @@ -351,7 +337,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) */ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, - struct _fpstate_ia32 __user *fpstate, + void __user *fpstate, struct pt_regs *regs, unsigned int mask) { int tmp, err = 0; @@ -382,7 +368,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, err |= __put_user((u32)regs->flags, &sc->flags); err |= __put_user((u32)regs->sp, &sc->sp_at_signal); - tmp = save_i387_ia32(fpstate); + tmp = save_i387_xstate_ia32(fpstate); if (tmp < 0) err = -EFAULT; else { @@ -404,7 +390,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - struct _fpstate_ia32 **fpstate) + void **fpstate) { unsigned long sp; @@ -441,7 +427,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, struct sigframe __user *frame; void __user *restorer; int err = 0; - struct _fpstate_ia32 __user *fpstate = NULL; + void __user *fpstate = NULL; /* copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -529,7 +515,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; - struct _fpstate_ia32 __user *fpstate = NULL; + void __user *fpstate = NULL; /* __copy_to_user optimizes that into a single 8 byte store */ static const struct { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 51fb288a2c97..7daf3a011dd9 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -21,9 +21,10 @@ # include # include #else -# define save_i387_ia32 save_i387 -# define restore_i387_ia32 restore_i387 +# define save_i387_xstate_ia32 save_i387_xstate +# define restore_i387_xstate_ia32 restore_i387_xstate # define _fpstate_ia32 _fpstate +# define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct @@ -424,7 +425,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; - unlazy_fpu(tsk); fp->status = fp->swd; if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; @@ -438,8 +438,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err = 0; - unlazy_fpu(tsk); - convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env))) return -1; @@ -455,10 +453,16 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) return 1; } -int save_i387_ia32(struct _fpstate_ia32 __user *buf) +int save_i387_xstate_ia32(void __user *buf) { + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; + struct task_struct *tsk = current; + if (!used_math()) return 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) + return -EACCES; /* * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. @@ -468,13 +472,15 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) ? -1 : 1; + NULL, fp) ? -1 : 1; } + unlazy_fpu(tsk); + if (cpu_has_fxsr) - return save_i387_fxsave(buf); + return save_i387_fxsave(fp); else - return save_i387_fsave(buf); + return save_i387_fsave(fp); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) @@ -502,14 +508,26 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } -int restore_i387_ia32(struct _fpstate_ia32 __user *buf) +int restore_i387_xstate_ia32(void __user *buf) { int err; struct task_struct *tsk = current; + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; if (HAVE_HWFP) clear_fpu(tsk); + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) + return -EACCES; + if (!used_math()) { err = init_fpu(tsk); if (err) @@ -518,13 +536,13 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) if (HAVE_HWFP) { if (cpu_has_fxsr) - err = restore_i387_fxsave(buf); + err = restore_i387_fxsave(fp); else - err = restore_i387_fsave(buf); + err = restore_i387_fsave(fp); } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + NULL, fp) != 0; } set_used_math(); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 19a7a5669b5b..690cc616ac07 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -159,28 +159,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage unsigned long sys_sigreturn(unsigned long __unused) @@ -262,7 +248,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) * Set up a signal frame. */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; @@ -289,7 +275,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - tmp = save_i387(fpstate); + tmp = save_i387_xstate(fpstate); if (tmp < 0) err = 1; else @@ -307,7 +293,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, */ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - struct _fpstate **fpstate) + void **fpstate) { unsigned long sp; @@ -356,7 +342,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -434,7 +420,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 0deab8eff33f..ddf6123a55c8 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -53,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, return do_sigaltstack(uss, uoss, regs->sp); } -/* - * Signal frame handlers. - */ - -static inline int save_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); - - if ((unsigned long)buf % 16) - printk("save_i387: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - sizeof(struct i387_fxsave_struct))) - return -1; - } - return 1; -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); - if (unlikely(err)) { - /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. - */ - clear_fpu(tsk); - clear_used_math(); - } - return err; -} - /* * Do a signal return; undo the signal stack. */ @@ -160,25 +97,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, { struct _fpstate __user * buf; err |= __get_user(buf, &sc->fpstate); - - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) @@ -276,7 +199,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + void __user *fp = NULL; int err = 0; struct task_struct *me = current; @@ -288,7 +211,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) + if (save_i387_xstate(fp) < 0) err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7ad169e33528..608e72d7ca64 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -12,6 +12,85 @@ */ unsigned int pcntxt_hmask, pcntxt_lmask; +#ifdef CONFIG_X86_64 +/* + * Signal frame handlers. + */ + +int save_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) + return -EACCES; + + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.xstate->fxsave)); + + if ((unsigned long)buf % 16) + printk("save_i387_xstate: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + err = save_i387_checking((struct i387_fxsave_struct __user *) + buf); + if (err) + return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, + xstate_size)) + return -1; + } + return 1; +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +int restore_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err; + + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) + return -EACCES; + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; +} +#endif + /* * Represents init state for the supported extended state. */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 36dca8db1660..dc3745e8040a 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -34,8 +34,9 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; #ifdef CONFIG_IA32_EMULATION extern unsigned int sig_xstate_ia32_size; struct _fpstate_ia32; -extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); -extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); +struct _xstate_ia32; +extern int save_i387_xstate_ia32(void __user *buf); +extern int restore_i387_xstate_ia32(void __user *buf); #endif #define X87_FSW_ES (1 << 7) /* Exception Summary */ @@ -249,13 +250,13 @@ static inline void __save_init_fpu(struct task_struct *tsk) task_thread_info(tsk)->status &= ~TS_USEDFPU; } +#endif /* CONFIG_X86_64 */ + /* * Signal frame handlers... */ -extern int save_i387(struct _fpstate __user *buf); -extern int restore_i387(struct _fpstate __user *buf); - -#endif /* CONFIG_X86_64 */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); static inline void __unlazy_fpu(struct task_struct *tsk) { From 9dc89c0f96a6ce6a1b7f9a47dd8bf6f17e2002c9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:23 -0700 Subject: [PATCH 067/186] x86, xsave: xsave/xrstor specific routines Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- include/asm-x86/xsave.h | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index e835a917ee19..b716511aede2 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -48,6 +48,58 @@ static inline int xrstor_checking(struct xsave_struct *fx) return err; } +static inline int xsave_check(struct xsave_struct __user *buf) +{ + int err; + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b,3b\n" + ".previous" + : [err] "=r" (err) + : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "memory"); + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline int xrestore_user(struct xsave_struct __user *buf, + unsigned int lmask, + unsigned int hmask) +{ + int err; + struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b,3b\n" + ".previous" + : [err] "=r" (err) + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "memory"); /* memory required? */ + return err; +} + +static inline void xrstor_state(struct xsave_struct *fx, int lmask, int hmask) +{ + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + static inline void xsave(struct task_struct *tsk) { /* This, however, we can work around by forcing the compiler to select From bdd8caba5ed5bb7ee21c9f061597284ffeb280bf Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:24 -0700 Subject: [PATCH 068/186] x86, xsave: struct _fpstate extensions to include extended state information Bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame, are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes are used to extended the fpstate pointer in the sigcontext, which now includes the extended state information along with fpstate information. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- include/asm-x86/processor.h | 7 ++- include/asm-x86/sigcontext.h | 87 ++++++++++++++++++++++++++++++++-- include/asm-x86/sigcontext32.h | 6 ++- 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 77b7af6b573b..eb4bd8c07730 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -322,7 +322,12 @@ struct i387_fxsave_struct { /* 16*16 bytes for each XMM-reg = 256 bytes: */ u32 xmm_space[64]; - u32 padding[24]; + u32 padding[12]; + + union { + u32 padding1[12]; + u32 sw_reserved[12]; + }; } __attribute__((aligned(16))); diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 24879c85b291..899fe2f8abb9 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -4,6 +4,40 @@ #include #include +#define FP_XSTATE_MAGIC1 0x46505853U +#define FP_XSTATE_MAGIC2 0x46505845U +#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2) + +/* + * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame + * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes + * are used to extended the fpstate pointer in the sigcontext, which now + * includes the extended state information along with fpstate information. + * + * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved + * area and FP_XSTATE_MAGIC2 at the end of memory layout + * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the + * extended state information in the memory layout pointed by the fpstate + * pointer in sigcontext. + */ +struct _fpx_sw_bytes { + __u32 magic1; /* FP_XSTATE_MAGIC1 */ + __u32 extended_size; /* total size of the layout referred by + * fpstate pointer in the sigcontext. + */ + __u64 xstate_bv; + /* feature bit mask (including fp/sse/extended + * state) that is present in the memory + * layout. + */ + __u32 xstate_size; /* actual xsave state size, based on the + * features saved in the layout. + * 'extended_size' will be greater than + * 'xstate_size'. + */ + __u32 padding[7]; /* for future use. */ +}; + #ifdef __i386__ /* * As documented in the iBCS2 standard.. @@ -53,7 +87,13 @@ struct _fpstate { unsigned long reserved; struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ struct _xmmreg _xmm[8]; - unsigned long padding[56]; + unsigned long padding1[44]; + + union { + unsigned long padding2[12]; + struct _fpx_sw_bytes sw_reserved; /* represents the extended + * state info */ + }; }; #define X86_FXSR_MAGIC 0x0000 @@ -79,7 +119,15 @@ struct sigcontext { unsigned long flags; unsigned long sp_at_signal; unsigned short ss, __ssh; - struct _fpstate __user *fpstate; + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the defintion of + * (struct _fpx_sw_bytes) + */ + void __user *fpstate; /* zero when no FPU/extended context */ unsigned long oldmask; unsigned long cr2; }; @@ -130,7 +178,12 @@ struct _fpstate { __u32 mxcsr_mask; __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ - __u32 reserved2[24]; + __u32 reserved2[12]; + union { + __u32 reserved3[12]; + struct _fpx_sw_bytes sw_reserved; /* represents the extended + * state information */ + }; }; #ifdef __KERNEL__ @@ -161,7 +214,15 @@ struct sigcontext { unsigned long trapno; unsigned long oldmask; unsigned long cr2; - struct _fpstate __user *fpstate; /* zero when no FPU context */ + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the defintion of + * (struct _fpx_sw_bytes) + */ + void __user *fpstate; /* zero when no FPU/extended context */ unsigned long reserved1[8]; }; #else /* __KERNEL__ */ @@ -202,4 +263,22 @@ struct sigcontext { #endif /* !__i386__ */ +struct _xsave_hdr { + u64 xstate_bv; + u64 reserved1[2]; + u64 reserved2[5]; +}; + +/* + * Extended state pointed by the fpstate pointer in the sigcontext. + * In addition to the fpstate, information encoded in the xstate_hdr + * indicates the presence of other extended state information + * supported by the processor and OS. + */ +struct _xstate { + struct _fpstate fpstate; + struct _xsave_hdr xstate_hdr; + /* new processor state extensions go here */ +}; + #endif /* ASM_X86__SIGCONTEXT_H */ diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h index 4e2ec732dd01..8c347032c2f2 100644 --- a/include/asm-x86/sigcontext32.h +++ b/include/asm-x86/sigcontext32.h @@ -40,7 +40,11 @@ struct _fpstate_ia32 { __u32 reserved; struct _fpxreg _fxsr_st[8]; struct _xmmreg _xmm[8]; /* It's actually 16 */ - __u32 padding[56]; + __u32 padding[44]; + union { + __u32 padding2[12]; + struct _fpx_sw_bytes sw_reserved; + }; }; struct sigcontext_ia32 { From c37b5efea43f9e500363f9973dd00e3d2cdcc685 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:25 -0700 Subject: [PATCH 069/186] x86, xsave: save/restore the extended state context in sigframe On cpu's supporting xsave/xrstor, fpstate pointer in the sigcontext, will include the extended state information along with fpstate information. Presence of extended state information is indicated by the presence of FP_XSTATE_MAGIC1 at fpstate.sw_reserved.magic1 and FP_XSTATE_MAGIC2 at fpstate + (fpstate.sw_reserved.extended_size - FP_XSTATE_MAGIC2_SIZE). Extended feature bit mask that is saved in the memory layout is represented by the fpstate.sw_reserved.xstate_bv For RT signal frames, UC_FP_XSTATE in the uc_flags also indicate the presence of extended state information in the sigcontext's fpstate pointer. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 5 +- arch/x86/kernel/i387.c | 82 +++++++++++++++-- arch/x86/kernel/signal_32.c | 5 +- arch/x86/kernel/signal_64.c | 7 +- arch/x86/kernel/xsave.c | 172 +++++++++++++++++++++++++++++++++--- include/asm-x86/i387.h | 4 +- include/asm-x86/ucontext.h | 6 ++ include/asm-x86/xsave.h | 5 +- 8 files changed, 264 insertions(+), 22 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index c596eabbe98b..f25a10124005 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -544,7 +544,10 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7daf3a011dd9..cbb9dc474a21 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -26,6 +26,7 @@ # define _fpstate_ia32 _fpstate # define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size +# define fx_sw_reserved_ia32 fx_sw_reserved # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -447,12 +448,30 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], fx, - sizeof(struct i387_fxsave_struct))) + if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) return -1; return 1; } +static int save_i387_xsave(void __user *buf) +{ + struct _fpstate_ia32 __user *fx = buf; + int err = 0; + + if (save_i387_fxsave(fx) < 0) + return -1; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, + sizeof(struct _fpx_sw_bytes)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_ia32_size + - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return -1; + + return 1; +} + int save_i387_xstate_ia32(void __user *buf) { struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; @@ -477,6 +496,8 @@ int save_i387_xstate_ia32(void __user *buf) unlazy_fpu(tsk); + if (cpu_has_xsave) + return save_i387_xsave(fp); if (cpu_has_fxsr) return save_i387_fxsave(fp); else @@ -491,14 +512,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) sizeof(struct i387_fsave_struct)); } -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) +static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, + unsigned int size) { struct task_struct *tsk = current; struct user_i387_ia32_struct env; int err; err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct)); + size); /* mxcsr reserved bits must be masked to zero for security reasons */ tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) @@ -508,6 +530,51 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } +static int restore_i387_xsave(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + struct _fpstate_ia32 __user *fx_user = + ((struct _fpstate_ia32 __user *) buf); + struct i387_fxsave_struct __user *fx = + (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; + struct xsave_hdr_struct *xsave_hdr = + ¤t->thread.xstate->xsave.xsave_hdr; + unsigned int lmask, hmask; + int err; + + if (check_for_xstate(fx, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); + + xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + /* + * These bits must be zero. + */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory layout + * and enabled by the OS. + */ + lmask = ~(pcntxt_lmask & ~lmask); + hmask = ~(pcntxt_hmask & ~hmask); + xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + + return err; +fx_only: + /* + * Couldn't find the extended state information in the memory + * layout. Restore the FP/SSE and init the other extended state + * enabled by the OS. + */ + xsave_hdr->xstate_bv = XSTATE_FPSSE; + return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); +} + int restore_i387_xstate_ia32(void __user *buf) { int err; @@ -535,8 +602,11 @@ int restore_i387_xstate_ia32(void __user *buf) } if (HAVE_HWFP) { - if (cpu_has_fxsr) - err = restore_i387_fxsave(fp); + if (cpu_has_xsave) + err = restore_i387_xsave(buf); + else if (cpu_has_fxsr) + err = restore_i387_fxsave(fp, sizeof(struct + i387_fxsave_struct)); else err = restore_i387_fsave(fp); } else { diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 690cc616ac07..0f98d69fbdb0 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -441,7 +441,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ddf6123a55c8..2621b98f5bf6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -192,7 +192,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 16); + return (void __user *)round_down(sp - size, 64); } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -226,7 +226,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 608e72d7ca64..dd66d0714c18 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -6,12 +6,68 @@ #include #include #include +#ifdef CONFIG_IA32_EMULATION +#include +#endif /* * Supported feature mask by the CPU and the kernel. */ unsigned int pcntxt_hmask, pcntxt_lmask; +struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +struct _fpx_sw_bytes fx_sw_reserved_ia32; +#endif + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw_user) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xsave_hdr_struct); + unsigned int magic2; + int err; + + err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], + sizeof(struct _fpx_sw_bytes)); + + if (err) + return err; + + /* + * First Magic check failed. + */ + if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) + return -1; + + /* + * Check for error scenarios. + */ + if (fx_sw_user->xstate_size < min_xstate_size || + fx_sw_user->xstate_size > xstate_size || + fx_sw_user->xstate_size > fx_sw_user->extended_size) + return -1; + + err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (err || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + #ifdef CONFIG_X86_64 /* * Signal frame handlers. @@ -28,15 +84,18 @@ int save_i387_xstate(void __user *buf) BUILD_BUG_ON(sizeof(struct user_i387_struct) != sizeof(tsk->thread.xstate->fxsave)); - if ((unsigned long)buf % 16) + if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); if (!used_math()) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = xsave_user(buf); + else + err = fxsave_user(buf); + if (err) return err; task_thread_info(tsk)->status &= ~TS_USEDFPU; @@ -46,23 +105,77 @@ int save_i387_xstate(void __user *buf) xstate_size)) return -1; } + + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct _fpstate __user *fx = buf; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_size + - FP_XSTATE_MAGIC2_SIZE)); + } + return 1; } +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE + * state. + */ +int restore_user_xstate(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + unsigned int lmask, hmask; + int err; + + if (((unsigned long)buf % 64) || + check_for_xstate(buf, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + /* + * restore the state passed by the user. + */ + err = xrestore_user(buf, lmask, hmask); + if (err) + return err; + + /* + * init the state skipped by the user. + */ + lmask = pcntxt_lmask & ~lmask; + hmask = pcntxt_hmask & ~hmask; + + xrstor_state(init_xstate_buf, lmask, hmask); + + return 0; + +fx_only: + /* + * couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, + pcntxt_hmask); + return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +} + /* * This restores directly out of user space. Exceptions are handled. */ int restore_i387_xstate(void __user *buf) { struct task_struct *tsk = current; - int err; + int err = 0; if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - + if (used_math()) + goto clear; return 0; } else if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) @@ -78,12 +191,17 @@ int restore_i387_xstate(void __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = restore_user_xstate(buf); + else + err = fxrstor_checking((__force struct i387_fxsave_struct *) + buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the * user buffer, clear the fpu state. */ +clear: clear_fpu(tsk); clear_used_math(); } @@ -91,6 +209,38 @@ int restore_i387_xstate(void __user *buf) } #endif +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void prepare_fx_sw_frame(void) +{ + int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + + FP_XSTATE_MAGIC2_SIZE; + + sig_xstate_size = sizeof(struct _fpstate) + size_extended; + +#ifdef CONFIG_IA32_EMULATION + sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; +#endif + + memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.xstate_bv = pcntxt_lmask | + (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_size = xstate_size; +#ifdef CONFIG_IA32_EMULATION + memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; +#endif +} + /* * Represents init state for the supported extended state. */ @@ -162,6 +312,8 @@ void __init xsave_cntxt_init(void) xstate_size = ebx; + prepare_fx_sw_frame(); + setup_xstate_init(); printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index dc3745e8040a..d3dda7161954 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -31,8 +31,10 @@ extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; +extern struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION extern unsigned int sig_xstate_ia32_size; +extern struct _fpx_sw_bytes fx_sw_reserved_ia32; struct _fpstate_ia32; struct _xstate_ia32; extern int save_i387_xstate_ia32(void __user *buf); @@ -104,7 +106,7 @@ static inline void clear_fpu_state(struct task_struct *tsk) X86_FEATURE_FXSAVE_LEAK); } -static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) +static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h index 9948dd328084..89eaa5456a7e 100644 --- a/include/asm-x86/ucontext.h +++ b/include/asm-x86/ucontext.h @@ -1,6 +1,12 @@ #ifndef ASM_X86__UCONTEXT_H #define ASM_X86__UCONTEXT_H +#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state + * information in the memory layout pointed + * by the fpstate pointer in the ucontext's + * sigcontext struct (uc_mcontext). + */ + struct ucontext { unsigned long uc_flags; struct ucontext *uc_link; diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index b716511aede2..b7f64b9fcd94 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -29,6 +29,9 @@ extern struct xsave_struct *init_xstate_buf; extern void xsave_cntxt_init(void); extern void xsave_init(void); extern int init_fpu(struct task_struct *child); +extern int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *sw); static inline int xrstor_checking(struct xsave_struct *fx) { @@ -48,7 +51,7 @@ static inline int xrstor_checking(struct xsave_struct *fx) return err; } -static inline int xsave_check(struct xsave_struct __user *buf) +static inline int xsave_user(struct xsave_struct __user *buf) { int err; __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" From 42deec6f2c3688fdaf986225ac901b817cd91568 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:26 -0700 Subject: [PATCH 070/186] x86, xsave: update xsave header bits during ptrace fpregs set FP/SSE bits may be zero in the xsave header(representing the init state). Update these bits during the ptrace fpregs set operation, to indicate the non-init state. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index cbb9dc474a21..e0ed59f5c19f 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -214,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, */ target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + return ret; } @@ -414,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!ret) convert_to_fxsr(target, &env); + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } From b4a091a62c8e91d6077e575600363cff73fa02ef Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jul 2008 17:30:29 -0700 Subject: [PATCH 071/186] x86, xsave: add header file for XCR registers Add header file for the XCR registers and their access functions. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- include/asm-x86/xcr.h | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 include/asm-x86/xcr.h diff --git a/include/asm-x86/xcr.h b/include/asm-x86/xcr.h new file mode 100644 index 000000000000..f2cba4e79a23 --- /dev/null +++ b/include/asm-x86/xcr.h @@ -0,0 +1,49 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * asm-x86/xcr.h + * + * Definitions for the eXtended Control Register instructions + */ + +#ifndef _ASM_X86_XCR_H +#define _ASM_X86_XCR_H + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 + +#ifdef __KERNEL__ +# ifndef __ASSEMBLY__ + +#include + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ + : : "a" (eax), "d" (edx), "c" (index)); +} + +# endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_XCR_H */ From 6152e4b1c99a3689fc318d092cd144597f7dbd14 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jul 2008 17:23:16 -0700 Subject: [PATCH 072/186] x86, xsave: keep the XSAVE feature mask as an u64 The XSAVE feature mask is a 64-bit number; keep it that way, in order to avoid the mistake done with rdmsr/wrmsr. Use the xsetbv() function provided in the previous patch. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 12 +++++------ arch/x86/kernel/xsave.c | 45 ++++++++++++++++------------------------- include/asm-x86/xsave.h | 18 ++++++++++------- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e0ed59f5c19f..45723f1fe198 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -552,18 +552,17 @@ static int restore_i387_xsave(void __user *buf) (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; struct xsave_hdr_struct *xsave_hdr = ¤t->thread.xstate->xsave.xsave_hdr; - unsigned int lmask, hmask; + u64 mask; int err; if (check_for_xstate(fx, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + xsave_hdr->xstate_bv &= pcntxt_mask; /* * These bits must be zero. */ @@ -573,9 +572,8 @@ static int restore_i387_xsave(void __user *buf) * Init the state that is not present in the memory layout * and enabled by the OS. */ - lmask = ~(pcntxt_lmask & ~lmask); - hmask = ~(pcntxt_hmask & ~hmask); - xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + mask = ~(pcntxt_mask & ~mask); + xsave_hdr->xstate_bv &= mask; return err; fx_only: diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index dd66d0714c18..7415f3e38a51 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -9,11 +9,12 @@ #ifdef CONFIG_IA32_EMULATION #include #endif +#include /* * Supported feature mask by the CPU and the kernel. */ -unsigned int pcntxt_hmask, pcntxt_lmask; +u64 pcntxt_mask; struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION @@ -127,30 +128,28 @@ int save_i387_xstate(void __user *buf) int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; - unsigned int lmask, hmask; + u64 mask; int err; if (((unsigned long)buf % 64) || check_for_xstate(buf, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; /* * restore the state passed by the user. */ - err = xrestore_user(buf, lmask, hmask); + err = xrestore_user(buf, mask); if (err) return err; /* * init the state skipped by the user. */ - lmask = pcntxt_lmask & ~lmask; - hmask = pcntxt_hmask & ~hmask; + mask = pcntxt_mask & ~mask; - xrstor_state(init_xstate_buf, lmask, hmask); + xrstor_state(init_xstate_buf, mask); return 0; @@ -160,8 +159,7 @@ int restore_user_xstate(void __user *buf) * memory layout. Restore just the FP/SSE and init all * the other extended state. */ - xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, - pcntxt_hmask); + xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); return fxrstor_checking((__force struct i387_fxsave_struct *)buf); } @@ -231,8 +229,7 @@ void prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = sig_xstate_size; - fx_sw_reserved.xstate_bv = pcntxt_lmask | - (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; #ifdef CONFIG_IA32_EMULATION memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, @@ -263,11 +260,8 @@ void __cpuinit xsave_init(void) /* * Enable all the features that the HW is capable of * and the Linux kernel is aware of. - * - * xsetbv(); */ - asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), - "a" (pcntxt_lmask), "d" (pcntxt_hmask)); + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } /* @@ -287,36 +281,31 @@ void __init xsave_cntxt_init(void) unsigned int eax, ebx, ecx, edx; cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + pcntxt_mask = eax + ((u64)edx << 32); - pcntxt_lmask = eax; - pcntxt_hmask = edx; - - if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", - pcntxt_lmask); + if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pcntxt_mask); BUG(); } /* * for now OS knows only about FP/SSE */ - pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; - pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; - + pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); /* * Recompute the context size for enabled features */ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; prepare_fx_sw_frame(); setup_xstate_init(); - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " "cntxt size 0x%x\n", - (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); + pcntxt_mask, xstate_size); } diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index b7f64b9fcd94..08e9a1ac07a9 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -1,6 +1,7 @@ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H +#include #include #include @@ -14,8 +15,7 @@ /* * These are the features that the OS can handle currently. */ -#define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) -#define XCNTXT_HMASK 0x0 +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " @@ -23,7 +23,8 @@ #define REX_PREFIX #endif -extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; +extern unsigned int xstate_size; +extern u64 pcntxt_mask; extern struct xsave_struct *init_xstate_buf; extern void xsave_cntxt_init(void); @@ -73,12 +74,12 @@ static inline int xsave_user(struct xsave_struct __user *buf) return err; } -static inline int xrestore_user(struct xsave_struct __user *buf, - unsigned int lmask, - unsigned int hmask) +static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) { int err; struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" "2:\n" @@ -96,8 +97,11 @@ static inline int xrestore_user(struct xsave_struct __user *buf, return err; } -static inline void xrstor_state(struct xsave_struct *fx, int lmask, int hmask) +static inline void xrstor_state(struct xsave_struct *fx, u64 mask) { + u32 lmask = mask; + u32 hmask = mask >> 32; + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); From 7ab6af7ab69df8c9c5fbc380004fb81187742096 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 30 Jul 2008 17:36:48 -0700 Subject: [PATCH 073/186] x86_32: use apic_ops at print_local_APIC() Use apic_icr_read at print_local_APIC() in io_apic_32.c Signed-off-by: Hiroshi Shimamoto Cc: Suresh Siddha Cc: Yinghai Lu Signed-off-by: Ingo Molnar Cc: Suresh Siddha Cc: Yinghai Lu --- arch/x86/kernel/io_apic_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f3..39e063a9a28a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1486,6 +1486,7 @@ static void print_APIC_bitfield(int base) void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1536,10 +1537,9 @@ void /*__init*/ print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); From 2ae111cdd8d83ebf9de72e36e68a8c84b6ebbeea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 11 Aug 2008 18:34:08 +0400 Subject: [PATCH 074/186] x86: apic interrupts - move assignments to irqinit_32.c, v2 64bit mode APIC interrupt handlers are set within irqinit_64.c. Lets do tha same for 32bit mode which would help in furter code merging. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 48 ---------------------------------- arch/x86/kernel/irqinit_32.c | 49 +++++++++++++++++++++++++++++++++++ arch/x86/mach-default/setup.c | 15 ----------- include/asm-x86/arch_hooks.h | 2 -- 4 files changed, 49 insertions(+), 65 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f93c18f5b79d..9e341c9d9414 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1354,54 +1354,6 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } -#ifdef CONFIG_SMP -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); -} -#endif - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - /** * connect_bsp_APIC - attach the APIC to the interrupt system */ diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d66914287ee1..9200a1e2752d 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -74,6 +74,15 @@ void __init init_ISA_irqs (void) } } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,6 +107,46 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); + /* setup after call gates are initialised (usually add in * the architecture specific gates) */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 3d317836be9e..b00f5ad10ca1 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -36,15 +36,6 @@ void __init pre_intr_init_hook(void) init_ISA_irqs(); } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - /** * intr_init_hook - post gate setup interrupt initialisation * @@ -60,12 +51,6 @@ void __init intr_init_hook(void) if (x86_quirks->arch_intr_init()) return; } -#ifdef CONFIG_X86_LOCAL_APIC - apic_intr_init(); -#endif - - if (!acpi_ioapic) - setup_irq(2, &irq2); } /** diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index 72adc3a109cc..de4596b24c23 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -12,8 +12,6 @@ /* these aren't arch hooks, they are generic routines * that can be used by the hooks */ extern void init_ISA_irqs(void); -extern void apic_intr_init(void); -extern void smp_intr_init(void); extern irqreturn_t timer_interrupt(int irq, void *dev_id); /* these are the defined hooks */ From 49800efcb17afdf973f33e8aa8807b7f83993cc6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: [PATCH 075/186] x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6f9b8924bdc0..8396fd7628a9 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -493,17 +493,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + From b55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: [PATCH 076/186] x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8396fd7628a9..cc6efe86249d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,19 +606,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); From 26d809af6397ce5c37f5c44d89734d19cce1ad25 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Aug 2008 12:46:26 +0200 Subject: [PATCH 077/186] x86: fix xsave build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this build failure with certain glibc versions: In file included from /usr/include/bits/sigcontext.h:28, from /usr/include/signal.h:333, from Documentation/accounting/getdelays.c:24: /home/mingo/tip/usr/include/asm/sigcontext.h:191: error: expected specifier-qualifier-list before ‘u64’ Signed-off-by: Ingo Molnar --- include/asm-x86/sigcontext.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 899fe2f8abb9..ee813f4fe5d5 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -264,9 +264,9 @@ struct sigcontext { #endif /* !__i386__ */ struct _xsave_hdr { - u64 xstate_bv; - u64 reserved1[2]; - u64 reserved2[5]; + __u64 xstate_bv; + __u64 reserved1[2]; + __u64 reserved2[5]; }; /* From ee2b92a8201a40021ecd1aee6f0625dc03bacc54 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:13 -0700 Subject: [PATCH 078/186] x86, xsave: remove the redundant access_ok() in setup_rt_frame() save_i387_xstate() is already doing the required access_ok(). Remove the redundant access_ok() before it. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 2621b98f5bf6..6c581698ab56 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -208,9 +208,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) - goto give_sigsegv; - if (save_i387_xstate(fp) < 0) err |= -1; } else From ed405958057ca6a8c4c9178a7a3b1167fabb45f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:14 -0700 Subject: [PATCH 079/186] x86, xsave: clear the user buffer before doing fxsave/xsave fxsave/xsave instructions will not touch all the bytes in the fxsave/xsave frame. Clear the user buffer before doing fxsave/xsave directly to user buffer during the sigcontext setup. This is essentially needed in the context of xsave(for example, some of the fields in the xsave header are not touched by the xsave and defined as must be zero). This will also present uniform and clean context to the user (from which user can safely do fxrstor/xrstor). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7415f3e38a51..bb097b1644d8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -92,6 +92,12 @@ int save_i387_xstate(void __user *buf) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { + /* + * Start with clearing the user buffer. This will present a + * clean context for the bytes not touched by the fxsave/xsave. + */ + __clear_user(buf, sig_xstate_size); + if (task_thread_info(tsk)->status & TS_XSAVE) err = xsave_user(buf); else From f65bc214e042916135256620f900e9599d65e0cb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:15 -0700 Subject: [PATCH 080/186] x86, xsave: use BUG_ON() instead of BUILD_BUG_ON() All these structure sizes are runtime determined. So use a runtime bug check. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bb097b1644d8..07713d64debe 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -82,8 +82,7 @@ int save_i387_xstate(void __user *buf) if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) return -EACCES; - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); + BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); From b76d69ed721e8365739c3bd5dd7891efbea88494 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 14 Aug 2008 17:25:37 -0700 Subject: [PATCH 081/186] x86_64: uml: fix rename header guard In unistd_64.h, the guard macro _ASM_X86_64_UNISTD_H_ is renamed to ASM_X86__UNISTD_64_H. This change should be applied to arch/um/sys-x86_64/syscall_table.c. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/um/sys-x86_64/syscall_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c index c128eb897008..32f5fbe2d0d2 100644 --- a/arch/um/sys-x86_64/syscall_table.c +++ b/arch/um/sys-x86_64/syscall_table.c @@ -41,12 +41,12 @@ #define stub_rt_sigreturn sys_rt_sigreturn #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include #undef __SYSCALL #define __SYSCALL(nr, sym) [ nr ] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); From ed4e5ec177d20504c51aebb93db12d57716cde9c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: [PATCH 082/186] x86: apic - use SET_APIC_DEST_FIELD instead of hardcoded shift Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 69a876be506f..77c5e5eb820e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,7 +150,7 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } From 36fef0949c14445d231a68663e8a01860f7caca3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: [PATCH 083/186] x86: apic - unify disable_apic_timer Get rid of local_apic_timer_disabled and use disable_apic_timer instead. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++----- arch/x86/kernel/apic_64.c | 16 +++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 12b154822bce..6af20dd12c96 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -63,7 +63,7 @@ int disable_apic; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int local_apic_timer_disabled; +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -574,7 +574,7 @@ void __init setup_boot_APIC_clock(void) * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ - if (local_apic_timer_disabled) { + if (disable_apic_timer) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -1699,12 +1699,19 @@ static int __init parse_nolapic(char *arg) } early_param("nolapic", parse_nolapic); -static int __init parse_disable_lapic_timer(char *arg) +static int __init parse_disable_apic_timer(char *arg) { - local_apic_timer_disabled = 1; + disable_apic_timer = 1; return 0; } -early_param("nolapic_timer", parse_disable_lapic_timer); +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); static int __init parse_lapic_timer_c2_ok(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 77c5e5eb820e..1e925be861e4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -45,6 +45,7 @@ #include #include +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -1583,14 +1584,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static __init int setup_noapictimer(char *str) +static int __init parse_disable_apic_timer(char *arg) { - if (str[0] != ' ' && str[0] != 0) - return 0; disable_apic_timer = 1; - return 1; + return 0; } -__setup("noapictimer", setup_noapictimer); +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); static __init int setup_apicpmtimer(char *s) { From f07f4f9046121ac803bc2f0ded3d77b7c2ab481b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: [PATCH 084/186] x86: apic - unify __setup_APIC_LVTT To be able to unify this function we RE-introduce APIC_DIVISOR for 64bit mode. This snipped was eliminated in some time ago in a sake of clenup but now we need it again since it allow up to get rid of #ifdef(s). And lapic_is_integrated call is added in apic_64.c but since we always have APIC integrated on 64bit cpu compiler will ignore this call. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e925be861e4..ac399d0f65c0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,9 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* Clock divisor is set to 1 */ +#define APIC_DIVISOR 1 + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -262,6 +265,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -276,7 +282,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks); + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } /* @@ -446,7 +452,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = result / HZ; + calibration_result = (result * APIC_DIVISOR) / HZ; /* * Do a sanity check on the APIC calibration result From 9ce122c6e55c44ae9a4c4c777579b87d83e7f898 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: [PATCH 085/186] x86: apic - do not clear APIC twice in lapic_shutdown There is no need to clear APIC twice since disable_local_APIC will clear it anyway. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6af20dd12c96..a151d66f948c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -830,10 +830,11 @@ void lapic_shutdown(void) return; local_irq_save(flags); - clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); + else + clear_local_APIC(); local_irq_restore(flags); } From 64e474d168e3cf31e44890b4947644d361f84e43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: [PATCH 086/186] x86: apic - get rid of local_apic_timer_verify_ok We are able to use clock_event_device as it's done in 64bit apic code so lets get rid of local_apic_timer_verify_ok variable. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a151d66f948c..7783c113be24 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -60,8 +60,6 @@ unsigned long mp_lapic_addr; static int force_enable_local_apic; int disable_apic; -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -301,7 +299,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned int v; /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); @@ -514,7 +512,7 @@ static int __init calibrate_APIC_clock(void) return -1; } - local_apic_timer_verify_ok = 1; + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* We trust the pm timer based calibration */ if (!pm_referenced) { @@ -548,11 +546,11 @@ static int __init calibrate_APIC_clock(void) if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else - local_apic_timer_verify_ok = 0; + levt->features |= CLOCK_EVT_FEAT_DUMMY; } else local_irq_enable(); - if (!local_apic_timer_verify_ok) { + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); return -1; From c93baa1ae51cdba25a5f5ad37b2348e700e75daf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: [PATCH 087/186] x86: apic - unify verify_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7783c113be24..7ef7c782a42d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -878,6 +878,12 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; /* * The next two are just to see if we have sane values. From 296cb9511dcc3895fda84d0cd5b411bd926e4bb3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:23 +0200 Subject: [PATCH 088/186] x86: apic - unify sync_Arb_IDs Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +-- arch/x86/kernel/apic_64.c | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7ef7c782a42d..d07488993ee7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -915,8 +915,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, - APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ac399d0f65c0..41aff3460635 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -742,8 +742,11 @@ int __init verify_local_APIC(void) */ void __init sync_Arb_IDs(void) { - /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - if (modern_apic()) + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; /* @@ -752,8 +755,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* From 6f6da97faf29f87b3980a6992fb8cab44b4c444d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:19 +0400 Subject: [PATCH 089/186] x86: apic - sync_Arb_IDs style fixup No changes on binary level Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d07488993ee7..60575c05151a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -909,13 +909,15 @@ void __init sync_Arb_IDs(void) */ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; + /* * Wait for idle. */ apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 41aff3460635..72e94ab0e364 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -755,7 +755,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* From 638c0411922540deaf8797cacf73513b17618405 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:18 +0400 Subject: [PATCH 090/186] x86: apic - unify init_bsp_APIC - remove redundant read of APIC_LVR register in 64bit mode - APIC is always integrated for 64bit mode so gcc will eliminate lapic_is_integrated call Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60575c05151a..16d9721788c1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -925,7 +925,7 @@ void __init sync_Arb_IDs(void) */ void __init init_bsp_APIC(void) { - unsigned long value; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -946,11 +946,13 @@ void __init init_bsp_APIC(void) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else +#endif value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 72e94ab0e364..99d18b8976a5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -773,8 +773,6 @@ void __init init_bsp_APIC(void) if (smp_found_config || !cpu_has_apic) return; - value = apic_read(APIC_LVR); - /* * Do not trust the local APIC being empty at bootup. */ @@ -786,7 +784,15 @@ void __init init_bsp_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; - value |= APIC_SPIV_FOCUS_DISABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -795,6 +801,8 @@ void __init init_bsp_APIC(void) */ apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); } From 6764014bc8bb4849f6a4f336477e873ad5861ed2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:50 +0400 Subject: [PATCH 091/186] x86: apic - unify clear_local_APIC - Remove redundant masking of APIC_LVTTHMR register in apic_32.c - Add masking of APIC_LVTTHMR register to apic_64.c. We use a bit complicated #ifdef here: CONFIG_X86_MCE_P4THERMAL is 32bit specific and X86_MCE_INTEL is 64bit specific so the appropriate config variable will be set by Kconfig. - the APIC_ESR register clearing in apic_64.c now uses not straightforward way but this is allowed tradeoff. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +----- arch/x86/kernel/apic_64.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 16d9721788c1..3131603a4d6a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -754,7 +754,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -771,10 +771,6 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { if (maxlvt > 3) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 99d18b8976a5..d834b7583624 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -630,6 +630,13 @@ void clear_local_APIC(void) apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif /* * Clean APIC state for other OSs: */ @@ -640,8 +647,14 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } } /** From 92206c909ad1d4f9c355b4842722d5a355d6b76b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:51 +0400 Subject: [PATCH 092/186] x86: apic - unify lapic_resume Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 29 ++++++++++++++++++----------- arch/x86/kernel/apic_64.c | 19 +++++++++++++++---- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3131603a4d6a..3d40213d3af2 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1606,16 +1606,21 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1625,7 +1630,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1639,7 +1644,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d834b7583624..e542a2d08de5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1412,13 +1412,22 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - if (!x2apic) { + +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); - } else - enable_x2apic(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1428,7 +1437,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1442,7 +1451,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } From 24968cfdd4c3cf61338495dd0f9bb8a6907d2087 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:52 +0400 Subject: [PATCH 093/186] x86: apic - unify lapic_suspend Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d40213d3af2..6cb8aaaf10f5 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1582,7 +1582,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index e542a2d08de5..13dea935b4eb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1390,10 +1390,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); From 274cfe5912eebe9d4e1a4c451fe617289a181fcf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:53 +0400 Subject: [PATCH 094/186] x86: apic - rearrange functions and comments Rearrange functions and comments to find differences easier. Also use apic_printk in setup_boot_APIC_clock for 64bit mode. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 69 ++++++++++++++++++++++----------------- arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++------ 2 files changed, 77 insertions(+), 40 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6cb8aaaf10f5..9e8702eebd46 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -251,6 +251,9 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -279,6 +282,36 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + /* * Program the next event, relative to now */ @@ -298,7 +331,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned long flags; unsigned int v; - /* Lapic used for broadcast ? */ + /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; @@ -680,35 +713,6 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} - /* * Local APIC start and shutdown */ @@ -1542,6 +1546,11 @@ void __cpuinit generic_processor_info(int apicid, int version) #ifdef CONFIG_PM static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 13dea935b4eb..46523c0cc6f6 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -81,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); +/* + * The local apic timer can be used for any function which is CPU local. + */ static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT @@ -127,6 +130,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -175,7 +183,6 @@ static struct apic_ops xapic_ops = { }; struct apic_ops __read_mostly *apic_ops = &xapic_ops; - EXPORT_SYMBOL_GPL(apic_ops); static void x2apic_wait_icr_idle(void) @@ -244,6 +251,10 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* + * Local APIC timer + */ + /* Clock divisor is set to 1 */ #define APIC_DIVISOR 1 @@ -257,7 +268,6 @@ int lapic_get_maxlvt(void) * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -474,10 +484,10 @@ static int __init calibrate_APIC_clock(void) void __init setup_boot_APIC_clock(void) { /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); @@ -489,7 +499,9 @@ void __init setup_boot_APIC_clock(void) return; } - printk(KERN_INFO "Using local APIC timer interrupts.\n"); + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -508,6 +520,7 @@ void __init setup_boot_APIC_clock(void) printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); + /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -577,6 +590,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_enter(); local_apic_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } @@ -1248,6 +1262,13 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ @@ -1347,9 +1368,11 @@ int hard_smp_processor_id(void) #ifdef CONFIG_PM static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1458,6 +1481,11 @@ static int lapic_resume(struct sys_device *dev) return 0; } +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + static struct sysdev_class lapic_sysclass = { .name = "lapic", .resume = lapic_resume, From 9c803869f5f5e3d7ad94b2926474b2882e30073b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:54 +0400 Subject: [PATCH 095/186] x86: apic - unify lapic_is_integrated Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 9e8702eebd46..41134d4e6a66 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -128,7 +128,11 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 + return 1; +#else return APIC_INTEGRATED(lapic_get_version()); +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46523c0cc6f6..18c0b8cd2376 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -111,11 +111,15 @@ static inline int lapic_get_version(void) } /* - * Check, if the APIC is integrated or a seperate chip + * Check, if the APIC is integrated or a separate chip */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif } /* From cf9768d7514d59b3179a886c4a47908d72e6cf76 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:55 +0400 Subject: [PATCH 096/186] x86: apic - unify xapic_icr_read Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 18c0b8cd2376..5e0738131e58 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -174,7 +174,7 @@ u64 xapic_icr_read(void) icr2 = apic_read(APIC_ICR2); icr1 = apic_read(APIC_ICR); - return (icr1 | ((u64)icr2 << 32)); + return icr1 | ((u64)icr2 << 32); } static struct apic_ops xapic_ops = { From d5e629a6f88137fb77c4cc857be5ea7c3f27110d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 17 Aug 2008 21:12:27 -0700 Subject: [PATCH 097/186] x86: apic - unify lapic_resume - fix Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5e0738131e58..ad532dccd11e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1446,6 +1446,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1456,6 +1457,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); From 8bfcb3960fde049b863266dab8c3617bb5a541aa Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:20 +0200 Subject: [PATCH 098/186] x86: make movsl_mask definition non-CPU specific movsl_mask is currently defined in arch/x86/kernel/cpu/intel.c, which contains code specific to Intel CPUs. However, movsl_mask is used in the non-CPU specific code in arch/x86/lib/usercopy_32.c, which breaks the compilation when support for Intel CPUs is compiled out. This patch solves this problem by moving movsl_mask's definition close to its users in arch/x86/lib/usercopy_32.c. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 ------- arch/x86/lib/usercopy_32.c | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f2569b8f8..5c8959b8a42e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -23,13 +23,6 @@ #include #endif -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 24e60944971a..9e68075544f6 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -14,6 +14,13 @@ #include #include +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) { #ifdef CONFIG_X86_INTEL_USERCOPY From 774400a3ba23b63f4de39e67ce6c4e48935809dc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:21 +0200 Subject: [PATCH 099/186] x86: move cmpxchg fallbacks to a generic place arch/x86/kernel/cpu/intel.c defines a few fallback functions (cmpxchg_*()) that are used when the CPU doesn't support cmpxchg and/or cmpxchg64 natively. However, while defined in an Intel-specific file, these functions are also used for CPUs from other vendors when they don't support cmpxchg and/or cmpxchg64. This breaks the compilation when support for Intel CPUs is disabled. This patch moves these functions to a new arch/x86/kernel/cpu/cmpxchg.c file, unconditionally compiled when X86_32 is enabled. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/cmpxchg.c | 72 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 64 ------------------------------- 3 files changed, 73 insertions(+), 65 deletions(-) create mode 100644 arch/x86/kernel/cpu/cmpxchg.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee76eaad3001..25b4c063fbf6 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,7 +5,7 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o feature_names.o -obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o obj-$(CONFIG_X86_32) += amd.o obj-$(CONFIG_X86_64) += amd_64.o diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c new file mode 100644 index 000000000000..2056ccf572cc --- /dev/null +++ b/arch/x86/kernel/cpu/cmpxchg.c @@ -0,0 +1,72 @@ +/* + * cmpxchg*() fallbacks for CPU not supporting these instructions + */ + +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5c8959b8a42e..77618c717d76 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -307,69 +307,5 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - /* arch_initcall(intel_cpu_init); */ From 8d02c2110b3fb8e2700b31596a582a2989fd72ba Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 5 Aug 2008 11:45:19 +0200 Subject: [PATCH 100/186] x86: configuration options to compile out x86 CPU support code This patch adds some configuration options that allow to compile out CPU vendor-specific code in x86 kernels (in arch/x86/kernel/cpu). The new configuration options are only visible when CONFIG_EMBEDDED is selected, as they are mostly interesting for space savings reasons. An example of size saving, on x86 with only Intel CPU support: text data bss dec hex filename 1125479 118760 212992 1457231 163c4f vmlinux.old 1121355 116536 212992 1450883 162383 vmlinux -4124 -2224 0 -6348 -18CC +/- However, I'm not exactly sure that the Kconfig wording is correct with regard to !64BIT / 64BIT. [ mingo@elte.hu: convert macro to inline ] Signed-off-by: Thomas Petazzoni Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 70 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/Makefile | 19 +++++----- include/asm-x86/bugs.h | 5 +++ 3 files changed, 85 insertions(+), 9 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2c518fbc52ec..6156ac25ff8c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -415,3 +415,73 @@ config X86_MINIMUM_CPU_FAMILY config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) + +menuconfig PROCESSOR_SELECT + default y + bool "Supported processor vendors" if EMBEDDED + help + This lets you choose what x86 vendor support code your kernel + will include. + +config CPU_SUP_INTEL_32 + default y + bool "Support Intel processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Intel processors + +config CPU_SUP_INTEL_64 + default y + bool "Support Intel processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for Intel processors + +config CPU_SUP_CYRIX_32 + default y + bool "Support Cyrix processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Cyrix processors + +config CPU_SUP_AMD_32 + default y + bool "Support AMD processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for AMD processors + +config CPU_SUP_AMD_64 + default y + bool "Support AMD processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for AMD processors + +config CPU_SUP_CENTAUR_32 + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Centaur processors + +config CPU_SUP_CENTAUR_64 + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for Centaur processors + +config CPU_SUP_TRANSMETA_32 + default y + bool "Support Transmeta processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Transmeta processors + +config CPU_SUP_UMC_32 + default y + bool "Support UMC processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for UMC processors diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25b4c063fbf6..a0fc6c144384 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,15 +7,16 @@ obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o -obj-$(CONFIG_X86_32) += amd.o -obj-$(CONFIG_X86_64) += amd_64.o -obj-$(CONFIG_X86_32) += cyrix.o -obj-$(CONFIG_X86_32) += centaur.o -obj-$(CONFIG_X86_64) += centaur_64.o -obj-$(CONFIG_X86_32) += transmeta.o -obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_64) += intel_64.o -obj-$(CONFIG_X86_32) += umc.o + +obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o +obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o +obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o +obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index 021cbdd5f258..00e4a0cd6f28 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -2,6 +2,11 @@ #define _ASM_X86_BUGS_H extern void check_bugs(void); + +#ifdef CONFIG_CPU_SUP_INTEL_32 int ppro_with_ram_bug(void); +#else +static inline int ppro_with_ram_bug(void) { return 0; } +#endif #endif /* _ASM_X86_BUGS_H */ From b6c8051311e1a14d229df05ea39d0a1b2ce90cdd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:49 +0400 Subject: [PATCH 101/186] x86: apic - rearrange maxcpu definition Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 41134d4e6a66..8d1febf05da1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -114,6 +114,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static int enabled_via_apicbase; static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; + /* * Get the LAPIC version @@ -1459,8 +1461,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ad532dccd11e..46acb9b47ae7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -98,10 +98,10 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ From f1ee37891dab6014f6b0ec77b18bc07e2369a55f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:50 +0400 Subject: [PATCH 102/186] x86: apic - unify setup_boot_APIC_clock Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8d1febf05da1..80db3e0426c4 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -612,6 +612,7 @@ void __init setup_boot_APIC_clock(void) * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; From 990b183e58cb513a62492b6218987750e106cbfb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:51 +0400 Subject: [PATCH 103/186] x86: apic - unify disable_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 80db3e0426c4..13c4b79441da 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -796,7 +796,7 @@ void clear_local_APIC(void) */ void disable_local_APIC(void) { - unsigned long value; + unsigned int value; clear_local_APIC(); @@ -808,6 +808,7 @@ void disable_local_APIC(void) value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); +#ifdef CONFIG_X86_32 /* * When LAPIC was disabled by the BIOS and enabled by the kernel, * restore the disabled state. @@ -819,6 +820,7 @@ void disable_local_APIC(void) l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46acb9b47ae7..4fb903b2fc39 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -691,6 +691,20 @@ void disable_local_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif } void lapic_shutdown(void) From fe4024dcb0c01e5399394d2807406a2c13fb1eb7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:52 +0400 Subject: [PATCH 104/186] x86: apic - unify lapic_shutdown Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 9 ++++++--- arch/x86/kernel/apic_64.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 13c4b79441da..d4efe86adc72 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -838,10 +838,13 @@ void lapic_shutdown(void) local_irq_save(flags); - if (enabled_via_apicbase) - disable_local_APIC(); - else +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fb903b2fc39..48806546d49f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -707,6 +707,12 @@ void disable_local_APIC(void) #endif } +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ void lapic_shutdown(void) { unsigned long flags; @@ -716,7 +722,13 @@ void lapic_shutdown(void) local_irq_save(flags); - disable_local_APIC(); +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } From 36c9d6742897fa414f51c4e9d0f20ab4e6bf942c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:53 +0400 Subject: [PATCH 105/186] x86: apic - unify connect_bsp_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ arch/x86/kernel/apic_64.c | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d4efe86adc72..6d230e9d0a7d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1387,6 +1387,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. @@ -1401,6 +1402,7 @@ void __init connect_bsp_APIC(void) outb(0x70, 0x22); outb(0x01, 0x23); } +#endif enable_apic_mode(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 48806546d49f..5579e213b5d2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1285,10 +1285,26 @@ asmlinkage void smp_error_interrupt(void) } /** - * * connect_bsp_APIC - attach the APIC to the interrupt system - * */ + * connect_bsp_APIC - attach the APIC to the interrupt system + */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif enable_apic_mode(); } From c43da2f5e92fe3bcc256f0c0d6cb858368da5bd9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:54 +0400 Subject: [PATCH 106/186] x86: apic - unify lapic_setup_esr We use 32bit code former for 64bit mode since it's much better implementation and easier to merge. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 21 ++++++++-------- arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6d230e9d0a7d..3c1562afa274 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -983,6 +983,16 @@ static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } /* !82489DX */ maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -1003,16 +1013,7 @@ static void __cpuinit lapic_setup_esr(void) "vector: 0x%08lx after: 0x%08lx\n", oldvalue, value); } else { - if (esr_disable) - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "No ESR for 82489DX.\n"); } } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5579e213b5d2..d74abf7e92f7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -863,6 +863,45 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + /** * setup_local_APIC - setup the local APIC */ @@ -968,18 +1007,6 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -static void __cpuinit lapic_setup_esr(void) -{ - unsigned maxlvt = lapic_get_maxlvt(); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); -} - void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); From c40aaec6868401671a0ca14ed77e9b2da2d1f223 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:55 +0400 Subject: [PATCH 107/186] x86: apic - unify __setup_APIC_LVTT Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 +++++++--- arch/x86/kernel/apic_64.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3c1562afa274..65419c7d437f 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -248,8 +248,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 16 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else #define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -281,8 +285,8 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d74abf7e92f7..fe57db9f3fbb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -259,8 +259,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 1 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 #define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -291,9 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); From c177b0bc03e0e11623e2099db42903fb0caf0fd3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:56 +0400 Subject: [PATCH 108/186] x86: apic - unify disconnect_bsp_APIC - just #ifdef added Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 66 ++++++++++++++++++++------------------- arch/x86/kernel/apic_64.c | 23 ++++++++++++-- 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 65419c7d437f..3095bb71eaed 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1420,6 +1420,7 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Put the board back into PIC mode (has an effect only on @@ -1431,47 +1432,48 @@ void disconnect_bsp_APIC(int virt_wire_setup) "entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + return; + } +#endif - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); + /* Go back to Virtual Wire compatibility mode */ + unsigned int value; - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + if (!virt_wire_setup) { /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled + * For LVT0 make it edge triggered, active high, + * external and enabled */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } void __cpuinit generic_processor_info(int apicid, int version) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index fe57db9f3fbb..0d969103fce7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1348,8 +1348,24 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); @@ -1375,7 +1391,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT0, APIC_LVT_MASKED); } - /* For LVT1 make it edge triggered, active high, nmi and enabled */ + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ value = apic_read(APIC_LVT1); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | From 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:57 +0400 Subject: [PATCH 109/186] x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 +++++++------- arch/x86/kernel/apic_64.c | 41 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3095bb71eaed..c3a252b1a8be 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1480,7 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; - physid_mask_t phys_cpu; /* * Validate version @@ -1493,9 +1492,6 @@ void __cpuinit generic_processor_info(int apicid, int version) } apic_version[apicid] = version; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." " Processor ignored.\n", NR_CPUS); @@ -1512,17 +1508,19 @@ void __cpuinit generic_processor_info(int apicid, int version) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - if (apicid == boot_cpu_physical_apicid) + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ cpu = 0; - + } if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1542,7 +1540,9 @@ void __cpuinit generic_processor_info(int apicid, int version) def_to_bigsmp = 1; } } -#ifdef CONFIG_SMP +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1555,6 +1555,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } #endif + cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0d969103fce7..76c20773bedf 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1215,6 +1215,8 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ +int apic_version[MAX_APICS]; + int __init APIC_init_uniprocessor(void) { if (disable_apic) { @@ -1409,15 +1411,26 @@ void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } if (num_processors >= maxcpus) { printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); + " Processor ignored.\n", maxcpus); return; } @@ -1437,6 +1450,29 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1448,6 +1484,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_cpu_to_apicid, cpu) = apicid; per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } +#endif cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); From fa6b95fc7c6f2f3eb1560e1f33cd13197546c5a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:58 +0400 Subject: [PATCH 110/186] x86: apic - unify end_local_APIC_setup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++-- arch/x86/kernel/apic_64.c | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index c3a252b1a8be..f1882329b9cf 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1155,13 +1155,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { - unsigned long value; - lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, value); +#endif setup_apic_nmi_watchdog(NULL); apic_pm_activate(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 76c20773bedf..eec10b34dc49 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1014,6 +1014,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); +#endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } From 0b23e8cf553f5e706b0057363f1319867bcd1a7d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:59 +0400 Subject: [PATCH 111/186] x86: apic - unify local_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f1882329b9cf..af227bce23b1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -685,7 +685,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eec10b34dc49..a9ad2cb4ff95 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -567,7 +567,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } From 79af9bec60e6e218a1e48e8830d603d64a7fc441 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:00 +0400 Subject: [PATCH 112/186] x86: apic - unify apic_set_verbosity Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++--- arch/x86/kernel/apic_64.c | 46 +++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index af227bce23b1..acbc4dea2ee8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1768,13 +1768,24 @@ early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); static int __init apic_set_verbosity(char *arg) { - if (!arg) + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif return -EINVAL; + } - if (strcmp(arg, "debug") == 0) + if (strcmp("debug", arg) == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp(arg, "verbose") == 0) + else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a9ad2cb4ff95..999781810c09 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,27 +1759,6 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static int __init apic_set_verbosity(char *str) -{ - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - static __init int setup_disableapic(char *str) { disable_apic = 1; @@ -1824,6 +1803,31 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + static int __init lapic_insert_resource(void) { if (!apic_phys) From 789fa735712d726b5cfa5c6be57171b5637a4872 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:01 +0400 Subject: [PATCH 113/186] x86: apic - unify disableapic and nolapic setup handlers Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 11 +++++++++-- arch/x86/kernel/apic_64.c | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index acbc4dea2ee8..5680bda62f78 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1737,13 +1737,20 @@ static int __init parse_lapic(char *arg) } early_param("lapic", parse_lapic); -static int __init parse_nolapic(char *arg) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("nolapic", parse_nolapic); +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); static int __init parse_disable_apic_timer(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 999781810c09..7a317180ba2e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,7 +1759,7 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static __init int setup_disableapic(char *str) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); @@ -1768,9 +1768,9 @@ static __init int setup_disableapic(char *str) early_param("disableapic", setup_disableapic); /* same as disableapic, for compatibility */ -static __init int setup_nolapic(char *str) +static int __init setup_nolapic(char *arg) { - return setup_disableapic(str); + return setup_disableapic(arg); } early_param("nolapic", setup_nolapic); From 3415610b8e38b26b52a430b8846665c2d6bb3558 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:02 +0400 Subject: [PATCH 114/186] x86: apic - rearrange parse_lapic_timer_c2_ok Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 5680bda62f78..885e306420ac 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1752,6 +1752,13 @@ static int __init setup_nolapic(char *arg) } early_param("nolapic", setup_nolapic); +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + static int __init parse_disable_apic_timer(char *arg) { disable_apic_timer = 1; @@ -1766,13 +1773,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - static int __init apic_set_verbosity(char *arg) { if (!arg) { From e75bedf415f300a08e9bbcc755784e488574a73e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:03 +0400 Subject: [PATCH 115/186] x86: apic - lapic_resume 32bit - unification fix Just add parenthesis to be identical of current 64bit implementation (so diff will not complain). Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 885e306420ac..e975562a76a5 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1646,6 +1646,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1656,6 +1657,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); From 1b4ee4e4096d430c4c12516c1d30a6b0b4f9e9e4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 23:12:33 +0400 Subject: [PATCH 116/186] x86: apic - compilation warnings fix Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 +++++++++------ arch/x86/kernel/apic_64.c | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index e975562a76a5..b8d80c291650 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1162,11 +1162,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1426,6 +1428,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1443,7 +1447,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7a317180ba2e..37e037606f30 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1020,11 +1020,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1363,6 +1365,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1380,7 +1384,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); From e2fe16d91228a005811335fbc4fbad5d4f5b75af Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 15 Aug 2008 15:36:31 -0700 Subject: [PATCH 117/186] x86: boot: stub out unimplemented CPU feature words The CPU feature detection code in the boot code is somewhat minimal, and doesn't include all possible CPUID words. In particular, it doesn't contain the code for CPU feature words 2 (Transmeta), 3 (Linux-specific), 5 (VIA), or 7 (scattered). Zero them out, so we can still set those bits as known at compile time; in particular, this allows creating a Linux-specific NOPL flag and have it required (and therefore resolvable at compile time) in 64-bit mode. Signed-off-by: H. Peter Anvin --- arch/x86/boot/cpucheck.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 7804389ee005..19b14f7ef9f1 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -46,12 +46,12 @@ static const u32 req_flags[NCAPINTS] = { REQUIRED_MASK0, REQUIRED_MASK1, - REQUIRED_MASK2, - REQUIRED_MASK3, + 0, /* REQUIRED_MASK2 not implemented in this file */ + 0, /* REQUIRED_MASK3 not implemented in this file */ REQUIRED_MASK4, - REQUIRED_MASK5, + 0, /* REQUIRED_MASK5 not implemented in this file */ REQUIRED_MASK6, - REQUIRED_MASK7, + 0, /* REQUIRED_MASK7 not implemented in this file */ }; #define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) From 7e00df5818964298c9821365a6cb7a8304227c5c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:39:32 -0700 Subject: [PATCH 118/186] x86: add NOPL as a synthetic CPU feature bit The long noops ("NOPL") are supposed to be detected by family >= 6. Unfortunately, several non-Intel x86 implementations, both hardware and software, don't obey this dictum. Instead, probe for NOPL directly by executing a NOPL instruction and see if we get #UD. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 32 ++++++++++++++++++++++++- arch/x86/kernel/cpu/common_64.c | 36 +++++++++++++++++++++++++++++ arch/x86/kernel/cpu/feature_names.c | 3 ++- include/asm-x86/cpufeature.h | 11 +++++---- include/asm-x86/required-features.h | 8 ++++++- 5 files changed, 82 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..0785b3c8d043 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include @@ -341,6 +342,35 @@ static void __init early_cpu_detect(void) early_get_cap(c); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -395,8 +425,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) } init_scattered_cpuid_features(c); + detect_nopl(c); } - } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017e..c3afba5a81a7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) } } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); void __init early_cpu_init(void) @@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } + detect_nopl(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cba..c9017799497c 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { NULL, NULL, NULL, NULL, "constant_tsc", "up", NULL, "arch_perfmon", "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 2f5a792b0acc..be8b2ad5d411 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -72,14 +72,15 @@ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ -#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index adec887dd7cd..5c2ff4bc2980 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -41,6 +41,12 @@ # define NEED_3DNOW 0 #endif +#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) +# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) +#else +# define NEED_NOPL 0 +#endif + #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) @@ -67,7 +73,7 @@ #define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) #define REQUIRED_MASK2 0 -#define REQUIRED_MASK3 0 +#define REQUIRED_MASK3 (NEED_NOPL) #define REQUIRED_MASK4 0 #define REQUIRED_MASK5 0 #define REQUIRED_MASK6 0 From f1c5d30e1d79bbfb60eaf189db862d3cb2bcac92 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:50:33 -0700 Subject: [PATCH 119/186] x86: use X86_FEATURE_NOPL in alternatives Use X86_FEATURE_NOPL to determine if it is safe to use P6 NOPs in alternatives. Also, replace table and loop with simple if statement. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 36 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..65a0c1b48696 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ From 11494547b1754c4f3bd7f707ab869e2adf54d52f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 01:01:19 -0700 Subject: [PATCH 120/186] x86: fix apic version warning after following patch, commit 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Author: Cyrill Gorcunov Date: Mon Aug 18 20:45:57 2008 +0400 x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now we are getting warning for acpi path on 64 bit system. make the 64-bit side fill in apic_version[] as well. [ mingo@elte.hu: build fix ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ---- include/asm-x86/mpspec.h | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 12e260e8fb2a..d9770a56511a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,10 +239,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) return; } -#ifdef CONFIG_X86_32 if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; -#endif generic_processor_info(id, ver); } @@ -762,10 +760,8 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = read_apic_id(); -#ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); -#endif } } diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 118da365e371..be2241a818f1 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -5,11 +5,12 @@ #include +extern int apic_version[MAX_APICS]; + #ifdef CONFIG_X86_32 #include extern unsigned int def_to_bigsmp; -extern int apic_version[MAX_APICS]; extern u8 apicid_2_node[]; extern int pic_mode; From 671eef85a3e885dff4ce210d8774ad50a91d5967 Mon Sep 17 00:00:00 2001 From: "Cihula, Joseph" Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: [PATCH 121/186] x86, e820: add support for AddressRangeUnusuable ACPI memory type Add support for the E820_UNUSABLE memory type, which is defined in Revision 3.0b (Oct. 10, 2006) of the ACPI Specification on p. 394 Table 14-1: AddressRangeUnusuable This range of address contains memory in which errors have been detected. This range must not be used by the OSPM. Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: Gang Wei Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 4 ++++ include/asm-x86/e820.h | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7bb..291e6cd9f9c0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -148,6 +148,9 @@ void __init e820_print_map(char *who) case E820_NVS: printk(KERN_CONT "(ACPI NVS)\n"); break; + case E820_UNUSABLE: + printk("(unusable)\n"); + break; default: printk(KERN_CONT "type %u\n", e820.map[i].type); break; @@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type) case E820_RAM: return "System RAM"; case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; default: return "reserved"; } } diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index f52daf176bcb..ca433c36af7a 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -43,6 +43,7 @@ #define E820_RESERVED 2 #define E820_ACPI 3 #define E820_NVS 4 +#define E820_UNUSABLE 5 /* reserved RAM used by kernel itself */ #define E820_RESERVED_KERN 128 From bbb65d2d365efe9951290e61678dcf81ec60add4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:10 +0200 Subject: [PATCH 122/186] x86: use cpuid vector 0xb when available for detecting cpu topology cpuid leaf 0xb provides extended topology enumeration. This interface provides the 32-bit x2APIC id of the logical processor and it also provides a new mechanism to detect SMT and core siblings (which provides increased addressability). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 86 ++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 3 + arch/x86/kernel/cpu/intel.c | 11 ++- arch/x86/kernel/cpu/intel_64.c | 5 +- include/asm-x86/cpufeature.h | 1 + include/asm-x86/processor.h | 1 + 6 files changed, 104 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a6072..aa9641ae6703 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,6 +7,8 @@ #include #include +#include + struct cpuid_bit { u16 feature; u8 reg; @@ -48,6 +50,90 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) } } +/* leaf 0xb SMT level */ +#define SMT_LEVEL 0 + +/* leaf 0xb sub-leaf types */ +#define INVALID_TYPE 0 +#define SMT_TYPE 1 +#define CORE_TYPE 2 + +#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) +#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) +#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) + +/* + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection. + */ +void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + + if (c->cpuid_level < 0xb) + return; + + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + /* + * check if the cpuid leaf 0xb is actually implemented. + */ + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + return; + + set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + + /* + * initial apic id, which also represents 32-bit extended x2apic id. + */ + c->initial_apicid = edx; + + /* + * Populate HT related information from sub-leaf level 0. + */ + core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + + sub_index = 1; + do { + cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + + /* + * Check for the Core type in the implemented sub leaves. + */ + if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + break; + } + + sub_index++; + } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + + core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + +#ifdef CONFIG_X86_32 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + & core_select_mask; + c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); +#else + c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; + c->phys_proc_id = phys_pkg_id(core_plus_mask_width); +#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); + + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + return; +} + #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index af569a964e74..a2888c7b56ac 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -128,6 +128,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d76..58a6f1a0b297 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -176,9 +176,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - c->x86_max_cores = num_cpu_cores(c); + detect_extended_topology(c); - detect_ht(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = num_cpu_cores(c); + detect_ht(c); + } /* Work around errata */ Intel_errata_workarounds(c); diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f0..42d501a8c418 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -80,7 +80,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 5fc4d55906d4..8d842af4cf7a 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -81,6 +81,7 @@ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ #define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 5f58da401b43..79338fe965d2 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -161,6 +161,7 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; +extern void detect_extended_topology(struct cpuinfo_x86 *c); #if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) extern void detect_ht(struct cpuinfo_x86 *c); #else From e17941b0c140562d92e5a3bc12b4ad88281c7926 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: [PATCH 123/186] x86: use x2apic id reported by cpuid during topology discovery use x2apic id reported by cpuid during topology discovery, instead of the apic id configured in the APIC. For most of the systems, x2apic id reported by cpuid leaf 0xb will be same as the physical apic id reported by the APIC_ID register of the APIC. We follow the suggested guidelines and use the apic id reported by the cpuid. No change to non-generic UV platforms, will use the apic id reported in the APIC_ID register as the cpuid reported apic id's may not be unique. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 7 +------ arch/x86/kernel/genx2apic_phys.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50a..0bf0a8624b8c 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -120,14 +120,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd4..e2401ab0c46e 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -118,14 +118,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } void x2apic_send_IPI_self(int vector) From 11c231a962c740b3216eb6565149ae5a7944cba7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: [PATCH 124/186] x86: use x2apic id reported by cpuid during topology discovery, fix v2: Fix for !SMP build Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index aa9641ae6703..a2d1a545767d 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,6 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; @@ -132,6 +133,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); return; +#endif } #ifdef CONFIG_X86_PAT From 83b8e28b14d63db928cb39e5c5ed2a548246bd71 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 27 Aug 2008 14:57:36 -0700 Subject: [PATCH 125/186] x86: xsave: restore xcr0 during resume Add the missing XCR0(XFEATURE_ENABLED_MASK) restore during resume. Reported-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/power/cpu_32.c | 7 +++++++ arch/x86/power/cpu_64.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 7dc5d5cf50a2..02f36f53558c 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -11,6 +11,7 @@ #include #include #include +#include static struct saved_context saved_context; @@ -124,6 +125,12 @@ static void __restore_processor_state(struct saved_context *ctxt) if (boot_cpu_has(X86_FEATURE_SEP)) enable_sep_cpu(); + /* + * restore XCR0 for xsave capable cpu's. + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + fix_processor_context(); do_fpu_end(); mtrr_ap_init(); diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 66bdfb591fd8..e3b6cf70d62c 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -14,6 +14,7 @@ #include #include #include +#include static void fix_processor_context(void); @@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + /* + * restore XCR0 for xsave capable cpu's. + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + fix_processor_context(); do_fpu_end(); From 7414aa41a63348c3bc72d8c37b716024c29b6d50 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Aug 2008 17:56:44 -0700 Subject: [PATCH 126/186] x86: generate names for /proc/cpuinfo from We have had a number of cases where (and its predecessors) have diverged substantially from the names list in /proc/cpuinfo. This patch generates the latter from the former. It retains the option for explicitly overriding the strings, but by making that require a separate action it should at least be less likely to happen. It would be good to do a future pass and rename strings that are gratuituously different in the kernel (/proc/cpuinfo is a userspace interface and must remain constant.) Signed-off-by: H. Peter Anvin --- arch/x86/boot/mkcpustr.c | 2 +- arch/x86/kernel/cpu/Makefile | 11 +++- arch/x86/kernel/cpu/feature_names.c | 84 ---------------------------- arch/x86/kernel/cpu/mkcapflags.pl | 32 +++++++++++ arch/x86/kernel/cpu/powerflags.c | 20 +++++++ include/asm-x86/cpufeature.h | 86 ++++++++++++++++++----------- 6 files changed, 117 insertions(+), 118 deletions(-) delete mode 100644 arch/x86/kernel/cpu/feature_names.c create mode 100644 arch/x86/kernel/cpu/mkcapflags.pl create mode 100644 arch/x86/kernel/cpu/powerflags.c diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index bbe76953bae9..4589caa3e9d1 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -15,7 +15,7 @@ #include -#include "../kernel/cpu/feature_names.c" +#include "../kernel/cpu/capflags.c" #if NCAPFLAGS > 8 # error "Need to adjust the boot code handling of CPUID strings" diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0fc6c144384..3ede19a4e0b2 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,7 +3,7 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o feature_names.o +obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o @@ -23,3 +23,12 @@ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o + +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ + +cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h + +targets += capflags.c +$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE + $(call if_changed,mkcapflags) diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c deleted file mode 100644 index c9017799497c..000000000000 --- a/arch/x86/kernel/cpu/feature_names.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Strings for the various x86 capability flags. - * - * This file must not contain any executable code. - */ - -#include - -/* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ -const char * const x86_cap_flags[NCAPINTS*32] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, - "nopl", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", - "cr8_legacy", "abm", "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", "sse5", - "skinit", "wdt", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -}; - -const char *const x86_power_flags[32] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ -}; diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl new file mode 100644 index 000000000000..dfea390e1608 --- /dev/null +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# +# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h +# + +($in, $out) = @ARGV; + +open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; +open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; + +print OUT "#include \n\n"; +print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; + +while (defined($line = )) { + if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { + $macro = $1; + $feature = $2; + $tail = $3; + if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { + $feature = $1; + } + + if ($feature ne '') { + printf OUT "\t%-32s = \"%s\",\n", + "[$macro]", "\L$feature"; + } + } +} +print OUT "};\n"; + +close(IN); +close(OUT); diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c new file mode 100644 index 000000000000..5abbea297e0c --- /dev/null +++ b/arch/x86/kernel/cpu/powerflags.c @@ -0,0 +1,20 @@ +/* + * Strings for the various x86 power flags + * + * This file must not contain any executable code. + */ + +#include + +const char *const x86_power_flags[32] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* tsc invariant mapped to constant_tsc */ + /* nothing */ +}; diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 9489283a4bcf..611da2898b2b 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -8,6 +8,12 @@ #define NCAPINTS 8 /* N 32-bit words worth of info */ +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ #define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ #define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ @@ -27,18 +33,18 @@ #define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DS (0*32+21) /* Debug Store */ +#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" Supports the CLFLUSH instruction */ +#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ - /* of FPU context), and CR4.OSFXSR available */ -#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ -#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ -#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM (0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */ #define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */ #define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ @@ -46,7 +52,8 @@ #define X86_FEATURE_MP (1*32+19) /* MP Capable. */ #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */ +#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */ #define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ @@ -64,52 +71,67 @@ #define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ /* cpu types for specific tunings: */ -#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ -#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ +#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ -#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ -#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ +#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ -#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */ -#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX (4*32+ 6) /* "Safer" mode */ #define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ #define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_CID (4*32+10) /* Context ID */ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ +#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ -#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */ -#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */ -#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ +#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */ #define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ #define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */ -#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */ +#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */ +#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ +#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ /* * Auxiliary flags: Linux defined - For features scattered in various From f1240c002679a77990fd7c198991ed15a437d691 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Aug 2008 18:53:07 -0700 Subject: [PATCH 127/186] x86: cpufeature: add Intel features from CPUID and AVX specs Add all Intel CPUID features currently documented in the CPUID spec (AP-485, 241618-032, Dec 2007) and the AVX Programming Reference (319433-003, Aug 2008). Signed-off-by: H. Peter Anvin --- include/asm-x86/cpufeature.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 611da2898b2b..7bd98b724fd5 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -91,6 +91,8 @@ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */ #define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ #define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ #define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ @@ -99,11 +101,18 @@ #define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_CID (4*32+10) /* Context ID */ +#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_AES (4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ @@ -213,7 +222,10 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) +#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 From 2798c63e65cb0f05cc12a060b9b0d56ac9523c4d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Aug 2008 21:20:07 -0700 Subject: [PATCH 128/186] x86: : clean up overlong lines, whitespace Clean up overlong lines and stealth whitespace in . Signed-off-by: H. Peter Anvin --- include/asm-x86/cpufeature.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 7bd98b724fd5..7710686c11f9 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -20,7 +20,7 @@ #define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ #define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ #define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ #define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ #define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ #define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ @@ -29,11 +29,12 @@ #define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ #define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ #define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ #define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" Supports the CLFLUSH instruction */ +#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ @@ -83,7 +84,7 @@ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ #define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ +#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ @@ -181,7 +182,7 @@ extern const char * const x86_power_flags[32]; } while (0) #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ + clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) From af2e1f276ff08f17192411ea3b71c13a758dfe12 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Aug 2008 22:05:45 -0700 Subject: [PATCH 129/186] x86: cpufeature: fix SMX flag Impact: "smx" flags showed as "safer" in /proc/cpuinfo The SMX feature flag is the so-called "safer mode"... I had put that in quotes, but that flagged it as a cpuinfo flag name. Remove the quotes to correct the flag name in /proc/cpuinfo. Signed-off-by: H. Peter Anvin --- include/asm-x86/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 7710686c11f9..24d99d65741d 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -97,7 +97,7 @@ #define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ #define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ #define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX (4*32+ 6) /* "Safer" mode */ +#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */ #define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ #define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ From 1625324d22409e32e3f8eb86018cad72e1c09d61 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 Aug 2008 23:01:16 -0700 Subject: [PATCH 130/186] x86: move dir es7000 to es7000_32.c to be aligned with numaq, summit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/es7000/Makefile | 5 - arch/x86/es7000/es7000.h | 114 ------------------ arch/x86/kernel/Makefile | 1 + .../es7000plat.c => kernel/es7000_32.c} | 85 ++++++++++++- arch/x86/mach-generic/Makefile | 1 - 5 files changed, 85 insertions(+), 121 deletions(-) delete mode 100644 arch/x86/es7000/Makefile delete mode 100644 arch/x86/es7000/es7000.h rename arch/x86/{es7000/es7000plat.c => kernel/es7000_32.c} (78%) diff --git a/arch/x86/es7000/Makefile b/arch/x86/es7000/Makefile deleted file mode 100644 index 3ef8b43b62fc..000000000000 --- a/arch/x86/es7000/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-$(CONFIG_X86_ES7000) := es7000plat.o diff --git a/arch/x86/es7000/es7000.h b/arch/x86/es7000/es7000.h deleted file mode 100644 index 4e62f6fa95b8..000000000000 --- a/arch/x86/es7000/es7000.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) - -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; -}; - -struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; -}; - -#ifdef CONFIG_ACPI - -struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -#endif - -struct mip_reg { - unsigned long long off_0; - unsigned long long off_8; - unsigned long long off_10; - unsigned long long off_18; - unsigned long long off_20; - unsigned long long off_28; - unsigned long long off_30; - unsigned long long off_38; -}; - -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) - -extern int parse_unisys_oem (char *oemptr); -extern void setup_unisys(void); -extern int es7000_start_cpu(int cpu, unsigned long eip); -extern void es7000_sw_apic(void); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f3312..dac24c4390dd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o +obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/x86/es7000/es7000plat.c b/arch/x86/kernel/es7000_32.c similarity index 78% rename from arch/x86/es7000/es7000plat.c rename to arch/x86/kernel/es7000_32.c index 7789fde13c3f..849e5cd485b8 100644 --- a/arch/x86/es7000/es7000plat.c +++ b/arch/x86/kernel/es7000_32.c @@ -39,9 +39,92 @@ #include #include #include -#include "es7000.h" #include +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + /* * ES7000 Globals */ diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 4706de7676b1..6730f4e7c744 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -9,4 +9,3 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o -obj-$(CONFIG_X86_ES7000) += ../../x86/es7000/ From 58f7c98850a226d3fb05b1095af9f7c4ea3507ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Aug 2008 13:52:25 -0700 Subject: [PATCH 131/186] x86: split e820 reserved entries record to late v2 so could let BAR res register at first, or even pnp. v2: insert e820 reserve resources before pnp_system_init Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 20 +++++++++-- arch/x86/pci/i386.c | 79 ++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/e820.h | 1 + 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 291e6cd9f9c0..523d6c5605d1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,13 +1271,15 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ +struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - struct resource *res; u64 end; + struct resource *res; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + e820_res = res; for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT @@ -1291,7 +1293,8 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + insert_resource(&iomem_resource, res); res++; } @@ -1303,6 +1306,19 @@ void __init e820_reserve_resources(void) } } +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + insert_resource(&iomem_resource, res); + res++; + } +} + char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 8791fc55e715..40811efaa25a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -31,8 +31,12 @@ #include #include #include +#include #include +#include +#include +#include #include "pci.h" @@ -77,6 +81,77 @@ pcibios_align_resource(void *data, struct resource *res, } EXPORT_SYMBOL(pcibios_align_resource); +static int check_res_with_valid(struct pci_dev *dev, struct resource *res) +{ + unsigned long base; + unsigned long size; + int i; + + base = res->start; + size = (res->start == 0 && res->end == res->start) ? 0 : + (res->end - res->start + 1); + + if (!base || !size) + return 0; + +#ifdef CONFIG_HPET_TIMER + /* for hpet */ + if (base == hpet_address && (res->flags & IORESOURCE_MEM)) { + dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n", + base, base + size - 1); + return 1; + } +#endif + +#ifdef CONFIG_X86_IO_APIC + for (i = 0; i < nr_ioapics; i++) { + unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr; + + if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) { + dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n", + base, base + size - 1); + return 1; + } + } +#endif + +#ifdef CONFIG_PCI_MMCONFIG + for (i = 0; i < pci_mmcfg_config_num; i++) { + unsigned long addr; + + addr = pci_mmcfg_config[i].address; + if (base == addr && (res->flags & IORESOURCE_MEM)) { + dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n", + base, base + size - 1); + return 1; + } + } +#endif + + return 0; +} + +static int check_platform(struct pci_dev *dev, struct resource *res) +{ + struct resource *root = NULL; + + /* + * forcibly insert it into the + * resource tree + */ + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + + if (root && check_res_with_valid(dev, res)) { + insert_resource(root, res); + + return 1; + } + + return 0; +} /* * Handle resources of PCI devices. If the world were perfect, we could * just allocate all the resource regions and do nothing more. It isn't. @@ -128,6 +203,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { + if (check_platform(dev, r)) + continue; dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* * Something is wrong with the region. @@ -169,6 +246,8 @@ static void __init pcibios_allocate_resources(int pass) r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { + if (check_platform(dev, r)) + continue; dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* We'll assign a new address later */ r->end -= r->start; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index ca433c36af7a..5abbdec06bd2 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -122,6 +122,7 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn, extern u64 e820_hole_size(u64 start, u64 end); extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); +extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); extern char *machine_specific_memory_setup(void); From a5444d15b611cf2ffe2bc52aaf11f2ac51882f89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Aug 2008 08:09:23 +0200 Subject: [PATCH 132/186] x86: split e820 reserved entries record to late v4 this one replaces: | commit a2bd7274b47124d2fc4dfdb8c0591f545ba749dd | Author: Yinghai Lu | Date: Mon Aug 25 00:56:08 2008 -0700 | | x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet against BAR, v3 v2: insert e820 reserve resources before pnp_system_init v3: fix merging problem in tip/x86/core v4: address Linus's review about comments and condition in _late() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 12 +++++-- arch/x86/pci/i386.c | 80 ++---------------------------------------- 2 files changed, 11 insertions(+), 81 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 523d6c5605d1..a7a71339bfb0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,12 +1271,12 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ -struct resource __initdata *e820_res; +static struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - u64 end; struct resource *res; + u64 end; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); e820_res = res; @@ -1293,6 +1293,12 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) insert_resource(&iomem_resource, res); res++; @@ -1313,7 +1319,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + if (!res->parent && res->end) insert_resource(&iomem_resource, res); res++; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 40811efaa25a..844df0cbbd3e 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -31,11 +31,8 @@ #include #include #include -#include #include -#include -#include #include #include "pci.h" @@ -81,77 +78,6 @@ pcibios_align_resource(void *data, struct resource *res, } EXPORT_SYMBOL(pcibios_align_resource); -static int check_res_with_valid(struct pci_dev *dev, struct resource *res) -{ - unsigned long base; - unsigned long size; - int i; - - base = res->start; - size = (res->start == 0 && res->end == res->start) ? 0 : - (res->end - res->start + 1); - - if (!base || !size) - return 0; - -#ifdef CONFIG_HPET_TIMER - /* for hpet */ - if (base == hpet_address && (res->flags & IORESOURCE_MEM)) { - dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n", - base, base + size - 1); - return 1; - } -#endif - -#ifdef CONFIG_X86_IO_APIC - for (i = 0; i < nr_ioapics; i++) { - unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr; - - if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) { - dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n", - base, base + size - 1); - return 1; - } - } -#endif - -#ifdef CONFIG_PCI_MMCONFIG - for (i = 0; i < pci_mmcfg_config_num; i++) { - unsigned long addr; - - addr = pci_mmcfg_config[i].address; - if (base == addr && (res->flags & IORESOURCE_MEM)) { - dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n", - base, base + size - 1); - return 1; - } - } -#endif - - return 0; -} - -static int check_platform(struct pci_dev *dev, struct resource *res) -{ - struct resource *root = NULL; - - /* - * forcibly insert it into the - * resource tree - */ - if (res->flags & IORESOURCE_MEM) - root = &iomem_resource; - else if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - - if (root && check_res_with_valid(dev, res)) { - insert_resource(root, res); - - return 1; - } - - return 0; -} /* * Handle resources of PCI devices. If the world were perfect, we could * just allocate all the resource regions and do nothing more. It isn't. @@ -203,8 +129,6 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { - if (check_platform(dev, r)) - continue; dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* * Something is wrong with the region. @@ -246,8 +170,6 @@ static void __init pcibios_allocate_resources(int pass) r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { - if (check_platform(dev, r)) - continue; dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* We'll assign a new address later */ r->end -= r->start; @@ -306,6 +228,8 @@ void __init pcibios_resource_survey(void) pcibios_allocate_bus_resources(&pci_root_buses); pcibios_allocate_resources(0); pcibios_allocate_resources(1); + + e820_reserve_resources_late(); } /** From fb481dd56adf3c5b0993b8f052cc9ba966e3959d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Sep 2008 13:46:11 +0200 Subject: [PATCH 133/186] x86: drop -funroll-loops for csum_partial_64.c Impact: performance optimization I did some rebenchmarking with modern compilers and dropping -funroll-loops makes the function consistently go faster by a few percent. So drop that flag. Thanks to Richard Guenther for a hint. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index aa3fa4119424..55e11aa6d66c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y) lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o - - CFLAGS_csum-partial_64.o := -funroll-loops - lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o From 268364a0f48aee2f851f9d1ef8a6cda0f3039ef1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:02:44 +0200 Subject: [PATCH 134/186] IO resources: add reserve_region_with_split() add reserve_region_with_split() to not lose e820 reserved entries if they overlap with existing IO regions: with test case by extend 0xe0000000 - 0xeffffff to 0xdd800000 - we get: e0000000-efffffff : PCI MMCONFIG 0 e0000000-efffffff : reserved and in /proc/iomem we get: found conflict for reserved [dd800000, efffffff], try to reserve with split __reserve_region_with_split: (PCI Bus #80) [dd000000, ddffffff], res: (reserved) [dd800000, efffffff] __reserve_region_with_split: (PCI Bus #00) [de000000, dfffffff], res: (reserved) [de000000, efffffff] initcall pci_subsys_init+0x0/0x121 returned 0 after 381 msecs in dmesg various fixes and improvements suggested by Linus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 3 ++ kernel/resource.c | 68 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 8d3b7a9afd17..fded376b94e3 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -108,6 +108,9 @@ extern struct resource iomem_resource; extern int request_resource(struct resource *root, struct resource *new); extern int release_resource(struct resource *new); +extern void reserve_region_with_split(struct resource *root, + resource_size_t start, resource_size_t end, + const char *name); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); extern int allocate_resource(struct resource *root, struct resource *new, diff --git a/kernel/resource.c b/kernel/resource.c index 03d796c1b2e9..414d6fc9131e 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -516,6 +516,74 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t return result; } +static void __init __reserve_region_with_split(struct resource *root, + resource_size_t start, resource_size_t end, + const char *name) +{ + struct resource *parent = root; + struct resource *conflict; + struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); + + if (!res) + return; + + res->name = name; + res->start = start; + res->end = end; + res->flags = IORESOURCE_BUSY; + + for (;;) { + conflict = __request_resource(parent, res); + if (!conflict) + break; + if (conflict != parent) { + parent = conflict; + if (!(conflict->flags & IORESOURCE_BUSY)) + continue; + } + + /* Uhhuh, that didn't work out.. */ + kfree(res); + res = NULL; + break; + } + + if (!res) { + printk(KERN_DEBUG " __reserve_region_with_split: (%s) [%llx, %llx], res: (%s) [%llx, %llx]\n", + conflict->name, conflict->start, conflict->end, + name, start, end); + + /* failed, split and try again */ + + /* conflict coverred whole area */ + if (conflict->start <= start && conflict->end >= end) + return; + + if (conflict->start > start) + __reserve_region_with_split(root, start, conflict->start-1, name); + if (!(conflict->flags & IORESOURCE_BUSY)) { + resource_size_t common_start, common_end; + + common_start = max(conflict->start, start); + common_end = min(conflict->end, end); + if (common_start < common_end) + __reserve_region_with_split(root, common_start, common_end, name); + } + if (conflict->end < end) + __reserve_region_with_split(root, conflict->end+1, end, name); + } + +} + +void reserve_region_with_split(struct resource *root, + resource_size_t start, resource_size_t end, + const char *name) +{ + write_lock(&resource_lock); + __reserve_region_with_split(root, start, end, name); + write_unlock(&resource_lock); +} + EXPORT_SYMBOL(adjust_resource); /** From fac8f1e4f99dff7a0c3a929f327d66f46de6fa21 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:59:22 +0200 Subject: [PATCH 135/186] x86: split e820 reserved entries record to late, v7 try to insert_resource second time, by expanding the resource... for case: e820 reserved entry is partially overlapped with bar res... hope it will never happen Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a7a71339bfb0..e24d1bc47b46 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1320,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - insert_resource(&iomem_resource, res); + reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); res++; } } From ebd60cd64f8ab1170102c3ab072eb73042b7a33d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:04:32 +0200 Subject: [PATCH 136/186] x86: unify using pci_mmcfg_insert_resource even with known_bridge insert them late too. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/mmconfig-shared.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index d9635764ce3d..654a2234f8f3 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -209,7 +209,7 @@ static int __init pci_mmcfg_check_hostbridge(void) return name != NULL; } -static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) +static void __init pci_mmcfg_insert_resources(void) { #define PCI_MMCFG_RESOURCE_NAME_LEN 19 int i; @@ -233,7 +233,7 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) cfg->pci_segment); res->start = cfg->address; res->end = res->start + (num_buses << 20) - 1; - res->flags = IORESOURCE_MEM | resource_flags; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); names += PCI_MMCFG_RESOURCE_NAME_LEN; } @@ -434,11 +434,9 @@ static void __init __pci_mmcfg_init(int early) (pci_mmcfg_config[0].address == 0)) return; - if (pci_mmcfg_arch_init()) { - if (known_bridge) - pci_mmcfg_insert_resources(IORESOURCE_BUSY); + if (pci_mmcfg_arch_init()) pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; - } else { + else { /* * Signal not to attempt to insert mmcfg resources because * the architecture mmcfg setup could not initialize. @@ -475,7 +473,7 @@ static int __init pci_mmcfg_late_insert_resources(void) * marked so it won't cause request errors when __request_region is * called. */ - pci_mmcfg_insert_resources(0); + pci_mmcfg_insert_resources(); return 0; } From 5fef55fddb7317585cabc1eae38dbd57f1c59729 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: [PATCH 137/186] x86: move mtrr cpu cap setting early in early_init_xxxx Krzysztof Helt found MTRR is not detected on k6-2 root cause: we moved mtrr_bp_init() early for mtrr trimming, and in early_detect we only read the CPU capability from cpuid, so some cpu doesn't have that bit in cpuid. So we need to add early_init_xxxx to preset those bit before mtrr_bp_init for those earlier cpus. this patch is for v2.6.27 Reported-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +++++---- arch/x86/kernel/cpu/centaur.c | 11 +++++++++++ arch/x86/kernel/cpu/cyrix.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc3031..18514ed26104 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mbytes); } - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a55..a0534c04d38a 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -314,6 +314,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index ada50505a5c8..13f8fa16b815 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void) local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, }; From 5031088dbc0cd30e893eb27d53f0e0eee6eb1c00 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: [PATCH 138/186] x86: delay early cpu initialization until cpuid is done Move early cpu initialization after cpu early get cap so the early cpu initialization can fix up cpu caps. Signed-off-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0785b3c8d043..8aab8517642e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -335,11 +335,11 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); + early_get_cap(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); - - early_get_cap(c); } /* From 3da99c97763703b23cbf2bd6e96252256d4e4617 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: [PATCH 139/186] x86: make (early)_identify_cpu more the same between 32bit and 64 bit 1. add extended_cpuid_level for 32bit 2. add generic_identify for 64bit 3. add early_identify_cpu for 32bit 4. early_identify_cpu not be called by identify_cpu 5. remove early in get_cpu_vendor for 32bit 6. add get_cpu_cap 7. add cpu_detect for 64bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 ++++++++++++------------------- arch/x86/kernel/cpu/common_64.c | 139 ++++++++++++++++++++------------ include/asm-x86/processor.h | 2 +- 3 files changed, 142 insertions(+), 137 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642e..96e1b8698d3a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -96,7 +96,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) unsigned int *v; char *p, *q; - if (cpuid_eax(0x80000000) < 0x80000004) + if (c->extended_cpuid_level < 0x80000004) return 0; v = (unsigned int *) c->x86_model_id; @@ -125,7 +125,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; - n = cpuid_eax(0x80000000); + n = c->extended_cpuid_level; if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); @@ -186,7 +186,7 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; int i; @@ -198,8 +198,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) (cpu_devs[i]->c_ident[1] && !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; + this_cpu = cpu_devs[i]; return; } } @@ -284,34 +283,30 @@ void __init cpu_detect(struct cpuinfo_x86 *c) } } } -static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; - unsigned int ebx; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (have_cpuid_p()) { - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - } + u32 ebx; + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; } + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + } } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -321,25 +316,29 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) * WARNING: this function is only called on the BP. Don't add code here * that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; if (!have_cpuid_p()) return; + c->extended_cpuid_level = 0; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + cpu_detect(c); - get_cpu_vendor(c, 1); + get_cpu_vendor(c); - early_get_cap(c); + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); + + validate_pat_support(c); } /* @@ -373,60 +372,32 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - u32 tfms, xlvl; - unsigned int ebx; + if (!have_cpuid_p()) + return; - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); + c->extended_cpuid_level = 0; - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - c->initial_apicid = (ebx >> 24) & 0xFF; + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; + c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->phys_proc_id = c->initial_apicid; #else - c->apicid = c->initial_apicid; + c->apicid = c->initial_apicid; #endif - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ - } - - init_scattered_cpuid_features(c); - detect_nopl(c); } + + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ + + init_scattered_cpuid_features(c); + detect_nopl(c); } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -651,13 +622,10 @@ void __init early_cpu_init(void) { struct cpu_vendor_dev *cvdev; - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_detect(); - validate_pat_support(&boot_cpu_data); + early_identify_cpu(&boot_cpu_data); } /* Make sure %fs is initialized properly in idle threads */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 699ecbfb63ec..28719fe07941 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -195,6 +195,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) printk(KERN_ERR "CPU: Your system may be unstable.\n"); } c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; } static void __init early_cpu_support_print(void) @@ -249,56 +250,18 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) } } -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); - -void __init early_cpu_init(void) +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_support_print(); - early_identify_cpu(&boot_cpu_data); -} - -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, (unsigned int *)&c->x86_vendor_id[0], (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c); - - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { - __u32 misc; - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], - &c->x86_capability[0]); + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; @@ -306,17 +269,32 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + if (cap0 & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; } +} + + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + u32 ebx; + + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; -#endif /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -325,8 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[1] = cpuid_edx(0x80000001); c->x86_capability[6] = cpuid_ecx(0x80000001); } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ } /* Transmeta-defined flags: level 0x80860001 */ @@ -346,8 +322,26 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +} - detect_nopl(c); +/* Do some early cpuid on the boot CPU to get some parameter that are + needed before check_bugs. Everything advanced is in identify_cpu + below. */ +static void __init early_identify_cpu(struct cpuinfo_x86 *c) +{ + + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) @@ -356,6 +350,39 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 *c) +{ + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +#ifdef CONFIG_SMP + c->phys_proc_id = c->initial_apicid; +#endif + + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ + + init_scattered_cpuid_features(c); + detect_nopl(c); +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -363,9 +390,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; - early_identify_cpu(c); + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + c->x86_max_cores = 1; + c->x86_coreid_bits = 0; + memset(&c->x86_capability, 0, sizeof c->x86_capability); - init_scattered_cpuid_features(c); + generic_identify(c); c->apicid = phys_pkg_id(0); diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 4df3e2f6fb56..8208cc1debc7 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -77,9 +77,9 @@ struct cpuinfo_x86 { __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; +#endif /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; -#endif /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; __u32 x86_capability[NCAPINTS]; From 9d31d35b5f9d619bb2482235cc889326de049e29 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: [PATCH 140/186] x86: order functions in cpu/common.c and cpu/common_64.c v2 v2: make 64 bit get c->x86_cache_alignment = c->x86_clfush_size Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 190 ++++++++++++++++++-------------- arch/x86/kernel/cpu/common_64.c | 106 +++++++++--------- 2 files changed, 156 insertions(+), 140 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96e1b8698d3a..10e89ae5a600 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -60,6 +60,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -123,15 +135,15 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ecx, edx, l2size; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24)+(edx>>24); + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -185,6 +197,51 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { @@ -258,7 +315,26 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -void __init cpu_detect(struct cpuinfo_x86 *c) +static void __init early_cpu_support_print(void) +{ + int i,j; + struct cpu_dev *cpu_devx; + + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } + } +} + +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, @@ -267,19 +343,20 @@ void __init cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[4]); c->x86 = 4; + /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; + c->x86_model += ((tfms >> 16) & 0xf) << 4; if (cap0 & (1<<19)) { - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + c->x86_cache_alignment = c->x86_clflush_size; } } } @@ -341,6 +418,17 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + /* * The NOPL instruction is supposed to exist on all CPUs with * family >= 6, unfortunately, that's not true in practice because @@ -500,7 +588,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for (i = 0 ; i < NCAPINTS ; i++) + for (i = 0; i < NCAPINTS; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -529,52 +617,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -} -#endif - static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -592,17 +634,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) vendor = c->x86_vendor_id; if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); + printk(KERN_CONT "%s ", vendor); - if (!c->x86_model_id[0]) - printk("%d86", c->x86); + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); else - printk("%s", c->x86_model_id); + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); + printk(KERN_CONT " stepping %02x\n", c->x86_mask); else - printk("\n"); + printk(KERN_CONT "\n"); } static __init int setup_disablecpuid(char *arg) @@ -618,16 +660,6 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -void __init early_cpu_init(void) -{ - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - - early_identify_cpu(&boot_cpu_data); -} - /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -636,18 +668,6 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) return regs; } -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} - /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 28719fe07941..522a5f2e405d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,9 +103,8 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " - "D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; @@ -143,8 +142,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1) { if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of " - "siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); smp_num_siblings = 1; return; } @@ -182,7 +181,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (cpu_devs[i]) { if (!strcmp(v, cpu_devs[i]->c_ident[0]) || (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { + !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; this_cpu = cpu_devs[i]; return; @@ -217,39 +216,6 @@ static void __init early_cpu_support_print(void) } } -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -258,6 +224,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); + c->x86 = 4; /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -268,12 +235,11 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (cap0 & (1<<19)) + c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; + c->x86_cache_alignment = c->x86_clflush_size; + } } } @@ -283,9 +249,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) u32 tfms, xlvl; u32 ebx; - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; @@ -361,6 +324,39 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { c->extended_cpuid_level = 0; @@ -448,7 +444,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) } -void __cpuinit identify_boot_cpu(void) +void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); } @@ -460,13 +456,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - struct msr_range { unsigned min; unsigned max; @@ -510,6 +499,13 @@ static __init int setup_show_msr(char *arg) } __setup("show_msr=", setup_show_msr); +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) From 10a434fcb23a57c385177a0086955fae01003f64 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:45 +0200 Subject: [PATCH 141/186] x86: remove cpu_vendor_dev 1. add c_x86_vendor into cpu_dev 2. change cpu_devs to static 3. check c_x86_vendor before put that cpu_dev into array 4. remove alignment for 64bit 5. order the sequence in cpu_devs according to link sequence... so could put intel at first, then amd... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 4 +- arch/x86/kernel/cpu/amd.c | 3 +- arch/x86/kernel/cpu/amd_64.c | 4 +- arch/x86/kernel/cpu/centaur.c | 3 +- arch/x86/kernel/cpu/centaur_64.c | 3 +- arch/x86/kernel/cpu/common.c | 71 ++++++++++++++++---------------- arch/x86/kernel/cpu/common_64.c | 71 ++++++++++++++++---------------- arch/x86/kernel/cpu/cpu.h | 18 +++----- arch/x86/kernel/cpu/cyrix.c | 6 ++- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/cpu/intel_64.c | 3 +- arch/x86/kernel/cpu/transmeta.c | 3 +- arch/x86/kernel/cpu/umc.c | 3 +- arch/x86/kernel/vmlinux_32.lds.S | 8 ++-- arch/x86/kernel/vmlinux_64.lds.S | 9 ++-- 15 files changed, 106 insertions(+), 106 deletions(-) diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3ede19a4e0b2..403e689df0b8 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,14 +8,14 @@ obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 18514ed26104..d64ea6097ca7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -298,6 +298,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_early_init = early_init_amd, .c_init = init_amd, .c_size_cache = amd_size_cache, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1692b2a41ff..d1c721c0c49f 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -218,7 +218,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_ident = { "AuthenticAMD" }, .c_early_init = early_init_amd, .c_init = init_amd, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); - +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index a0534c04d38a..e5f6d89521bf 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -475,6 +475,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 1d181c40e2e1..49cfc6d2f2fb 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -29,7 +29,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10e89ae5a600..ee1044ca481d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -75,7 +75,7 @@ void switch_to_new_gdt(void) static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -93,8 +93,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; static int __init cachesize_setup(char *str) { @@ -250,21 +251,24 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } @@ -315,25 +319,6 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -411,21 +396,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; + + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 522a5f2e405d..b82479c10835 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -66,7 +66,7 @@ void switch_to_new_gdt(void) load_gdt(&gdt_descr); } -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -76,8 +76,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -178,44 +179,28 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -306,21 +291,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; + + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4d894e8565fe..3cc9d92afd8f 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -21,21 +21,15 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); + int c_x86_vendor; }; -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; +#define cpu_dev_register(cpu_devX) \ + static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + __attribute__((__section__(".x86_cpu_dev.init"))) = \ + &cpu_devX; -struct cpu_vendor_dev { - int vendor; - struct cpu_dev *cpu_dev; -}; - -#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ - static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ - __attribute__((__section__(".x86cpuvendor.init"))) = \ - { cpu_vendor_id, cpu_dev } - -extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; +extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 13f8fa16b815..3f8c7283d816 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -442,14 +442,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, + .c_x86_vendor = X86_VENDOR_CYRIX, }; -cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); +cpu_dev_register(cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, + .c_x86_vendor = X86_VENDOR_NSC, }; -cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); +cpu_dev_register(nsc_cpu_dev); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d76..c5ac08124adc 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -303,9 +303,10 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_early_init = early_init_intel, .c_init = init_intel, .c_size_cache = intel_size_cache, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); /* arch_initcall(intel_cpu_init); */ diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f0..0a8128a240df 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -90,6 +90,7 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_ident = { "GenuineIntel" }, .c_early_init = early_init_intel, .c_init = init_intel, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index b911a2c61b8f..7c46e6ecedca 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -102,6 +102,7 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, .c_identify = transmeta_identify, + .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); +cpu_dev_register(transmeta_cpu_dev); diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1fc90989d75..e777f79e0960 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, + .c_x86_vendor = X86_VENDOR_UMC, }; -cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); +cpu_dev_register(umc_cpu_dev); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index af5bdad84604..21b4f7eefaab 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -140,10 +140,10 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - __x86cpuvendor_start = .; - *(.x86cpuvendor.init) - __x86cpuvendor_end = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; } SECURITY_INIT . = ALIGN(4); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 63e5c1a22e88..201e81a91a95 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -168,13 +168,12 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; - . = ALIGN(16); - __x86cpuvendor_start = .; - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - *(.x86cpuvendor.init) + __x86_cpu_dev_start = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + *(.x86_cpu_dev.init) } - __x86cpuvendor_end = .; SECURITY_INIT + __x86_cpu_dev_end = .; . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { From a0854a46c5eb82588126421a9cbceb973384a644 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: [PATCH 142/186] x86: make 32bit support show_msr like 64 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 51 ++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ee1044ca481d..a79cf5c52b6a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -616,6 +616,49 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -644,6 +687,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) From 01b2e16a7a9be6573cba5d594d6659b3c6cb46a0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: [PATCH 143/186] x86: make get_mode_name of 64bit the same as 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b82479c10835..f1fb94e766bb 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -83,6 +83,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; + char *p, *q; if (c->extended_cpuid_level < 0x80000004) return 0; @@ -92,6 +93,19 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while (*p == ' ') + p++; + if (p != q) { + while (*p) + *q++ = *p++; + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ + } + return 1; } From 0a488a53d7ca46ac638c30079072c57e50cfcc7b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:47 +0200 Subject: [PATCH 144/186] x86: move 32bit related functions together Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 251 ++++++++++++++++---------------- arch/x86/kernel/cpu/common_64.c | 34 +++-- 2 files changed, 143 insertions(+), 142 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a79cf5c52b6a..2b2c170e8d69 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -58,6 +60,109 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -72,9 +177,6 @@ void switch_to_new_gdt(void) asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); } -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) @@ -95,14 +197,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -133,7 +227,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -150,7 +243,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n < 0x80000006) /* Some chips just has a large L1. */ return; - ecx = cpuid_ecx(0x80000006); + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; /* do processor-specific cache resizing */ @@ -167,48 +260,23 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ + l2size, ecx & 0xFF); } #ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + if (!cpu_has(c, X86_FEATURE_HT)) + return; + + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; cpuid(1, &eax, &ebx, &ecx, &edx); - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -225,8 +293,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -236,10 +302,14 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); + } - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); } } #endif @@ -273,52 +343,6 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) this_cpu = &default_cpu; } - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -380,16 +404,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; + c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) return; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->extended_cpuid_level = 0; + cpu_detect(c); get_cpu_vendor(c); @@ -487,31 +511,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - - - /* * This does the hard work of actually picking apart the CPU stuff... */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f1fb94e766bb..b2da04135326 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -37,6 +37,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -78,7 +80,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -112,7 +113,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ebx, ecx, edx; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; @@ -125,15 +126,17 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_tlbsize = 0; } - if (n >= 0x80000006) { - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - ecx = cpuid_ecx(0x80000006); - c->x86_cache_size = ecx >> 16; - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - c->x86_cache_size, ecx & 0xFF); - } + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + l2size = ecx >> 16; + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -142,14 +145,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT)) return; if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -175,6 +177,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); } + out: if ((c->x86_max_cores * smp_num_siblings) > 1) { printk(KERN_INFO "CPU: Physical Processor ID: %d\n", @@ -182,7 +185,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } - #endif } @@ -405,10 +407,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); generic_identify(c); From 97e4db7c8719f67c52793eeca5ec4df4c3407f2a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:08:59 -0700 Subject: [PATCH 145/186] x86: make detect_ht depend on CONFIG_X86_HT 64-bit has X86_HT set too, so use that instead of SMP. This also removes a include/asm-x86/processor.h ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/common_64.c | 2 +- include/asm-x86/processor.h | 4 ---- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d5a07f0fd24..1f80ce07daa2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -263,9 +263,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) l2size, ecx & 0xFF); } -#ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -311,8 +311,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } -} #endif +} static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index bcb48ce05d23..8e630734e1fd 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -141,7 +141,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) void __cpuinit detect_ht(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index bbbbe1fc5ce1..fc5e961c5b60 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -166,11 +166,7 @@ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; extern void detect_extended_topology(struct cpuinfo_x86 *c); -#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) extern void detect_ht(struct cpuinfo_x86 *c); -#else -static inline void detect_ht(struct cpuinfo_x86 *c) {} -#endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) From f0fc4aff1fa4db1c201593422dcbf85c9897ec4f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:00 -0700 Subject: [PATCH 146/186] x86: make header file the same in arch/x86/kernel/cpu/common_xx.c Make the files more similar in preparation to unification, no code changed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 22 +++++++++++++++++++--- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1f80ce07daa2..6bbdbc24f2ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,25 +1,41 @@ #include +#include +#include #include +#include +#include +#include +#include +#include #include #include -#include #include -#include -#include #include #include #include +#include #include #include #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include #include +#include #endif +#include +#include +#include +#include +#include +#include +#include +#include + #include "cpu.h" static struct cpu_dev *this_cpu __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8e630734e1fd..5daec699acf0 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #endif #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include "cpu.h" From 950ad7ff6ec17fc1b47abc95c5c74628eb1adf8b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:01 -0700 Subject: [PATCH 147/186] x86: same gdt_page with macro Move the 32-bit and 64-bit gdt_page definitions next to each other, separated with an #ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 17 +++++++++++++++ arch/x86/kernel/cpu/common_64.c | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bbdbc24f2ba..e0ca51f4f2d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -40,6 +40,22 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +#else DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -74,6 +90,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int cachesize_override __cpuinitdata = -1; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 5daec699acf0..b4890504284d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -39,6 +39,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -53,6 +54,42 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, } }; +#else +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + /* data */ + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +} }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; From ba51dced0b55eb62874e1646d5eeff344c3d4e67 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:02 -0700 Subject: [PATCH 148/186] x86: cpu/common.c, let 64-bit code have 32-bit only functions No effect on 64-bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 +++ arch/x86/kernel/cpu/common_64.c | 111 ++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e0ca51f4f2d3..9128ba0c8d33 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -93,6 +93,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -195,6 +196,13 @@ static int __init x86_serial_nr_setup(char *s) return 1; } __setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b4890504284d..40c9d89cc14a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -92,6 +92,117 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, From d5494d4f517158b1d2eea1ae33f6c264eb12675f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:03 -0700 Subject: [PATCH 149/186] x86: cpu/common*.c, make 32-bit have 64-bit only functions Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 ++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 12 +++ 2 files changed, 150 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9128ba0c8d33..dcd3ebd5ba62 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -750,6 +750,143 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + +void pda_init(int cpu) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; + } else { + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] __page_aligned_bss; + +extern asmlinkage void ignore_sysret(void); + +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +} + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +#else + /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -757,6 +894,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) regs->fs = __KERNEL_PERCPU; return regs; } +#endif /* * cpu_init() initializes state that is per-CPU. Some data is already diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 40c9d89cc14a..9f2a6ece82dd 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -704,6 +704,7 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 struct x8664_pda **_cpu_pda __read_mostly; EXPORT_SYMBOL(_cpu_pda); @@ -838,6 +839,17 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +#else + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->fs = __KERNEL_PERCPU; + return regs; +} +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT From 1ba76586f778be327e452180d8378e40ee70f066 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:04 -0700 Subject: [PATCH 150/186] x86: cpu/common*.c have same cpu_init(), with copying and #ifdef hard to merge by lines... (as here we have material differences between 32-bit and 64-bit mode) - will try to do it later. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 124 ++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 88 ++++++++++++++++++++++- 2 files changed, 211 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dcd3ebd5ba62..f44678db1616 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -901,7 +901,129 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); + + /* + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} + +#else + void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); @@ -982,3 +1104,5 @@ void __cpuinit cpu_uninit(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } #endif + +#endif diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 9f2a6ece82dd..2bd0ed5abb0a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -855,8 +855,9 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 void __cpuinit cpu_init(void) { int cpu = stack_smp_processor_id(); @@ -974,3 +975,88 @@ void __cpuinit cpu_init(void) if (is_uv_system()) uv_cpu_init(); } + +#else + +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_sp0(t, thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif + +#endif From fab334c1d5f24d23d12f98ad652d399279cd03ce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:05 -0700 Subject: [PATCH 151/186] x86: cpu/common*.c, merge switch_to_new_gdt() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/common_64.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f44678db1616..43d5287bb2a4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -215,7 +215,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2bd0ed5abb0a..d7b996518f86 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -214,6 +214,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; From b9e67f00424e164dcd29391eb48dc941db8691ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:06 -0700 Subject: [PATCH 152/186] x86: cpu/common.c, merge default_init() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 43d5287bb2a4..2c4bfa2e56ad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -224,6 +224,9 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else /* Not much we can do here... */ /* Check if at least it has cpuid */ if (c->cpuid_level == -1) { @@ -233,6 +236,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) else if (c->x86 == 3) strcpy(c->x86_model_id, "386"); } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index d7b996518f86..2fda10974813 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -223,7 +223,19 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { From 140fc72709278989f08eb756d16a70008bdcc409 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:07 -0700 Subject: [PATCH 153/186] x86: cpu/common*.c, merge display_cacheinfo() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 ++++++++ arch/x86/kernel/cpu/common_64.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c4bfa2e56ad..f9191207718b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -285,6 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -293,6 +297,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; +#ifdef CONFIG_X86_64 + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -303,6 +310,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (l2size == 0) return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2fda10974813..f7a2d524b1e7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -285,8 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -294,7 +296,22 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; + +#ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c, l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if (l2size == 0) + return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; From 1cd78776c7d5487e3000426d0ce1ff203c5d60cd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:08 -0700 Subject: [PATCH 154/186] x86: cpu/common*.c, merge detect_ht() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 13 ++++++++++++- arch/x86/kernel/cpu/common_64.c | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f9191207718b..b2018f76c94d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -330,6 +330,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; @@ -346,8 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 + c->phys_proc_id = phys_pkg_id(index_msb); +#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -355,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); +#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); +#endif } out: diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f7a2d524b1e7..7ac0a00743c7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -349,7 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 c->phys_proc_id = phys_pkg_id(index_msb); +#else + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -357,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); +#else + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); +#endif } out: From 5122c890ba969e6efeaba9ed45f5936e62d3c6d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:09 -0700 Subject: [PATCH 155/186] x86: cpu/common.c: merge get_cpu_cap() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 2 ++ 2 files changed, 22 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2018f76c94d..ffc2f5ed09a3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -458,6 +458,26 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[6] = cpuid_ecx(0x80000001); } } + +#ifdef CONFIG_X86_64 + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } +#endif } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7ac0a00743c7..75bb7ff99ee3 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -461,6 +461,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_X86_64 /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); if ((xlvl & 0xffff0000) == 0x80860000) { @@ -478,6 +479,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#endif } /* Do some early cpuid on the boot CPU to get some parameter that are From 6627d2423067f2c6eedb422a59fba9270a3c5e36 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:10 -0700 Subject: [PATCH 156/186] x86: cpu/common*.c, merge early_identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ffc2f5ed09a3..61a70748213e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -490,7 +490,11 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; +#else c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 75bb7ff99ee3..6475548d64eb 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -482,15 +482,28 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) #endif } -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ +/* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, + * cache alignment. + * The others are not touched to avoid unwanted side effects. + * + * WARNING: this function is only called on the BP. Don't add code here + * that is supposed to run on all CPUs. + */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; + if (!have_cpuid_p()) + return; + memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; From 56f0d033be2ebb983993e5d7f24ae232c9a1e7f9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:11 -0700 Subject: [PATCH 157/186] x86: cpu/common*.c: merge print_cpu_info() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6475548d64eb..68b06a39dcac 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -730,8 +730,20 @@ __setup("noclflush", setup_noclflush); void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk(KERN_CONT "%s ", vendor); + if (c->x86_model_id[0]) printk(KERN_CONT "%s", c->x86_model_id); + else + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) printk(KERN_CONT " stepping %02x\n", c->x86_mask); From b89d3b3e2caeab3d895301d1aee3cd2a91eddb79 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:12 -0700 Subject: [PATCH 158/186] x86: cpu/common*.c, merge generic_identify() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 15 ++++++++++++--- arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 61a70748213e..f35baa7f3036 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -548,6 +548,10 @@ void __init early_cpu_init(void) * of early VIA chips and (more importantly) broken virtualizers that * are not easy to detect. Hence, probe for it based on first * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { @@ -586,11 +590,16 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) if (c->cpuid_level >= 0x00000001) { c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; -#else +# else c->apicid = c->initial_apicid; +# endif +#endif + +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; #endif } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 68b06a39dcac..869a6ff9f7dc 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -581,6 +581,9 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + if (!have_cpuid_p()) + return; + c->extended_cpuid_level = 0; cpu_detect(c); @@ -589,11 +592,21 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) get_cpu_cap(c); - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id(c->initial_apicid, 0); +# else + c->apicid = c->initial_apicid; +# endif #endif +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; +#endif + } + if (c->extended_cpuid_level >= 0x80000004) get_model_name(c); /* Default name */ From 102bbe3ab83c7ac04461d277df23cd5acdb49892 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:13 -0700 Subject: [PATCH 159/186] x86: cpu/common*.c, merge identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 89 ++++++++++++++++-------- arch/x86/kernel/cpu/common_64.c | 116 ++++++++++++++++++++++++-------- 2 files changed, 147 insertions(+), 58 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f35baa7f3036..eef868c97b89 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -104,34 +104,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -197,13 +169,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -620,12 +627,18 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 + c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; +#endif + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -644,6 +657,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); +#ifdef CONFIG_X86_64 + c->apicid = phys_pkg_id(0); +#endif + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -677,6 +694,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } +#ifdef CONFIG_X86_64 + detect_ht(c); +#endif + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -693,24 +714,34 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) for (i = 0; i < NCAPINTS; i++) c->x86_capability[i] &= ~cleared_cpu_caps[i]; +#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); +#endif select_idle_routine(c); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + numa_add_cpu(smp_processor_id()); +#endif } void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 enable_sep_cpu(); +#endif mtrr_ap_init(); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 869a6ff9f7dc..6a42371a609f 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,34 +103,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -196,13 +168,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -628,14 +635,35 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (!have_cpuid_p()) { + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + } + generic_identify(c); + if (this_cpu->c_identify) + this_cpu->c_identify(c); + +#ifdef CONFIG_X86_64 c->apicid = phys_pkg_id(0); +#endif /* * Vendor-specific initialization. In this section we @@ -650,7 +678,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_init) this_cpu->c_init(c); + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + char *p; + p = table_lookup_model(c); + if (p) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + +#ifdef CONFIG_X86_64 detect_ht(c); +#endif /* * On SMP, boot_cpu_data holds the common feature set between @@ -669,11 +719,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] &= ~cleared_cpu_caps[i]; #ifdef CONFIG_X86_MCE + /* Init Machine Check Exception if available. */ mcheck_init(c); #endif select_idle_routine(c); -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif @@ -682,12 +733,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 + sysenter_setup(); + enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 + enable_sep_cpu(); +#endif mtrr_ap_init(); } From 143b604a2d07ff69cc2bc02ecd9395b28e0b5292 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Sep 2008 09:37:15 +0200 Subject: [PATCH 160/186] x86: cpu/common*.c, merge whitespaces Merge leftover whitespaces, to make arch/x86/kernel/cpu/common_64.c exactly identical to arch/x86/kernel/cpu/common.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6a42371a609f..eef868c97b89 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -26,6 +26,7 @@ #include #include #endif + #include #include #include @@ -280,7 +281,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -307,7 +307,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); #else - /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -334,6 +333,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; @@ -443,7 +443,6 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } - static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -452,7 +451,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; @@ -488,7 +486,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #endif } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -500,7 +497,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else @@ -722,12 +718,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_init(c); #endif + select_idle_routine(c); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif - } void __init identify_boot_cpu(void) From f5017cfa3591d8eaf5c792e40ff30f18e498b68c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:14 -0700 Subject: [PATCH 161/186] x86: use cpu/common.c on 64 bit Use cpu/common.c on both 64-bit and 32-bit and remove cpu/common_64.c. We started out with this linecount: 816 arch/x86/kernel/cpu/common_64.c 805 arch/x86/kernel/cpu/common.c and the resulting common.c is 1197 lines long, so there's already 424 lines of code eliminated in this phase of the unification. Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 6 +- arch/x86/kernel/cpu/common_64.c | 1197 ------------------------------- 2 files changed, 3 insertions(+), 1200 deletions(-) delete mode 100644 arch/x86/kernel/cpu/common_64.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 403e689df0b8..d031f248dfc0 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,10 +3,10 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o capflags.o powerflags.o +obj-y += proc.o capflags.o powerflags.o common.o -obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o -obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index eef868c97b89..000000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null @@ -1,1197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -static struct cpu_dev *this_cpu __cpuinitdata; - -#ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; -#else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - - [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; -#endif -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -#ifdef CONFIG_X86_32 -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); -#else -static inline int flag_is_changeable_p(u32 flag) -{ - return 1; -} -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} -static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ -} -#endif - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif -} - -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; - -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (c->extended_cpuid_level < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while (*p == ' ') - p++; - if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ebx, ecx, edx, l2size; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); -#ifdef CONFIG_X86_64 - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; -#endif - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - l2size = ecx >> 16; - -#ifdef CONFIG_X86_64 - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); -#else - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c, l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if (l2size == 0) - return; /* Again, no L2 cache is possible */ -#endif - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif - } - -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -#endif -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (!cpu_devs[i]) - break; - - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - this_cpu = cpu_devs[i]; - c->x86_vendor = this_cpu->c_x86_vendor; - return; - } - } - - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - -void __cpuinit cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; - if (cap0 & (1<<19)) { - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_cache_alignment = c->x86_clflush_size; - } - } -} - -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - u32 ebx; - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - } - -#ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } -#endif -} -/* - * Do minimum CPU detection early. - * Fields really needed: vendor, cpuid_level, family, model, mask, - * cache alignment. - * The others are not touched to avoid unwanted side effects. - * - * WARNING: this function is only called on the BP. Don't add code here - * that is supposed to run on all CPUs. - */ -static void __init early_identify_cpu(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; -#else - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - - if (!have_cpuid_p()) - return; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); - - validate_pat_support(c); -} - -void __init early_cpu_init(void) -{ - struct cpu_dev **cdev; - int count = 0; - - printk("KERNEL supported cpus:\n"); - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; - unsigned int j; - - if (count >= X86_VENDOR_NUM) - break; - cpu_devs[count] = cpudev; - count++; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(" %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } - - early_identify_cpu(&boot_cpu_data); -} - -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -{ - if (!have_cpuid_p()) - return; - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (c->cpuid_level >= 0x00000001) { - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); -# else - c->apicid = c->initial_apicid; -# endif -#endif - -#ifdef CONFIG_X86_HT - c->phys_proc_id = c->initial_apicid; -#endif - } - - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ - - init_scattered_cpuid_features(c); - detect_nopl(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 - c->x86_coreid_bits = 0; - c->x86_clflush_size = 64; -#else - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - - generic_identify(c); - - if (this_cpu->c_identify) - this_cpu->c_identify(c); - -#ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); -#endif - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* If the model name is still unset, do table lookup. */ - if (!c->x86_model_id[0]) { - char *p; - p = table_lookup_model(c); - if (p) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - -#ifdef CONFIG_X86_MCE - /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif - - select_idle_routine(c); - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - numa_add_cpu(smp_processor_id()); -#endif -} - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); -#ifdef CONFIG_X86_32 - sysenter_setup(); - enable_sep_cpu(); -#endif -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); -#ifdef CONFIG_X86_32 - enable_sep_cpu(); -#endif - mtrr_ap_init(); -} - -struct msr_range { - unsigned min; - unsigned max; -}; - -static struct msr_range msr_range_array[] __cpuinitdata = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; - -static void __cpuinit print_cpu_msr(void) -{ - unsigned index; - u64 val; - int i; - unsigned index_min, index_max; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) - continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); - } - } -} - -static int show_msr __cpuinitdata; -static __init int setup_show_msr(char *arg) -{ - int num; - - get_option(&arg, &num); - - if (num > 0) - show_msr = num; - return 1; -} -__setup("show_msr=", setup_show_msr); - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) - vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk(KERN_CONT "%s ", vendor); - - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - else - printk(KERN_CONT "%d86", c->x86); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); - -#ifdef CONFIG_SMP - if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -#ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; - -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -void pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#else - -/* Make sure %fs is initialized properly in idle threads */ -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->fs = __KERNEL_PERCPU; - return regs; -} -#endif - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit - */ -#ifdef CONFIG_X86_64 -void __cpuinit cpu_init(void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - if (cpu != 0 && x2apic) - enable_x2apic(); - - /* - * set up and load the per-CPU TSS - */ - if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - } - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} - -#else - -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_idt(&idt_descr); - switch_to_new_gdt(); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - if (curr->mm) - BUG(); - enter_lazy_tlb(&init_mm, curr); - - load_sp0(t, thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); - - /* - * Force FPU initialization: - */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); - - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - - xsave_init(); -} - -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) -{ - int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - - /* lazy TLB state */ - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} -#endif - -#endif From bd220a24a9263eaa70d5e6821383085950b5a13c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Sep 2008 00:58:28 -0700 Subject: [PATCH 162/186] x86: move nonx_setup etc from common.c to init_64.c like 32 bit put it in init_32.c Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 54 ------------------------------------ arch/x86/mm/init_64.c | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eef868c97b89..286c89a991bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -847,51 +847,6 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - void pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -957,15 +912,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - unsigned long kernel_eflags; /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d3746efb060d..9c750d6307b0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -88,6 +88,60 @@ early_param("gbpages", parse_direct_gbpages_on); int after_bootmem; +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + /* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. From deed05b7c017e49473e8c386efba196410fd18b3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Sep 2008 10:23:26 +0200 Subject: [PATCH 163/186] x86, init_64.c: cleanup Clean up comments. Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9c750d6307b0..1f6806b62eb8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -93,12 +93,13 @@ EXPORT_SYMBOL_GPL(__supported_pte_mask); static int do_not_nx __cpuinitdata; -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ +/* + * noexec=on|off + * Control non-executable mappings for 64-bit processes. + * + * on Enable (default) + * off Disable + */ static int __init nonx_setup(char *str) { if (!str) @@ -125,13 +126,14 @@ void __cpuinit check_efer(void) int force_personality32; -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ +/* + * noexec32=on|off + * Control non executable heap for 32bit processes. + * To control the stack too use noexec=off + * + * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) + * off PROT_READ implies PROT_EXEC + */ static int __init nonx32_setup(char *str) { if (!strcmp(str, "on")) From 1b05d60d60e81c6594da8298107a05b506f01797 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:27 -0700 Subject: [PATCH 164/186] x86: remove duplicated get_model_name() calling Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 3 --- arch/x86/kernel/cpu/amd_64.c | 3 +-- arch/x86/kernel/cpu/centaur.c | 1 - arch/x86/kernel/cpu/common.c | 9 +++------ arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/transmeta.c | 1 - 7 files changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d64ea6097ca7..e0ba2c7c5a18 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -42,7 +42,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; #ifdef CONFIG_SMP unsigned long long value; @@ -75,8 +74,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) */ clear_cpu_cap(c, 0*32+31); - r = get_model_name(c); - switch (c->x86) { case 4: /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1c721c0c49f..c5fbf7477ead 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -157,8 +157,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - level = get_model_name(c); - if (!level) { + if (!c->x86_model_id[0]) { switch (c->x86) { case 0xf: /* Should distinguish Models here, but this is only diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5f6d89521bf..89bfdd9cacc6 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); - get_model_name(c); display_cacheinfo(c); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 286c89a991bd..a8b9b7242428 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -252,13 +252,13 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -int __cpuinit get_model_name(struct cpuinfo_x86 *c) +static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; if (c->extended_cpuid_level < 0x80000004) - return 0; + return; v = (unsigned int *) c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); @@ -277,8 +277,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } - - return 1; } void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) @@ -610,8 +608,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) #endif } - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ + get_model_name(c); /* Default name */ init_scattered_cpuid_features(c); detect_nopl(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3cc9d92afd8f..de4094a39210 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -31,7 +31,6 @@ struct cpu_dev { extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; -extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3f8c7283d816..ffd0f5ed071a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); - get_model_name(c); /* get CPU marketing name */ return; } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 7c46e6ecedca..738e03244f95 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; - get_model_name(c); /* Same as AMD/Cyrix */ display_cacheinfo(c); /* Print CMS and CPU revision */ From e3224234717b4228c235cee401af89212f17a3a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:28 -0700 Subject: [PATCH 165/186] x86, cpu init: call early_init_xxx in init_xxx so we: 1. could set some cap to ap 2. restore some cap after memset in identify_cpu for boot cpu esp for CONSTANT_TSC this matters, as: before this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs after this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs so constant_tsc is back... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 ++----- arch/x86/kernel/cpu/amd_64.c | 2 ++ arch/x86/kernel/cpu/centaur_64.c | 2 ++ arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/cpu/intel_64.c | 2 ++ 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e0ba2c7c5a18..c3175da7bc69 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,11 +26,8 @@ __asm__(".align 4\nvide: ret"); static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - } + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); /* Set MTRR capability flag if appropriate */ if (c->x86_model == 13 || c->x86_model == 9 || diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index c5fbf7477ead..8c2d07f06f1d 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -140,6 +140,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif + early_init_amd(c); + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 49cfc6d2f2fb..0e5cf17a3c89 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -16,6 +16,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { + early_init_centaur(c); + if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8b9b7242428..e8045c4ef1c1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -473,9 +473,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); @@ -483,6 +480,10 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } #endif + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 0c0a58dfe099..14a2cd98d480 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -54,6 +54,8 @@ static void __cpuinit srat_detect_node(void) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + early_init_intel(c); + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); From 2d9cd6c27f00958a31622b8e9909d5e35f069b28 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 13:15:04 +0100 Subject: [PATCH 166/186] x86-64: add two __cpuinit annotations Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e8045c4ef1c1..5cde4b18448c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -845,7 +845,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -void pda_init(int cpu) +void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 726a5fcdf341..4b031a4ac856 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -860,7 +860,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) return err; } -static void mce_remove_device(unsigned int cpu) +static __cpuinit void mce_remove_device(unsigned int cpu) { int i; From 11fdd252bb5b461289fc5c21dc8fc87dc66f3284 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:50 -0700 Subject: [PATCH 167/186] x86: cpu make amd.c more like amd_64.c v2 1. make 32bit have early_init_amd_mc and amd_detect_cmp 2. seperate init_amd_k5/k6/k7 ... v2: fix compiling for !CONFIG_SMP Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 412 ++++++++++++++++++++--------------- arch/x86/kernel/cpu/amd_64.c | 17 +- arch/x86/kernel/cpu/common.c | 2 +- include/asm-x86/processor.h | 2 +- 4 files changed, 244 insertions(+), 189 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c3175da7bc69..a3a9e3cbdea9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,200 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits; + + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; +#endif +} + static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { + early_init_amd_mc(c); + if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); @@ -37,9 +229,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - #ifdef CONFIG_SMP unsigned long long value; @@ -50,7 +239,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 15) { + if (c->x86 == 0xf) { rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K7_HWCR, value); @@ -73,192 +262,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) switch (c->x86) { case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; + init_amd_k5(c); + break; case 5: - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - break; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; + init_amd_k6(c); + break; case 6: /* An Athlon/Duron */ - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; - } - - switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_cpu_cap(c, X86_FEATURE_K8); + init_amd_k7(c); break; - case 6: - set_cpu_cap(c, X86_FEATURE_K7); - break; - } - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - display_cacheinfo(c); - - if (cpuid_eax(0x80000000) >= 0x80000008) - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distinguish the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; - - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); - } -#endif - - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; } /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); - if (cpu_has_xmm2) + if (c->x86 >= 6) + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); + + if (!c->x86_model_id[0]) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } + + display_cacheinfo(c); + + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); + + detect_ht(c); + + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } + + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); + + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 8c2d07f06f1d..7913e48f6739 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -174,17 +174,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } if (c->x86 >= 0xf && c->x86 <= 0x11) set_cpu_cap(c, X86_FEATURE_K8); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } if (c->x86 == 0x10) { /* do this for boot cpu */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5cde4b18448c..9a57106c1995 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -629,8 +629,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else c->cpuid_level = -1; /* CPUID not detected */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index fc5e961c5b60..62531ecbde16 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -75,9 +75,9 @@ struct cpuinfo_x86 { int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; +#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; -#endif /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ From c58606ad551e9a1c85a9bd4f1698ca41ec279b2c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:51 -0700 Subject: [PATCH 168/186] x86: remove duplicated force_mwait Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 2 -- arch/x86/kernel/process.c | 1 - 2 files changed, 3 deletions(-) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7913e48f6739..466a1eaa81ea 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -9,8 +9,6 @@ #include "cpu.h" -int force_mwait __cpuinitdata; - #ifdef CONFIG_NUMA static int __cpuinit nearby_node(int apicid) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a0..9f94bb1c8117 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,7 +15,6 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; -static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { From 2b86473604f6e9aef0b5e89c56798a90ccddcdbe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:52 -0700 Subject: [PATCH 169/186] x86: add srat_detect_node for amd64 separate that from amd_detect_cmp() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 466a1eaa81ea..20c3f12dfe78 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -36,19 +36,23 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif + bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; +#endif +} +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ #ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) node = apicid_to_node[apicid]; @@ -76,7 +80,6 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif -#endif } static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) @@ -169,8 +172,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) From 8d71a2ea0ad4ef9b9076ffd44726bad1f0ccf59b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:53 -0700 Subject: [PATCH 170/186] x86: merge header in amd_64.c Singed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 8 ++++++++ arch/x86/kernel/cpu/amd_64.c | 13 ++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a3a9e3cbdea9..3c8090d10053 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,11 +1,19 @@ #include #include #include + #include #include #include +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif + #include + #include "cpu.h" /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 20c3f12dfe78..1d83601e85ef 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -1,9 +1,16 @@ #include +#include #include -#include -#include -#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif #include From 6c62aa4a3c12989a1f1fcbbe6f1ee5d4de4b2300 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:54 -0700 Subject: [PATCH 171/186] x86: make amd.c have 64bit support code Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 137 +++++++++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3c8090d10053..32e73520adf7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -16,6 +16,7 @@ #include "cpu.h" +#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -177,6 +178,26 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K7); } +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. @@ -196,6 +217,42 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT @@ -226,13 +283,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { early_init_amd_mc(c); + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -256,18 +319,31 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) early_init_amd(c); - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + u32 level; + + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + switch (c->x86) { case 4: init_amd_k5(c); @@ -283,7 +359,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); +#endif + /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); @@ -300,10 +378,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } +#ifdef CONFIG_X86_32 detect_ht(c); +#endif if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) @@ -319,8 +401,38 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } + +#ifdef CONFIG_X86_64 + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < + (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || + ((tseg>>PMD_SHIFT) < + (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ @@ -333,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { @@ -349,9 +463,10 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, - .c_size_cache = amd_size_cache, .c_x86_vendor = X86_VENDOR_AMD, }; From 2a02505055fdd44958efd0e140dd87cb9fdecaf1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:55 -0700 Subject: [PATCH 172/186] x86: make amd_64 have 32 bit code Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 259 +++++++++++++++++++++++++++++++++-- 1 file changed, 247 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 1d83601e85ef..32e73520adf7 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -16,7 +16,171 @@ #include "cpu.h" -#ifdef CONFIG_NUMA +#ifdef CONFIG_X86_32 +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) static int __cpuinit nearby_node(int apicid) { int i, node; @@ -41,7 +205,7 @@ static int __cpuinit nearby_node(int apicid) */ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits; bits = c->x86_coreid_bits; @@ -55,7 +219,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) int cpu = smp_processor_id(); int node; unsigned apicid = hard_smp_processor_id(); @@ -91,7 +255,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits, ecx; /* Multi core CPU? */ @@ -112,7 +276,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) } c->x86_coreid_bits = bits; - #endif } @@ -124,15 +287,21 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else + /* Set MTRR capability flag if appropriate */ + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - unsigned level; - #ifdef CONFIG_SMP - unsigned long value; + unsigned long long value; /* * Disable TLB flush filter by setting HWCR.FFDIS on K8 @@ -142,26 +311,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 122 for all steppings (F+ have it disabled by default) */ if (c->x86 == 0xf) { - rdmsrl(MSR_K8_HWCR, value); + rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); + wrmsrl(MSR_K7_HWCR, value); } #endif early_init_amd(c); - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 /* On C+ stepping K8 rep microcode works well for copy/memset */ if (c->x86 == 0xf) { + u32 level; + level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + switch (c->x86) { + case 4: + init_amd_k5(c); + break; + case 5: + init_amd_k6(c); + break; + case 6: /* An Athlon/Duron */ + init_amd_k7(c); + break; + } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); +#endif /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) @@ -176,6 +374,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) break; } } + display_cacheinfo(c); /* Multi core CPU? */ @@ -184,6 +383,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) srat_detect_node(c); } +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; @@ -199,6 +402,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } +#ifdef CONFIG_X86_64 if (c->x86 == 0x10) { /* do this for boot cpu */ if (c == &boot_cpu_data) @@ -225,11 +429,42 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_memory_4k((unsigned long)__va(tseg), 1); } } +#endif } +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; +} +#endif + static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, From ff73152ced60871f7d5fb7dee52fa499902a3c6d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:56 -0700 Subject: [PATCH 173/186] x86: make 64 bit to use amd.c arch/x86/kernel/cpu/amd.c is now 100% identical to arch/x86/kernel/cpu/amd_64.c, so use amd.c on 64-bit too and fix up the namespace impact. Simplify the Kconfig glue as well. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 10 +- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/amd_64.c | 473 ----------------------------------- 3 files changed, 2 insertions(+), 484 deletions(-) delete mode 100644 arch/x86/kernel/cpu/amd_64.c diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6156ac25ff8c..ab77d409fee0 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -444,17 +444,9 @@ config CPU_SUP_CYRIX_32 help This enables extended support for Cyrix processors -config CPU_SUP_AMD_32 +config CPU_SUP_AMD default y bool "Support AMD processors" if PROCESSOR_SELECT - depends on !64BIT - help - This enables extended support for AMD processors - -config CPU_SUP_AMD_64 - default y - bool "Support AMD processors" if PROCESSOR_SELECT - depends on 64BIT help This enables extended support for AMD processors diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d031f248dfc0..510d1bcb058a 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -10,8 +10,7 @@ obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o -obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o -obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index 32e73520adf7..000000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null @@ -1,473 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_X86_64 -# include -# include -# include -#endif - -#include - -#include "cpu.h" - -#ifdef CONFIG_X86_32 -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) -{ -/* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } -} - - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - return; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - return; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - return; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - return; - } -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); -} -#endif - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; -#endif -} - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - int cpu = smp_processor_id(); - int node; - unsigned apicid = hard_smp_processor_id(); - - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -#else - /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); -#endif -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned long long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - early_init_amd(c); - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } - -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - - if (c->extended_cpuid_level >= 0x80000006) { - if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - size = 64; - if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ - size = 256; - } - return size; -} -#endif - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_size_cache = amd_size_cache, -#endif - .c_early_init = early_init_amd, - .c_init = init_amd, - .c_x86_vendor = X86_VENDOR_AMD, -}; - -cpu_dev_register(amd_cpu_dev); From f69feff720497237ae9dd2f4604921bd3080c421 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:58 -0700 Subject: [PATCH 174/186] x86: little clean up of intel.c/intel_64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 +++---- arch/x86/kernel/cpu/intel_64.c | 8 +++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 959417b8cd64..4a8ac9d4ff50 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -76,7 +76,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -183,7 +183,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) * let's use the legacy cpuid vector 0x1 and 0x4 for topology * detection. */ - c->x86_max_cores = num_cpu_cores(c); + c->x86_max_cores = intel_num_cpu_cores(c); detect_ht(c); } @@ -210,9 +210,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) { + if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); - } if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 14a2cd98d480..aef4f2826313 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -71,17 +71,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) ds_init_intel(c); + } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) From 185f3b9da24c09c26b784591ed354fe57998a7b1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:35 -0700 Subject: [PATCH 175/186] x86: make intel.c have 64-bit support code prepare for unification. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 119 +++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a66989586a84..365a008080c2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -15,6 +15,11 @@ #include #include +#ifdef CONFIG_X86_64 +#include +#include +#endif + #include "cpu.h" #ifdef CONFIG_X86_LOCAL_APIC @@ -25,14 +30,20 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 * @@ -73,6 +84,40 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) } + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif +#endif + +static void __cpuinit srat_detect_node(void) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + /* * find out the number of processor cores on the die */ @@ -91,20 +136,6 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -139,6 +170,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); @@ -176,18 +208,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); - detect_ht(c); - } - - /* Work around errata */ Intel_errata_workarounds(c); #ifdef CONFIG_X86_INTEL_USERCOPY @@ -206,14 +226,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) movsl_mask.mask = 7; break; } +#endif + #endif if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); @@ -224,6 +242,17 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + if (cpu_has_bts) ptrace_bts_init_intel(c); @@ -240,8 +269,25 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif +#endif + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + + /* Work around errata */ + srat_detect_node(); } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* @@ -254,10 +300,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i size = 256; return size; } +#endif static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = { @@ -307,13 +355,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, - .c_size_cache = intel_size_cache, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); -/* arch_initcall(intel_cpu_init); */ - From 58602c1681bdfa1a0deaa5574b8a72d6e30c0e97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:36 -0700 Subject: [PATCH 176/186] x86: make intel_64.c the same as intel.c No change in functionality intended - this only adds the 32-bit side. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_64.c | 305 +++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index aef4f2826313..365a008080c2 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -1,42 +1,108 @@ #include +#include + +#include +#include #include +#include +#include + #include +#include +#include +#include #include +#include +#include + +#ifdef CONFIG_X86_64 #include #include +#endif #include "cpu.h" +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) + (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 +/* + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work + */ + +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } + return 0; +} + + /* - * find out the number of processor cores on the die + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { - unsigned int eax, t; + unsigned long lo, hi; - if (c->cpuid_level < 4) - return 1; - - cpuid_count(4, 0, &eax, &t, &t, &t); - - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } } + + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif +#endif + static void __cpuinit srat_detect_node(void) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); int apicid = hard_smp_processor_id(); @@ -52,11 +118,51 @@ static void __cpuinit srat_detect_node(void) #endif } +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + unsigned int l2 = 0; + char *p = NULL; + early_init_intel(c); - init_intel_cacheinfo(c); +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -64,8 +170,70 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * Names for the Pentium II/Celeron processors + * detectable only by also checking the cache size. + * Dixon is NOT a Celeron. + */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if (p) + strcpy(c->x86_model_id, p); + + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + +#endif + + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (cpu_has_ds) { - unsigned int l1, l2; + unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) set_cpu_cap(c, X86_FEATURE_BTS); @@ -74,26 +242,125 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + + if (cpu_has_bts) + ptrace_bts_init_intel(c); + + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +#endif detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + /* Work around errata */ srat_detect_node(); } +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* + * Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} +#endif + static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); + From 879d792b66d633bbe466974f61d1acc9aa8c78eb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:37 -0700 Subject: [PATCH 177/186] x86: let intel 64-bit use intel.c now that arch/x86/kernel/cpu/intel_64.c and arch/x86/kernel/cpu/intel.c are equal, drop arch/x86/kernel/cpu/intel_64.c and fix up the glue. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 10 +- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/intel_64.c | 366 --------------------------------- include/asm-x86/bugs.h | 2 +- 4 files changed, 3 insertions(+), 378 deletions(-) delete mode 100644 arch/x86/kernel/cpu/intel_64.c diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 1b29d6a87563..6761848329fa 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -423,17 +423,9 @@ menuconfig PROCESSOR_SELECT This lets you choose what x86 vendor support code your kernel will include. -config CPU_SUP_INTEL_32 +config CPU_SUP_INTEL default y bool "Support Intel processors" if PROCESSOR_SELECT - depends on !64BIT - help - This enables extended support for Intel processors - -config CPU_SUP_INTEL_64 - default y - bool "Support Intel processors" if PROCESSOR_SELECT - depends on 64BIT help This enables extended support for Intel processors diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 510d1bcb058a..7f0b45a5d788 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,8 +8,7 @@ obj-y += proc.o capflags.o powerflags.o common.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c deleted file mode 100644 index 365a008080c2..000000000000 --- a/arch/x86/kernel/cpu/intel_64.c +++ /dev/null @@ -1,366 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_64 -#include -#include -#endif - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -#endif -} - -#ifdef CONFIG_X86_32 -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - - -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif -#endif - -static void __cpuinit srat_detect_node(void) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - char *p = NULL; - - early_init_intel(c); - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* - * Names for the Pentium II/Celeron processors - * detectable only by also checking the cache size. - * Dixon is NOT a Celeron. - */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#endif - - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); - - if (cpu_has_bts) - ptrace_bts_init_intel(c); - - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif -#endif - - detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - - /* Work around errata */ - srat_detect_node(); -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* - * Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} -#endif - -static struct cpu_dev intel_cpu_dev __cpuinitdata = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_size_cache = intel_size_cache, -#endif - .c_early_init = early_init_intel, - .c_init = init_intel, - .c_x86_vendor = X86_VENDOR_INTEL, -}; - -cpu_dev_register(intel_cpu_dev); - diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index ae514c76a96f..dc604985f2ad 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -3,7 +3,7 @@ extern void check_bugs(void); -#ifdef CONFIG_CPU_SUP_INTEL_32 +#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else static inline int ppro_with_ram_bug(void) { return 0; } From 4052704d92c3172ebc13cc0cc8df8ba2bd446a5c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:38 -0700 Subject: [PATCH 178/186] x86: intel.c put workaround for old cpus together consolidate the code some more. No change in functionality intended. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 206 ++++++++++++++++++------------------ 1 file changed, 105 insertions(+), 101 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 365a008080c2..5f76bf139fda 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -63,28 +63,6 @@ int __cpuinit ppro_with_ram_bug(void) return 0; } - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - - #ifdef CONFIG_X86_F00F_BUG static void __cpuinit trap_init_f00f_bug(void) { @@ -98,6 +76,88 @@ static void __cpuinit trap_init_f00f_bug(void) load_idt(&idt_descr); } #endif + +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + /* + * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until + * model 3 mask 3 + */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } + + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); + + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +} +#else +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) +{ +} #endif static void __cpuinit srat_detect_node(void) @@ -139,28 +199,10 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; - char *p = NULL; early_init_intel(c); -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif + intel_workarounds(c); l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { @@ -170,17 +212,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); + } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else /* * Names for the Pentium II/Celeron processors * detectable only by also checking the cache size. * Dixon is NOT a Celeron. */ if (c->x86 == 6) { + char *p = NULL; + switch (c->x86_model) { case 5: if (c->x86_mask == 0) { @@ -203,51 +260,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) p = "Celeron (Coppermine)"; break; } + + if (p) + strcpy(c->x86_model_id, p); } - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#endif - - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) @@ -256,19 +273,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_bts) ptrace_bts_init_intel(c); - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif #endif detect_extended_topology(c); From ec70cae8698632917f06110fdb70c6364281ecc6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:39 -0700 Subject: [PATCH 179/186] x86: centaur_64.c remove duplicated setting of CONSTANT_TSC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/centaur_64.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 0e5cf17a3c89..a1625f5a1e78 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -20,7 +20,6 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_REP_GOOD); } set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); From 315a6558f30a264c88274fa70626615d1c7851c7 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 9 Sep 2008 14:54:53 +0800 Subject: [PATCH 180/186] x86: move VMX MSRs to msr-index.h They are hardware specific MSRs, and we would use them in virtualization feature detection later. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- arch/x86/kvm/vmx.h | 15 --------------- include/asm-x86/msr-index.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 425a13436b3f..b32d4e5b123d 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -331,21 +331,6 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define MSR_IA32_VMX_BASIC 0x480 -#define MSR_IA32_VMX_PINBASED_CTLS 0x481 -#define MSR_IA32_VMX_PROCBASED_CTLS 0x482 -#define MSR_IA32_VMX_EXIT_CTLS 0x483 -#define MSR_IA32_VMX_ENTRY_CTLS 0x484 -#define MSR_IA32_VMX_MISC 0x485 -#define MSR_IA32_VMX_CR0_FIXED0 0x486 -#define MSR_IA32_VMX_CR0_FIXED1 0x487 -#define MSR_IA32_VMX_CR4_FIXED0 0x488 -#define MSR_IA32_VMX_CR4_FIXED1 0x489 -#define MSR_IA32_VMX_VMCS_ENUM 0x48a -#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b -#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c - -#define MSR_IA32_FEATURE_CONTROL 0x3a #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 3052f058ab06..0bb43301a202 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -176,6 +176,7 @@ #define MSR_IA32_TSC 0x00000010 #define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) @@ -310,4 +311,19 @@ /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c + #endif /* ASM_X86__MSR_INDEX_H */ From e38e05a85828dac23540cd007df5f20985388afc Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 10 Sep 2008 18:53:34 +0800 Subject: [PATCH 181/186] x86: extended "flags" to show virtualization HW feature in /proc/cpuinfo The hardware virtualization technology evolves very fast. But currently it's hard to tell if your CPU support a certain kind of HW technology without digging into the source code. The patch add a new catagory in "flags" under /proc/cpuinfo. Now "flags" can indicate the (important) HW virtulization features the CPU supported as well. Current implementation just cover Intel VMX side. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 41 ++++++++++++++++++++++++++++++++++++ include/asm-x86/cpufeature.h | 9 +++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5f76bf139fda..99468dbd08da 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -196,6 +196,44 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + /* Intel VMX MSR indicated features */ +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + clear_cpu_cap(c, X86_FEATURE_VNMI); + clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + clear_cpu_cap(c, X86_FEATURE_EPT); + clear_cpu_cap(c, X86_FEATURE_VPID); + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -289,6 +327,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) /* Work around errata */ srat_detect_node(); + + if (cpu_has(c, X86_FEATURE_VMX)) + detect_vmx_virtcap(c); } #ifdef CONFIG_X86_32 diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 7ac4d93d20ed..8d45690bef5f 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -6,7 +6,7 @@ #include -#define NCAPINTS 8 /* N 32-bit words worth of info */ +#define NCAPINTS 9 /* N 32-bit words worth of info */ /* * Note: If the comment begins with a quoted string, that string is used @@ -150,6 +150,13 @@ */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ +/* Virtualization flags: Linux defined */ +#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include From aef93c8bd506a78983d91cd576a97fee6e934ac1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:15 -0700 Subject: [PATCH 182/186] x86: identify_cpu_without_cpuid v2 Krzysztof found some old cyrix cpu where an mtrr-alike cpu feature was not detected properly. this one is based on Krzysztof' patch, and we call ->c_identify() in early_identify_cpu. need to call c_identify() for cpus without cpuid even earlier ... v2: Krzysztof point out need to give cyrix another chance about cpuid checking again, after ->c_identify() enables cpuid for it Signed-off-by: Yinghai Lu Cc: Krzysztof Helt Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 59 +++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9a57106c1995..f5f25b85be95 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -485,6 +485,33 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_power = cpuid_edx(0x80000007); } + +static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_32 + int i; + + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + + for (i = 0; i < X86_VENDOR_NUM; i++) + if (cpu_devs[i] && cpu_devs[i]->c_identify) { + c->x86_vendor_id[0] = 0; + cpu_devs[i]->c_identify(c); + if (c->x86_vendor_id[0]) { + get_cpu_vendor(c); + break; + } + } +#endif +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -503,13 +530,16 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; - if (!have_cpuid_p()) - return; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; + cpu_detect(c); get_cpu_vendor(c); @@ -583,11 +613,15 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - if (!have_cpuid_p()) - return; - c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; + cpu_detect(c); get_cpu_vendor(c); @@ -639,17 +673,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - generic_identify(c); if (this_cpu->c_identify) From afae865613c8292e8bdcce4f0b161988d761f033 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:16 -0700 Subject: [PATCH 183/186] x86: move transmeta cap read to early_init_transmeta() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 -------- arch/x86/kernel/cpu/transmeta.c | 28 +++++++++++++++------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f5f25b85be95..3e2ce4d33d3a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -465,14 +465,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 738e03244f95..52b3fefbd5af 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -5,6 +5,18 @@ #include #include "cpu.h" +static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; @@ -12,6 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; + early_init_transmeta(c); + display_cacheinfo(c); /* Print CMS and CPU revision */ @@ -84,23 +98,11 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_early_init = early_init_transmeta, .c_init = init_transmeta, - .c_identify = transmeta_identify, .c_x86_vendor = X86_VENDOR_TRANSMETA, }; From a9853dd6d285c30a3ddeb3cce8c05e1678400bef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:46:58 +0200 Subject: [PATCH 184/186] x86: cpuid, fix typo Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e2ce4d33d3a..cef3519b3bbb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -529,7 +529,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); @@ -611,7 +611,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); From 97fc0555dae8f4d437c8672c14d7191962429be4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 16 Sep 2008 15:09:26 -0700 Subject: [PATCH 185/186] x86 setup: handle more than 8 CPU flag words Checkin e38e05a85828dac23540cd007df5f20985388afc added a 9th CPU flag word, but didn't adjust the boot code to match. This patch adds the necessary boot code support. Note: due to a typo in an #if statement, it didn't trigger the #error this was supposed to do. Signed-off-by: H. Peter Anvin --- arch/x86/boot/cpu.c | 17 +++++++++-------- arch/x86/boot/mkcpustr.c | 36 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 75298fe2edca..6ec6bb6e9957 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -59,17 +59,18 @@ int validate_cpu(void) u32 e = err_flags[i]; for (j = 0; j < 32; j++) { - int n = (i << 5)+j; - if (*msg_strs < n) { + if (msg_strs[0] < i || + (msg_strs[0] == i && msg_strs[1] < j)) { /* Skip to the next string */ - do { - msg_strs++; - } while (*msg_strs); - msg_strs++; + msg_strs += 2; + while (*msg_strs++) + ; } if (e & 1) { - if (*msg_strs == n && msg_strs[1]) - printf("%s ", msg_strs+1); + if (msg_strs[0] == i && + msg_strs[1] == j && + msg_strs[2]) + printf("%s ", msg_strs+2); else printf("%d:%d ", i, j); } diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 4589caa3e9d1..8ef60f20b371 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -17,31 +17,31 @@ #include "../kernel/cpu/capflags.c" -#if NCAPFLAGS > 8 -# error "Need to adjust the boot code handling of CPUID strings" -#endif - int main(void) { - int i; + int i, j; const char *str; printf("static const char x86_cap_strs[] = \n"); - for (i = 0; i < NCAPINTS*32; i++) { - str = x86_cap_flags[i]; + for (i = 0; i < NCAPINTS; i++) { + for (j = 0; j < 32; j++) { + str = x86_cap_flags[i*32+j]; - if (i == NCAPINTS*32-1) { - /* The last entry must be unconditional; this - also consumes the compiler-added null character */ - if (!str) - str = ""; - printf("\t\"\\x%02x\"\"%s\"\n", i, str); - } else if (str) { - printf("#if REQUIRED_MASK%d & (1 << %d)\n" - "\t\"\\x%02x\"\"%s\\0\"\n" - "#endif\n", - i >> 5, i & 31, i, str); + if (i == NCAPINTS-1 && j == 31) { + /* The last entry must be unconditional; this + also consumes the compiler-added null + character */ + if (!str) + str = ""; + printf("\t\"\\x%02x\\x%02x\"\"%s\"\n", + i, j, str); + } else if (str) { + printf("#if REQUIRED_MASK%d & (1 << %d)\n" + "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n" + "#endif\n", + i, j, i, j, str); + } } } printf("\t;\n"); From 43603c8df97f246e8be7b9cc92a8f968a85108bd Mon Sep 17 00:00:00 2001 From: Hans Schou Date: Thu, 9 Oct 2008 20:47:24 +0200 Subject: [PATCH 186/186] x86, debug: print more information about unknown CPUs Write the name of the unknown vendor_id to output instead of just "unknown". Tag changed to 'vendor_id' as used in /proc/cpuinfo Signed-off-by: Hans Schou Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cef3519b3bbb..84c4040efa80 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -406,7 +406,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); printk(KERN_ERR "CPU: Your system may be unstable.\n"); }