linux/arch/ia64/mm/tlb.c

591 lines
14 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* TLB support routines.
*
* Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* 08/02/00 A. Mallick <asit.k.mallick@intel.com>
* Modified RID allocation for SMP
* Goutham Rao <goutham.rao@intel.com>
* IPI based ptc implementation and A-step IPI implementation.
* Rohit Seth <rohit.seth@intel.com>
* Ken Chen <kenneth.w.chen@intel.com>
* Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
* Copyright (C) 2007 Intel Corp
* Fenghua Yu <fenghua.yu@intel.com>
* Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/mm.h>
mm: remove include/linux/bootmem.h Move remaining definitions and declarations from include/linux/bootmem.h into include/linux/memblock.h and remove the redundant header. The includes were replaced with the semantic patch below and then semi-automated removal of duplicated '#include <linux/memblock.h> @@ @@ - #include <linux/bootmem.h> + #include <linux/memblock.h> [sfr@canb.auug.org.au: dma-direct: fix up for the removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181002185342.133d1680@canb.auug.org.au [sfr@canb.auug.org.au: powerpc: fix up for removal of linux/bootmem.h] Link: http://lkml.kernel.org/r/20181005161406.73ef8727@canb.auug.org.au [sfr@canb.auug.org.au: x86/kaslr, ACPI/NUMA: fix for linux/bootmem.h removal] Link: http://lkml.kernel.org/r/20181008190341.5e396491@canb.auug.org.au Link: http://lkml.kernel.org/r/1536927045-23536-30-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Serge Semin <fancer.lancer@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:09:49 +08:00
#include <linux/memblock.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <asm/delay.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/pal.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
#include <asm/processor.h>
#include <asm/sal.h>
#include <asm/tlb.h>
static struct {
u64 mask; /* mask of supported purge page-sizes */
unsigned long max_bits; /* log2 of largest supported purge page-size */
} purge;
struct ia64_ctx ia64_ctx = {
.lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock),
.next = 1,
.max_ctx = ~0U
};
DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/
DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
/*
* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
* Called after cpu_init() has setup ia64_ctx.max_ctx based on
* maximum RID that is supported by boot CPU.
*/
void __init
mmu_context_init (void)
{
memblock: stop using implicit alignment to SMP_CACHE_BYTES When a memblock allocation APIs are called with align = 0, the alignment is implicitly set to SMP_CACHE_BYTES. Implicit alignment is done deep in the memblock allocator and it can come as a surprise. Not that such an alignment would be wrong even when used incorrectly but it is better to be explicit for the sake of clarity and the prinicple of the least surprise. Replace all such uses of memblock APIs with the 'align' parameter explicitly set to SMP_CACHE_BYTES and stop implicit alignment assignment in the memblock internal allocation functions. For the case when memblock APIs are used via helper functions, e.g. like iommu_arena_new_node() in Alpha, the helper functions were detected with Coccinelle's help and then manually examined and updated where appropriate. The direct memblock APIs users were updated using the semantic patch below: @@ expression size, min_addr, max_addr, nid; @@ ( | - memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc(size, 0) + memblock_alloc(size, SMP_CACHE_BYTES) | - memblock_alloc_raw(size, 0) + memblock_alloc_raw(size, SMP_CACHE_BYTES) | - memblock_alloc_from(size, 0, min_addr) + memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_nopanic(size, 0) + memblock_alloc_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_low(size, 0) + memblock_alloc_low(size, SMP_CACHE_BYTES) | - memblock_alloc_low_nopanic(size, 0) + memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_from_nopanic(size, 0, min_addr) + memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_node(size, 0, nid) + memblock_alloc_node(size, SMP_CACHE_BYTES, nid) ) [mhocko@suse.com: changelog update] [akpm@linux-foundation.org: coding-style fixes] [rppt@linux.ibm.com: fix missed uses of implicit alignment] Link: http://lkml.kernel.org/r/20181016133656.GA10925@rapoport-lnx Link: http://lkml.kernel.org/r/1538687224-17535-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Suggested-by: Michal Hocko <mhocko@suse.com> Acked-by: Paul Burton <paul.burton@mips.com> [MIPS] Acked-by: Michael Ellerman <mpe@ellerman.id.au> [powerpc] Acked-by: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michal Simek <monstr@monstr.eu> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:09:57 +08:00
ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3,
SMP_CACHE_BYTES);
ia64: add checks for the return value of memblock_alloc*() Add panic() calls if memblock_alloc*() returns NULL. Most of the changes are simply addition of if(!ptr) panic(); statements after the calls to memblock_alloc*() variants. Exceptions are create_mem_map_page_table() and ia64_log_init() that were slightly refactored to accommodate the change. Link: http://lkml.kernel.org/r/1548057848-15136-15-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Guo Ren <ren_guo@c-sky.com> [c-sky] Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Juergen Gross <jgross@suse.com> [Xen] Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Paul Burton <paul.burton@mips.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 14:30:00 +08:00
if (!ia64_ctx.bitmap)
panic("%s: Failed to allocate %u bytes\n", __func__,
(ia64_ctx.max_ctx + 1) >> 3);
memblock: stop using implicit alignment to SMP_CACHE_BYTES When a memblock allocation APIs are called with align = 0, the alignment is implicitly set to SMP_CACHE_BYTES. Implicit alignment is done deep in the memblock allocator and it can come as a surprise. Not that such an alignment would be wrong even when used incorrectly but it is better to be explicit for the sake of clarity and the prinicple of the least surprise. Replace all such uses of memblock APIs with the 'align' parameter explicitly set to SMP_CACHE_BYTES and stop implicit alignment assignment in the memblock internal allocation functions. For the case when memblock APIs are used via helper functions, e.g. like iommu_arena_new_node() in Alpha, the helper functions were detected with Coccinelle's help and then manually examined and updated where appropriate. The direct memblock APIs users were updated using the semantic patch below: @@ expression size, min_addr, max_addr, nid; @@ ( | - memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc(size, 0) + memblock_alloc(size, SMP_CACHE_BYTES) | - memblock_alloc_raw(size, 0) + memblock_alloc_raw(size, SMP_CACHE_BYTES) | - memblock_alloc_from(size, 0, min_addr) + memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_nopanic(size, 0) + memblock_alloc_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_low(size, 0) + memblock_alloc_low(size, SMP_CACHE_BYTES) | - memblock_alloc_low_nopanic(size, 0) + memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_from_nopanic(size, 0, min_addr) + memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_node(size, 0, nid) + memblock_alloc_node(size, SMP_CACHE_BYTES, nid) ) [mhocko@suse.com: changelog update] [akpm@linux-foundation.org: coding-style fixes] [rppt@linux.ibm.com: fix missed uses of implicit alignment] Link: http://lkml.kernel.org/r/20181016133656.GA10925@rapoport-lnx Link: http://lkml.kernel.org/r/1538687224-17535-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Suggested-by: Michal Hocko <mhocko@suse.com> Acked-by: Paul Burton <paul.burton@mips.com> [MIPS] Acked-by: Michael Ellerman <mpe@ellerman.id.au> [powerpc] Acked-by: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michal Simek <monstr@monstr.eu> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:09:57 +08:00
ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3,
SMP_CACHE_BYTES);
ia64: add checks for the return value of memblock_alloc*() Add panic() calls if memblock_alloc*() returns NULL. Most of the changes are simply addition of if(!ptr) panic(); statements after the calls to memblock_alloc*() variants. Exceptions are create_mem_map_page_table() and ia64_log_init() that were slightly refactored to accommodate the change. Link: http://lkml.kernel.org/r/1548057848-15136-15-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Guo Ren <ren_guo@c-sky.com> [c-sky] Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Juergen Gross <jgross@suse.com> [Xen] Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Paul Burton <paul.burton@mips.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 14:30:00 +08:00
if (!ia64_ctx.flushmap)
panic("%s: Failed to allocate %u bytes\n", __func__,
(ia64_ctx.max_ctx + 1) >> 3);
}
/*
* Acquire the ia64_ctx.lock before calling this function!
*/
void
wrap_mmu_context (struct mm_struct *mm)
{
int i, cpu;
unsigned long flush_bit;
for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
ia64_ctx.bitmap[i] ^= flush_bit;
}
/* use offset at 300 to skip daemons */
ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
ia64_ctx.max_ctx, 300);
ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
ia64_ctx.max_ctx, ia64_ctx.next);
/*
* can't call flush_tlb_all() here because of race condition
* with O(1) scheduler [EF]
*/
cpu = get_cpu(); /* prevent preemption/migration */
for_each_online_cpu(i)
if (i != cpu)
per_cpu(ia64_need_tlb_flush, i) = 1;
put_cpu();
local_flush_tlb_all();
}
/*
* Implement "spinaphores" ... like counting semaphores, but they
* spin instead of sleeping. If there are ever any other users for
* this primitive it can be moved up to a spinaphore.h header.
*/
struct spinaphore {
unsigned long ticket;
unsigned long serve;
};
static inline void spinaphore_init(struct spinaphore *ss, int val)
{
ss->ticket = 0;
ss->serve = val;
}
static inline void down_spin(struct spinaphore *ss)
{
unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve;
if (time_before(t, ss->serve))
return;
ia64_invala();
for (;;) {
asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory");
if (time_before(t, serve))
return;
cpu_relax();
}
}
static inline void up_spin(struct spinaphore *ss)
{
ia64_fetchadd(1, &ss->serve, rel);
}
static struct spinaphore ptcg_sem;
static u16 nptcg = 1;
static int need_ptcg_sem = 1;
static int toolatetochangeptcgsem = 0;
/*
* Kernel parameter "nptcg=" overrides max number of concurrent global TLB
* purges which is reported from either PAL or SAL PALO.
*
* We don't have sanity checking for nptcg value. It's the user's responsibility
* for valid nptcg value on the platform. Otherwise, kernel may hang in some
* cases.
*/
static int __init
set_nptcg(char *str)
{
int value = 0;
get_option(&str, &value);
setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
return 1;
}
__setup("nptcg=", set_nptcg);
/*
* Maximum number of simultaneous ptc.g purges in the system can
* be defined by PAL_VM_SUMMARY (in which case we should take
* the smallest value for any cpu in the system) or by the PAL
* override table (in which case we should ignore the value from
* PAL_VM_SUMMARY).
*
* Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
* purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
* we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
*
* Complicating the logic here is the fact that num_possible_cpus()
* isn't fully setup until we start bringing cpus online.
*/
void
setup_ptcg_sem(int max_purges, int nptcg_from)
{
static int kp_override;
static int palo_override;
static int firstcpu = 1;
if (toolatetochangeptcgsem) {
if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0)
BUG_ON(1 < nptcg);
else
BUG_ON(max_purges < nptcg);
return;
}
if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
kp_override = 1;
nptcg = max_purges;
goto resetsema;
}
if (kp_override) {
need_ptcg_sem = num_possible_cpus() > nptcg;
return;
}
if (nptcg_from == NPTCG_FROM_PALO) {
palo_override = 1;
/* In PALO max_purges == 0 really means it! */
if (max_purges == 0)
panic("Whoa! Platform does not support global TLB purges.\n");
nptcg = max_purges;
if (nptcg == PALO_MAX_TLB_PURGES) {
need_ptcg_sem = 0;
return;
}
goto resetsema;
}
if (palo_override) {
if (nptcg != PALO_MAX_TLB_PURGES)
need_ptcg_sem = (num_possible_cpus() > nptcg);
return;
}
/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
if (max_purges == 0) max_purges = 1;
if (firstcpu) {
nptcg = max_purges;
firstcpu = 0;
}
if (max_purges < nptcg)
nptcg = max_purges;
if (nptcg == PAL_MAX_PURGES) {
need_ptcg_sem = 0;
return;
} else
need_ptcg_sem = (num_possible_cpus() > nptcg);
resetsema:
spinaphore_init(&ptcg_sem, max_purges);
}
void
ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long nbits)
{
struct mm_struct *active_mm = current->active_mm;
toolatetochangeptcgsem = 1;
if (mm != active_mm) {
/* Restore region IDs for mm */
if (mm && active_mm) {
activate_context(mm);
} else {
flush_tlb_all();
return;
}
}
if (need_ptcg_sem)
down_spin(&ptcg_sem);
do {
/*
* Flush ALAT entries also.
*/
ia64_ptcga(start, (nbits << 2));
ia64_srlz_i();
start += (1UL << nbits);
} while (start < end);
if (need_ptcg_sem)
up_spin(&ptcg_sem);
if (mm != active_mm) {
activate_context(active_mm);
}
}
void
local_flush_tlb_all (void)
{
unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
addr = local_cpu_data->ptce_base;
count0 = local_cpu_data->ptce_count[0];
count1 = local_cpu_data->ptce_count[1];
stride0 = local_cpu_data->ptce_stride[0];
stride1 = local_cpu_data->ptce_stride[1];
local_irq_save(flags);
for (i = 0; i < count0; ++i) {
for (j = 0; j < count1; ++j) {
ia64_ptce(addr);
addr += stride1;
}
addr += stride0;
}
local_irq_restore(flags);
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
static void
__flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long size = end - start;
unsigned long nbits;
#ifndef CONFIG_SMP
if (mm != current->active_mm) {
mm->context = 0;
return;
}
#endif
nbits = ia64_fls(size + 0xfff);
while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
(nbits < purge.max_bits))
++nbits;
if (nbits > purge.max_bits)
nbits = purge.max_bits;
start &= ~((1UL << nbits) - 1);
preempt_disable();
#ifdef CONFIG_SMP
if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
platform_global_tlb_purge(mm, start, end, nbits);
preempt_enable();
return;
}
#endif
do {
ia64_ptcl(start, (nbits<<2));
start += (1UL << nbits);
} while (start < end);
preempt_enable();
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
if (unlikely(end - start >= 1024*1024*1024*1024UL
|| REGION_NUMBER(start) != REGION_NUMBER(end - 1))) {
/*
* If we flush more than a tera-byte or across regions, we're
* probably better off just flushing the entire TLB(s). This
* should be very rare and is not worth optimizing for.
*/
flush_tlb_all();
} else {
/* flush the address range from the tlb */
__flush_tlb_range(vma, start, end);
/* flush the virt. page-table area mapping the addr range */
__flush_tlb_range(vma, ia64_thash(start), ia64_thash(end));
}
}
EXPORT_SYMBOL(flush_tlb_range);
void ia64_tlb_init(void)
{
ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
u64 tr_pgbits;
long status;
pal_vm_info_1_u_t vm_info_1;
pal_vm_info_2_u_t vm_info_2;
int cpu = smp_processor_id();
if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
"defaulting to architected purge page-sizes.\n", status);
purge.mask = 0x115557000UL;
}
purge.max_bits = ia64_fls(purge.mask);
ia64_get_ptce(&ptce_info);
local_cpu_data->ptce_base = ptce_info.base;
local_cpu_data->ptce_count[0] = ptce_info.count[0];
local_cpu_data->ptce_count[1] = ptce_info.count[1];
local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
if (status) {
printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
per_cpu(ia64_tr_num, cpu) = 8;
return;
}
per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
if (per_cpu(ia64_tr_num, cpu) >
(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
per_cpu(ia64_tr_num, cpu) =
vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
static int justonce = 1;
per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
if (justonce) {
justonce = 0;
printk(KERN_DEBUG "TR register number exceeds "
"IA64_TR_ALLOC_MAX!\n");
}
}
}
/*
* is_tr_overlap
*
* Check overlap with inserted TRs.
*/
static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
{
u64 tr_log_size;
u64 tr_end;
u64 va_rr = ia64_get_rr(va);
u64 va_rid = RR_TO_RID(va_rr);
u64 va_end = va + (1<<log_size) - 1;
if (va_rid != RR_TO_RID(p->rr))
return 0;
tr_log_size = (p->itir & 0xff) >> 2;
tr_end = p->ifa + (1<<tr_log_size) - 1;
if (va > tr_end || p->ifa > va_end)
return 0;
return 1;
}
/*
* ia64_insert_tr in virtual mode. Allocate a TR slot
*
* target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
*
* va : virtual address.
* pte : pte entries inserted.
* log_size: range to be covered.
*
* Return value: <0 : error No.
*
* >=0 : slot number allocated for TR.
* Must be called with preemption disabled.
*/
int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
{
int i, r;
unsigned long psr;
struct ia64_tr_entry *p;
int cpu = smp_processor_id();
if (!ia64_idtrs[cpu]) {
treewide: kmalloc() -> kmalloc_array() The kmalloc() function has a 2-factor argument form, kmalloc_array(). This patch replaces cases of: kmalloc(a * b, gfp) with: kmalloc_array(a * b, gfp) as well as handling cases of: kmalloc(a * b * c, gfp) with: kmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The tools/ directory was manually excluded, since it has its own implementation of kmalloc(). The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(char) * COUNT + COUNT , ...) | kmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kmalloc + kmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kmalloc(C1 * C2 * C3, ...) | kmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kmalloc(sizeof(THING) * C2, ...) | kmalloc(sizeof(TYPE) * C2, ...) | kmalloc(C1 * C2 * C3, ...) | kmalloc(C1 * C2, ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - (E1) * E2 + E1, E2 , ...) | - kmalloc + kmalloc_array ( - (E1) * (E2) + E1, E2 , ...) | - kmalloc + kmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:55:00 +08:00
ia64_idtrs[cpu] = kmalloc_array(2 * IA64_TR_ALLOC_MAX,
sizeof(struct ia64_tr_entry),
GFP_KERNEL);
if (!ia64_idtrs[cpu])
return -ENOMEM;
}
r = -EINVAL;
/*Check overlap with existing TR entries*/
if (target_mask & 0x1) {
p = ia64_idtrs[cpu];
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
i++, p++) {
if (p->pte & 0x1)
if (is_tr_overlap(p, va, log_size)) {
printk(KERN_DEBUG "Overlapped Entry"
"Inserted for TR Register!!\n");
goto out;
}
}
}
if (target_mask & 0x2) {
p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
i++, p++) {
if (p->pte & 0x1)
if (is_tr_overlap(p, va, log_size)) {
printk(KERN_DEBUG "Overlapped Entry"
"Inserted for TR Register!!\n");
goto out;
}
}
}
for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
switch (target_mask & 0x3) {
case 1:
if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
goto found;
continue;
case 2:
if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
goto found;
continue;
case 3:
if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
goto found;
continue;
default:
r = -EINVAL;
goto out;
}
}
found:
if (i >= per_cpu(ia64_tr_num, cpu))
return -EBUSY;
/*Record tr info for mca hander use!*/
if (i > per_cpu(ia64_tr_used, cpu))
per_cpu(ia64_tr_used, cpu) = i;
psr = ia64_clear_ic();
if (target_mask & 0x1) {
ia64_itr(0x1, i, va, pte, log_size);
ia64_srlz_i();
p = ia64_idtrs[cpu] + i;
p->ifa = va;
p->pte = pte;
p->itir = log_size << 2;
p->rr = ia64_get_rr(va);
}
if (target_mask & 0x2) {
ia64_itr(0x2, i, va, pte, log_size);
ia64_srlz_i();
p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
p->ifa = va;
p->pte = pte;
p->itir = log_size << 2;
p->rr = ia64_get_rr(va);
}
ia64_set_psr(psr);
r = i;
out:
return r;
}
EXPORT_SYMBOL_GPL(ia64_itr_entry);
/*
* ia64_purge_tr
*
* target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
* slot: slot number to be freed.
*
* Must be called with preemption disabled.
*/
void ia64_ptr_entry(u64 target_mask, int slot)
{
int cpu = smp_processor_id();
int i;
struct ia64_tr_entry *p;
if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
return;
if (target_mask & 0x1) {
p = ia64_idtrs[cpu] + slot;
if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
p->pte = 0;
ia64_ptr(0x1, p->ifa, p->itir>>2);
ia64_srlz_i();
}
}
if (target_mask & 0x2) {
p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
p->pte = 0;
ia64_ptr(0x2, p->ifa, p->itir>>2);
ia64_srlz_i();
}
}
for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
break;
}
per_cpu(ia64_tr_used, cpu) = i;
}
EXPORT_SYMBOL_GPL(ia64_ptr_entry);