2015-06-05 14:35:24 +08:00
|
|
|
/*
|
|
|
|
* IOMMU helpers in MMU context.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2017-02-09 01:51:30 +08:00
|
|
|
#include <linux/sched/signal.h>
|
2015-06-05 14:35:24 +08:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/rculist.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/mutex.h>
|
2016-09-06 14:27:31 +08:00
|
|
|
#include <linux/migrate.h>
|
|
|
|
#include <linux/hugetlb.h>
|
|
|
|
#include <linux/swap.h>
|
2018-09-10 16:29:07 +08:00
|
|
|
#include <linux/sizes.h>
|
2015-06-05 14:35:24 +08:00
|
|
|
#include <asm/mmu_context.h>
|
2018-07-17 15:19:13 +08:00
|
|
|
#include <asm/pte-walk.h>
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
static DEFINE_MUTEX(mem_list_mutex);
|
|
|
|
|
2018-09-10 16:29:07 +08:00
|
|
|
#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
|
|
|
|
#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
|
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
struct mm_iommu_table_group_mem_t {
|
|
|
|
struct list_head next;
|
|
|
|
struct rcu_head rcu;
|
|
|
|
unsigned long used;
|
|
|
|
atomic64_t mapped;
|
2018-07-17 15:19:13 +08:00
|
|
|
unsigned int pageshift;
|
2015-06-05 14:35:24 +08:00
|
|
|
u64 ua; /* userspace address */
|
|
|
|
u64 entries; /* number of entries in hpas[] */
|
|
|
|
u64 *hpas; /* vmalloc'ed */
|
2018-12-19 16:52:15 +08:00
|
|
|
#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
|
|
|
|
u64 dev_hpa; /* Device memory base address */
|
2015-06-05 14:35:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
|
|
|
|
unsigned long npages, bool incr)
|
|
|
|
{
|
|
|
|
long ret = 0, locked, lock_limit;
|
|
|
|
|
|
|
|
if (!npages)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
down_write(&mm->mmap_sem);
|
|
|
|
|
|
|
|
if (incr) {
|
|
|
|
locked = mm->locked_vm + npages;
|
|
|
|
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
|
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
|
|
|
|
ret = -ENOMEM;
|
|
|
|
else
|
|
|
|
mm->locked_vm += npages;
|
|
|
|
} else {
|
|
|
|
if (WARN_ON_ONCE(npages > mm->locked_vm))
|
|
|
|
npages = mm->locked_vm;
|
|
|
|
mm->locked_vm -= npages;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
|
2016-11-30 14:52:00 +08:00
|
|
|
current ? current->pid : 0,
|
2015-06-05 14:35:24 +08:00
|
|
|
incr ? '+' : '-',
|
|
|
|
npages << PAGE_SHIFT,
|
|
|
|
mm->locked_vm << PAGE_SHIFT,
|
|
|
|
rlimit(RLIMIT_MEMLOCK));
|
|
|
|
up_write(&mm->mmap_sem);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
bool mm_iommu_preregistered(struct mm_struct *mm)
|
2015-06-05 14:35:24 +08:00
|
|
|
{
|
2016-11-30 14:52:00 +08:00
|
|
|
return !list_empty(&mm->context.iommu_group_mem_list);
|
2015-06-05 14:35:24 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
|
|
|
|
|
2016-09-06 14:27:31 +08:00
|
|
|
/*
|
|
|
|
* Taken from alloc_migrate_target with changes to remove CMA allocations
|
|
|
|
*/
|
2018-04-11 07:30:03 +08:00
|
|
|
struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
|
2016-09-06 14:27:31 +08:00
|
|
|
{
|
|
|
|
gfp_t gfp_mask = GFP_USER;
|
|
|
|
struct page *new_page;
|
|
|
|
|
2017-04-11 15:54:57 +08:00
|
|
|
if (PageCompound(page))
|
2016-09-06 14:27:31 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (PageHighMem(page))
|
|
|
|
gfp_mask |= __GFP_HIGHMEM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't want the allocation to force an OOM if possibe
|
|
|
|
*/
|
|
|
|
new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
|
|
|
|
return new_page;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mm_iommu_move_page_from_cma(struct page *page)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
LIST_HEAD(cma_migrate_pages);
|
|
|
|
|
|
|
|
/* Ignore huge pages for now */
|
2017-04-11 15:54:57 +08:00
|
|
|
if (PageCompound(page))
|
2016-09-06 14:27:31 +08:00
|
|
|
return -EBUSY;
|
|
|
|
|
|
|
|
lru_add_drain();
|
|
|
|
ret = isolate_lru_page(page);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
list_add(&page->lru, &cma_migrate_pages);
|
|
|
|
put_page(page); /* Drop the gup reference */
|
|
|
|
|
|
|
|
ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
|
2018-04-06 07:22:08 +08:00
|
|
|
NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
|
2016-09-06 14:27:31 +08:00
|
|
|
if (ret) {
|
|
|
|
if (!list_empty(&cma_migrate_pages))
|
|
|
|
putback_movable_pages(&cma_migrate_pages);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
|
|
|
|
unsigned long entries, unsigned long dev_hpa,
|
2015-06-05 14:35:24 +08:00
|
|
|
struct mm_iommu_table_group_mem_t **pmem)
|
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem;
|
|
|
|
long i, j, ret = 0, locked_entries = 0;
|
2018-07-17 15:19:13 +08:00
|
|
|
unsigned int pageshift;
|
|
|
|
unsigned long flags;
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
unsigned long cur_ua;
|
2015-06-05 14:35:24 +08:00
|
|
|
struct page *page = NULL;
|
|
|
|
|
|
|
|
mutex_lock(&mem_list_mutex);
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
|
2015-06-05 14:35:24 +08:00
|
|
|
next) {
|
|
|
|
/* Overlap? */
|
|
|
|
if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
|
|
|
|
(ua < (mem->ua +
|
|
|
|
(mem->entries << PAGE_SHIFT)))) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
|
|
|
|
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
|
|
|
|
if (ret)
|
|
|
|
goto unlock_exit;
|
2015-06-05 14:35:24 +08:00
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
locked_entries = entries;
|
|
|
|
}
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
|
|
|
|
if (!mem) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
|
|
|
|
mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
|
|
|
|
mem->dev_hpa = dev_hpa;
|
|
|
|
goto good_exit;
|
|
|
|
}
|
|
|
|
mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
|
|
|
|
|
2018-07-17 15:19:13 +08:00
|
|
|
/*
|
|
|
|
* For a starting point for a maximum page size calculation
|
|
|
|
* we use @ua and @entries natural alignment to allow IOMMU pages
|
|
|
|
* smaller than huge pages but still bigger than PAGE_SIZE.
|
|
|
|
*/
|
|
|
|
mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
|
treewide: Use array_size() in vzalloc()
The vzalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vzalloc(a * b)
with:
vzalloc(array_size(a, b))
as well as handling cases of:
vzalloc(a * b * c)
with:
vzalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vzalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vzalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vzalloc(C1 * C2 * C3, ...)
|
vzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vzalloc(C1 * C2, ...)
|
vzalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 05:27:37 +08:00
|
|
|
mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
|
2015-06-05 14:35:24 +08:00
|
|
|
if (!mem->hpas) {
|
|
|
|
kfree(mem);
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < entries; ++i) {
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
cur_ua = ua + (i << PAGE_SHIFT);
|
|
|
|
if (1 != get_user_pages_fast(cur_ua,
|
2015-06-05 14:35:24 +08:00
|
|
|
1/* pages */, 1/* iswrite */, &page)) {
|
2016-09-06 14:27:31 +08:00
|
|
|
ret = -EFAULT;
|
2015-06-05 14:35:24 +08:00
|
|
|
for (j = 0; j < i; ++j)
|
2016-09-06 14:27:31 +08:00
|
|
|
put_page(pfn_to_page(mem->hpas[j] >>
|
|
|
|
PAGE_SHIFT));
|
2015-06-05 14:35:24 +08:00
|
|
|
vfree(mem->hpas);
|
|
|
|
kfree(mem);
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
2016-09-06 14:27:31 +08:00
|
|
|
/*
|
|
|
|
* If we get a page from the CMA zone, since we are going to
|
|
|
|
* be pinning these entries, we might as well move them out
|
|
|
|
* of the CMA zone if possible. NOTE: faulting in + migration
|
|
|
|
* can be expensive. Batching can be considered later
|
|
|
|
*/
|
2017-02-14 10:44:05 +08:00
|
|
|
if (is_migrate_cma_page(page)) {
|
2016-09-06 14:27:31 +08:00
|
|
|
if (mm_iommu_move_page_from_cma(page))
|
|
|
|
goto populate;
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
if (1 != get_user_pages_fast(cur_ua,
|
2016-09-06 14:27:31 +08:00
|
|
|
1/* pages */, 1/* iswrite */,
|
|
|
|
&page)) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
for (j = 0; j < i; ++j)
|
|
|
|
put_page(pfn_to_page(mem->hpas[j] >>
|
|
|
|
PAGE_SHIFT));
|
|
|
|
vfree(mem->hpas);
|
|
|
|
kfree(mem);
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
populate:
|
2018-07-17 15:19:13 +08:00
|
|
|
pageshift = PAGE_SHIFT;
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
|
2018-07-17 15:19:13 +08:00
|
|
|
pte_t *pte;
|
|
|
|
struct page *head = compound_head(page);
|
|
|
|
unsigned int compshift = compound_order(head);
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
unsigned int pteshift;
|
2018-07-17 15:19:13 +08:00
|
|
|
|
|
|
|
local_irq_save(flags); /* disables as well */
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift);
|
2018-07-17 15:19:13 +08:00
|
|
|
|
|
|
|
/* Double check it is still the same pinned page */
|
|
|
|
if (pte && pte_page(*pte) == head &&
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
pteshift == compshift + PAGE_SHIFT)
|
|
|
|
pageshift = max_t(unsigned int, pteshift,
|
2018-07-17 15:19:13 +08:00
|
|
|
PAGE_SHIFT);
|
KVM: PPC: Book3S: Fix guest DMA when guest partially backed by THP pages
Commit 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in
the pinned physical page", 2018-07-17) added some checks to ensure
that guest DMA mappings don't attempt to map more than the guest is
entitled to access. However, errors in the logic mean that legitimate
guest requests to map pages for DMA are being denied in some
situations. Specifically, if the first page of the range passed to
mm_iommu_get() is mapped with a normal page, and subsequent pages are
mapped with transparent huge pages, we end up with mem->pageshift ==
0. That means that the page size checks in mm_iommu_ua_to_hpa() and
mm_iommu_up_to_hpa_rm() will always fail for every page in that
region, and thus the guest can never map any memory in that region for
DMA, typically leading to a flood of error messages like this:
qemu-system-ppc64: VFIO_MAP_DMA: -22
qemu-system-ppc64: vfio_dma_map(0x10005f47780, 0x800000000000000, 0x10000, 0x7fff63ff0000) = -22 (Invalid argument)
The logic errors in mm_iommu_get() are:
(a) use of 'ua' not 'ua + (i << PAGE_SHIFT)' in the find_linux_pte()
call (meaning that find_linux_pte() returns the pte for the
first address in the range, not the address we are currently up
to);
(b) use of 'pageshift' as the variable to receive the hugepage shift
returned by find_linux_pte() - for a normal page this gets set
to 0, leading to us setting mem->pageshift to 0 when we conclude
that the pte returned by find_linux_pte() didn't match the page
we were looking at;
(c) comparing 'compshift', which is a page order, i.e. log base 2 of
the number of pages, with 'pageshift', which is a log base 2 of
the number of bytes.
To fix these problems, this patch introduces 'cur_ua' to hold the
current user address and uses that in the find_linux_pte() call;
introduces 'pteshift' to hold the hugepage shift found by
find_linux_pte(); and compares 'pteshift' with 'compshift +
PAGE_SHIFT' rather than 'compshift'.
The patch also moves the local_irq_restore to the point after the PTE
pointer returned by find_linux_pte() has been dereferenced because
otherwise the PTE could change underneath us, and adds a check to
avoid doing the find_linux_pte() call once mem->pageshift has been
reduced to PAGE_SHIFT, as an optimization.
Fixes: 76fa4975f3ed ("KVM: PPC: Check if IOMMU page is contained in the pinned physical page")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-08-23 08:08:58 +08:00
|
|
|
local_irq_restore(flags);
|
2018-07-17 15:19:13 +08:00
|
|
|
}
|
|
|
|
mem->pageshift = min(mem->pageshift, pageshift);
|
2015-06-05 14:35:24 +08:00
|
|
|
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
|
|
|
|
}
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
good_exit:
|
2015-06-05 14:35:24 +08:00
|
|
|
atomic64_set(&mem->mapped, 1);
|
|
|
|
mem->used = 1;
|
|
|
|
mem->ua = ua;
|
|
|
|
mem->entries = entries;
|
|
|
|
*pmem = mem;
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
unlock_exit:
|
|
|
|
if (locked_entries && ret)
|
2016-11-30 14:52:00 +08:00
|
|
|
mm_iommu_adjust_locked_vm(mm, locked_entries, false);
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
mutex_unlock(&mem_list_mutex);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2018-12-19 16:52:15 +08:00
|
|
|
|
|
|
|
long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
|
|
|
|
struct mm_iommu_table_group_mem_t **pmem)
|
|
|
|
{
|
|
|
|
return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
|
|
|
|
pmem);
|
|
|
|
}
|
2018-12-19 16:52:14 +08:00
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_new);
|
2015-06-05 14:35:24 +08:00
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
|
|
|
|
unsigned long entries, unsigned long dev_hpa,
|
|
|
|
struct mm_iommu_table_group_mem_t **pmem)
|
|
|
|
{
|
|
|
|
return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_newdev);
|
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
|
|
|
|
{
|
|
|
|
long i;
|
|
|
|
struct page *page = NULL;
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (!mem->hpas)
|
|
|
|
return;
|
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
for (i = 0; i < mem->entries; ++i) {
|
|
|
|
if (!mem->hpas[i])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
|
|
|
|
if (!page)
|
|
|
|
continue;
|
|
|
|
|
2018-09-10 16:29:07 +08:00
|
|
|
if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
|
|
|
|
SetPageDirty(page);
|
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
put_page(page);
|
|
|
|
mem->hpas[i] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
|
|
|
|
{
|
|
|
|
|
|
|
|
mm_iommu_unpin(mem);
|
|
|
|
vfree(mem->hpas);
|
|
|
|
kfree(mem);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mm_iommu_free(struct rcu_head *head)
|
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem = container_of(head,
|
|
|
|
struct mm_iommu_table_group_mem_t, rcu);
|
|
|
|
|
|
|
|
mm_iommu_do_free(mem);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
|
|
|
|
{
|
|
|
|
list_del_rcu(&mem->next);
|
|
|
|
call_rcu(&mem->rcu, mm_iommu_free);
|
|
|
|
}
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
|
2015-06-05 14:35:24 +08:00
|
|
|
{
|
|
|
|
long ret = 0;
|
2018-12-19 16:52:15 +08:00
|
|
|
unsigned long entries, dev_hpa;
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
mutex_lock(&mem_list_mutex);
|
|
|
|
|
|
|
|
if (mem->used == 0) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
--mem->used;
|
|
|
|
/* There are still users, exit */
|
|
|
|
if (mem->used)
|
|
|
|
goto unlock_exit;
|
|
|
|
|
|
|
|
/* Are there still mappings? */
|
|
|
|
if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
|
|
|
|
++mem->used;
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto unlock_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* @mapped became 0 so now mappings are disabled, release the region */
|
2018-12-19 16:52:15 +08:00
|
|
|
entries = mem->entries;
|
|
|
|
dev_hpa = mem->dev_hpa;
|
2015-06-05 14:35:24 +08:00
|
|
|
mm_iommu_release(mem);
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
|
|
|
|
mm_iommu_adjust_locked_vm(mm, entries, false);
|
2016-11-30 14:52:00 +08:00
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
unlock_exit:
|
|
|
|
mutex_unlock(&mem_list_mutex);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_put);
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
|
|
|
|
unsigned long ua, unsigned long size)
|
2015-06-05 14:35:24 +08:00
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
|
2015-06-05 14:35:24 +08:00
|
|
|
if ((mem->ua <= ua) &&
|
|
|
|
(ua + size <= mem->ua +
|
|
|
|
(mem->entries << PAGE_SHIFT))) {
|
|
|
|
ret = mem;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_lookup);
|
|
|
|
|
2017-03-22 12:21:47 +08:00
|
|
|
struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
|
|
|
|
unsigned long ua, unsigned long size)
|
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
|
|
|
|
|
|
|
|
list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
|
|
|
|
next) {
|
|
|
|
if ((mem->ua <= ua) &&
|
|
|
|
(ua + size <= mem->ua +
|
|
|
|
(mem->entries << PAGE_SHIFT))) {
|
|
|
|
ret = mem;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-12-19 16:52:14 +08:00
|
|
|
struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
|
2016-11-30 14:52:00 +08:00
|
|
|
unsigned long ua, unsigned long entries)
|
2015-06-05 14:35:24 +08:00
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
|
|
|
|
|
2018-12-19 16:52:14 +08:00
|
|
|
mutex_lock(&mem_list_mutex);
|
|
|
|
|
2016-11-30 14:52:00 +08:00
|
|
|
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
|
2015-06-05 14:35:24 +08:00
|
|
|
if ((mem->ua == ua) && (mem->entries == entries)) {
|
|
|
|
ret = mem;
|
2018-12-19 16:52:14 +08:00
|
|
|
++mem->used;
|
2015-06-05 14:35:24 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-19 16:52:14 +08:00
|
|
|
mutex_unlock(&mem_list_mutex);
|
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2018-12-19 16:52:14 +08:00
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_get);
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
|
2018-07-17 15:19:13 +08:00
|
|
|
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
|
2015-06-05 14:35:24 +08:00
|
|
|
{
|
|
|
|
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
|
2018-12-19 16:52:15 +08:00
|
|
|
u64 *va;
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
if (entry >= mem->entries)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-07-17 15:19:13 +08:00
|
|
|
if (pageshift > mem->pageshift)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (!mem->hpas) {
|
|
|
|
*hpa = mem->dev_hpa + (ua - mem->ua);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
va = &mem->hpas[entry];
|
2018-09-10 16:29:07 +08:00
|
|
|
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
|
2015-06-05 14:35:24 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
|
|
|
|
|
2017-03-22 12:21:47 +08:00
|
|
|
long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
|
2018-07-17 15:19:13 +08:00
|
|
|
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
|
2017-03-22 12:21:47 +08:00
|
|
|
{
|
|
|
|
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
|
|
|
|
unsigned long *pa;
|
|
|
|
|
|
|
|
if (entry >= mem->entries)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-07-17 15:19:13 +08:00
|
|
|
if (pageshift > mem->pageshift)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (!mem->hpas) {
|
|
|
|
*hpa = mem->dev_hpa + (ua - mem->ua);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
|
2017-03-22 12:21:47 +08:00
|
|
|
if (!pa)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2018-09-10 16:29:07 +08:00
|
|
|
*hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
|
2017-03-22 12:21:47 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2018-09-10 16:29:07 +08:00
|
|
|
|
|
|
|
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
|
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem;
|
|
|
|
long entry;
|
|
|
|
void *va;
|
|
|
|
unsigned long *pa;
|
|
|
|
|
|
|
|
mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
|
|
|
|
if (!mem)
|
|
|
|
return;
|
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
|
|
|
|
return;
|
|
|
|
|
2018-09-10 16:29:07 +08:00
|
|
|
entry = (ua - mem->ua) >> PAGE_SHIFT;
|
|
|
|
va = &mem->hpas[entry];
|
|
|
|
|
|
|
|
pa = (void *) vmalloc_to_phys(va);
|
|
|
|
if (!pa)
|
|
|
|
return;
|
|
|
|
|
|
|
|
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
|
|
|
|
}
|
2017-03-22 12:21:47 +08:00
|
|
|
|
2018-12-19 16:52:15 +08:00
|
|
|
bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
|
|
|
|
unsigned int pageshift, unsigned long *size)
|
|
|
|
{
|
|
|
|
struct mm_iommu_table_group_mem_t *mem;
|
|
|
|
unsigned long end;
|
|
|
|
|
|
|
|
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
|
|
|
|
if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
|
|
|
|
if ((mem->dev_hpa <= hpa) && (hpa < end)) {
|
|
|
|
/*
|
|
|
|
* Since the IOMMU page size might be bigger than
|
|
|
|
* PAGE_SIZE, the amount of preregistered memory
|
|
|
|
* starting from @hpa might be smaller than 1<<pageshift
|
|
|
|
* and the caller needs to distinguish this situation.
|
|
|
|
*/
|
|
|
|
*size = min(1UL << pageshift, end - hpa);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
|
|
|
|
|
2015-06-05 14:35:24 +08:00
|
|
|
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
|
|
|
|
{
|
|
|
|
if (atomic64_inc_not_zero(&mem->mapped))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Last mm_iommu_put() has been called, no more mappings allowed() */
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
|
|
|
|
|
|
|
|
void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
|
|
|
|
{
|
|
|
|
atomic64_add_unless(&mem->mapped, -1, 1);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
|
|
|
|
|
2016-11-30 14:51:59 +08:00
|
|
|
void mm_iommu_init(struct mm_struct *mm)
|
2015-06-05 14:35:24 +08:00
|
|
|
{
|
2016-11-30 14:51:59 +08:00
|
|
|
INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
|
2015-06-05 14:35:24 +08:00
|
|
|
}
|