mirror of https://gitee.com/openkylin/linux.git
vfio/type1: Batch page pinning
Pinning one 4K page at a time is inefficient, so do it in batches of 512 instead. This is just an optimization with no functional change intended, and in particular the driver still calls iommu_map() with the largest physically contiguous range possible. Add two fields in vfio_batch to remember where to start between calls to vfio_pin_pages_remote(), and use vfio_batch_unpin() to handle remaining pages in the batch in case of error. qemu pins pages for guests around 8% faster on my test system, a two-node Broadwell server with 128G memory per node. The qemu process was bound to one node with its allocations constrained there as well. base test guest ---------------- ---------------- mem (GB) speedup avg sec (std) avg sec (std) 1 7.4% 0.61 (0.00) 0.56 (0.00) 2 8.3% 0.93 (0.00) 0.85 (0.00) 4 8.4% 1.46 (0.00) 1.34 (0.00) 8 8.6% 2.54 (0.01) 2.32 (0.00) 16 8.3% 4.66 (0.00) 4.27 (0.01) 32 8.3% 8.94 (0.01) 8.20 (0.01) 64 8.2% 17.47 (0.01) 16.04 (0.03) 120 8.5% 32.45 (0.13) 29.69 (0.01) perf diff confirms less time spent in pup. Here are the top ten functions: Baseline Delta Abs Symbol 78.63% +6.64% clear_page_erms 1.50% -1.50% __gup_longterm_locked 1.27% -0.78% __get_user_pages +0.76% kvm_zap_rmapp.constprop.0 0.54% -0.53% vmacache_find 0.55% -0.51% get_pfnblock_flags_mask 0.48% -0.48% __get_user_pages_remote +0.39% slot_rmap_walk_next +0.32% vfio_pin_map_dma +0.26% kvm_handle_hva_range ... Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
parent
4b6c33b322
commit
4d83de6da2
|
@ -107,6 +107,8 @@ struct vfio_batch {
|
|||
struct page **pages; /* for pin_user_pages_remote */
|
||||
struct page *fallback_page; /* if pages alloc fails */
|
||||
int capacity; /* length of pages array */
|
||||
int size; /* of batch currently */
|
||||
int offset; /* of next entry in pages */
|
||||
};
|
||||
|
||||
struct vfio_group {
|
||||
|
@ -469,6 +471,9 @@ static int put_pfn(unsigned long pfn, int prot)
|
|||
|
||||
static void vfio_batch_init(struct vfio_batch *batch)
|
||||
{
|
||||
batch->size = 0;
|
||||
batch->offset = 0;
|
||||
|
||||
if (unlikely(disable_hugepages))
|
||||
goto fallback;
|
||||
|
||||
|
@ -484,6 +489,17 @@ static void vfio_batch_init(struct vfio_batch *batch)
|
|||
batch->capacity = 1;
|
||||
}
|
||||
|
||||
static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
|
||||
{
|
||||
while (batch->size) {
|
||||
unsigned long pfn = page_to_pfn(batch->pages[batch->offset]);
|
||||
|
||||
put_pfn(pfn, dma->prot);
|
||||
batch->offset++;
|
||||
batch->size--;
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_batch_fini(struct vfio_batch *batch)
|
||||
{
|
||||
if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
|
||||
|
@ -635,65 +651,88 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
|||
long npage, unsigned long *pfn_base,
|
||||
unsigned long limit, struct vfio_batch *batch)
|
||||
{
|
||||
unsigned long pfn = 0;
|
||||
unsigned long pfn;
|
||||
struct mm_struct *mm = current->mm;
|
||||
long ret, pinned = 0, lock_acct = 0;
|
||||
bool rsvd;
|
||||
dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
|
||||
|
||||
/* This code path is only user initiated */
|
||||
if (!current->mm)
|
||||
if (!mm)
|
||||
return -ENODEV;
|
||||
|
||||
ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, pfn_base,
|
||||
batch->pages);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
pinned++;
|
||||
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
||||
|
||||
/*
|
||||
* Reserved pages aren't counted against the user, externally pinned
|
||||
* pages are already counted against the user.
|
||||
*/
|
||||
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
||||
if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
|
||||
put_pfn(*pfn_base, dma->prot);
|
||||
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
|
||||
limit << PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
}
|
||||
lock_acct++;
|
||||
if (batch->size) {
|
||||
/* Leftover pages in batch from an earlier call. */
|
||||
*pfn_base = page_to_pfn(batch->pages[batch->offset]);
|
||||
pfn = *pfn_base;
|
||||
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
||||
} else {
|
||||
*pfn_base = 0;
|
||||
}
|
||||
|
||||
if (unlikely(disable_hugepages))
|
||||
goto out;
|
||||
while (npage) {
|
||||
if (!batch->size) {
|
||||
/* Empty batch, so refill it. */
|
||||
long req_pages = min_t(long, npage, batch->capacity);
|
||||
|
||||
/* Lock all the consecutive pages from pfn_base */
|
||||
for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
|
||||
pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
|
||||
ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, &pfn,
|
||||
batch->pages);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (pfn != *pfn_base + pinned ||
|
||||
rsvd != is_invalid_reserved_pfn(pfn)) {
|
||||
put_pfn(pfn, dma->prot);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
||||
if (!dma->lock_cap &&
|
||||
current->mm->locked_vm + lock_acct + 1 > limit) {
|
||||
put_pfn(pfn, dma->prot);
|
||||
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
|
||||
__func__, limit << PAGE_SHIFT);
|
||||
ret = -ENOMEM;
|
||||
ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
|
||||
&pfn, batch->pages);
|
||||
if (ret < 0)
|
||||
goto unpin_out;
|
||||
|
||||
batch->size = ret;
|
||||
batch->offset = 0;
|
||||
|
||||
if (!*pfn_base) {
|
||||
*pfn_base = pfn;
|
||||
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
||||
}
|
||||
lock_acct++;
|
||||
}
|
||||
|
||||
/*
|
||||
* pfn is preset for the first iteration of this inner loop and
|
||||
* updated at the end to handle a VM_PFNMAP pfn. In that case,
|
||||
* batch->pages isn't valid (there's no struct page), so allow
|
||||
* batch->pages to be touched only when there's more than one
|
||||
* pfn to check, which guarantees the pfns are from a
|
||||
* !VM_PFNMAP vma.
|
||||
*/
|
||||
while (true) {
|
||||
if (pfn != *pfn_base + pinned ||
|
||||
rsvd != is_invalid_reserved_pfn(pfn))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Reserved pages aren't counted against the user,
|
||||
* externally pinned pages are already counted against
|
||||
* the user.
|
||||
*/
|
||||
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
||||
if (!dma->lock_cap &&
|
||||
mm->locked_vm + lock_acct + 1 > limit) {
|
||||
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
|
||||
__func__, limit << PAGE_SHIFT);
|
||||
ret = -ENOMEM;
|
||||
goto unpin_out;
|
||||
}
|
||||
lock_acct++;
|
||||
}
|
||||
|
||||
pinned++;
|
||||
npage--;
|
||||
vaddr += PAGE_SIZE;
|
||||
iova += PAGE_SIZE;
|
||||
batch->offset++;
|
||||
batch->size--;
|
||||
|
||||
if (!batch->size)
|
||||
break;
|
||||
|
||||
pfn = page_to_pfn(batch->pages[batch->offset]);
|
||||
}
|
||||
|
||||
if (unlikely(disable_hugepages))
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
|
@ -701,10 +740,11 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
|||
|
||||
unpin_out:
|
||||
if (ret < 0) {
|
||||
if (!rsvd) {
|
||||
if (pinned && !rsvd) {
|
||||
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
|
||||
put_pfn(pfn, dma->prot);
|
||||
}
|
||||
vfio_batch_unpin(batch, dma);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1463,6 +1503,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
|
|||
if (ret) {
|
||||
vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
|
||||
npage, true);
|
||||
vfio_batch_unpin(&batch, dma);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1726,11 +1767,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
|
|||
ret = iommu_map(domain->domain, iova, phys,
|
||||
size, dma->prot | domain->prot);
|
||||
if (ret) {
|
||||
if (!dma->iommu_mapped)
|
||||
if (!dma->iommu_mapped) {
|
||||
vfio_unpin_pages_remote(dma, iova,
|
||||
phys >> PAGE_SHIFT,
|
||||
size >> PAGE_SHIFT,
|
||||
true);
|
||||
vfio_batch_unpin(&batch, dma);
|
||||
}
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue