mirror of https://gitee.com/openkylin/linux.git
RDMA/umem: Do not use current->tgid to track the mm_struct
This is just wrong, the process that calls into the reg_mr is the process associated with the umem, and that does not have to be the same process that created the context. When this code was first written mmgrab() didn't exist, however these days we can just directly hold the mm_struct pointer in the umem and have no ambiguity when it comes to releasing the umem as to which mm it was associated with. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
ce92db1ca8
commit
d4b4dd1b97
|
@ -86,6 +86,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
struct vm_area_struct **vma_list;
|
struct vm_area_struct **vma_list;
|
||||||
unsigned long lock_limit;
|
unsigned long lock_limit;
|
||||||
unsigned long cur_base;
|
unsigned long cur_base;
|
||||||
|
struct mm_struct *mm;
|
||||||
unsigned long npages;
|
unsigned long npages;
|
||||||
int ret;
|
int ret;
|
||||||
int i;
|
int i;
|
||||||
|
@ -124,6 +125,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
return umem;
|
return umem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
umem->owning_mm = mm = current->mm;
|
||||||
|
mmgrab(mm);
|
||||||
umem->odp_data = NULL;
|
umem->odp_data = NULL;
|
||||||
|
|
||||||
/* We assume the memory is from hugetlb until proved otherwise */
|
/* We assume the memory is from hugetlb until proved otherwise */
|
||||||
|
@ -132,7 +135,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
page_list = (struct page **) __get_free_page(GFP_KERNEL);
|
page_list = (struct page **) __get_free_page(GFP_KERNEL);
|
||||||
if (!page_list) {
|
if (!page_list) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto umem_kfree;
|
goto umem_kfree_drop;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -147,14 +150,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
|
|
||||||
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||||
|
|
||||||
down_write(¤t->mm->mmap_sem);
|
down_write(&mm->mmap_sem);
|
||||||
current->mm->pinned_vm += npages;
|
mm->pinned_vm += npages;
|
||||||
if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
if ((mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
|
||||||
up_write(¤t->mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto vma;
|
goto vma;
|
||||||
}
|
}
|
||||||
up_write(¤t->mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
|
|
||||||
cur_base = addr & PAGE_MASK;
|
cur_base = addr & PAGE_MASK;
|
||||||
|
|
||||||
|
@ -172,14 +175,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
|
|
||||||
sg_list_start = umem->sg_head.sgl;
|
sg_list_start = umem->sg_head.sgl;
|
||||||
|
|
||||||
down_read(¤t->mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
while (npages) {
|
while (npages) {
|
||||||
ret = get_user_pages_longterm(cur_base,
|
ret = get_user_pages_longterm(cur_base,
|
||||||
min_t(unsigned long, npages,
|
min_t(unsigned long, npages,
|
||||||
PAGE_SIZE / sizeof (struct page *)),
|
PAGE_SIZE / sizeof (struct page *)),
|
||||||
gup_flags, page_list, vma_list);
|
gup_flags, page_list, vma_list);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
goto umem_release;
|
goto umem_release;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,7 +200,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
/* preparing for next loop */
|
/* preparing for next loop */
|
||||||
sg_list_start = sg;
|
sg_list_start = sg;
|
||||||
}
|
}
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
umem->nmap = ib_dma_map_sg_attrs(context->device,
|
umem->nmap = ib_dma_map_sg_attrs(context->device,
|
||||||
umem->sg_head.sgl,
|
umem->sg_head.sgl,
|
||||||
|
@ -223,6 +226,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
if (vma_list)
|
if (vma_list)
|
||||||
free_page((unsigned long) vma_list);
|
free_page((unsigned long) vma_list);
|
||||||
free_page((unsigned long) page_list);
|
free_page((unsigned long) page_list);
|
||||||
|
umem_kfree_drop:
|
||||||
|
if (ret)
|
||||||
|
mmdrop(umem->owning_mm);
|
||||||
umem_kfree:
|
umem_kfree:
|
||||||
if (ret)
|
if (ret)
|
||||||
kfree(umem);
|
kfree(umem);
|
||||||
|
@ -230,15 +236,21 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ib_umem_get);
|
EXPORT_SYMBOL(ib_umem_get);
|
||||||
|
|
||||||
static void ib_umem_account(struct work_struct *work)
|
static void __ib_umem_release_tail(struct ib_umem *umem)
|
||||||
|
{
|
||||||
|
mmdrop(umem->owning_mm);
|
||||||
|
kfree(umem);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ib_umem_release_defer(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct ib_umem *umem = container_of(work, struct ib_umem, work);
|
struct ib_umem *umem = container_of(work, struct ib_umem, work);
|
||||||
|
|
||||||
down_write(&umem->mm->mmap_sem);
|
down_write(&umem->owning_mm->mmap_sem);
|
||||||
umem->mm->pinned_vm -= umem->diff;
|
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
|
||||||
up_write(&umem->mm->mmap_sem);
|
up_write(&umem->owning_mm->mmap_sem);
|
||||||
mmput(umem->mm);
|
|
||||||
kfree(umem);
|
__ib_umem_release_tail(umem);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -248,9 +260,6 @@ static void ib_umem_account(struct work_struct *work)
|
||||||
void ib_umem_release(struct ib_umem *umem)
|
void ib_umem_release(struct ib_umem *umem)
|
||||||
{
|
{
|
||||||
struct ib_ucontext *context = umem->context;
|
struct ib_ucontext *context = umem->context;
|
||||||
struct mm_struct *mm;
|
|
||||||
struct task_struct *task;
|
|
||||||
unsigned long diff;
|
|
||||||
|
|
||||||
if (umem->odp_data) {
|
if (umem->odp_data) {
|
||||||
ib_umem_odp_release(umem);
|
ib_umem_odp_release(umem);
|
||||||
|
@ -259,41 +268,27 @@ void ib_umem_release(struct ib_umem *umem)
|
||||||
|
|
||||||
__ib_umem_release(umem->context->device, umem, 1);
|
__ib_umem_release(umem->context->device, umem, 1);
|
||||||
|
|
||||||
task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
|
|
||||||
if (!task)
|
|
||||||
goto out;
|
|
||||||
mm = get_task_mm(task);
|
|
||||||
put_task_struct(task);
|
|
||||||
if (!mm)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
diff = ib_umem_num_pages(umem);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We may be called with the mm's mmap_sem already held. This
|
* We may be called with the mm's mmap_sem already held. This
|
||||||
* can happen when a userspace munmap() is the call that drops
|
* can happen when a userspace munmap() is the call that drops
|
||||||
* the last reference to our file and calls our release
|
* the last reference to our file and calls our release
|
||||||
* method. If there are memory regions to destroy, we'll end
|
* method. If there are memory regions to destroy, we'll end
|
||||||
* up here and not be able to take the mmap_sem. In that case
|
* up here and not be able to take the mmap_sem. In that case
|
||||||
* we defer the vm_locked accounting to the system workqueue.
|
* we defer the vm_locked accounting a workqueue.
|
||||||
*/
|
*/
|
||||||
if (context->closing) {
|
if (context->closing) {
|
||||||
if (!down_write_trylock(&mm->mmap_sem)) {
|
if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
|
||||||
INIT_WORK(&umem->work, ib_umem_account);
|
INIT_WORK(&umem->work, ib_umem_release_defer);
|
||||||
umem->mm = mm;
|
|
||||||
umem->diff = diff;
|
|
||||||
|
|
||||||
queue_work(ib_wq, &umem->work);
|
queue_work(ib_wq, &umem->work);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else
|
} else {
|
||||||
down_write(&mm->mmap_sem);
|
down_write(&umem->owning_mm->mmap_sem);
|
||||||
|
}
|
||||||
|
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
|
||||||
|
up_write(&umem->owning_mm->mmap_sem);
|
||||||
|
|
||||||
mm->pinned_vm -= diff;
|
__ib_umem_release_tail(umem);
|
||||||
up_write(&mm->mmap_sem);
|
|
||||||
mmput(mm);
|
|
||||||
out:
|
|
||||||
kfree(umem);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ib_umem_release);
|
EXPORT_SYMBOL(ib_umem_release);
|
||||||
|
|
||||||
|
|
|
@ -42,14 +42,13 @@ struct ib_umem_odp;
|
||||||
|
|
||||||
struct ib_umem {
|
struct ib_umem {
|
||||||
struct ib_ucontext *context;
|
struct ib_ucontext *context;
|
||||||
|
struct mm_struct *owning_mm;
|
||||||
size_t length;
|
size_t length;
|
||||||
unsigned long address;
|
unsigned long address;
|
||||||
int page_shift;
|
int page_shift;
|
||||||
int writable;
|
int writable;
|
||||||
int hugetlb;
|
int hugetlb;
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
struct mm_struct *mm;
|
|
||||||
unsigned long diff;
|
|
||||||
struct ib_umem_odp *odp_data;
|
struct ib_umem_odp *odp_data;
|
||||||
struct sg_table sg_head;
|
struct sg_table sg_head;
|
||||||
int nmap;
|
int nmap;
|
||||||
|
|
Loading…
Reference in New Issue