2008-11-27 00:21:24 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
|
|
|
|
* Author: Joerg Roedel <joerg.roedel@amd.com>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License version 2 as published
|
|
|
|
* by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
#define pr_fmt(fmt) "%s: " fmt, __func__
|
|
|
|
|
2011-09-06 22:03:26 +08:00
|
|
|
#include <linux/device.h>
|
2011-09-03 01:32:32 +08:00
|
|
|
#include <linux/kernel.h>
|
2008-11-27 00:21:24 +08:00
|
|
|
#include <linux/bug.h>
|
|
|
|
#include <linux/types.h>
|
2009-05-07 07:03:07 +08:00
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/slab.h>
|
2008-11-27 00:21:24 +08:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/iommu.h>
|
|
|
|
|
2011-10-22 03:56:05 +08:00
|
|
|
static ssize_t show_iommu_group(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
unsigned int groupid;
|
|
|
|
|
|
|
|
if (iommu_device_group(dev, &groupid))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return sprintf(buf, "%u", groupid);
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
|
|
|
|
|
|
|
|
static int add_iommu_group(struct device *dev, void *data)
|
|
|
|
{
|
|
|
|
unsigned int groupid;
|
|
|
|
|
|
|
|
if (iommu_device_group(dev, &groupid) == 0)
|
|
|
|
return device_create_file(dev, &dev_attr_iommu_group);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int remove_iommu_group(struct device *dev)
|
|
|
|
{
|
|
|
|
unsigned int groupid;
|
|
|
|
|
|
|
|
if (iommu_device_group(dev, &groupid) == 0)
|
|
|
|
device_remove_file(dev, &dev_attr_iommu_group);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int iommu_device_notifier(struct notifier_block *nb,
|
|
|
|
unsigned long action, void *data)
|
|
|
|
{
|
|
|
|
struct device *dev = data;
|
|
|
|
|
|
|
|
if (action == BUS_NOTIFY_ADD_DEVICE)
|
|
|
|
return add_iommu_group(dev, NULL);
|
|
|
|
else if (action == BUS_NOTIFY_DEL_DEVICE)
|
|
|
|
return remove_iommu_group(dev);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block iommu_device_nb = {
|
|
|
|
.notifier_call = iommu_device_notifier,
|
|
|
|
};
|
|
|
|
|
2011-08-26 22:48:26 +08:00
|
|
|
static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
|
|
|
|
{
|
2011-10-22 03:56:05 +08:00
|
|
|
bus_register_notifier(bus, &iommu_device_nb);
|
|
|
|
bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
|
2011-08-26 22:48:26 +08:00
|
|
|
}
|
2008-11-27 00:21:24 +08:00
|
|
|
|
2011-08-26 22:48:26 +08:00
|
|
|
/**
|
|
|
|
* bus_set_iommu - set iommu-callbacks for the bus
|
|
|
|
* @bus: bus.
|
|
|
|
* @ops: the callbacks provided by the iommu-driver
|
|
|
|
*
|
|
|
|
* This function is called by an iommu driver to set the iommu methods
|
|
|
|
* used for a particular bus. Drivers for devices on that bus can use
|
|
|
|
* the iommu-api after these ops are registered.
|
|
|
|
* This special function is needed because IOMMUs are usually devices on
|
|
|
|
* the bus itself, so the iommu drivers are not initialized when the bus
|
|
|
|
* is set up. With this function the iommu-driver can set the iommu-ops
|
|
|
|
* afterwards.
|
|
|
|
*/
|
|
|
|
int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
|
2008-11-27 00:21:24 +08:00
|
|
|
{
|
2011-08-26 22:48:26 +08:00
|
|
|
if (bus->iommu_ops != NULL)
|
|
|
|
return -EBUSY;
|
2008-11-27 00:21:24 +08:00
|
|
|
|
2011-08-26 22:48:26 +08:00
|
|
|
bus->iommu_ops = ops;
|
|
|
|
|
|
|
|
/* Do IOMMU specific setup for this bus-type */
|
|
|
|
iommu_bus_init(bus, ops);
|
2008-11-27 00:21:24 +08:00
|
|
|
|
2011-08-26 22:48:26 +08:00
|
|
|
return 0;
|
2008-11-27 00:21:24 +08:00
|
|
|
}
|
2011-08-26 22:48:26 +08:00
|
|
|
EXPORT_SYMBOL_GPL(bus_set_iommu);
|
2008-11-27 00:21:24 +08:00
|
|
|
|
2011-09-07 00:46:34 +08:00
|
|
|
bool iommu_present(struct bus_type *bus)
|
2008-11-27 00:21:24 +08:00
|
|
|
{
|
2011-09-07 00:58:54 +08:00
|
|
|
return bus->iommu_ops != NULL;
|
2008-11-27 00:21:24 +08:00
|
|
|
}
|
2011-09-07 00:46:34 +08:00
|
|
|
EXPORT_SYMBOL_GPL(iommu_present);
|
2008-11-27 00:21:24 +08:00
|
|
|
|
2011-09-14 03:25:23 +08:00
|
|
|
/**
|
|
|
|
* iommu_set_fault_handler() - set a fault handler for an iommu domain
|
|
|
|
* @domain: iommu domain
|
|
|
|
* @handler: fault handler
|
2011-09-27 19:36:40 +08:00
|
|
|
*
|
|
|
|
* This function should be used by IOMMU users which want to be notified
|
|
|
|
* whenever an IOMMU fault happens.
|
|
|
|
*
|
|
|
|
* The fault handler itself should return 0 on success, and an appropriate
|
|
|
|
* error code otherwise.
|
2011-09-14 03:25:23 +08:00
|
|
|
*/
|
|
|
|
void iommu_set_fault_handler(struct iommu_domain *domain,
|
|
|
|
iommu_fault_handler_t handler)
|
|
|
|
{
|
|
|
|
BUG_ON(!domain);
|
|
|
|
|
|
|
|
domain->handler = handler;
|
|
|
|
}
|
2011-09-26 21:11:46 +08:00
|
|
|
EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
|
2011-09-14 03:25:23 +08:00
|
|
|
|
2011-09-06 22:03:26 +08:00
|
|
|
struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
|
2008-11-27 00:21:24 +08:00
|
|
|
{
|
|
|
|
struct iommu_domain *domain;
|
|
|
|
int ret;
|
|
|
|
|
2011-09-07 00:58:54 +08:00
|
|
|
if (bus == NULL || bus->iommu_ops == NULL)
|
2011-09-06 22:03:26 +08:00
|
|
|
return NULL;
|
|
|
|
|
2011-12-16 20:38:25 +08:00
|
|
|
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
|
2008-11-27 00:21:24 +08:00
|
|
|
if (!domain)
|
|
|
|
return NULL;
|
|
|
|
|
2011-09-07 00:58:54 +08:00
|
|
|
domain->ops = bus->iommu_ops;
|
2011-09-06 22:03:26 +08:00
|
|
|
|
2011-09-07 00:58:54 +08:00
|
|
|
ret = domain->ops->domain_init(domain);
|
2008-11-27 00:21:24 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_free;
|
|
|
|
|
|
|
|
return domain;
|
|
|
|
|
|
|
|
out_free:
|
|
|
|
kfree(domain);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_domain_alloc);
|
|
|
|
|
|
|
|
void iommu_domain_free(struct iommu_domain *domain)
|
|
|
|
{
|
2011-09-06 22:44:29 +08:00
|
|
|
if (likely(domain->ops->domain_destroy != NULL))
|
|
|
|
domain->ops->domain_destroy(domain);
|
|
|
|
|
2008-11-27 00:21:24 +08:00
|
|
|
kfree(domain);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_domain_free);
|
|
|
|
|
|
|
|
int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
|
|
|
|
{
|
2011-09-06 22:44:29 +08:00
|
|
|
if (unlikely(domain->ops->attach_dev == NULL))
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
return domain->ops->attach_dev(domain, dev);
|
2008-11-27 00:21:24 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_attach_device);
|
|
|
|
|
|
|
|
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
|
|
|
|
{
|
2011-09-06 22:44:29 +08:00
|
|
|
if (unlikely(domain->ops->detach_dev == NULL))
|
|
|
|
return;
|
|
|
|
|
|
|
|
domain->ops->detach_dev(domain, dev);
|
2008-11-27 00:21:24 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_detach_device);
|
|
|
|
|
|
|
|
phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
|
|
|
|
unsigned long iova)
|
|
|
|
{
|
2011-09-06 22:44:29 +08:00
|
|
|
if (unlikely(domain->ops->iova_to_phys == NULL))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return domain->ops->iova_to_phys(domain, iova);
|
2008-11-27 00:21:24 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
|
2009-03-18 15:33:06 +08:00
|
|
|
|
|
|
|
int iommu_domain_has_cap(struct iommu_domain *domain,
|
|
|
|
unsigned long cap)
|
|
|
|
{
|
2011-09-06 22:44:29 +08:00
|
|
|
if (unlikely(domain->ops->domain_has_cap == NULL))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return domain->ops->domain_has_cap(domain, cap);
|
2009-03-18 15:33:06 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
|
2010-01-08 20:35:09 +08:00
|
|
|
|
|
|
|
int iommu_map(struct iommu_domain *domain, unsigned long iova,
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
phys_addr_t paddr, size_t size, int prot)
|
2010-01-08 20:35:09 +08:00
|
|
|
{
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
unsigned long orig_iova = iova;
|
|
|
|
unsigned int min_pagesz;
|
|
|
|
size_t orig_size = size;
|
|
|
|
int ret = 0;
|
2010-01-08 20:35:09 +08:00
|
|
|
|
2011-09-06 22:44:29 +08:00
|
|
|
if (unlikely(domain->ops->map == NULL))
|
|
|
|
return -ENODEV;
|
2010-01-08 20:35:09 +08:00
|
|
|
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
/* find out the minimum page size supported */
|
|
|
|
min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* both the virtual address and the physical one, as well as
|
|
|
|
* the size of the mapping, must be aligned (at least) to the
|
|
|
|
* size of the smallest page supported by the hardware
|
|
|
|
*/
|
|
|
|
if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
|
|
|
|
pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
|
|
|
|
"0x%x\n", iova, (unsigned long)paddr,
|
|
|
|
(unsigned long)size, min_pagesz);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
|
|
|
|
(unsigned long)paddr, (unsigned long)size);
|
|
|
|
|
|
|
|
while (size) {
|
|
|
|
unsigned long pgsize, addr_merge = iova | paddr;
|
|
|
|
unsigned int pgsize_idx;
|
|
|
|
|
|
|
|
/* Max page size that still fits into 'size' */
|
|
|
|
pgsize_idx = __fls(size);
|
|
|
|
|
|
|
|
/* need to consider alignment requirements ? */
|
|
|
|
if (likely(addr_merge)) {
|
|
|
|
/* Max page size allowed by both iova and paddr */
|
|
|
|
unsigned int align_pgsize_idx = __ffs(addr_merge);
|
|
|
|
|
|
|
|
pgsize_idx = min(pgsize_idx, align_pgsize_idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* build a mask of acceptable page sizes */
|
|
|
|
pgsize = (1UL << (pgsize_idx + 1)) - 1;
|
|
|
|
|
|
|
|
/* throw away page sizes not supported by the hardware */
|
|
|
|
pgsize &= domain->ops->pgsize_bitmap;
|
2010-01-08 20:35:09 +08:00
|
|
|
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
/* make sure we're still sane */
|
|
|
|
BUG_ON(!pgsize);
|
2010-01-08 20:35:09 +08:00
|
|
|
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
/* pick the biggest page */
|
|
|
|
pgsize_idx = __fls(pgsize);
|
|
|
|
pgsize = 1UL << pgsize_idx;
|
|
|
|
|
|
|
|
pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
|
|
|
|
(unsigned long)paddr, pgsize);
|
|
|
|
|
|
|
|
ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
|
|
|
|
iova += pgsize;
|
|
|
|
paddr += pgsize;
|
|
|
|
size -= pgsize;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* unroll mapping in case something went wrong */
|
|
|
|
if (ret)
|
|
|
|
iommu_unmap(domain, orig_iova, orig_size - size);
|
|
|
|
|
|
|
|
return ret;
|
2010-01-08 20:35:09 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_map);
|
|
|
|
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
|
2010-01-08 20:35:09 +08:00
|
|
|
{
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
size_t unmapped_page, unmapped = 0;
|
|
|
|
unsigned int min_pagesz;
|
2010-01-08 20:35:09 +08:00
|
|
|
|
2011-09-06 22:44:29 +08:00
|
|
|
if (unlikely(domain->ops->unmap == NULL))
|
|
|
|
return -ENODEV;
|
|
|
|
|
iommu/core: split mapping to page sizes as supported by the hardware
When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.
The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.
This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.
Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.
register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.
Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.
Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
2011-11-10 17:32:26 +08:00
|
|
|
/* find out the minimum page size supported */
|
|
|
|
min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The virtual address, as well as the size of the mapping, must be
|
|
|
|
* aligned (at least) to the size of the smallest page supported
|
|
|
|
* by the hardware
|
|
|
|
*/
|
|
|
|
if (!IS_ALIGNED(iova | size, min_pagesz)) {
|
|
|
|
pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
|
|
|
|
iova, (unsigned long)size, min_pagesz);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
|
|
|
|
(unsigned long)size);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep iterating until we either unmap 'size' bytes (or more)
|
|
|
|
* or we hit an area that isn't mapped.
|
|
|
|
*/
|
|
|
|
while (unmapped < size) {
|
|
|
|
size_t left = size - unmapped;
|
|
|
|
|
|
|
|
unmapped_page = domain->ops->unmap(domain, iova, left);
|
|
|
|
if (!unmapped_page)
|
|
|
|
break;
|
|
|
|
|
|
|
|
pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
|
|
|
|
(unsigned long)unmapped_page);
|
|
|
|
|
|
|
|
iova += unmapped_page;
|
|
|
|
unmapped += unmapped_page;
|
|
|
|
}
|
|
|
|
|
|
|
|
return unmapped;
|
2010-01-08 20:35:09 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_unmap);
|
2011-10-22 03:56:05 +08:00
|
|
|
|
|
|
|
int iommu_device_group(struct device *dev, unsigned int *groupid)
|
|
|
|
{
|
|
|
|
if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
|
|
|
|
return dev->bus->iommu_ops->device_group(dev, groupid);
|
|
|
|
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(iommu_device_group);
|