2016-05-19 00:15:08 +08:00
|
|
|
/*
|
|
|
|
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*/
|
|
|
|
#include <linux/percpu-refcount.h>
|
|
|
|
#include <linux/memremap.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/pfn_t.h>
|
|
|
|
#include "../nvdimm/pfn.h"
|
|
|
|
#include "../nvdimm/nd.h"
|
2017-04-12 00:49:49 +08:00
|
|
|
#include "device-dax.h"
|
2016-05-19 00:15:08 +08:00
|
|
|
|
|
|
|
struct dax_pmem {
|
|
|
|
struct device *dev;
|
|
|
|
struct percpu_ref ref;
|
2017-12-29 15:54:05 +08:00
|
|
|
struct dev_pagemap pgmap;
|
2016-05-19 00:15:08 +08:00
|
|
|
struct completion cmp;
|
|
|
|
};
|
|
|
|
|
2016-08-07 07:05:06 +08:00
|
|
|
static struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
|
2016-05-19 00:15:08 +08:00
|
|
|
{
|
|
|
|
return container_of(ref, struct dax_pmem, ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dax_pmem_percpu_release(struct percpu_ref *ref)
|
|
|
|
{
|
|
|
|
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
|
|
|
|
2018-03-06 08:40:05 +08:00
|
|
|
dev_dbg(dax_pmem->dev, "trace\n");
|
2016-05-19 00:15:08 +08:00
|
|
|
complete(&dax_pmem->cmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dax_pmem_percpu_exit(void *data)
|
|
|
|
{
|
|
|
|
struct percpu_ref *ref = data;
|
|
|
|
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
|
|
|
|
2018-03-06 08:40:05 +08:00
|
|
|
dev_dbg(dax_pmem->dev, "trace\n");
|
2017-04-29 01:23:37 +08:00
|
|
|
wait_for_completion(&dax_pmem->cmp);
|
2016-05-19 00:15:08 +08:00
|
|
|
percpu_ref_exit(ref);
|
|
|
|
}
|
|
|
|
|
mm, devm_memremap_pages: fix shutdown handling
The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down. However, the result from devm_add_action() is not checked.
Checking the error from devm_add_action() is not enough. The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run. Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly. This allows
devm_memremap_pages() to do the right thing relative to init failures and
shutdown.
Without this change we could fail to register the teardown of
devm_memremap_pages(). The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed. However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.
An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately. However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.
Link: http://lkml.kernel.org/r/154275558526.76910.7535251937849268605.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fixes: e8d513483300 ("memremap: change devm_memremap_pages interface...")
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-12-28 16:34:57 +08:00
|
|
|
static void dax_pmem_percpu_kill(struct percpu_ref *ref)
|
2016-05-19 00:15:08 +08:00
|
|
|
{
|
|
|
|
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
|
|
|
|
2018-03-06 08:40:05 +08:00
|
|
|
dev_dbg(dax_pmem->dev, "trace\n");
|
2016-05-19 00:15:08 +08:00
|
|
|
percpu_ref_kill(ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dax_pmem_probe(struct device *dev)
|
|
|
|
{
|
|
|
|
void *addr;
|
|
|
|
struct resource res;
|
2017-07-19 08:49:14 +08:00
|
|
|
int rc, id, region_id;
|
2016-05-19 00:15:08 +08:00
|
|
|
struct nd_pfn_sb *pfn_sb;
|
2017-01-31 13:43:10 +08:00
|
|
|
struct dev_dax *dev_dax;
|
2016-05-19 00:15:08 +08:00
|
|
|
struct dax_pmem *dax_pmem;
|
|
|
|
struct nd_namespace_io *nsio;
|
|
|
|
struct dax_region *dax_region;
|
|
|
|
struct nd_namespace_common *ndns;
|
|
|
|
struct nd_dax *nd_dax = to_nd_dax(dev);
|
|
|
|
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
|
|
|
|
|
|
|
|
ndns = nvdimm_namespace_common_probe(dev);
|
|
|
|
if (IS_ERR(ndns))
|
|
|
|
return PTR_ERR(ndns);
|
|
|
|
nsio = to_nd_namespace_io(&ndns->dev);
|
|
|
|
|
2017-12-29 15:54:05 +08:00
|
|
|
dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
|
|
|
|
if (!dax_pmem)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2016-05-19 00:15:08 +08:00
|
|
|
/* parse the 'pfn' info block via ->rw_bytes */
|
2016-10-29 05:34:51 +08:00
|
|
|
rc = devm_nsio_enable(dev, nsio);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2017-12-29 15:54:05 +08:00
|
|
|
rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
2016-05-19 00:15:08 +08:00
|
|
|
devm_nsio_disable(dev, nsio);
|
|
|
|
|
|
|
|
pfn_sb = nd_pfn->pfn_sb;
|
|
|
|
|
|
|
|
if (!devm_request_mem_region(dev, nsio->res.start,
|
libnvdimm: use consistent naming for request_mem_region()
Here is an example /proc/iomem listing for a system with 2 namespaces,
one in "sector" mode and one in "memory" mode:
1fc000000-2fbffffff : Persistent Memory (legacy)
1fc000000-2fbffffff : namespace1.0
340000000-34fffffff : Persistent Memory
340000000-34fffffff : btt0.1
Here is the corresponding ndctl listing:
# ndctl list
[
{
"dev":"namespace1.0",
"mode":"memory",
"size":4294967296,
"blockdev":"pmem1"
},
{
"dev":"namespace0.0",
"mode":"sector",
"size":267091968,
"uuid":"f7594f86-badb-4592-875f-ded577da2eaf",
"sector_size":4096,
"blockdev":"pmem0s"
}
]
Notice that the ndctl listing is purely in terms of namespace devices,
while the iomem listing leaks the internal "btt0.1" implementation
detail. Given that ndctl requires the namespace device name to change
the mode, for example:
# ndctl create-namespace --reconfig=namespace0.0 --mode=raw --force
...use the namespace name in the iomem listing to keep the claiming
device name consistent across different mode settings.
Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2016-11-29 03:15:18 +08:00
|
|
|
resource_size(&nsio->res),
|
|
|
|
dev_name(&ndns->dev))) {
|
2016-05-19 00:15:08 +08:00
|
|
|
dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
dax_pmem->dev = dev;
|
|
|
|
init_completion(&dax_pmem->cmp);
|
|
|
|
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2018-07-31 22:32:46 +08:00
|
|
|
rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
|
|
|
|
if (rc) {
|
|
|
|
percpu_ref_exit(&dax_pmem->ref);
|
2016-05-19 00:15:08 +08:00
|
|
|
return rc;
|
2018-07-31 22:32:46 +08:00
|
|
|
}
|
2016-05-19 00:15:08 +08:00
|
|
|
|
2017-12-29 15:54:05 +08:00
|
|
|
dax_pmem->pgmap.ref = &dax_pmem->ref;
|
mm, devm_memremap_pages: fix shutdown handling
The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down. However, the result from devm_add_action() is not checked.
Checking the error from devm_add_action() is not enough. The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run. Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly. This allows
devm_memremap_pages() to do the right thing relative to init failures and
shutdown.
Without this change we could fail to register the teardown of
devm_memremap_pages(). The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed. However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.
An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately. However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.
Link: http://lkml.kernel.org/r/154275558526.76910.7535251937849268605.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fixes: e8d513483300 ("memremap: change devm_memremap_pages interface...")
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-12-28 16:34:57 +08:00
|
|
|
dax_pmem->pgmap.kill = dax_pmem_percpu_kill;
|
2017-12-29 15:54:05 +08:00
|
|
|
addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
|
mm, devm_memremap_pages: fix shutdown handling
The last step before devm_memremap_pages() returns success is to allocate
a release action, devm_memremap_pages_release(), to tear the entire setup
down. However, the result from devm_add_action() is not checked.
Checking the error from devm_add_action() is not enough. The api
currently relies on the fact that the percpu_ref it is using is killed by
the time the devm_memremap_pages_release() is run. Rather than continue
this awkward situation, offload the responsibility of killing the
percpu_ref to devm_memremap_pages_release() directly. This allows
devm_memremap_pages() to do the right thing relative to init failures and
shutdown.
Without this change we could fail to register the teardown of
devm_memremap_pages(). The likelihood of hitting this failure is tiny as
small memory allocations almost always succeed. However, the impact of
the failure is large given any future reconfiguration, or disable/enable,
of an nvdimm namespace will fail forever as subsequent calls to
devm_memremap_pages() will fail to setup the pgmap_radix since there will
be stale entries for the physical address range.
An argument could be made to require that the ->kill() operation be set in
the @pgmap arg rather than passed in separately. However, it helps code
readability, tracking the lifetime of a given instance, to be able to grep
the kill routine directly at the devm_memremap_pages() call site.
Link: http://lkml.kernel.org/r/154275558526.76910.7535251937849268605.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fixes: e8d513483300 ("memremap: change devm_memremap_pages interface...")
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-12-28 16:34:57 +08:00
|
|
|
if (IS_ERR(addr))
|
2016-05-19 00:15:08 +08:00
|
|
|
return PTR_ERR(addr);
|
|
|
|
|
2016-08-26 06:17:14 +08:00
|
|
|
/* adjust the dax_region resource to the start of data */
|
2017-12-29 15:54:05 +08:00
|
|
|
memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
|
2016-08-26 06:17:14 +08:00
|
|
|
res.start += le64_to_cpu(pfn_sb->dataoff);
|
|
|
|
|
2017-07-19 08:49:14 +08:00
|
|
|
rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id);
|
|
|
|
if (rc != 2)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
dax_region = alloc_dax_region(dev, region_id, &res,
|
2016-05-19 00:15:08 +08:00
|
|
|
le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
|
|
|
|
if (!dax_region)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* TODO: support for subdividing a dax region... */
|
2017-07-19 08:49:14 +08:00
|
|
|
dev_dax = devm_create_dev_dax(dax_region, id, &res, 1);
|
2016-05-19 00:15:08 +08:00
|
|
|
|
2017-01-31 13:43:10 +08:00
|
|
|
/* child dev_dax instances now own the lifetime of the dax_region */
|
2016-05-19 00:15:08 +08:00
|
|
|
dax_region_put(dax_region);
|
|
|
|
|
2017-01-31 13:43:10 +08:00
|
|
|
return PTR_ERR_OR_ZERO(dev_dax);
|
2016-05-19 00:15:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct nd_device_driver dax_pmem_driver = {
|
|
|
|
.probe = dax_pmem_probe,
|
|
|
|
.drv = {
|
|
|
|
.name = "dax_pmem",
|
|
|
|
},
|
|
|
|
.type = ND_DRIVER_DAX_PMEM,
|
|
|
|
};
|
|
|
|
|
2018-03-15 02:25:08 +08:00
|
|
|
module_nd_driver(dax_pmem_driver);
|
2016-05-19 00:15:08 +08:00
|
|
|
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_AUTHOR("Intel Corporation");
|
|
|
|
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
|