linux/drivers/nvdimm/nd-core.h

119 lines
4.6 KiB
C
Raw Normal View History

/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __ND_CORE_H__
#define __ND_CORE_H__
#include <linux/libnvdimm.h>
#include <linux/device.h>
2015-05-02 01:11:27 +08:00
#include <linux/libnvdimm.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/nd.h>
extern struct list_head nvdimm_bus_list;
extern struct mutex nvdimm_bus_list_mutex;
2015-06-09 02:27:06 +08:00
extern int nvdimm_major;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
2015-05-02 01:11:27 +08:00
wait_queue_head_t probe_wait;
struct list_head list;
struct device dev;
2015-05-02 01:11:27 +08:00
int id, probe_active;
struct list_head poison_list;
struct list_head mapping_list;
struct mutex reconfig_mutex;
libnvdimm: fix clear poison locking with spinlock and GFP_NOWAIT allocation The following warning results from holding a lane spinlock, preempt_disable(), or the btt map spinlock and then trying to take the reconfig_mutex to walk the poison list and potentially add new entries. BUG: sleeping function called from invalid context at kernel/locking/mutex. c:747 in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd [..] Call Trace: dump_stack+0x85/0xc8 ___might_sleep+0x184/0x250 __might_sleep+0x4a/0x90 __mutex_lock+0x58/0x9b0 ? nvdimm_bus_lock+0x21/0x30 [libnvdimm] ? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm] ? acpi_nfit_forget_poison+0x79/0x80 [nfit] ? _raw_spin_unlock+0x27/0x40 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_forget_poison+0x25/0x50 [libnvdimm] nvdimm_clear_poison+0x106/0x140 [libnvdimm] nsio_rw_bytes+0x164/0x270 [libnvdimm] btt_write_pg+0x1de/0x3e0 [nd_btt] ? blk_queue_enter+0x30/0x290 btt_make_request+0x11a/0x310 [nd_btt] ? blk_queue_enter+0xb7/0x290 ? blk_queue_enter+0x30/0x290 generic_make_request+0x118/0x3b0 A spinlock is introduced to protect the poison list. This allows us to not having to acquire the reconfig_mutex for touching the poison list. The add_poison() function has been broken out into two helper functions. One to allocate the poison entry and the other to apppend the entry. This allows us to unlock the poison_lock in non-I/O path and continue to be able to allocate the poison entry with GFP_KERNEL. We will use GFP_NOWAIT in the I/O path in order to satisfy being in atomic context. Reviewed-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2017-04-14 05:25:17 +08:00
spinlock_t poison_lock;
};
struct nvdimm {
unsigned long flags;
void *provider_data;
unsigned long cmd_mask;
struct device dev;
2015-05-02 01:11:27 +08:00
atomic_t busy;
int id, num_flush;
struct resource *flush_wpq;
};
/**
* struct blk_alloc_info - tracking info for BLK dpa scanning
* @nd_mapping: blk region mapping boundaries
* @available: decremented in alias_dpa_busy as aliased PMEM is scanned
* @busy: decremented in blk_dpa_busy to account for ranges already
* handled by alias_dpa_busy
* @res: alias_dpa_busy interprets this a free space range that needs to
* be truncated to the valid BLK allocation starting DPA, blk_dpa_busy
* treats it as a busy range that needs the aliased PMEM ranges
* truncated.
*/
struct blk_alloc_info {
struct nd_mapping *nd_mapping;
resource_size_t available, busy;
struct resource *res;
};
bool is_nvdimm(struct device *dev);
bool is_nd_pmem(struct device *dev);
bool is_nd_blk(struct device *dev);
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void);
void nvdimm_devs_exit(void);
void nd_region_devs_exit(void);
2015-05-02 01:11:27 +08:00
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
struct nd_region;
void nd_region_create_ns_seed(struct nd_region *nd_region);
void nd_region_create_btt_seed(struct nd_region *nd_region);
void nd_region_create_pfn_seed(struct nd_region *nd_region);
void nd_region_create_dax_seed(struct nd_region *nd_region);
2015-05-02 01:11:27 +08:00
void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
void nd_synchronize(void);
libnvdimm, nfit: regions (block-data-window, persistent memory, volatile memory) A "region" device represents the maximum capacity of a BLK range (mmio block-data-window(s)), or a PMEM range (DAX-capable persistent memory or volatile memory), without regard for aliasing. Aliasing, in the dimm-local address space (DPA), is resolved by metadata on a dimm to designate which exclusive interface will access the aliased DPA ranges. Support for the per-dimm metadata/label arrvies is in a subsequent patch. The name format of "region" devices is "regionN" where, like dimms, N is a global ida index assigned at discovery time. This id is not reliable across reboots nor in the presence of hotplug. Look to attributes of the region or static id-data of the sub-namespace to generate a persistent name. However, if the platform configuration does not change it is reasonable to expect the same region id to be assigned at the next boot. "region"s have 2 generic attributes "size", and "mapping"s where: - size: the BLK accessible capacity or the span of the system physical address range in the case of PMEM. - mappingN: a tuple describing a dimm's contribution to the region's capacity in the format (<nmemX>,<dpa>,<size>). For a PMEM-region there will be at least one mapping per dimm in the interleave set. For a BLK-region there is only "mapping0" listing the starting DPA of the BLK-region and the available DPA capacity of that space (matches "size" above). The max number of mappings per "region" is hard coded per the constraints of sysfs attribute groups. That said the number of mappings per region should never exceed the maximum number of possible dimms in the system. If the current number turns out to not be enough then the "mappings" attribute clarifies how many there are supposed to be. "32 should be enough for anybody...". Cc: Neil Brown <neilb@suse.de> Cc: <linux-acpi@vger.kernel.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Robert Moore <robert.moore@intel.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Christoph Hellwig <hch@lst.de> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2015-06-10 08:13:14 +08:00
int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
2015-05-02 01:11:27 +08:00
int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
void __nd_device_register(struct device *dev);
libnvdimm, nfit: regions (block-data-window, persistent memory, volatile memory) A "region" device represents the maximum capacity of a BLK range (mmio block-data-window(s)), or a PMEM range (DAX-capable persistent memory or volatile memory), without regard for aliasing. Aliasing, in the dimm-local address space (DPA), is resolved by metadata on a dimm to designate which exclusive interface will access the aliased DPA ranges. Support for the per-dimm metadata/label arrvies is in a subsequent patch. The name format of "region" devices is "regionN" where, like dimms, N is a global ida index assigned at discovery time. This id is not reliable across reboots nor in the presence of hotplug. Look to attributes of the region or static id-data of the sub-namespace to generate a persistent name. However, if the platform configuration does not change it is reasonable to expect the same region id to be assigned at the next boot. "region"s have 2 generic attributes "size", and "mapping"s where: - size: the BLK accessible capacity or the span of the system physical address range in the case of PMEM. - mappingN: a tuple describing a dimm's contribution to the region's capacity in the format (<nmemX>,<dpa>,<size>). For a PMEM-region there will be at least one mapping per dimm in the interleave set. For a BLK-region there is only "mapping0" listing the starting DPA of the BLK-region and the available DPA capacity of that space (matches "size" above). The max number of mappings per "region" is hard coded per the constraints of sysfs attribute groups. That said the number of mappings per region should never exceed the maximum number of possible dimms in the system. If the current number turns out to not be enough then the "mappings" attribute clarifies how many there are supposed to be. "32 should be enough for anybody...". Cc: Neil Brown <neilb@suse.de> Cc: <linux-acpi@vger.kernel.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Robert Moore <robert.moore@intel.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Christoph Hellwig <hch@lst.de> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2015-06-10 08:13:14 +08:00
int nd_match_dimm(struct device *dev, void *data);
struct nd_label_id;
char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
struct nd_region;
struct nvdimm_drvdata;
struct nd_mapping;
void nd_mapping_free_labels(struct nd_mapping *nd_mapping);
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, resource_size_t *overlap);
resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id);
int alias_dpa_busy(struct device *dev, void *data);
struct resource *nsblk_add_resource(struct nd_region *nd_region,
struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
resource_size_t start);
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
void get_ndd(struct nvdimm_drvdata *ndd);
resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
struct nd_namespace_common **_ndns);
bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
struct nd_namespace_common **_ndns);
ssize_t nd_namespace_store(struct device *dev,
struct nd_namespace_common **_ndns, const char *buf,
size_t len);
struct nd_pfn *to_nd_pfn_safe(struct device *dev);
#endif /* __ND_CORE_H__ */