filesystem-dax: Introduce dax_lock_mapping_entry()
In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
This commit is contained in:
parent
ae1139ece1
commit
c2a7d2a115
109
fs/dax.c
109
fs/dax.c
|
@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
|
||||||
*
|
*
|
||||||
* Must be called with the i_pages lock held.
|
* Must be called with the i_pages lock held.
|
||||||
*/
|
*/
|
||||||
static void *get_unlocked_mapping_entry(struct address_space *mapping,
|
static void *__get_unlocked_mapping_entry(struct address_space *mapping,
|
||||||
pgoff_t index, void ***slotp)
|
pgoff_t index, void ***slotp, bool (*wait_fn)(void))
|
||||||
{
|
{
|
||||||
void *entry, **slot;
|
void *entry, **slot;
|
||||||
struct wait_exceptional_entry_queue ewait;
|
struct wait_exceptional_entry_queue ewait;
|
||||||
|
@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
|
||||||
ewait.wait.func = wake_exceptional_entry_func;
|
ewait.wait.func = wake_exceptional_entry_func;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
bool revalidate;
|
||||||
|
|
||||||
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
|
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
|
||||||
&slot);
|
&slot);
|
||||||
if (!entry ||
|
if (!entry ||
|
||||||
|
@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
|
||||||
prepare_to_wait_exclusive(wq, &ewait.wait,
|
prepare_to_wait_exclusive(wq, &ewait.wait,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
xa_unlock_irq(&mapping->i_pages);
|
xa_unlock_irq(&mapping->i_pages);
|
||||||
schedule();
|
revalidate = wait_fn();
|
||||||
finish_wait(wq, &ewait.wait);
|
finish_wait(wq, &ewait.wait);
|
||||||
xa_lock_irq(&mapping->i_pages);
|
xa_lock_irq(&mapping->i_pages);
|
||||||
|
if (revalidate)
|
||||||
|
return ERR_PTR(-EAGAIN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dax_unlock_mapping_entry(struct address_space *mapping,
|
static bool entry_wait(void)
|
||||||
pgoff_t index)
|
{
|
||||||
|
schedule();
|
||||||
|
/*
|
||||||
|
* Never return an ERR_PTR() from
|
||||||
|
* __get_unlocked_mapping_entry(), just keep looping.
|
||||||
|
*/
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *get_unlocked_mapping_entry(struct address_space *mapping,
|
||||||
|
pgoff_t index, void ***slotp)
|
||||||
|
{
|
||||||
|
return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
|
||||||
{
|
{
|
||||||
void *entry, **slot;
|
void *entry, **slot;
|
||||||
|
|
||||||
|
@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
|
||||||
static void put_locked_mapping_entry(struct address_space *mapping,
|
static void put_locked_mapping_entry(struct address_space *mapping,
|
||||||
pgoff_t index)
|
pgoff_t index)
|
||||||
{
|
{
|
||||||
dax_unlock_mapping_entry(mapping, index);
|
unlock_mapping_entry(mapping, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool entry_wait_revalidate(void)
|
||||||
|
{
|
||||||
|
rcu_read_unlock();
|
||||||
|
schedule();
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tell __get_unlocked_mapping_entry() to take a break, we need
|
||||||
|
* to revalidate page->mapping after dropping locks
|
||||||
|
*/
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dax_lock_mapping_entry(struct page *page)
|
||||||
|
{
|
||||||
|
pgoff_t index;
|
||||||
|
struct inode *inode;
|
||||||
|
bool did_lock = false;
|
||||||
|
void *entry = NULL, **slot;
|
||||||
|
struct address_space *mapping;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for (;;) {
|
||||||
|
mapping = READ_ONCE(page->mapping);
|
||||||
|
|
||||||
|
if (!dax_mapping(mapping))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the device-dax case there's no need to lock, a
|
||||||
|
* struct dev_pagemap pin is sufficient to keep the
|
||||||
|
* inode alive, and we assume we have dev_pagemap pin
|
||||||
|
* otherwise we would not have a valid pfn_to_page()
|
||||||
|
* translation.
|
||||||
|
*/
|
||||||
|
inode = mapping->host;
|
||||||
|
if (S_ISCHR(inode->i_mode)) {
|
||||||
|
did_lock = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
xa_lock_irq(&mapping->i_pages);
|
||||||
|
if (mapping != page->mapping) {
|
||||||
|
xa_unlock_irq(&mapping->i_pages);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
index = page->index;
|
||||||
|
|
||||||
|
entry = __get_unlocked_mapping_entry(mapping, index, &slot,
|
||||||
|
entry_wait_revalidate);
|
||||||
|
if (!entry) {
|
||||||
|
xa_unlock_irq(&mapping->i_pages);
|
||||||
|
break;
|
||||||
|
} else if (IS_ERR(entry)) {
|
||||||
|
WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
lock_slot(mapping, slot);
|
||||||
|
did_lock = true;
|
||||||
|
xa_unlock_irq(&mapping->i_pages);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return did_lock;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dax_unlock_mapping_entry(struct page *page)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = page->mapping;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
|
||||||
|
if (S_ISCHR(inode->i_mode))
|
||||||
|
return;
|
||||||
|
|
||||||
|
unlock_mapping_entry(mapping, page->index);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find radix tree entry at given index. If it points to an exceptional entry,
|
* Find radix tree entry at given index. If it points to an exceptional entry,
|
||||||
* return it with the radix tree entry locked. If the radix tree doesn't
|
* return it with the radix tree entry locked. If the radix tree doesn't
|
||||||
|
|
|
@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
|
||||||
struct block_device *bdev, struct writeback_control *wbc);
|
struct block_device *bdev, struct writeback_control *wbc);
|
||||||
|
|
||||||
struct page *dax_layout_busy_page(struct address_space *mapping);
|
struct page *dax_layout_busy_page(struct address_space *mapping);
|
||||||
|
bool dax_lock_mapping_entry(struct page *page);
|
||||||
|
void dax_unlock_mapping_entry(struct page *page);
|
||||||
#else
|
#else
|
||||||
static inline bool bdev_dax_supported(struct block_device *bdev,
|
static inline bool bdev_dax_supported(struct block_device *bdev,
|
||||||
int blocksize)
|
int blocksize)
|
||||||
|
@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
|
||||||
{
|
{
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool dax_lock_mapping_entry(struct page *page)
|
||||||
|
{
|
||||||
|
if (IS_DAX(page->mapping->host))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void dax_unlock_mapping_entry(struct page *page)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int dax_read_lock(void);
|
int dax_read_lock(void);
|
||||||
|
|
Loading…
Reference in New Issue