Commit c2a7d2a1 authored by Dan Williams's avatar Dan Williams Committed by Dave Jiang

filesystem-dax: Introduce dax_lock_mapping_entry()

In preparation for implementing support for memory poison (media error)
handling via dax mappings, implement a lock_page() equivalent. Poison
error handling requires rmap and needs guarantees that the page->mapping
association is maintained / valid (inode not freed) for the duration of
the lookup.

In the device-dax case it is sufficient to simply hold a dev_pagemap
reference. In the filesystem-dax case we need to use the entry lock.

Export the entry lock via dax_lock_mapping_entry() that uses
rcu_read_lock() to protect against the inode being freed, and
revalidates the page->mapping association under xa_lock().

Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
parent ae1139ec
...@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) ...@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
* *
* Must be called with the i_pages lock held. * Must be called with the i_pages lock held.
*/ */
static void *get_unlocked_mapping_entry(struct address_space *mapping, static void *__get_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void ***slotp) pgoff_t index, void ***slotp, bool (*wait_fn)(void))
{ {
void *entry, **slot; void *entry, **slot;
struct wait_exceptional_entry_queue ewait; struct wait_exceptional_entry_queue ewait;
...@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, ...@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func; ewait.wait.func = wake_exceptional_entry_func;
for (;;) { for (;;) {
bool revalidate;
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
&slot); &slot);
if (!entry || if (!entry ||
...@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, ...@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
prepare_to_wait_exclusive(wq, &ewait.wait, prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
xa_unlock_irq(&mapping->i_pages); xa_unlock_irq(&mapping->i_pages);
schedule(); revalidate = wait_fn();
finish_wait(wq, &ewait.wait); finish_wait(wq, &ewait.wait);
xa_lock_irq(&mapping->i_pages); xa_lock_irq(&mapping->i_pages);
if (revalidate)
return ERR_PTR(-EAGAIN);
} }
} }
static void dax_unlock_mapping_entry(struct address_space *mapping, static bool entry_wait(void)
pgoff_t index) {
schedule();
/*
* Never return an ERR_PTR() from
* __get_unlocked_mapping_entry(), just keep looping.
*/
return false;
}
static void *get_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void ***slotp)
{
return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
}
static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
{ {
void *entry, **slot; void *entry, **slot;
...@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, ...@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
static void put_locked_mapping_entry(struct address_space *mapping, static void put_locked_mapping_entry(struct address_space *mapping,
pgoff_t index) pgoff_t index)
{ {
dax_unlock_mapping_entry(mapping, index); unlock_mapping_entry(mapping, index);
} }
/* /*
...@@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry) ...@@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
return NULL; return NULL;
} }
static bool entry_wait_revalidate(void)
{
rcu_read_unlock();
schedule();
rcu_read_lock();
/*
* Tell __get_unlocked_mapping_entry() to take a break, we need
* to revalidate page->mapping after dropping locks
*/
return true;
}
bool dax_lock_mapping_entry(struct page *page)
{
pgoff_t index;
struct inode *inode;
bool did_lock = false;
void *entry = NULL, **slot;
struct address_space *mapping;
rcu_read_lock();
for (;;) {
mapping = READ_ONCE(page->mapping);
if (!dax_mapping(mapping))
break;
/*
* In the device-dax case there's no need to lock, a
* struct dev_pagemap pin is sufficient to keep the
* inode alive, and we assume we have dev_pagemap pin
* otherwise we would not have a valid pfn_to_page()
* translation.
*/
inode = mapping->host;
if (S_ISCHR(inode->i_mode)) {
did_lock = true;
break;
}
xa_lock_irq(&mapping->i_pages);
if (mapping != page->mapping) {
xa_unlock_irq(&mapping->i_pages);
continue;
}
index = page->index;
entry = __get_unlocked_mapping_entry(mapping, index, &slot,
entry_wait_revalidate);
if (!entry) {
xa_unlock_irq(&mapping->i_pages);
break;
} else if (IS_ERR(entry)) {
WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
continue;
}
lock_slot(mapping, slot);
did_lock = true;
xa_unlock_irq(&mapping->i_pages);
break;
}
rcu_read_unlock();
return did_lock;
}
void dax_unlock_mapping_entry(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
if (S_ISCHR(inode->i_mode))
return;
unlock_mapping_entry(mapping, page->index);
}
/* /*
* Find radix tree entry at given index. If it points to an exceptional entry, * Find radix tree entry at given index. If it points to an exceptional entry,
* return it with the radix tree entry locked. If the radix tree doesn't * return it with the radix tree entry locked. If the radix tree doesn't
......
...@@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, ...@@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct block_device *bdev, struct writeback_control *wbc); struct block_device *bdev, struct writeback_control *wbc);
struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page(struct address_space *mapping);
bool dax_lock_mapping_entry(struct page *page);
void dax_unlock_mapping_entry(struct page *page);
#else #else
static inline bool bdev_dax_supported(struct block_device *bdev, static inline bool bdev_dax_supported(struct block_device *bdev,
int blocksize) int blocksize)
...@@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, ...@@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static inline bool dax_lock_mapping_entry(struct page *page)
{
if (IS_DAX(page->mapping->host))
return true;
return false;
}
static inline void dax_unlock_mapping_entry(struct page *page)
{
}
#endif #endif
int dax_read_lock(void); int dax_read_lock(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment