Commit 2765cfbb authored by Ross Zwisler's avatar Ross Zwisler Committed by Dan Williams

dax: update I/O path to do proper PMEM flushing

Update the DAX I/O path so that all operations that store data (I/O
writes, zeroing blocks, punching holes, etc.) properly synchronize the
stores to media using the PMEM API.  This ensures that the data DAX is
writing is durable on media before the operation completes.
Signed-off-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 5de490da
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/pmem.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/vmstat.h> #include <linux/vmstat.h>
...@@ -46,10 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) ...@@ -46,10 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
unsigned pgsz = PAGE_SIZE - offset_in_page(addr); unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
if (pgsz > count) if (pgsz > count)
pgsz = count; pgsz = count;
if (pgsz < PAGE_SIZE) clear_pmem((void __pmem *)addr, pgsz);
memset(addr, 0, pgsz);
else
clear_page(addr);
addr += pgsz; addr += pgsz;
size -= pgsz; size -= pgsz;
count -= pgsz; count -= pgsz;
...@@ -59,6 +57,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) ...@@ -59,6 +57,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
} }
} while (size); } while (size);
wmb_pmem();
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(dax_clear_blocks); EXPORT_SYMBOL_GPL(dax_clear_blocks);
...@@ -70,15 +69,16 @@ static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) ...@@ -70,15 +69,16 @@ static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size); return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
} }
/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos, static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
loff_t end) loff_t end)
{ {
loff_t final = end - pos + first; /* The final byte of the buffer */ loff_t final = end - pos + first; /* The final byte of the buffer */
if (first > 0) if (first > 0)
memset(addr, 0, first); clear_pmem((void __pmem *)addr, first);
if (final < size) if (final < size)
memset(addr + final, 0, size - final); clear_pmem((void __pmem *)addr + final, size - final);
} }
static bool buffer_written(struct buffer_head *bh) static bool buffer_written(struct buffer_head *bh)
...@@ -108,12 +108,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, ...@@ -108,12 +108,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
loff_t bh_max = start; loff_t bh_max = start;
void *addr; void *addr;
bool hole = false; bool hole = false;
bool need_wmb = false;
if (iov_iter_rw(iter) != WRITE) if (iov_iter_rw(iter) != WRITE)
end = min(end, i_size_read(inode)); end = min(end, i_size_read(inode));
while (pos < end) { while (pos < end) {
unsigned len; size_t len;
if (pos == max) { if (pos == max) {
unsigned blkbits = inode->i_blkbits; unsigned blkbits = inode->i_blkbits;
sector_t block = pos >> blkbits; sector_t block = pos >> blkbits;
...@@ -145,18 +146,22 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, ...@@ -145,18 +146,22 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
retval = dax_get_addr(bh, &addr, blkbits); retval = dax_get_addr(bh, &addr, blkbits);
if (retval < 0) if (retval < 0)
break; break;
if (buffer_unwritten(bh) || buffer_new(bh)) if (buffer_unwritten(bh) || buffer_new(bh)) {
dax_new_buf(addr, retval, first, pos, dax_new_buf(addr, retval, first, pos,
end); end);
need_wmb = true;
}
addr += first; addr += first;
size = retval - first; size = retval - first;
} }
max = min(pos + size, end); max = min(pos + size, end);
} }
if (iov_iter_rw(iter) == WRITE) if (iov_iter_rw(iter) == WRITE) {
len = copy_from_iter_nocache(addr, max - pos, iter); len = copy_from_iter_pmem((void __pmem *)addr,
else if (!hole) max - pos, iter);
need_wmb = true;
} else if (!hole)
len = copy_to_iter(addr, max - pos, iter); len = copy_to_iter(addr, max - pos, iter);
else else
len = iov_iter_zero(max - pos, iter); len = iov_iter_zero(max - pos, iter);
...@@ -168,6 +173,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, ...@@ -168,6 +173,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
addr += len; addr += len;
} }
if (need_wmb)
wmb_pmem();
return (pos == start) ? retval : pos - start; return (pos == start) ? retval : pos - start;
} }
...@@ -303,8 +311,10 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, ...@@ -303,8 +311,10 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
goto out; goto out;
} }
if (buffer_unwritten(bh) || buffer_new(bh)) if (buffer_unwritten(bh) || buffer_new(bh)) {
clear_page(addr); clear_pmem((void __pmem *)addr, PAGE_SIZE);
wmb_pmem();
}
error = vm_insert_mixed(vma, vaddr, pfn); error = vm_insert_mixed(vma, vaddr, pfn);
...@@ -542,7 +552,8 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, ...@@ -542,7 +552,8 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
err = dax_get_addr(&bh, &addr, inode->i_blkbits); err = dax_get_addr(&bh, &addr, inode->i_blkbits);
if (err < 0) if (err < 0)
return err; return err;
memset(addr + offset, 0, length); clear_pmem((void __pmem *)addr + offset, length);
wmb_pmem();
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment