Commit f0fe1e54 authored by Jeff Layton's avatar Jeff Layton Committed by Ilya Dryomov

ceph: plumb in decryption during reads

Force the use of sparse reads when the inode is encrypted, and add the
appropriate code to decrypt the extent map after receiving.

Note that the crypto block may be smaller than a page, but the reverse
cannot be true.
Signed-off-by: default avatarJeff Layton <jlayton@kernel.org>
Reviewed-by: default avatarXiubo Li <xiubli@redhat.com>
Reviewed-and-tested-by: default avatarLuís Henriques <lhenriques@suse.de>
Reviewed-by: default avatarMilind Changire <mchangir@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent d5520771
......@@ -18,6 +18,7 @@
#include "mds_client.h"
#include "cache.h"
#include "metric.h"
#include "crypto.h"
#include <linux/ceph/osd_client.h>
#include <linux/ceph/striper.h>
......@@ -242,7 +243,8 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
static void finish_netfs_read(struct ceph_osd_request *req)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
struct inode *inode = req->r_inode;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
struct netfs_io_subrequest *subreq = req->r_priv;
struct ceph_osd_req_op *op = &req->r_ops[0];
......@@ -256,16 +258,31 @@ static void finish_netfs_read(struct ceph_osd_request *req)
subreq->len, i_size_read(req->r_inode));
/* no object means success but no data */
if (sparse && err >= 0)
err = ceph_sparse_ext_map_end(op);
else if (err == -ENOENT)
if (err == -ENOENT)
err = 0;
else if (err == -EBLOCKLISTED)
fsc->blocklisted = true;
if (err >= 0 && err < subreq->len)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (err >= 0) {
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
if (err < subreq->len)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
err = ceph_fscrypt_decrypt_extents(inode,
osd_data->pages, subreq->start,
op->extent.sparse_ext,
op->extent.sparse_ext_cnt);
if (err > subreq->len)
err = subreq->len;
}
}
if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
ceph_put_page_vector(osd_data->pages,
calc_pages_for(osd_data->alignment,
osd_data->length), false);
}
netfs_subreq_terminated(subreq, err, false);
iput(req->r_inode);
}
......@@ -336,7 +353,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct iov_iter iter;
int err = 0;
u64 len = subreq->len;
bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 off = subreq->start;
if (ceph_inode_is_shutdown(inode)) {
err = -EIO;
......@@ -346,8 +364,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
if (IS_ERR(req)) {
......@@ -363,8 +383,37 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
}
dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
osd_req_op_extent_osd_iter(req, 0, &iter);
/*
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
* encrypted inodes. We'd need infrastructure that handles an iov_iter
* instead of page arrays, and we don't have that as of yet. Once the
* dust settles on the write helpers and encrypt/decrypt routines for
* netfs, we should be able to rework this.
*/
if (IS_ENCRYPTED(inode)) {
struct page **pages;
size_t page_off;
err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
if (err < 0) {
dout("%s: iov_ter_get_pages_alloc returned %d\n",
__func__, err);
goto out;
}
/* should always give us a page-aligned read */
WARN_ON_ONCE(page_off);
len = err;
err = 0;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
false);
} else {
osd_req_op_extent_osd_iter(req, 0, &iter);
}
req->r_callback = finish_netfs_read;
req->r_priv = subreq;
req->r_inode = inode;
......
......@@ -970,7 +970,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
u64 off = *ki_pos;
u64 len = iov_iter_count(to);
u64 i_size = i_size_read(inode);
bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 objver = 0;
dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
......@@ -1001,10 +1001,19 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
int idx;
size_t left;
struct ceph_osd_req_op *op;
u64 read_off = off;
u64 read_len = len;
/* determine new offset/length if encrypted */
ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
dout("sync_read orig %llu~%llu reading %llu~%llu",
off, len, read_off, read_len);
req = ceph_osdc_new_request(osdc, &ci->i_layout,
ci->i_vino, off, &len, 0, 1,
sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
ci->i_vino, read_off, &read_len, 0, 1,
sparse ? CEPH_OSD_OP_SPARSE_READ :
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
......@@ -1013,10 +1022,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
/* adjust len downward if the request truncated the len */
if (off + len > read_off + read_len)
len = read_off + read_len - off;
more = len < iov_iter_count(to);
num_pages = calc_pages_for(off, len);
page_off = off & ~PAGE_MASK;
num_pages = calc_pages_for(read_off, read_len);
page_off = offset_in_page(off);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
......@@ -1024,7 +1036,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
offset_in_page(read_off),
false, false);
op = &req->r_ops[0];
......@@ -1042,7 +1055,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
len, ret);
read_len, ret);
if (ret > 0)
objver = req->r_version;
......@@ -1057,8 +1070,35 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
else if (ret == -ENOENT)
ret = 0;
if (ret > 0 && IS_ENCRYPTED(inode)) {
int fret;
fret = ceph_fscrypt_decrypt_extents(inode, pages,
read_off, op->extent.sparse_ext,
op->extent.sparse_ext_cnt);
if (fret < 0) {
ret = fret;
ceph_osdc_put_request(req);
break;
}
/* account for any partial block at the beginning */
fret -= (off - read_off);
/*
* Short read after big offset adjustment?
* Nothing is usable, just call it a zero
* len read.
*/
fret = max(fret, 0);
/* account for partial block at the end */
ret = min_t(ssize_t, fret, len);
}
ceph_osdc_put_request(req);
/* Short read but not EOF? Zero out the remainder. */
if (ret >= 0 && ret < len && (off + ret < i_size)) {
int zlen = min(len - ret, i_size - off - ret);
int zoff = page_off + ret;
......@@ -1072,15 +1112,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
idx = 0;
left = ret > 0 ? ret : 0;
while (left > 0) {
size_t len, copied;
page_off = off & ~PAGE_MASK;
len = min_t(size_t, left, PAGE_SIZE - page_off);
size_t plen, copied;
plen = min_t(size_t, left, PAGE_SIZE - page_off);
SetPageUptodate(pages[idx]);
copied = copy_page_to_iter(pages[idx++],
page_off, len, to);
page_off, plen, to);
off += copied;
left -= copied;
if (copied < len) {
page_off = 0;
if (copied < plen) {
ret = -EFAULT;
break;
}
......@@ -1097,20 +1138,21 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
if (off > *ki_pos) {
if (off >= i_size) {
*retry_op = CHECK_EOF;
ret = i_size - *ki_pos;
*ki_pos = i_size;
} else {
ret = off - *ki_pos;
*ki_pos = off;
if (ret > 0) {
if (off > *ki_pos) {
if (off >= i_size) {
*retry_op = CHECK_EOF;
ret = i_size - *ki_pos;
*ki_pos = i_size;
} else {
ret = off - *ki_pos;
*ki_pos = off;
}
}
}
if (last_objver && ret > 0)
*last_objver = objver;
if (last_objver)
*last_objver = objver;
}
dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment