Commit cff7f223 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.17-rc3' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A patch to make it possible to disable zero copy path in the messenger
  to avoid checksum or authentication tag mismatches and ensuing session
  resets in case the destination buffer isn't guaranteed to be stable"

* tag 'ceph-for-5.17-rc3' of git://github.com/ceph/ceph-client:
  libceph: optionally use bounce buffer on recv path in crc mode
  libceph: make recv path in secure mode work the same as send path
parents 1eb7de17 038b8d1d
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */ #define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */ #define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */
#define CEPH_OPT_RXBOUNCE (1<<7) /* double-buffer read data */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
......
...@@ -383,6 +383,10 @@ struct ceph_connection_v2_info { ...@@ -383,6 +383,10 @@ struct ceph_connection_v2_info {
struct ceph_gcm_nonce in_gcm_nonce; struct ceph_gcm_nonce in_gcm_nonce;
struct ceph_gcm_nonce out_gcm_nonce; struct ceph_gcm_nonce out_gcm_nonce;
struct page **in_enc_pages;
int in_enc_page_cnt;
int in_enc_resid;
int in_enc_i;
struct page **out_enc_pages; struct page **out_enc_pages;
int out_enc_page_cnt; int out_enc_page_cnt;
int out_enc_resid; int out_enc_resid;
...@@ -457,6 +461,7 @@ struct ceph_connection { ...@@ -457,6 +461,7 @@ struct ceph_connection {
struct ceph_msg *out_msg; /* sending message (== tail of struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */ out_sent) */
struct page *bounce_page;
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
......
...@@ -246,6 +246,7 @@ enum { ...@@ -246,6 +246,7 @@ enum {
Opt_cephx_sign_messages, Opt_cephx_sign_messages,
Opt_tcp_nodelay, Opt_tcp_nodelay,
Opt_abort_on_full, Opt_abort_on_full,
Opt_rxbounce,
}; };
enum { enum {
...@@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = { ...@@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
fsparam_enum ("read_from_replica", Opt_read_from_replica, fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica), ceph_param_read_from_replica),
fsparam_flag ("rxbounce", Opt_rxbounce),
fsparam_enum ("ms_mode", Opt_ms_mode, fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode), ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret), fsparam_string ("secret", Opt_secret),
...@@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, ...@@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_abort_on_full: case Opt_abort_on_full:
opt->flags |= CEPH_OPT_ABORT_ON_FULL; opt->flags |= CEPH_OPT_ABORT_ON_FULL;
break; break;
case Opt_rxbounce:
opt->flags |= CEPH_OPT_RXBOUNCE;
break;
default: default:
BUG(); BUG();
...@@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, ...@@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "notcp_nodelay,"); seq_puts(m, "notcp_nodelay,");
if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
seq_puts(m, "abort_on_full,"); seq_puts(m, "abort_on_full,");
if (opt->flags & CEPH_OPT_RXBOUNCE)
seq_puts(m, "rxbounce,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,", seq_printf(m, "mount_timeout=%d,",
......
...@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con) ...@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
ceph_msg_put(con->out_msg); ceph_msg_put(con->out_msg);
con->out_msg = NULL; con->out_msg = NULL;
} }
if (con->bounce_page) {
__free_page(con->bounce_page);
con->bounce_page = NULL;
}
if (ceph_msgr2(from_msgr(con->msgr))) if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_reset_protocol(con); ceph_con_v2_reset_protocol(con);
......
...@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con, ...@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
static int read_partial_msg_data(struct ceph_connection *con) static int read_partial_msg_data(struct ceph_connection *con)
{ {
struct ceph_msg *msg = con->in_msg; struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page; struct page *page;
size_t page_offset; size_t page_offset;
...@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con) ...@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0; u32 crc = 0;
int ret; int ret;
if (!msg->num_data_items)
return -EIO;
if (do_datacrc) if (do_datacrc)
crc = con->in_data_crc; crc = con->in_data_crc;
while (cursor->total_resid) { while (cursor->total_resid) {
...@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con) ...@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
return 1; /* must return > 0 to indicate success */ return 1; /* must return > 0 to indicate success */
} }
static int read_partial_msg_data_bounce(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
struct page *page;
size_t off, len;
u32 crc;
int ret;
if (unlikely(!con->bounce_page)) {
con->bounce_page = alloc_page(GFP_NOIO);
if (!con->bounce_page) {
pr_err("failed to allocate bounce page\n");
return -ENOMEM;
}
}
crc = con->in_data_crc;
while (cursor->total_resid) {
if (!cursor->resid) {
ceph_msg_data_advance(cursor, 0);
continue;
}
page = ceph_msg_data_next(cursor, &off, &len, NULL);
ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
if (ret <= 0) {
con->in_data_crc = crc;
return ret;
}
crc = crc32c(crc, page_address(con->bounce_page), ret);
memcpy_to_page(page, off, page_address(con->bounce_page), ret);
ceph_msg_data_advance(cursor, ret);
}
con->in_data_crc = crc;
return 1; /* must return > 0 to indicate success */
}
/* /*
* read (part of) a message. * read (part of) a message.
*/ */
...@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con) ...@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
/* (page) data */ /* (page) data */
if (data_len) { if (data_len) {
ret = read_partial_msg_data(con); if (!m->num_data_items)
return -EIO;
if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
ret = read_partial_msg_data(con);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment