Commit ba11ad9a authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe

drbd: improve usage of MSG_MORE

It seems to improve performance if we allow the "p_data" header in its
own frame (no MSG_MORE), but sendpage all but the last page with MSG_MORE.
This is also in preparation of a later zero copy receive implementation.

Suggested by Eduard.Guzovsky@stratus.com on drbd-dev.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <jaxboe@fusionio.com>
parent 5dbf1673
...@@ -2272,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * ...@@ -2272,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
* with page_count == 0 or PageSlab. * with page_count == 0 or PageSlab.
*/ */
static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
int offset, size_t size) int offset, size_t size, unsigned msg_flags)
{ {
int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
kunmap(page); kunmap(page);
if (sent == size) if (sent == size)
mdev->send_cnt += size>>9; mdev->send_cnt += size>>9;
...@@ -2282,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, ...@@ -2282,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
} }
static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
int offset, size_t size) int offset, size_t size, unsigned msg_flags)
{ {
mm_segment_t oldfs = get_fs(); mm_segment_t oldfs = get_fs();
int sent, ok; int sent, ok;
...@@ -2295,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, ...@@ -2295,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
* __page_cache_release a page that would actually still be referenced * __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */ * by someone, leading to some obscure delayed Oops somewhere else. */
if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
return _drbd_no_send_page(mdev, page, offset, size); return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
msg_flags |= MSG_NOSIGNAL;
drbd_update_congested(mdev); drbd_update_congested(mdev);
set_fs(KERNEL_DS); set_fs(KERNEL_DS);
do { do {
sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
offset, len, offset, len,
MSG_NOSIGNAL); msg_flags);
if (sent == -EAGAIN) { if (sent == -EAGAIN) {
if (we_should_drop_the_connection(mdev, if (we_should_drop_the_connection(mdev,
mdev->data.socket)) mdev->data.socket))
...@@ -2331,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) ...@@ -2331,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
{ {
struct bio_vec *bvec; struct bio_vec *bvec;
int i; int i;
/* hint all but last page with MSG_MORE */
__bio_for_each_segment(bvec, bio, i, 0) { __bio_for_each_segment(bvec, bio, i, 0) {
if (!_drbd_no_send_page(mdev, bvec->bv_page, if (!_drbd_no_send_page(mdev, bvec->bv_page,
bvec->bv_offset, bvec->bv_len)) bvec->bv_offset, bvec->bv_len,
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
return 0; return 0;
} }
return 1; return 1;
...@@ -2343,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) ...@@ -2343,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
{ {
struct bio_vec *bvec; struct bio_vec *bvec;
int i; int i;
/* hint all but last page with MSG_MORE */
__bio_for_each_segment(bvec, bio, i, 0) { __bio_for_each_segment(bvec, bio, i, 0) {
if (!_drbd_send_page(mdev, bvec->bv_page, if (!_drbd_send_page(mdev, bvec->bv_page,
bvec->bv_offset, bvec->bv_len)) bvec->bv_offset, bvec->bv_len,
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
return 0; return 0;
} }
return 1; return 1;
} }
...@@ -2356,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) ...@@ -2356,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
{ {
struct page *page = e->pages; struct page *page = e->pages;
unsigned len = e->size; unsigned len = e->size;
/* hint all but last page with MSG_MORE */
page_chain_for_each(page) { page_chain_for_each(page) {
unsigned l = min_t(unsigned, len, PAGE_SIZE); unsigned l = min_t(unsigned, len, PAGE_SIZE);
if (!_drbd_send_page(mdev, page, 0, l)) if (!_drbd_send_page(mdev, page, 0, l,
page_chain_next(page) ? MSG_MORE : 0))
return 0; return 0;
len -= l; len -= l;
} }
...@@ -2438,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) ...@@ -2438,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
p.dp_flags = cpu_to_be32(dp_flags); p.dp_flags = cpu_to_be32(dp_flags);
set_bit(UNPLUG_REMOTE, &mdev->flags); set_bit(UNPLUG_REMOTE, &mdev->flags);
ok = (sizeof(p) == ok = (sizeof(p) ==
drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
if (ok && dgs) { if (ok && dgs) {
dgb = mdev->int_dig_out; dgb = mdev->int_dig_out;
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
} }
if (ok) { if (ok) {
if (mdev->net_conf->wire_protocol == DRBD_PROT_A) if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
...@@ -2491,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, ...@@ -2491,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
return 0; return 0;
ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
sizeof(p), MSG_MORE); sizeof(p), dgs ? MSG_MORE : 0);
if (ok && dgs) { if (ok && dgs) {
dgb = mdev->int_dig_out; dgb = mdev->int_dig_out;
drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
} }
if (ok) if (ok)
ok = _drbd_send_zc_ee(mdev, e); ok = _drbd_send_zc_ee(mdev, e);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment