Commit 0b70a13d authored by Philipp Reisner's avatar Philipp Reisner

drbd: Sending of big packets, for payloads from 64KByte to 4GByte

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 204bba99
......@@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
* NOTE that the payload starts at a long aligned offset,
* regardless of 32 or 64 bit arch!
*/
struct p_header {
struct p_header80 {
u32 magic;
u16 command;
u16 length; /* bytes of data after this header */
u8 payload[0];
} __packed;
/* 8 bytes. packet FIXED for the next century! */
/* Header for big packets, Used for data packets exceeding 64kB */
struct p_header95 {
u16 magic; /* use DRBD_MAGIC_BIG here */
u16 command;
u32 length;
u8 payload[0];
} __packed;
union p_header {
struct p_header80 h80;
struct p_header95 h95;
};
/*
* short commands, packets without payload, plain p_header:
......@@ -367,7 +379,7 @@ struct p_header {
#define DP_MAY_SET_IN_SYNC 4
struct p_data {
struct p_header head;
union p_header head;
u64 sector; /* 64 bits sector number */
u64 block_id; /* to identify the request in protocol B&C */
u32 seq_num;
......@@ -383,7 +395,7 @@ struct p_data {
* P_DATA_REQUEST, P_RS_DATA_REQUEST
*/
struct p_block_ack {
struct p_header head;
struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
......@@ -392,7 +404,7 @@ struct p_block_ack {
struct p_block_req {
struct p_header head;
struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
......@@ -409,7 +421,7 @@ struct p_block_req {
*/
struct p_handshake {
struct p_header head; /* 8 bytes */
struct p_header80 head; /* 8 bytes */
u32 protocol_min;
u32 feature_flags;
u32 protocol_max;
......@@ -424,19 +436,19 @@ struct p_handshake {
/* 80 bytes, FIXED for the next century */
struct p_barrier {
struct p_header head;
struct p_header80 head;
u32 barrier; /* barrier number _handle_ only */
u32 pad; /* to multiple of 8 Byte */
} __packed;
struct p_barrier_ack {
struct p_header head;
struct p_header80 head;
u32 barrier;
u32 set_size;
} __packed;
struct p_rs_param {
struct p_header head;
struct p_header80 head;
u32 rate;
/* Since protocol version 88 and higher. */
......@@ -444,7 +456,7 @@ struct p_rs_param {
} __packed;
struct p_rs_param_89 {
struct p_header head;
struct p_header80 head;
u32 rate;
/* protocol version 89: */
char verify_alg[SHARED_SECRET_MAX];
......@@ -452,7 +464,7 @@ struct p_rs_param_89 {
} __packed;
struct p_rs_param_95 {
struct p_header head;
struct p_header80 head;
u32 rate;
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
......@@ -468,7 +480,7 @@ enum drbd_conn_flags {
};
struct p_protocol {
struct p_header head;
struct p_header80 head;
u32 protocol;
u32 after_sb_0p;
u32 after_sb_1p;
......@@ -482,17 +494,17 @@ struct p_protocol {
} __packed;
struct p_uuids {
struct p_header head;
struct p_header80 head;
u64 uuid[UI_EXTENDED_SIZE];
} __packed;
struct p_rs_uuid {
struct p_header head;
struct p_header80 head;
u64 uuid;
} __packed;
struct p_sizes {
struct p_header head;
struct p_header80 head;
u64 d_size; /* size of disk */
u64 u_size; /* user requested size */
u64 c_size; /* current exported size */
......@@ -502,18 +514,18 @@ struct p_sizes {
} __packed;
struct p_state {
struct p_header head;
struct p_header80 head;
u32 state;
} __packed;
struct p_req_state {
struct p_header head;
struct p_header80 head;
u32 mask;
u32 val;
} __packed;
struct p_req_state_reply {
struct p_header head;
struct p_header80 head;
u32 retcode;
} __packed;
......@@ -528,7 +540,7 @@ struct p_drbd06_param {
} __packed;
struct p_discard {
struct p_header head;
struct p_header80 head;
u64 block_id;
u32 seq_num;
u32 pad;
......@@ -544,7 +556,7 @@ enum drbd_bitmap_code {
};
struct p_compressed_bm {
struct p_header head;
struct p_header80 head;
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
* (encoding & 0x80): polarity (set/unset) of first runlength
* ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
......@@ -555,8 +567,8 @@ struct p_compressed_bm {
u8 code[0];
} __packed;
struct p_delay_probe {
struct p_header head;
struct p_delay_probe93 {
struct p_header80 head;
u32 seq_num; /* sequence number to match the two probe packets */
u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
......@@ -605,7 +617,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
* so we need to use the fixed size 4KiB page size
* most architechtures have used for a long time.
*/
#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
#if (PAGE_SIZE < 4096)
......@@ -614,7 +626,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
#endif
union p_polymorph {
struct p_header header;
struct p_header80 header;
struct p_handshake handshake;
struct p_data data;
struct p_block_ack block_ack;
......@@ -1188,12 +1200,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f
extern int _drbd_send_state(struct drbd_conf *mdev);
extern int drbd_send_state(struct drbd_conf *mdev);
extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
enum drbd_packets cmd, struct p_header *h,
enum drbd_packets cmd, struct p_header80 *h,
size_t size, unsigned msg_flags);
#define USE_DATA_SOCKET 1
#define USE_META_SOCKET 0
extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
enum drbd_packets cmd, struct p_header *h,
enum drbd_packets cmd, struct p_header80 *h,
size_t size);
extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
char *data, size_t size);
......@@ -1936,19 +1948,19 @@ static inline void request_ping(struct drbd_conf *mdev)
static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
enum drbd_packets cmd)
{
struct p_header h;
struct p_header80 h;
return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
}
static inline int drbd_send_ping(struct drbd_conf *mdev)
{
struct p_header h;
struct p_header80 h;
return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
}
static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
{
struct p_header h;
struct p_header80 h;
return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
}
......
......@@ -1647,7 +1647,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
/* the appropriate socket mutex must be held already */
int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
enum drbd_packets cmd, struct p_header *h,
enum drbd_packets cmd, struct p_header80 *h,
size_t size, unsigned msg_flags)
{
int sent, ok;
......@@ -1657,7 +1657,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
h->magic = BE_DRBD_MAGIC;
h->command = cpu_to_be16(cmd);
h->length = cpu_to_be16(size-sizeof(struct p_header));
h->length = cpu_to_be16(size-sizeof(struct p_header80));
sent = drbd_send(mdev, sock, h, size, msg_flags);
......@@ -1672,7 +1672,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
* when we hold the appropriate socket mutex.
*/
int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
enum drbd_packets cmd, struct p_header *h, size_t size)
enum drbd_packets cmd, struct p_header80 *h, size_t size)
{
int ok = 0;
struct socket *sock;
......@@ -1700,7 +1700,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
size_t size)
{
struct p_header h;
struct p_header80 h;
int ok;
h.magic = BE_DRBD_MAGIC;
......@@ -1807,7 +1807,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
(struct p_header *)p, size);
(struct p_header80 *)p, size);
kfree(p);
return rv;
}
......@@ -1833,7 +1833,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
put_ldev(mdev);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
}
int drbd_send_uuids(struct drbd_conf *mdev)
......@@ -1854,7 +1854,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
p.uuid = cpu_to_be64(val);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
}
int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
......@@ -1884,7 +1884,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
p.dds_flags = cpu_to_be16(flags);
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
return ok;
}
......@@ -1909,7 +1909,7 @@ int drbd_send_state(struct drbd_conf *mdev)
if (likely(sock != NULL)) {
ok = _drbd_send_cmd(mdev, sock, P_STATE,
(struct p_header *)&p, sizeof(p), 0);
(struct p_header80 *)&p, sizeof(p), 0);
}
mutex_unlock(&mdev->data.mutex);
......@@ -1927,7 +1927,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
p.val = cpu_to_be32(val.i);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
}
int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
......@@ -1937,7 +1937,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
p.retcode = cpu_to_be32(retcode);
return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
}
int fill_bitmap_rle_bits(struct drbd_conf *mdev,
......@@ -2036,7 +2036,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
enum { OK, FAILED, DONE }
send_bitmap_rle_or_plain(struct drbd_conf *mdev,
struct p_header *h, struct bm_xfer_ctx *c)
struct p_header80 *h, struct bm_xfer_ctx *c)
{
struct p_compressed_bm *p = (void*)h;
unsigned long num_words;
......@@ -2066,12 +2066,12 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
if (len)
drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
h, sizeof(struct p_header) + len, 0);
h, sizeof(struct p_header80) + len, 0);
c->word_offset += num_words;
c->bit_offset = c->word_offset * BITS_PER_LONG;
c->packets[1]++;
c->bytes[1] += sizeof(struct p_header) + len;
c->bytes[1] += sizeof(struct p_header80) + len;
if (c->bit_offset > c->bm_bits)
c->bit_offset = c->bm_bits;
......@@ -2087,14 +2087,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
int _drbd_send_bitmap(struct drbd_conf *mdev)
{
struct bm_xfer_ctx c;
struct p_header *p;
struct p_header80 *p;
int ret;
ERR_IF(!mdev->bitmap) return FALSE;
/* maybe we should use some per thread scratch page,
* and allocate that during initial device creation? */
p = (struct p_header *) __get_free_page(GFP_NOIO);
p = (struct p_header80 *) __get_free_page(GFP_NOIO);
if (!p) {
dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
return FALSE;
......@@ -2152,7 +2152,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
if (mdev->state.conn < C_CONNECTED)
return FALSE;
ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
return ok;
}
......@@ -2180,7 +2180,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
return FALSE;
ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
return ok;
}
......@@ -2188,8 +2188,8 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
struct p_data *dp)
{
const int header_size = sizeof(struct p_data)
- sizeof(struct p_header);
int data_size = ((struct p_header *)dp)->length - header_size;
- sizeof(struct p_header80);
int data_size = ((struct p_header80 *)dp)->length - header_size;
return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
dp->block_id);
......@@ -2238,7 +2238,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
p.blksize = cpu_to_be32(size);
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
return ok;
}
......@@ -2256,7 +2256,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev,
p.head.magic = BE_DRBD_MAGIC;
p.head.command = cpu_to_be16(cmd);
p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size);
p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
mutex_lock(&mdev->data.mutex);
......@@ -2278,7 +2278,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
p.blksize = cpu_to_be32(size);
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
(struct p_header *)&p, sizeof(p));
(struct p_header80 *)&p, sizeof(p));
return ok;
}
......@@ -2447,10 +2447,17 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
p.head.magic = BE_DRBD_MAGIC;
p.head.command = cpu_to_be16(P_DATA);
p.head.length =
cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size);
if (req->size <= (1 << 15)) {
p.head.h80.magic = BE_DRBD_MAGIC;
p.head.h80.command = cpu_to_be16(P_DATA);
p.head.h80.length =
cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
} else {
p.head.h95.magic = BE_DRBD_MAGIC_BIG;
p.head.h95.command = cpu_to_be16(P_DATA);
p.head.h95.length =
cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
}
p.sector = cpu_to_be64(req->sector);
p.block_id = (unsigned long)req;
......@@ -2511,10 +2518,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
p.head.magic = BE_DRBD_MAGIC;
p.head.command = cpu_to_be16(cmd);
p.head.length =
cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size);
if (e->size <= (1 << 15)) {
p.head.h80.magic = BE_DRBD_MAGIC;
p.head.h80.command = cpu_to_be16(cmd);
p.head.h80.length =
cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
} else {
p.head.h95.magic = BE_DRBD_MAGIC_BIG;
p.head.h95.command = cpu_to_be16(cmd);
p.head.h95.length =
cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
}
p.sector = cpu_to_be64(e->sector);
p.block_id = e->block_id;
......@@ -2527,8 +2541,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
if (!drbd_get_data_sock(mdev))
return 0;
ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
sizeof(p), dgs ? MSG_MORE : 0);
ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
if (ok && dgs) {
dgb = mdev->int_dig_out;
drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
......
This diff is collapsed.
......@@ -1204,7 +1204,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
* dec_ap_pending will be done in got_BarrierAck
* or (on connection loss) in w_clear_epoch. */
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
(struct p_header *)p, sizeof(*p), 0);
(struct p_header80 *)p, sizeof(*p), 0);
drbd_put_data_sock(mdev);
return ok;
......
......@@ -318,6 +318,8 @@ enum drbd_timeout_flag {
#define DRBD_MAGIC 0x83740267
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
#define DRBD_MAGIC_BIG 0x835a
#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
/* these are of type "int" */
#define DRBD_MD_INDEX_INTERNAL -1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment