Commit 668700b4 authored by Philipp Reisner's avatar Philipp Reisner Committed by Jens Axboe

drbd: Create a dedicated workqueue for sending acks on the control connection

The intention is to reduce CPU utilization. Recent measurements
unveiled that the current performance bottleneck is CPU utilization
on the receiving node. The asender thread became CPU limited.

One of the main points is to eliminate the idr_for_each_entry() loop
from the sending acks code path.

One exception in that is sending back ping_acks. These stay
in the ack-receiver thread. Otherwise the logic becomes too
complicated for no added value.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 1c03e520
...@@ -77,13 +77,6 @@ extern int fault_devs; ...@@ -77,13 +77,6 @@ extern int fault_devs;
extern char usermode_helper[]; extern char usermode_helper[];
/* I don't remember why XCPU ...
* This is used to wake the asender,
* and to interrupt sending the sending task
* on disconnect.
*/
#define DRBD_SIG SIGXCPU
/* This is used to stop/restart our threads. /* This is used to stop/restart our threads.
* Cannot use SIGTERM nor SIGKILL, since these * Cannot use SIGTERM nor SIGKILL, since these
* are sent out by init on runlevel changes * are sent out by init on runlevel changes
...@@ -647,8 +640,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size); ...@@ -647,8 +640,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size);
enum { enum {
NET_CONGESTED, /* The data socket is congested */ NET_CONGESTED, /* The data socket is congested */
RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */ RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
SEND_PING, /* whether asender should send a ping asap */ SEND_PING,
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */ CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_OKAY,
...@@ -755,6 +747,7 @@ struct drbd_connection { ...@@ -755,6 +747,7 @@ struct drbd_connection {
struct drbd_thread receiver; struct drbd_thread receiver;
struct drbd_thread worker; struct drbd_thread worker;
struct drbd_thread ack_receiver; struct drbd_thread ack_receiver;
struct workqueue_struct *ack_sender;
/* cached pointers, /* cached pointers,
* so we can look up the oldest pending requests more quickly. * so we can look up the oldest pending requests more quickly.
...@@ -823,6 +816,7 @@ struct drbd_peer_device { ...@@ -823,6 +816,7 @@ struct drbd_peer_device {
struct list_head peer_devices; struct list_head peer_devices;
struct drbd_device *device; struct drbd_device *device;
struct drbd_connection *connection; struct drbd_connection *connection;
struct work_struct send_acks_work;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_peer_dev; struct dentry *debugfs_peer_dev;
#endif #endif
...@@ -1558,6 +1552,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); ...@@ -1558,6 +1552,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */ /* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_ack_receiver(struct drbd_thread *thi); extern int drbd_ack_receiver(struct drbd_thread *thi);
extern void drbd_send_ping_wf(struct work_struct *ws);
extern void drbd_send_acks_wf(struct work_struct *ws);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool throttle_if_app_is_waiting); bool throttle_if_app_is_waiting);
...@@ -1968,16 +1964,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit) ...@@ -1968,16 +1964,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit)
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
static inline void wake_asender(struct drbd_connection *connection) /* To get the ack_receiver out of the blocking network stack,
* so it can change its sk_rcvtimeo from idle- to ping-timeout,
* and send a ping, we need to send a signal.
* Which signal we send is irrelevant. */
static inline void wake_ack_receiver(struct drbd_connection *connection)
{ {
if (test_bit(SIGNAL_ASENDER, &connection->flags)) struct task_struct *task = connection->ack_receiver.task;
force_sig(DRBD_SIG, connection->ack_receiver.task); if (task && get_t_state(&connection->ack_receiver) == RUNNING)
force_sig(SIGXCPU, task);
} }
static inline void request_ping(struct drbd_connection *connection) static inline void request_ping(struct drbd_connection *connection)
{ {
set_bit(SEND_PING, &connection->flags); set_bit(SEND_PING, &connection->flags);
wake_asender(connection); wake_ack_receiver(connection);
} }
extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *); extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
......
...@@ -1794,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, ...@@ -1794,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
drbd_update_congested(connection); drbd_update_congested(connection);
} }
do { do {
/* STRANGE
* tcp_sendmsg does _not_ use its size parameter at all ?
*
* -EAGAIN on timeout, -EINTR on signal.
*/
/* THINK
* do we need to block DRBD_SIG if sock == &meta.socket ??
* otherwise wake_asender() might interrupt some send_*Ack !
*/
rv = kernel_sendmsg(sock, &msg, &iov, 1, size); rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
if (rv == -EAGAIN) { if (rv == -EAGAIN) {
if (we_should_drop_the_connection(connection, sock)) if (we_should_drop_the_connection(connection, sock))
...@@ -2821,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig ...@@ -2821,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
goto out_idr_remove_from_resource; goto out_idr_remove_from_resource;
} }
kref_get(&connection->kref); kref_get(&connection->kref);
INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
} }
if (init_submitter(device)) { if (init_submitter(device)) {
......
...@@ -1258,8 +1258,8 @@ static void conn_reconfig_done(struct drbd_connection *connection) ...@@ -1258,8 +1258,8 @@ static void conn_reconfig_done(struct drbd_connection *connection)
connection->cstate == C_STANDALONE; connection->cstate == C_STANDALONE;
spin_unlock_irq(&connection->resource->req_lock); spin_unlock_irq(&connection->resource->req_lock);
if (stop_threads) { if (stop_threads) {
/* asender is implicitly stopped by receiver /* ack_receiver thread and ack_sender workqueue are implicitly
* in conn_disconnect() */ * stopped by receiver in conn_disconnect() */
drbd_thread_stop(&connection->receiver); drbd_thread_stop(&connection->receiver);
drbd_thread_stop(&connection->worker); drbd_thread_stop(&connection->worker);
} }
......
...@@ -23,7 +23,7 @@ enum drbd_packet { ...@@ -23,7 +23,7 @@ enum drbd_packet {
P_AUTH_RESPONSE = 0x11, P_AUTH_RESPONSE = 0x11,
P_STATE_CHG_REQ = 0x12, P_STATE_CHG_REQ = 0x12,
/* asender (meta socket */ /* (meta socket) */
P_PING = 0x13, P_PING = 0x13,
P_PING_ACK = 0x14, P_PING_ACK = 0x14,
P_RECV_ACK = 0x15, /* Used in protocol B */ P_RECV_ACK = 0x15, /* Used in protocol B */
......
This diff is collapsed.
...@@ -453,7 +453,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, ...@@ -453,7 +453,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
kref_get(&req->kref); /* wait for the DONE */ kref_get(&req->kref); /* wait for the DONE */
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
/* potentially already completed in the asender thread */ /* potentially already completed in the ack_receiver thread */
if (!(s & RQ_NET_DONE)) { if (!(s & RQ_NET_DONE)) {
atomic_add(req->i.size >> 9, &device->ap_in_flight); atomic_add(req->i.size >> 9, &device->ap_in_flight);
set_if_null_req_not_net_done(peer_device, req); set_if_null_req_not_net_done(peer_device, req);
......
...@@ -113,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l ...@@ -113,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
unsigned long flags = 0; unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device; struct drbd_peer_device *peer_device = peer_req->peer_device;
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
struct drbd_connection *connection = peer_device->connection;
struct drbd_interval i; struct drbd_interval i;
int do_wake; int do_wake;
u64 block_id; u64 block_id;
...@@ -145,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l ...@@ -145,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
if (peer_req->flags & EE_WAS_ERROR) if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR); __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
kref_put(&device->kref, drbd_destroy_device);
}
spin_unlock_irqrestore(&device->resource->req_lock, flags); spin_unlock_irqrestore(&device->resource->req_lock, flags);
if (block_id == ID_SYNCER) if (block_id == ID_SYNCER)
...@@ -156,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l ...@@ -156,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
if (do_al_complete_io) if (do_al_complete_io)
drbd_al_complete_io(device, &i); drbd_al_complete_io(device, &i);
wake_asender(peer_device->connection);
put_ldev(device); put_ldev(device);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment