Commit b7acfc95 authored by Liang Zhen's avatar Liang Zhen Committed by Greg Kroah-Hartman

staging: lustre: LNet network latency simulation

Incoming lnet message can be delayed for seconds if it can match
any of LNet Delay Rules.

User can add/remove/list Delay Rule by lctl commands:
- lctl net_delay_add
  Add a new Delay Rule to LNet, options
  <-s | --source SRC_NID>
  <-d | --dest DST_NID>
  <<-r | --rate RATE_NUMBER>
  <-i | --interlval SECONDS>>
  <-l | --latency DELAY_LATENCY>

- lctl net_delay_del
  Remove matched Delay Rule from LNet, options:
  <[-a | --all] |
  <-s | --source SRC_NID>
  <-d | --dest DST_NID>>

- lctl net_delay_list
  List all Delay Rules in LNet

- lctl net_delay_reset
  Reset statistic counters for all Delay Rules
Signed-off-by: default avatarLiang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5435
Reviewed-on: http://review.whamcloud.com/11409Reviewed-by: default avatarAmir Shehata <amir.shehata@intel.com>
Reviewed-by: default avatarBobi Jam <bobijam@gmail.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 0fbbced2
......@@ -559,13 +559,22 @@ void lnet_portals_destroy(void);
/* message functions */
int lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr,
lnet_nid_t fromnid, void *private, int rdma_req);
int lnet_parse_local(lnet_ni_t *ni, lnet_msg_t *msg);
int lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg);
void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
unsigned int offset, unsigned int mlen, unsigned int rlen);
void lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
int delayed, unsigned int offset,
unsigned int mlen, unsigned int rlen);
lnet_msg_t *lnet_create_reply_msg(lnet_ni_t *ni, lnet_msg_t *get_msg);
void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len);
void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc);
void lnet_drop_message(lnet_ni_t *ni, int cpt, void *private,
unsigned int nob);
void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
void lnet_recv_delayed_msg_list(struct list_head *head);
......@@ -586,6 +595,14 @@ void lnet_fault_fini(void);
bool lnet_drop_rule_match(lnet_hdr_t *hdr);
int lnet_delay_rule_add(struct lnet_fault_attr *attr);
int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
int lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr,
struct lnet_fault_stat *stat);
void lnet_delay_rule_reset(void);
void lnet_delay_rule_check(void);
bool lnet_delay_rule_match_locked(lnet_hdr_t *hdr, struct lnet_msg *msg);
/** @} lnet_fault_simulation */
void lnet_counters_get(lnet_counters_t *counters);
......
......@@ -88,6 +88,7 @@ typedef struct lnet_msg {
unsigned int msg_rtrcredit:1; /* taken a global router credit */
unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
unsigned int msg_onactivelist:1; /* on the activelist */
unsigned int msg_rdma_get:1;
struct lnet_peer *msg_txpeer; /* peer I'm sending to */
struct lnet_peer *msg_rxpeer; /* peer I received from */
......@@ -574,6 +575,7 @@ typedef struct {
/* failure simulation */
struct list_head ln_test_peers;
struct list_head ln_drop_rules;
struct list_head ln_delay_rules;
struct list_head ln_nis; /* LND instances */
/* NIs bond on specific CPT(s) */
......@@ -610,6 +612,7 @@ typedef struct {
struct mutex ln_api_mutex;
struct mutex ln_lnd_mutex;
struct mutex ln_delay_mutex;
/* Have I called LNetNIInit myself? */
int ln_niinit_self;
/* LNetNIInit/LNetNIFini counter */
......
......@@ -26,6 +26,10 @@ enum {
LNET_CTL_DROP_DEL,
LNET_CTL_DROP_RESET,
LNET_CTL_DROP_LIST,
LNET_CTL_DELAY_ADD,
LNET_CTL_DELAY_DEL,
LNET_CTL_DELAY_RESET,
LNET_CTL_DELAY_LIST,
};
#define LNET_ACK_BIT BIT(0)
......@@ -71,7 +75,17 @@ struct lnet_fault_attr {
*/
__u32 da_interval;
} drop;
/** TODO: add more */
/** message latency simulation */
struct {
__u32 la_rate;
/**
* time interval of message delay, it is exclusive
* with la_rate
*/
__u32 la_interval;
/** latency to delay */
__u32 la_latency;
} delay;
__u64 space[8];
} u;
};
......@@ -93,7 +107,10 @@ struct lnet_fault_stat {
/** total # dropped messages */
__u64 ds_dropped;
} drop;
/** TODO: add more */
struct {
/** total # delayed messages */
__u64 ls_delayed;
} delay;
__u64 space[8];
} u;
};
......
......@@ -551,6 +551,7 @@ lnet_prepare(lnet_pid_t requested_pid)
INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
INIT_LIST_HEAD(&the_lnet.ln_routers);
INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
rc = lnet_create_remote_nets_table();
if (rc)
......
......@@ -42,11 +42,6 @@
#include "../../include/linux/lnet/lib-lnet.h"
/** lnet message has credit and can be submitted to lnd for send/receive */
#define LNET_CREDIT_OK 0
/** lnet message is waiting for credit */
#define LNET_CREDIT_WAIT 1
static int local_nid_dist_zero = 1;
module_param(local_nid_dist_zero, int, 0444);
MODULE_PARM_DESC(local_nid_dist_zero, "Reserved");
......@@ -570,7 +565,7 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
}
EXPORT_SYMBOL(lnet_extract_kiov);
static void
void
lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
unsigned int offset, unsigned int mlen, unsigned int rlen)
{
......@@ -1431,7 +1426,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */
}
static void
void
lnet_drop_message(lnet_ni_t *ni, int cpt, void *private, unsigned int nob)
{
lnet_net_lock(cpt);
......@@ -1705,7 +1700,7 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
* \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer
* \retval -ve error code
*/
static int
int
lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg)
{
int rc = 0;
......@@ -1729,6 +1724,33 @@ lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg)
return rc;
}
int
lnet_parse_local(lnet_ni_t *ni, lnet_msg_t *msg)
{
int rc;
switch (msg->msg_type) {
case LNET_MSG_ACK:
rc = lnet_parse_ack(ni, msg);
break;
case LNET_MSG_PUT:
rc = lnet_parse_put(ni, msg);
break;
case LNET_MSG_GET:
rc = lnet_parse_get(ni, msg, msg->msg_rdma_get);
break;
case LNET_MSG_REPLY:
rc = lnet_parse_reply(ni, msg);
break;
default: /* prevent an unused label if !kernel */
LASSERT(0);
return -EPROTO;
}
LASSERT(!rc || rc == -ENOENT);
return rc;
}
char *
lnet_msgtyp2str(int type)
{
......@@ -1953,6 +1975,7 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
msg->msg_type = type;
msg->msg_private = private;
msg->msg_receiving = 1;
msg->msg_rdma_get = rdma_req;
msg->msg_wanted = payload_length;
msg->msg_len = payload_length;
msg->msg_offset = 0;
......@@ -2000,6 +2023,13 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
lnet_msg_commit(msg, cpt);
/* message delay simulation */
if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&
lnet_delay_rule_match_locked(hdr, msg))) {
lnet_net_unlock(cpt);
return 0;
}
if (!for_me) {
rc = lnet_parse_forward_locked(ni, msg);
lnet_net_unlock(cpt);
......@@ -2016,29 +2046,10 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
lnet_net_unlock(cpt);
switch (type) {
case LNET_MSG_ACK:
rc = lnet_parse_ack(ni, msg);
break;
case LNET_MSG_PUT:
rc = lnet_parse_put(ni, msg);
break;
case LNET_MSG_GET:
rc = lnet_parse_get(ni, msg, rdma_req);
break;
case LNET_MSG_REPLY:
rc = lnet_parse_reply(ni, msg);
break;
default:
LASSERT(0);
rc = -EPROTO;
goto free_drop; /* prevent an unused label if !kernel */
}
if (!rc)
return 0;
LASSERT(rc == -ENOENT);
rc = lnet_parse_local(ni, msg);
if (rc)
goto free_drop;
return 0;
free_drop:
LASSERT(!msg->msg_md);
......
......@@ -535,6 +535,12 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status)
break;
}
if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) {
lnet_net_unlock(cpt);
lnet_delay_rule_check();
lnet_net_lock(cpt);
}
container->msc_finalizers[my_slot] = NULL;
lnet_net_unlock(cpt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment