Commit f5b2b966 authored by Jay Vosburgh's avatar Jay Vosburgh Committed by Jeff Garzik

[PATCH] bonding: Validate probe replies in ARP monitor

	Add logic to check ARP request / reply packets used for ARP
monitor link integrity checking.

	The current method simply examines the slave device to see if it
has sent and received traffic; this can be fooled by extraneous traffic.
For example, if multiple hosts running bonding are behind a common
switch, the probe traffic from the multiple instances of bonding will
update the tx/rx times on each other's slave devices.
Signed-off-by: default avatarJay Vosburgh <fubar@us.ibm.com>
Signed-off-by: default avatarJeff Garzik <jeff@garzik.org>
parent 70298705
......@@ -192,6 +192,17 @@ or, for backwards compatibility, the option value. E.g.,
arp_interval
Specifies the ARP link monitoring frequency in milliseconds.
The ARP monitor works by periodically checking the slave
devices to determine whether they have sent or received
traffic recently (the precise criteria depends upon the
bonding mode, and the state of the slave). Regular traffic is
generated via ARP probes issued for the addresses specified by
the arp_ip_target option.
This behavior can be modified by the arp_validate option,
below.
If ARP monitoring is used in an etherchannel compatible mode
(modes 0 and 2), the switch should be configured in a mode
that evenly distributes packets across all links. If the
......@@ -213,6 +224,54 @@ arp_ip_target
maximum number of targets that can be specified is 16. The
default value is no IP addresses.
arp_validate
Specifies whether or not ARP probes and replies should be
validated in the active-backup mode. This causes the ARP
monitor to examine the incoming ARP requests and replies, and
only consider a slave to be up if it is receiving the
appropriate ARP traffic.
Possible values are:
none or 0
No validation is performed. This is the default.
active or 1
Validation is performed only for the active slave.
backup or 2
Validation is performed only for backup slaves.
all or 3
Validation is performed for all slaves.
For the active slave, the validation checks ARP replies to
confirm that they were generated by an arp_ip_target. Since
backup slaves do not typically receive these replies, the
validation performed for backup slaves is on the ARP request
sent out via the active slave. It is possible that some
switch or network configurations may result in situations
wherein the backup slaves do not receive the ARP requests; in
such a situation, validation of backup slaves must be
disabled.
This option is useful in network configurations in which
multiple bonding hosts are concurrently issuing ARPs to one or
more targets beyond a common switch. Should the link between
the switch and target fail (but not the switch itself), the
probe traffic generated by the multiple bonding instances will
fool the standard ARP monitor into considering the links as
still up. Use of the arp_validate option can resolve this, as
the ARP monitor will only consider ARP requests and replies
associated with its own instance of bonding.
This option was added in bonding version 3.1.0.
downdelay
Specifies the time, in milliseconds, to wait before disabling
......
......@@ -96,6 +96,7 @@ static char *lacp_rate = NULL;
static char *xmit_hash_policy = NULL;
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
static char *arp_validate = NULL;
struct bond_params bonding_defaults;
module_param(max_bonds, int, 0);
......@@ -127,6 +128,8 @@ module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
module_param(arp_validate, charp, 0);
MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
/*----------------------------- Global variables ----------------------------*/
......@@ -170,6 +173,14 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = {
{ NULL, -1},
};
struct bond_parm_tbl arp_validate_tbl[] = {
{ "none", BOND_ARP_VALIDATE_NONE},
{ "active", BOND_ARP_VALIDATE_ACTIVE},
{ "backup", BOND_ARP_VALIDATE_BACKUP},
{ "all", BOND_ARP_VALIDATE_ALL},
{ NULL, -1},
};
/*-------------------------- Forward declarations ---------------------------*/
static void bond_send_gratuitous_arp(struct bonding *bond);
......@@ -1424,6 +1435,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_compute_features(bond);
new_slave->last_arp_rx = jiffies;
if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
......@@ -1785,7 +1798,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
dev_set_mac_address(slave_dev, &addr);
slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
IFF_SLAVE_INACTIVE | IFF_BONDING);
IFF_SLAVE_INACTIVE | IFF_BONDING |
IFF_SLAVE_NEEDARP);
kfree(slave);
......@@ -2298,6 +2312,25 @@ static int bond_has_ip(struct bonding *bond)
return 0;
}
static int bond_has_this_ip(struct bonding *bond, u32 ip)
{
struct vlan_entry *vlan, *vlan_next;
if (ip == bond->master_ip)
return 1;
if (list_empty(&bond->vlan_list))
return 0;
list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
vlan_list) {
if (ip == vlan->vlan_ip)
return 1;
}
return 0;
}
/*
* We go to the (large) trouble of VLAN tagging ARP frames because
* switches in VLAN mode (especially if ports are configured as
......@@ -2436,6 +2469,93 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
}
}
static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip)
{
int i;
u32 *targets = bond->params.arp_targets;
targets = bond->params.arp_targets;
for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] "
"%u.%u.%u.%u bhti(tip) %d\n",
NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]),
bond_has_this_ip(bond, tip));
if (sip == targets[i]) {
if (bond_has_this_ip(bond, tip))
slave->last_arp_rx = jiffies;
return;
}
}
}
static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct arphdr *arp;
struct slave *slave;
struct bonding *bond;
unsigned char *arp_ptr;
u32 sip, tip;
if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
goto out;
bond = dev->priv;
read_lock(&bond->lock);
dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
bond->dev->name, skb->dev ? skb->dev->name : "NULL",
orig_dev ? orig_dev->name : "NULL");
slave = bond_get_slave_by_dev(bond, orig_dev);
if (!slave || !slave_do_arp_validate(bond, slave))
goto out_unlock;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
(2 * dev->addr_len) +
(2 * sizeof(u32)))))
goto out_unlock;
arp = skb->nh.arph;
if (arp->ar_hln != dev->addr_len ||
skb->pkt_type == PACKET_OTHERHOST ||
skb->pkt_type == PACKET_LOOPBACK ||
arp->ar_hrd != htons(ARPHRD_ETHER) ||
arp->ar_pro != htons(ETH_P_IP) ||
arp->ar_pln != 4)
goto out_unlock;
arp_ptr = (unsigned char *)(arp + 1);
arp_ptr += dev->addr_len;
memcpy(&sip, arp_ptr, 4);
arp_ptr += 4 + dev->addr_len;
memcpy(&tip, arp_ptr, 4);
dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u"
" tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name,
slave->state, bond->params.arp_validate,
slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip));
/*
* Backup slaves won't see the ARP reply, but do come through
* here for each ARP probe (so we swap the sip/tip to validate
* the probe). In a "redundant switch, common router" type of
* configuration, the ARP probe will (hopefully) travel from
* the active, through one switch, the router, then the other
* switch before reaching the backup.
*/
if (slave->state == BOND_STATE_ACTIVE)
bond_validate_arp(bond, slave, sip, tip);
else
bond_validate_arp(bond, slave, tip, sip);
out_unlock:
read_unlock(&bond->lock);
out:
dev_kfree_skb(skb);
return NET_RX_SUCCESS;
}
/*
* this function is called regularly to monitor each slave's link
* ensuring that traffic is being sent and received when arp monitoring
......@@ -2600,7 +2720,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
*/
bond_for_each_slave(bond, slave, i) {
if (slave->link != BOND_LINK_UP) {
if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) {
if ((jiffies - slave_last_rx(bond, slave)) <=
delta_in_ticks) {
slave->link = BOND_LINK_UP;
......@@ -2645,7 +2766,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
if ((slave != bond->curr_active_slave) &&
(!bond->current_arp_slave) &&
(((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) &&
(((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) &&
bond_has_ip(bond))) {
/* a backup slave has gone down; three times
* the delta allows the current slave to be
......@@ -2692,7 +2813,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
* if it is up and needs to take over as the curr_active_slave
*/
if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
(((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
(((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) &&
bond_has_ip(bond))) &&
((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {
......@@ -3315,6 +3436,21 @@ static void bond_unregister_lacpdu(struct bonding *bond)
dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
}
void bond_register_arp(struct bonding *bond)
{
struct packet_type *pt = &bond->arp_mon_pt;
pt->type = htons(ETH_P_ARP);
pt->dev = NULL; /*bond->dev;XXX*/
pt->func = bond_arp_rcv;
dev_add_pack(pt);
}
void bond_unregister_arp(struct bonding *bond)
{
dev_remove_pack(&bond->arp_mon_pt);
}
/*---------------------------- Hashing Policies -----------------------------*/
/*
......@@ -3401,6 +3537,9 @@ static int bond_open(struct net_device *bond_dev)
} else {
arp_timer->function = (void *)&bond_loadbalance_arp_mon;
}
if (bond->params.arp_validate)
bond_register_arp(bond);
add_timer(arp_timer);
}
......@@ -3428,6 +3567,9 @@ static int bond_close(struct net_device *bond_dev)
bond_unregister_lacpdu(bond);
}
if (bond->params.arp_validate)
bond_unregister_arp(bond);
write_lock_bh(&bond->lock);
......@@ -4281,6 +4423,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)
static int bond_check_params(struct bond_params *params)
{
int arp_validate_value;
/*
* Convert string parameters.
*/
......@@ -4484,6 +4628,29 @@ static int bond_check_params(struct bond_params *params)
arp_interval = 0;
}
if (arp_validate) {
if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
printk(KERN_ERR DRV_NAME
": arp_validate only supported in active-backup mode\n");
return -EINVAL;
}
if (!arp_interval) {
printk(KERN_ERR DRV_NAME
": arp_validate requires arp_interval\n");
return -EINVAL;
}
arp_validate_value = bond_parse_parm(arp_validate,
arp_validate_tbl);
if (arp_validate_value == -1) {
printk(KERN_ERR DRV_NAME
": Error: invalid arp_validate \"%s\"\n",
arp_validate == NULL ? "NULL" : arp_validate);
return -EINVAL;
}
} else
arp_validate_value = 0;
if (miimon) {
printk(KERN_INFO DRV_NAME
": MII link monitoring set to %d ms\n",
......@@ -4492,8 +4659,10 @@ static int bond_check_params(struct bond_params *params)
int i;
printk(KERN_INFO DRV_NAME
": ARP monitoring set to %d ms with %d target(s):",
arp_interval, arp_ip_count);
": ARP monitoring set to %d ms, validate %s, with %d target(s):",
arp_interval,
arp_validate_tbl[arp_validate_value].modename,
arp_ip_count);
for (i = 0; i < arp_ip_count; i++)
printk (" %s", arp_ip_target[i]);
......@@ -4527,6 +4696,7 @@ static int bond_check_params(struct bond_params *params)
params->xmit_policy = xmit_hashtype;
params->miimon = miimon;
params->arp_interval = arp_interval;
params->arp_validate = arp_validate_value;
params->updelay = updelay;
params->downdelay = downdelay;
params->use_carrier = use_carrier;
......
......@@ -51,6 +51,7 @@ extern struct bond_params bonding_defaults;
extern struct bond_parm_tbl bond_mode_tbl[];
extern struct bond_parm_tbl bond_lacp_tbl[];
extern struct bond_parm_tbl xmit_hashtype_tbl[];
extern struct bond_parm_tbl arp_validate_tbl[];
static int expected_refcount = -1;
static struct class *netdev_class;
......@@ -502,6 +503,53 @@ static ssize_t bonding_store_xmit_hash(struct class_device *cd, const char *buf,
}
static CLASS_DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, bonding_show_xmit_hash, bonding_store_xmit_hash);
/*
* Show and set arp_validate.
*/
static ssize_t bonding_show_arp_validate(struct class_device *cd, char *buf)
{
struct bonding *bond = to_bond(cd);
return sprintf(buf, "%s %d\n",
arp_validate_tbl[bond->params.arp_validate].modename,
bond->params.arp_validate) + 1;
}
static ssize_t bonding_store_arp_validate(struct class_device *cd, const char *buf, size_t count)
{
int new_value;
struct bonding *bond = to_bond(cd);
new_value = bond_parse_parm((char *)buf, arp_validate_tbl);
if (new_value < 0) {
printk(KERN_ERR DRV_NAME
": %s: Ignoring invalid arp_validate value %s\n",
bond->dev->name, buf);
return -EINVAL;
}
if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) {
printk(KERN_ERR DRV_NAME
": %s: arp_validate only supported in active-backup mode.\n",
bond->dev->name);
return -EINVAL;
}
printk(KERN_INFO DRV_NAME ": %s: setting arp_validate to %s (%d).\n",
bond->dev->name, arp_validate_tbl[new_value].modename,
new_value);
if (!bond->params.arp_validate && new_value) {
bond_register_arp(bond);
} else if (bond->params.arp_validate && !new_value) {
bond_unregister_arp(bond);
}
bond->params.arp_validate = new_value;
return count;
}
static CLASS_DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate);
/*
* Show and set the arp timer interval. There are two tricky bits
* here. First, if ARP monitoring is activated, then we must disable
......@@ -914,6 +962,11 @@ static ssize_t bonding_store_miimon(struct class_device *cd, const char *buf, si
"ARP monitoring. Disabling ARP monitoring...\n",
bond->dev->name);
bond->params.arp_interval = 0;
if (bond->params.arp_validate) {
bond_unregister_arp(bond);
bond->params.arp_validate =
BOND_ARP_VALIDATE_NONE;
}
/* Kill ARP timer, else it brings bond's link down */
if (bond->mii_timer.function) {
printk(KERN_INFO DRV_NAME
......@@ -1273,6 +1326,7 @@ static CLASS_DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, N
static struct attribute *per_bond_attrs[] = {
&class_device_attr_slaves.attr,
&class_device_attr_mode.attr,
&class_device_attr_arp_validate.attr,
&class_device_attr_arp_interval.attr,
&class_device_attr_arp_ip_target.attr,
&class_device_attr_downdelay.attr,
......
......@@ -22,8 +22,8 @@
#include "bond_3ad.h"
#include "bond_alb.h"
#define DRV_VERSION "3.0.3"
#define DRV_RELDATE "March 23, 2006"
#define DRV_VERSION "3.1.0-test"
#define DRV_RELDATE "September 9, 2006"
#define DRV_NAME "bonding"
#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
......@@ -126,6 +126,7 @@ struct bond_params {
int xmit_policy;
int miimon;
int arp_interval;
int arp_validate;
int use_carrier;
int updelay;
int downdelay;
......@@ -151,6 +152,7 @@ struct slave {
struct slave *prev;
int delay;
u32 jiffies;
u32 last_arp_rx;
s8 link; /* one of BOND_LINK_XXXX */
s8 state; /* one of BOND_STATE_XXXX */
u32 original_flags;
......@@ -198,6 +200,7 @@ struct bonding {
struct bond_params params;
struct list_head vlan_list;
struct vlan_group *vlgrp;
struct packet_type arp_mon_pt;
};
/**
......@@ -228,6 +231,25 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
return (struct bonding *)slave->dev->master->priv;
}
#define BOND_ARP_VALIDATE_NONE 0
#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE)
#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP)
#define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \
BOND_ARP_VALIDATE_BACKUP)
extern inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave)
{
return bond->params.arp_validate & (1 << slave->state);
}
extern inline u32 slave_last_rx(struct bonding *bond, struct slave *slave)
{
if (slave_do_arp_validate(bond, slave))
return slave->last_arp_rx;
return slave->dev->last_rx;
}
static inline void bond_set_slave_inactive_flags(struct slave *slave)
{
struct bonding *bond = slave->dev->master->priv;
......@@ -235,12 +257,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
bond->params.mode != BOND_MODE_ALB)
slave->state = BOND_STATE_BACKUP;
slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
if (slave_do_arp_validate(bond, slave))
slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
}
static inline void bond_set_slave_active_flags(struct slave *slave)
{
slave->state = BOND_STATE_ACTIVE;
slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
}
static inline void bond_set_master_3ad_flags(struct bonding *bond)
......@@ -284,6 +308,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl);
const char *bond_mode_name(int mode);
void bond_select_active_slave(struct bonding *bond);
void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
void bond_register_arp(struct bonding *);
void bond_unregister_arp(struct bonding *);
#endif /* _LINUX_BONDING_H */
......@@ -60,6 +60,7 @@
#define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */
#define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */
#define IFF_BONDING 0x20 /* bonding master or slave */
#define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */
#define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002
......
......@@ -1016,7 +1016,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
}
/* On bonding slaves other than the currently active slave, suppress
* duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast.
* duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
* ARP on active-backup slaves with arp_validate enabled.
*/
static inline int skb_bond_should_drop(struct sk_buff *skb)
{
......@@ -1025,6 +1026,10 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
if (master &&
(dev->priv_flags & IFF_SLAVE_INACTIVE)) {
if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
skb->protocol == __constant_htons(ETH_P_ARP))
return 0;
if (master->priv_flags & IFF_MASTER_ALB) {
if (skb->pkt_type != PACKET_BROADCAST &&
skb->pkt_type != PACKET_MULTICAST)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment