Commit 66f2a2e3 authored by Ranko Zivojnovic's avatar Ranko Zivojnovic Committed by Willy Tarreau

[PATCH] gen estimator deadlock fix

[NET]: gen_estimator deadlock fix

-Fixes ABBA deadlock noted by Patrick McHardy <kaber@trash.net>:

> There is at least one ABBA deadlock, est_timer() does:
> read_lock(&est_lock)
> spin_lock(e->stats_lock) (which is dev->queue_lock)
>
> and qdisc_destroy calls htb_destroy under dev->queue_lock, which
> calls htb_destroy_class, then gen_kill_estimator and this
> write_locks est_lock.

To fix the ABBA deadlock the rate estimators are now kept on an rcu list.

-The est_lock changes the use from protecting the list to protecting
the update to the 'bstat' pointer in order to avoid NULL dereferencing.

-The 'interval' member of the gen_estimator structure removed as it is
not needed.
Signed-off-by: default avatarRanko Zivojnovic <ranko@spidernet.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: default avatarWilly Tarreau <w@1wt.eu>
parent 6bfc898d
...@@ -79,27 +79,27 @@ ...@@ -79,27 +79,27 @@
struct gen_estimator struct gen_estimator
{ {
struct gen_estimator *next; struct list_head list;
struct gnet_stats_basic *bstats; struct gnet_stats_basic *bstats;
struct gnet_stats_rate_est *rate_est; struct gnet_stats_rate_est *rate_est;
spinlock_t *stats_lock; spinlock_t *stats_lock;
unsigned interval;
int ewma_log; int ewma_log;
u64 last_bytes; u64 last_bytes;
u32 last_packets; u32 last_packets;
u32 avpps; u32 avpps;
u32 avbps; u32 avbps;
struct rcu_head e_rcu;
}; };
struct gen_estimator_head struct gen_estimator_head
{ {
struct timer_list timer; struct timer_list timer;
struct gen_estimator *list; struct list_head list;
}; };
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
/* Estimator array lock */ /* Protects against NULL dereference */
static DEFINE_RWLOCK(est_lock); static DEFINE_RWLOCK(est_lock);
static void est_timer(unsigned long arg) static void est_timer(unsigned long arg)
...@@ -107,13 +107,17 @@ static void est_timer(unsigned long arg) ...@@ -107,13 +107,17 @@ static void est_timer(unsigned long arg)
int idx = (int)arg; int idx = (int)arg;
struct gen_estimator *e; struct gen_estimator *e;
read_lock(&est_lock); rcu_read_lock();
for (e = elist[idx].list; e; e = e->next) { list_for_each_entry_rcu(e, &elist[idx].list, list) {
u64 nbytes; u64 nbytes;
u32 npackets; u32 npackets;
u32 rate; u32 rate;
spin_lock(e->stats_lock); spin_lock(e->stats_lock);
read_lock(&est_lock);
if (e->bstats == NULL)
goto skip;
nbytes = e->bstats->bytes; nbytes = e->bstats->bytes;
npackets = e->bstats->packets; npackets = e->bstats->packets;
rate = (nbytes - e->last_bytes)<<(7 - idx); rate = (nbytes - e->last_bytes)<<(7 - idx);
...@@ -125,12 +129,14 @@ static void est_timer(unsigned long arg) ...@@ -125,12 +129,14 @@ static void est_timer(unsigned long arg)
e->last_packets = npackets; e->last_packets = npackets;
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
e->rate_est->pps = (e->avpps+0x1FF)>>10; e->rate_est->pps = (e->avpps+0x1FF)>>10;
skip:
read_unlock(&est_lock);
spin_unlock(e->stats_lock); spin_unlock(e->stats_lock);
} }
if (elist[idx].list != NULL) if (!list_empty(&elist[idx].list))
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
read_unlock(&est_lock); rcu_read_unlock();
} }
/** /**
...@@ -147,12 +153,17 @@ static void est_timer(unsigned long arg) ...@@ -147,12 +153,17 @@ static void est_timer(unsigned long arg)
* &rate_est with the statistics lock grabed during this period. * &rate_est with the statistics lock grabed during this period.
* *
* Returns 0 on success or a negative error code. * Returns 0 on success or a negative error code.
*
* NOTE: Called under rtnl_mutex
*/ */
int gen_new_estimator(struct gnet_stats_basic *bstats, int gen_new_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) struct gnet_stats_rate_est *rate_est,
spinlock_t *stats_lock,
struct rtattr *opt)
{ {
struct gen_estimator *est; struct gen_estimator *est;
struct gnet_estimator *parm = RTA_DATA(opt); struct gnet_estimator *parm = RTA_DATA(opt);
int idx;
if (RTA_PAYLOAD(opt) < sizeof(*parm)) if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL; return -EINVAL;
...@@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, ...@@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
if (est == NULL) if (est == NULL)
return -ENOBUFS; return -ENOBUFS;
est->interval = parm->interval + 2; idx = parm->interval + 2;
est->bstats = bstats; est->bstats = bstats;
est->rate_est = rate_est; est->rate_est = rate_est;
est->stats_lock = stats_lock; est->stats_lock = stats_lock;
...@@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, ...@@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
est->last_packets = bstats->packets; est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10; est->avpps = rate_est->pps<<10;
est->next = elist[est->interval].list; if (!elist[idx].timer.function) {
if (est->next == NULL) { INIT_LIST_HEAD(&elist[idx].list);
init_timer(&elist[est->interval].timer); setup_timer(&elist[idx].timer, est_timer, idx);
elist[est->interval].timer.data = est->interval;
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
elist[est->interval].timer.function = est_timer;
add_timer(&elist[est->interval].timer);
} }
write_lock_bh(&est_lock);
elist[est->interval].list = est; if (list_empty(&elist[idx].list))
write_unlock_bh(&est_lock); mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
list_add_rcu(&est->list, &elist[idx].list);
return 0; return 0;
} }
static void __gen_kill_estimator(struct rcu_head *head)
{
struct gen_estimator *e = container_of(head,
struct gen_estimator, e_rcu);
kfree(e);
}
/** /**
* gen_kill_estimator - remove a rate estimator * gen_kill_estimator - remove a rate estimator
* @bstats: basic statistics * @bstats: basic statistics
...@@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, ...@@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
* *
* Removes the rate estimator specified by &bstats and &rate_est * Removes the rate estimator specified by &bstats and &rate_est
* and deletes the timer. * and deletes the timer.
*
* NOTE: Called under rtnl_mutex
*/ */
void gen_kill_estimator(struct gnet_stats_basic *bstats, void gen_kill_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est) struct gnet_stats_rate_est *rate_est)
{ {
int idx; int idx;
struct gen_estimator *est, **pest; struct gen_estimator *e, *n;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
int killed = 0;
pest = &elist[idx].list; /* Skip non initialized indexes */
while ((est=*pest) != NULL) { if (!elist[idx].timer.function)
if (est->rate_est != rate_est || est->bstats != bstats) { continue;
pest = &est->next;
list_for_each_entry_safe(e, n, &elist[idx].list, list) {
if (e->rate_est != rate_est || e->bstats != bstats)
continue; continue;
}
write_lock_bh(&est_lock); write_lock_bh(&est_lock);
*pest = est->next; e->bstats = NULL;
write_unlock_bh(&est_lock); write_unlock_bh(&est_lock);
kfree(est); list_del_rcu(&e->list);
killed++; call_rcu(&e->e_rcu, __gen_kill_estimator);
} }
if (killed && elist[idx].list == NULL)
del_timer(&elist[idx].timer);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment