Commit 4167d838 authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[PKT_SCHED]: Update to network emulation QOS scheduler.

This patch updates the network emulation packet scheduler.
	* name changed from delay to netem since it does more than just delay
	* Catalin's merged code to do packet reordering
	* uses a socket queue's directly rather than layering on qdisc(fifo)
	  because this is used in performance tests.
	* adds placeholder in API for future enhancements (rate and duplicate).
Signed-off-by: default avatarStephen Hemminger <shemminger@osdl.org>
Signed-off-by: default avatarDavid S. Miller <davem@redhat.com>
parent 5f6de72e
...@@ -429,11 +429,14 @@ enum { ...@@ -429,11 +429,14 @@ enum {
#define TCA_ATM_MAX TCA_ATM_STATE #define TCA_ATM_MAX TCA_ATM_STATE
/* Delay section */ /* Network emulator */
struct tc_dly_qopt struct tc_netem_qopt
{ {
__u32 latency; __u32 latency; /* added delay (us) */
__u32 limit; __u32 limit; /* fifo limit (packets) */
__u32 loss; __u32 loss; /* random packet loss (0=none ~0=100%) */
__u32 gap; /* re-ordering gap (0 for delay all) */
__u32 duplicate; /* random packet dup (0=none ~0=100%) */
__u32 rate; /* maximum transmit rate (bytes/sec) */
}; };
#endif #endif
...@@ -164,12 +164,12 @@ config NET_SCH_DSMARK ...@@ -164,12 +164,12 @@ config NET_SCH_DSMARK
To compile this code as a module, choose M here: the To compile this code as a module, choose M here: the
module will be called sch_dsmark. module will be called sch_dsmark.
config NET_SCH_DELAY config NET_SCH_NETEM
tristate "Delay simulator" tristate "Network emulator"
depends on NET_SCHED depends on NET_SCHED
help help
Say Y if you want to delay packets by a fixed amount of Say Y if you want to emulate network delay, loss, and packet
time. This is often useful to simulate network delay when re-ordering. This is often useful to simulate networks when
testing applications or protocols. testing applications or protocols.
To compile this driver as a module, choose M here: the module To compile this driver as a module, choose M here: the module
......
...@@ -24,7 +24,7 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o ...@@ -24,7 +24,7 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_DELAY) += sch_delay.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
......
/*
* net/sched/sch_delay.c Simple constant delay
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Stephen Hemminger <shemminger@osdl.org>
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
/* Network delay simulator
This scheduler adds a fixed delay to all packets.
Similar to NISTnet and BSD Dummynet.
It uses byte fifo underneath similar to TBF */
struct dly_sched_data {
u32 latency;
u32 limit;
u32 loss;
struct timer_list timer;
struct Qdisc *qdisc;
};
/* Time stamp put into socket buffer control block */
struct dly_skb_cb {
psched_time_t queuetime;
};
/* Enqueue packets with underlying discipline (fifo)
* but mark them with current time first.
*/
static int dly_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
struct dly_skb_cb *cb = (struct dly_skb_cb *)skb->cb;
int ret;
/* Random packet drop 0 => none, ~0 => all */
if (q->loss >= net_random()) {
sch->stats.drops++;
return 0; /* lie about loss so TCP doesn't know */
}
PSCHED_GET_TIME(cb->queuetime);
/* Queue to underlying scheduler */
ret = q->qdisc->enqueue(skb, q->qdisc);
if (ret)
sch->stats.drops++;
else {
sch->q.qlen++;
sch->stats.bytes += skb->len;
sch->stats.packets++;
}
return ret;
}
/* Requeue packets but don't change time stamp */
static int dly_requeue(struct sk_buff *skb, struct Qdisc *sch)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
int ret;
ret = q->qdisc->ops->requeue(skb, q->qdisc);
if (ret == 0)
sch->q.qlen++;
return ret;
}
static unsigned int dly_drop(struct Qdisc *sch)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
unsigned int len;
len = q->qdisc->ops->drop(q->qdisc);
if (len) {
sch->q.qlen--;
sch->stats.drops++;
}
return len;
}
/* Dequeue packet.
* If packet needs to be held up, then stop the
* queue and set timer to wakeup later.
*/
static struct sk_buff *dly_dequeue(struct Qdisc *sch)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
struct sk_buff *skb;
retry:
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
struct dly_skb_cb *cb = (struct dly_skb_cb *)skb->cb;
psched_time_t now;
long diff, delay;
PSCHED_GET_TIME(now);
diff = q->latency - PSCHED_TDIFF(now, cb->queuetime);
if (diff <= 0) {
sch->q.qlen--;
sch->flags &= ~TCQ_F_THROTTLED;
return skb;
}
if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
sch->q.qlen--;
sch->stats.drops++;
goto retry;
}
delay = PSCHED_US2JIFFIE(diff);
if (delay <= 0)
delay = 1;
mod_timer(&q->timer, jiffies+delay);
sch->flags |= TCQ_F_THROTTLED;
}
return NULL;
}
static void dly_reset(struct Qdisc *sch)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
sch->flags &= ~TCQ_F_THROTTLED;
del_timer(&q->timer);
}
static void dly_timer(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc *)arg;
sch->flags &= ~TCQ_F_THROTTLED;
netif_schedule(sch->dev);
}
/* Tell Fifo the new limit. */
static int change_limit(struct Qdisc *q, u32 limit)
{
struct rtattr *rta;
int ret;
rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (!rta)
return -ENOMEM;
rta->rta_type = RTM_NEWQDISC;
rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
ret = q->ops->change(q, rta);
kfree(rta);
return ret;
}
/* Setup underlying FIFO discipline */
static int dly_change(struct Qdisc *sch, struct rtattr *opt)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
struct tc_dly_qopt *qopt = RTA_DATA(opt);
int err;
if (q->qdisc == &noop_qdisc) {
struct Qdisc *child
= qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops);
if (!child)
return -EINVAL;
q->qdisc = child;
}
err = change_limit(q->qdisc, qopt->limit);
if (err) {
qdisc_destroy(q->qdisc);
q->qdisc = &noop_qdisc;
} else {
q->latency = qopt->latency;
q->limit = qopt->limit;
q->loss = qopt->loss;
}
return err;
}
static int dly_init(struct Qdisc *sch, struct rtattr *opt)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
if (!opt)
return -EINVAL;
init_timer(&q->timer);
q->timer.function = dly_timer;
q->timer.data = (unsigned long) sch;
q->qdisc = &noop_qdisc;
return dly_change(sch, opt);
}
static void dly_destroy(struct Qdisc *sch)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
del_timer(&q->timer);
qdisc_destroy(q->qdisc);
q->qdisc = &noop_qdisc;
}
static int dly_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct dly_sched_data *q = (struct dly_sched_data *)sch->data;
unsigned char *b = skb->tail;
struct tc_dly_qopt qopt;
qopt.latency = q->latency;
qopt.limit = q->limit;
qopt.loss = q->loss;
RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
static struct Qdisc_ops dly_qdisc_ops = {
.id = "delay",
.priv_size = sizeof(struct dly_sched_data),
.enqueue = dly_enqueue,
.dequeue = dly_dequeue,
.requeue = dly_requeue,
.drop = dly_drop,
.init = dly_init,
.reset = dly_reset,
.destroy = dly_destroy,
.change = dly_change,
.dump = dly_dump,
.owner = THIS_MODULE,
};
static int __init dly_module_init(void)
{
return register_qdisc(&dly_qdisc_ops);
}
static void __exit dly_module_exit(void)
{
unregister_qdisc(&dly_qdisc_ops);
}
module_init(dly_module_init)
module_exit(dly_module_exit)
MODULE_LICENSE("GPL");
/*
* net/sched/sch_netem.c Network emulator
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Stephen Hemminger <shemminger@osdl.org>
* Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
*/
#include <linux/config.h>
#include <linux/module.h>
#include <asm/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
/* Network emulator
*
* This scheduler can alters spacing and order
* Similar to NISTnet and BSD Dummynet.
*/
struct netem_sched_data {
struct sk_buff_head qnormal;
struct sk_buff_head qdelay;
struct timer_list timer;
u32 latency;
u32 loss;
u32 counter;
u32 gap;
};
/* Time stamp put into socket buffer control block */
struct netem_skb_cb {
psched_time_t time_to_send;
};
/* Enqueue packets with underlying discipline (fifo)
* but mark them with current time first.
*/
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
/* Random packet drop 0 => none, ~0 => all */
if (q->loss >= net_random()) {
sch->stats.drops++;
return 0; /* lie about loss so TCP doesn't know */
}
if (q->qnormal.qlen < sch->dev->tx_queue_len) {
PSCHED_GET_TIME(cb->time_to_send);
PSCHED_TADD(cb->time_to_send, q->latency);
__skb_queue_tail(&q->qnormal, skb);
sch->q.qlen++;
sch->stats.bytes += skb->len;
sch->stats.packets++;
return 0;
}
sch->stats.drops++;
kfree_skb(skb);
return NET_XMIT_DROP;
}
/* Requeue packets but don't change time stamp */
static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
__skb_queue_head(&q->qnormal, skb);
sch->q.qlen++;
return 0;
}
/*
* Check the look aside buffer list, and see if any freshly baked buffers.
* If head of queue is not baked, set timer.
*/
static struct sk_buff *netem_get_delayed(struct netem_sched_data *q)
{
struct sk_buff *skb;
psched_time_t now;
long delay;
skb = skb_peek(&q->qdelay);
if (skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
PSCHED_GET_TIME(now);
delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
pr_debug("netem_dequeue: delay queue %p@%lu %ld\n",
skb, jiffies, delay);
/* it's baked enough */
if (delay <= 0) {
__skb_unlink(skb, &q->qdelay);
del_timer(&q->timer);
return skb;
}
if (!timer_pending(&q->timer)) {
q->timer.expires = jiffies + delay;
add_timer(&q->timer);
}
}
return NULL;
}
/* Dequeue packet.
* If packet needs to be held up, then put in the delay
* queue and set timer to wakeup later.
*/
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
struct sk_buff *skb;
skb = netem_get_delayed(q);
if (!skb && (skb = __skb_dequeue(&q->qnormal))) {
/* are we doing out of order packet skip? */
if (q->counter < q->gap) {
pr_debug("netem_dequeue: send %p normally\n", skb);
q->counter++;
} else {
/* don't send now hold for later */
pr_debug("netem_dequeue: hold [%p]@%lu\n", skb, jiffies);
__skb_queue_tail(&q->qdelay, skb);
q->counter = 0;
skb = netem_get_delayed(q);
}
}
if (skb)
sch->q.qlen--;
return skb;
}
static void netem_timer(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc *)arg;
pr_debug("netem_timer: fired @%lu\n", jiffies);
netif_schedule(sch->dev);
}
static void netem_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
skb_queue_purge(&q->qnormal);
skb_queue_purge(&q->qdelay);
sch->q.qlen = 0;
del_timer_sync(&q->timer);
}
static int netem_change(struct Qdisc *sch, struct rtattr *opt)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
struct tc_netem_qopt *qopt = RTA_DATA(opt);
if (qopt->limit)
sch->dev->tx_queue_len = qopt->limit;
q->gap = qopt->gap;
q->loss = qopt->loss;
q->latency = qopt->latency;
return 0;
}
static int netem_init(struct Qdisc *sch, struct rtattr *opt)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
if (!opt)
return -EINVAL;
skb_queue_head_init(&q->qnormal);
skb_queue_head_init(&q->qdelay);
init_timer(&q->timer);
q->timer.function = netem_timer;
q->timer.data = (unsigned long) sch;
q->counter = 0;
return netem_change(sch, opt);
}
static void netem_destroy(struct Qdisc *sch)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
del_timer_sync(&q->timer);
}
static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct netem_sched_data *q = (struct netem_sched_data *)sch->data;
unsigned char *b = skb->tail;
struct tc_netem_qopt qopt;
qopt.latency = q->latency;
qopt.limit = sch->dev->tx_queue_len;
qopt.loss = q->loss;
qopt.gap = q->gap;
RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
static struct Qdisc_ops netem_qdisc_ops = {
.id = "netem",
.priv_size = sizeof(struct netem_sched_data),
.enqueue = netem_enqueue,
.dequeue = netem_dequeue,
.requeue = netem_requeue,
.init = netem_init,
.reset = netem_reset,
.destroy = netem_destroy,
.change = netem_change,
.dump = netem_dump,
.owner = THIS_MODULE,
};
static int __init netem_module_init(void)
{
return register_qdisc(&netem_qdisc_ops);
}
static void __exit netem_module_exit(void)
{
unregister_qdisc(&netem_qdisc_ops);
}
module_init(netem_module_init)
module_exit(netem_module_exit)
MODULE_LICENSE("GPL");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment