Commit 4e54c481 authored by Jamal Hadi Salim's avatar Jamal Hadi Salim Committed by David S. Miller

[NET]: Add tc extensions infrastructure.

Signed-off-by: default avatarJamal Hadi Salim <hadi@znyx.com>
Signed-off-by: default avatarDavid S. Miller <davem@redhat.com>
parent 71279d2b
......@@ -366,6 +366,8 @@ struct net_device
struct Qdisc *qdisc_ingress;
unsigned long tx_queue_len; /* Max frames per queue allowed */
/* ingress path synchronizer */
spinlock_t ingress_lock;
/* hard_start_xmit synchronizer */
spinlock_t xmit_lock;
/* cpu id of processor entered to hard_start_xmit or -1,
......
#ifndef __LINUX_PKT_CLS_H
#define __LINUX_PKT_CLS_H
/* I think i could have done better macros ; for now this is stolen from
* some arch/mips code - jhs
*/
#define _TC_MAKE32(x) ((x))
#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n))
#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n))
#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n))
#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n))
/* verdict bit breakdown
*
bit 0: when set -> this packet has been munged already
bit 1: when set -> It is ok to munge this packet
bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded
assume loop
bit 6,7: Where this packet was last seen
0: Above the transmit example at the socket level
1: on the Ingress
2: on the Egress
bit 8: when set --> Request not to classify on ingress.
bits 9,10,11: redirect counter - redirect TTL. Loop avoidance
*
* */
#define TC_MUNGED _TC_MAKEMASK1(0)
#define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED))
#define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED)
#define TC_OK2MUNGE _TC_MAKEMASK1(1)
#define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
#define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE)
#define S_TC_VERD _TC_MAKE32(2)
#define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD)
#define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD)
#define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD)
#define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD))
#define S_TC_FROM _TC_MAKE32(6)
#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM)
#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM)
#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM))
#define AT_STACK 0x0
#define AT_INGRESS 0x1
#define AT_EGRESS 0x2
#define TC_NCLS _TC_MAKEMASK1(8)
#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS))
#define CLR_TC_NCLS(v) ( v & ~TC_NCLS)
#define S_TC_RTTL _TC_MAKE32(9)
#define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL)
#define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL)
#define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL)
#define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL))
#define S_TC_AT _TC_MAKE32(12)
#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT)
#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT)
#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT))
/* Action types */
enum
{
TCA_ACT_UNSPEC=0,
TCA_ACT_KIND=1,
TCA_ACT_OPTIONS=2,
TCA_ACT_INDEX=3,
TCA_ACT_POLICE=4,
/* other actions go here */
__TCA_ACT_MAX=255
};
#define TCA_ACT_MAX __TCA_ACT_MAX
#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
#define TCA_ACT_MAX_PRIO 32
#define TCA_ACT_BIND 1
#define TCA_ACT_NOBIND 0
#define TCA_ACT_UNBIND 1
#define TCA_ACT_NOUNBIND 0
#define TCA_ACT_REPLACE 1
#define TCA_ACT_NOREPLACE 0
#define MAX_REC_LOOP 4
#define MAX_RED_LOOP 4
#define TC_ACT_UNSPEC (-1)
#define TC_ACT_OK 0
#define TC_ACT_RECLASSIFY 1
#define TC_ACT_SHOT 2
#define TC_ACT_PIPE 3
#define TC_ACT_STOLEN 4
#define TC_ACT_QUEUED 5
#define TC_ACT_REPEAT 6
#define TC_ACT_JUMP 0x10000000
struct tc_police
{
__u32 index;
#ifdef CONFIG_NET_CLS_ACT
int refcnt;
int bindcnt;
#endif
/* Turned off because it requires new tc
* to work (for now maintain ABI)
*
#ifdef CONFIG_NET_CLS_ACT
__u32 capab;
#endif
*/
int action;
#define TC_POLICE_UNSPEC (-1)
#define TC_POLICE_OK 0
#define TC_POLICE_RECLASSIFY 1
#define TC_POLICE_SHOT 2
#define TC_POLICE_UNSPEC TC_ACT_UNSPEC
#define TC_POLICE_OK TC_ACT_OK
#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY
#define TC_POLICE_SHOT TC_ACT_SHOT
#define TC_POLICE_PIPE TC_ACT_PIPE
__u32 limit;
__u32 burst;
......@@ -17,6 +133,26 @@ struct tc_police
struct tc_ratespec peakrate;
};
struct tcf_t
{
__u32 install;
__u32 lastuse;
__u32 expires;
};
struct tc_cnt
{
int refcnt;
int bindcnt;
};
#define tc_gen \
__u32 index; \
__u32 capab; \
int action; \
int refcnt; \
int bindcnt
enum
{
TCA_POLICE_UNSPEC,
......@@ -25,8 +161,8 @@ enum
TCA_POLICE_PEAKRATE,
TCA_POLICE_AVRATE,
TCA_POLICE_RESULT,
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
......@@ -50,6 +186,12 @@ enum
TCA_U32_DIVISOR,
TCA_U32_SEL,
TCA_U32_POLICE,
#ifdef CONFIG_NET_CLS_ACT
TCA_U32_ACT,
#endif
#ifdef CONFIG_NET_CLS_IND
TCA_U32_INDEV,
#endif
__TCA_U32_MAX
};
......@@ -61,6 +203,9 @@ struct tc_u32_key
__u32 val;
int off;
int offmask;
#ifdef CONFIG_CLS_U32_PERF
unsigned long kcnt;
#endif
};
struct tc_u32_sel
......@@ -68,6 +213,7 @@ struct tc_u32_sel
unsigned char flags;
unsigned char offshift;
unsigned char nkeys;
unsigned char fshift; /* fold shift */
__u16 offmask;
__u16 off;
......@@ -75,7 +221,10 @@ struct tc_u32_sel
short hoff;
__u32 hmask;
#ifdef CONFIG_CLS_U32_PERF
unsigned long rcnt;
unsigned long rhit;
#endif
struct tc_u32_key keys[0];
};
......@@ -102,7 +251,7 @@ enum
__TCA_RSVP_MAX
};
#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1)
#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
struct tc_rsvp_gpi
{
......@@ -143,6 +292,12 @@ enum
TCA_FW_UNSPEC,
TCA_FW_CLASSID,
TCA_FW_POLICE,
#ifdef CONFIG_NET_CLS_IND
TCA_FW_INDEV,
#endif
#ifdef CONFIG_NET_CLS_ACT
TCA_FW_ACT,
#endif
__TCA_FW_MAX
};
......@@ -162,6 +317,6 @@ enum
__TCA_TCINDEX_MAX
};
#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1)
#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1)
#endif
......@@ -37,6 +37,7 @@ struct tc_stats
__u32 bps; /* Current flow byte rate */
__u32 pps; /* Current flow packet rate */
__u32 qlen;
__u32 reqs; /* number of requeues happened */
__u32 backlog;
#ifdef __KERNEL__
spinlock_t *lock;
......
......@@ -44,6 +44,10 @@
#define RTM_DELTFILTER (RTM_BASE+29)
#define RTM_GETTFILTER (RTM_BASE+30)
#define RTM_NEWACTION (RTM_BASE+32)
#define RTM_DELACTION (RTM_BASE+33)
#define RTM_GETACTION (RTM_BASE+34)
#define RTM_NEWPREFIX (RTM_BASE+36)
#define RTM_GETPREFIX (RTM_BASE+38)
......@@ -639,6 +643,7 @@ enum
TCA_STATS,
TCA_XSTATS,
TCA_RATE,
TCA_FCNT,
__TCA_MAX
};
......@@ -673,6 +678,18 @@ enum
#define RTMGRP_IPV6_PREFIX 0x20000
/* TC action piece */
struct tcamsg
{
unsigned char tca_family;
unsigned char tca__pad1;
unsigned short tca__pad2;
};
#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
#define TCA_ACT_TAB 1 /* attr type must be >=1 */
#define TCAA_MAX 1
/* End of information exported to user level */
#ifdef __KERNEL__
......
......@@ -155,6 +155,7 @@ struct skb_shared_info {
* @sk: Socket we are owned by
* @stamp: Time we arrived
* @dev: Device we arrived on/are leaving by
* @input_dev: Device we arrived on
* @real_dev: The real device we are using
* @h: Transport layer header
* @nh: Network layer header
......@@ -197,6 +198,7 @@ struct sk_buff {
struct sock *sk;
struct timeval stamp;
struct net_device *dev;
struct net_device *input_dev;
struct net_device *real_dev;
union {
......@@ -262,9 +264,15 @@ struct sk_buff {
} private;
#endif
#ifdef CONFIG_NET_SCHED
__u32 tc_index; /* traffic control index */
__u32 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u32 tc_verd; /* traffic control verdict */
__u32 tc_classid; /* traffic control classid */
#endif
#endif
/* These elements must be at the end, see alloc_skb() for details. */
unsigned int truesize;
atomic_t users;
......
#ifndef __NET_PKT_ACT_H
#define __NET_PKT_ACT_H
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#define tca_st(val) (struct tcf_##val *)
#define PRIV(a,name) ( tca_st(name) (a)->priv)
#if 0 /* control */
#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
#else
#define DPRINTK(format,args...)
#endif
#if 0 /* data */
#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
#else
#define D2PRINTK(format,args...)
#endif
static __inline__ unsigned
tcf_hash(u32 index)
{
return index & MY_TAB_MASK;
}
/* probably move this from being inline
* and put into act_generic
*/
static inline void
tcf_hash_destroy(struct tcf_st *p)
{
unsigned h = tcf_hash(p->index);
struct tcf_st **p1p;
for (p1p = &tcf_ht[h]; *p1p; p1p = &(*p1p)->next) {
if (*p1p == p) {
write_lock_bh(&tcf_t_lock);
*p1p = p->next;
write_unlock_bh(&tcf_t_lock);
#ifdef CONFIG_NET_ESTIMATOR
qdisc_kill_estimator(&p->stats);
#endif
kfree(p);
return;
}
}
BUG_TRAP(0);
}
static inline void
tcf_hash_release(struct tcf_st *p, int bind )
{
if (p) {
if (bind) {
p->bindcnt--;
}
p->refcnt--;
if (p->refcnt > 0)
MOD_DEC_USE_COUNT;
if(p->bindcnt <=0 && p->refcnt <= 0) {
tcf_hash_destroy(p);
}
}
}
static __inline__ int
tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
struct tc_action *a)
{
struct tcf_st *p;
int err =0, index = -1,i= 0, s_i = 0, n_i = 0;
struct rtattr *r ;
read_lock(&tcf_t_lock);
s_i = cb->args[0];
for (i = 0; i < MY_TAB_SIZE; i++) {
p = tcf_ht[tcf_hash(i)];
for (; p; p = p->next) {
index++;
if (index < s_i)
continue;
a->priv = p;
a->order = n_i;
r = (struct rtattr*) skb->tail;
RTA_PUT(skb, a->order, 0, NULL);
err = tcf_action_dump_1(skb, a, 0, 0);
if (0 > err) {
index--;
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
r->rta_len = skb->tail - (u8*)r;
n_i++;
if (n_i >= TCA_ACT_MAX_PRIO) {
printk("Jamal Dump Exceeded batch limit\n");
goto done;
}
}
}
done:
read_unlock(&tcf_t_lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
rtattr_failure:
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
static __inline__ int
tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
{
struct tcf_st *p, *s_p;
struct rtattr *r ;
int i= 0, n_i = 0;
r = (struct rtattr*) skb->tail;
RTA_PUT(skb, a->order, 0, NULL);
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
for (i = 0; i < MY_TAB_SIZE; i++) {
p = tcf_ht[tcf_hash(i)];
while (p != NULL) {
s_p = p->next;
printk("tcf_del_walker deleting ..\n");
tcf_hash_release(p, 0);
n_i++;
p = s_p;
}
}
RTA_PUT(skb, TCA_FCNT, 4, &n_i);
r->rta_len = skb->tail - (u8*)r;
return n_i;
rtattr_failure:
skb_trim(skb, (u8*)r - skb->data);
return -EINVAL;
}
static __inline__ int
tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type,
struct tc_action *a)
{
if (type == RTM_DELACTION) {
return tcf_del_walker(skb,a);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(skb,cb,a);
} else {
printk("tcf_generic_walker: unknown action %d\n",type);
return -EINVAL;
}
}
static __inline__ struct tcf_st *
tcf_hash_lookup(u32 index)
{
struct tcf_st *p;
read_lock(&tcf_t_lock);
for (p = tcf_ht[tcf_hash(index)]; p; p = p->next) {
if (p->index == index)
break;
}
read_unlock(&tcf_t_lock);
return p;
}
static __inline__ u32
tcf_hash_new_index(void)
{
do {
if (++idx_gen == 0)
idx_gen = 1;
} while (tcf_hash_lookup(idx_gen));
return idx_gen;
}
static inline int
tcf_hash_search(struct tc_action *a, u32 index)
{
struct tcf_st *p = tcf_hash_lookup(index);
if (p != NULL) {
a->priv = p;
return 1;
} else {
return 0;
}
}
#ifdef CONFIG_NET_ACT_INIT
static inline struct tcf_st *
tcf_hash_check(struct tc_st *parm, struct tc_action *a, int ovr, int bind)
{
struct tcf_st *p = NULL;
if (parm->index && (p = tcf_hash_lookup(parm->index)) != NULL) {
spin_lock(&p->lock);
if (bind) {
p->bindcnt++;
p->refcnt++;
}
spin_unlock(&p->lock);
a->priv = (void *) p;
}
return p;
}
static inline struct tcf_st *
tcf_hash_create(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind)
{
unsigned h;
struct tcf_st *p = NULL;
p = kmalloc(size, GFP_KERNEL);
if (p == NULL)
return p;
memset(p, 0, size);
p->refcnt = 1;
if (bind) {
p->bindcnt = 1;
}
MOD_INC_USE_COUNT;
spin_lock_init(&p->lock);
p->stats.lock = &p->lock;
p->index = parm->index ? : tcf_hash_new_index();
p->tm.install = jiffies;
p->tm.lastuse = jiffies;
#ifdef CONFIG_NET_ESTIMATOR
if (est) {
qdisc_new_estimator(&p->stats, est);
}
#endif
h = tcf_hash(p->index);
write_lock_bh(&tcf_t_lock);
p->next = tcf_ht[h];
tcf_ht[h] = p;
write_unlock_bh(&tcf_t_lock);
a->priv = (void *) p;
return p;
}
static inline struct tcf_st *
tcf_hash_init(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind)
{
struct tcf_st *p;
p = tcf_hash_check (parm,a,ovr,bind);
if (NULL == p) {
return tcf_hash_create(parm, est, a, size, ovr, bind);
}
}
#endif
#endif
......@@ -71,12 +71,38 @@ static inline int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct
{
int err = 0;
u32 protocol = skb->protocol;
#ifdef CONFIG_NET_CLS_ACT
struct tcf_proto *otp = tp;
reclassify:
#endif
protocol = skb->protocol;
for ( ; tp; tp = tp->next) {
if ((tp->protocol == protocol ||
tp->protocol == __constant_htons(ETH_P_ALL)) &&
(err = tp->classify(skb, tp, res)) >= 0)
tp->protocol == __constant_htons(ETH_P_ALL)) &&
(err = tp->classify(skb, tp, res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
if ( TC_ACT_RECLASSIFY == err) {
__u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
tp = otp;
if (MAX_REC_LOOP < verd++) {
printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",tp->prio&0xffff, ntohs(tp->protocol));
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
goto reclassify;
} else {
if (skb->tc_verd)
skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
return err;
}
#else
return err;
#endif
}
}
return -1;
}
......@@ -90,6 +116,8 @@ static inline void tcf_destroy(struct tcf_proto *tp)
extern int register_tcf_proto_ops(struct tcf_proto_ops *ops);
extern int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
extern int ing_filter(struct sk_buff *skb);
......
......@@ -12,11 +12,14 @@
#include <linux/types.h>
#include <linux/pkt_sched.h>
#include <net/pkt_cls.h>
#include <linux/module.h>
#include <linux/rtnetlink.h>
#ifdef CONFIG_X86_TSC
#include <asm/msr.h>
#endif
struct rtattr;
struct Qdisc;
......@@ -390,14 +393,15 @@ struct tcf_police
{
struct tcf_police *next;
int refcnt;
#ifdef CONFIG_NET_CLS_ACT
int bindcnt;
#endif
u32 index;
int action;
int result;
u32 ewma_rate;
u32 burst;
u32 mtu;
u32 toks;
u32 ptoks;
psched_time_t t_c;
......@@ -408,16 +412,84 @@ struct tcf_police
struct tc_stats stats;
};
#ifdef CONFIG_NET_CLS_ACT
#define tca_gen(name) \
struct tcf_##name *next; \
u32 index; \
int refcnt; \
int bindcnt; \
u32 capab; \
int action; \
struct tcf_t tm; \
struct tc_stats stats; \
spinlock_t lock
struct tc_action
{
void *priv;
struct tc_action_ops *ops;
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
__u32 order;
struct tc_action *next;
};
#define TCA_CAP_NONE 0
struct tc_action_ops
{
struct tc_action_ops *next;
char kind[IFNAMSIZ];
__u32 type; /* TBD to match kind */
__u32 capab; /* capabilities includes 4 bit version */
int (*act)(struct sk_buff **, struct tc_action *);
int (*get_stats)(struct sk_buff *, struct tc_action *);
int (*dump)(struct sk_buff *, struct tc_action *,int , int);
void (*cleanup)(struct tc_action *, int bind);
int (*lookup)(struct tc_action *, u32 );
int (*init)(struct rtattr *,struct rtattr *,struct tc_action *, int , int );
int (*walk)(struct sk_buff *, struct netlink_callback *, int , struct tc_action *);
};
extern int tcf_register_action(struct tc_action_ops *a);
extern int tcf_unregister_action(struct tc_action_ops *a);
extern void tcf_action_destroy(struct tc_action *a, int bind);
extern int tcf_action_exec(struct sk_buff *skb, struct tc_action *a);
extern int tcf_action_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,char *n, int ovr, int bind);
extern int tcf_action_init_1(struct rtattr *rta, struct rtattr *est, struct tc_action *a,char *n, int ovr, int bind);
extern int tcf_action_dump(struct sk_buff *skb, struct tc_action *a, int, int);
extern int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
extern int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
extern int tcf_action_copy_stats (struct sk_buff *,struct tc_action *);
extern int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,struct tc_action *,int , int );
extern int tcf_act_police_dump(struct sk_buff *, struct tc_action *, int, int);
extern int tcf_act_police(struct sk_buff **skb, struct tc_action *a);
#endif
extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st);
extern void tcf_police_destroy(struct tcf_police *p);
extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est);
extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p);
extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
static inline void tcf_police_release(struct tcf_police *p)
static inline void tcf_police_release(struct tcf_police *p, int bind)
{
#ifdef CONFIG_NET_CLS_ACT
if (p) {
if (bind) {
p->bindcnt--;
}
p->refcnt--;
if (p->refcnt > 0)
MOD_DEC_USE_COUNT;
if (p->refcnt <= 0 && !p->bindcnt)
tcf_police_destroy(p);
}
#else
if (p && --p->refcnt == 0)
tcf_police_destroy(p);
#endif
}
extern struct Qdisc noop_qdisc;
......
......@@ -1351,6 +1351,9 @@ int dev_queue_xmit(struct sk_buff *skb)
/* Grab device queue */
spin_lock_bh(&dev->queue_lock);
q = dev->qdisc;
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if (q->enqueue) {
rc = q->enqueue(skb, q);
......@@ -1731,6 +1734,48 @@ static inline int __handle_bridge(struct sk_buff *skb,
return 0;
}
#ifdef CONFIG_NET_CLS_ACT
/* TODO: Maybe we should just force sch_ingress to be compiled in
* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
* a compare and 2 stores extra right now if we dont have it on
* but have CONFIG_NET_CLS_ACT
* NOTE: This doesnt stop any functionality; if you dont have
* the ingress scheduler, you just cant add policies on ingress.
*
*/
int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;
if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
if (NULL == skb->input_dev) {
skb->input_dev = skb->dev;
printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
}
spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);
}
return result;
}
#endif
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
......@@ -1762,6 +1807,13 @@ int netif_receive_skb(struct sk_buff *skb)
skb->mac_len = skb->nh.raw - skb->mac.raw;
pt_prev = NULL;
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
......@@ -1771,6 +1823,26 @@ int netif_receive_skb(struct sk_buff *skb)
}
}
#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
atomic_inc(&skb->users);
ret = pt_prev->func(skb, skb->dev, pt_prev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}
ret = ing_filter(skb);
if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}
skb->tc_verd = 0;
ncls:
#endif
handle_diverter(skb);
if (__handle_bridge(skb, &pt_prev, &ret))
......@@ -2824,6 +2896,10 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->xmit_lock);
dev->xmit_lock_owner = -1;
#ifdef CONFIG_NET_CLS_ACT
spin_lock_init(&dev->ingress_lock);
#endif
#ifdef CONFIG_NET_FASTROUTE
dev->fastpath_lock = RW_LOCK_UNLOCKED;
#endif
......@@ -3358,4 +3434,9 @@ EXPORT_SYMBOL(netdev_fastroute);
EXPORT_SYMBOL(netdev_fastroute_obstacles);
#endif
#ifdef CONFIG_NET_CLS_ACT
EXPORT_SYMBOL(ing_filter);
#endif
EXPORT_PER_CPU_SYMBOL(softnet_data);
......@@ -93,7 +93,8 @@ static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
NLMSG_LENGTH(sizeof(struct rtmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcmsg))
NLMSG_LENGTH(sizeof(struct tcmsg)),
NLMSG_LENGTH(sizeof(struct tcamsg))
};
static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
......@@ -105,7 +106,8 @@ static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
RTA_MAX,
TCA_MAX,
TCA_MAX,
TCA_MAX
TCA_MAX,
TCAA_MAX
};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
......
......@@ -49,6 +49,9 @@
#include <linux/inet.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
#endif
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/cache.h>
......@@ -241,6 +244,15 @@ void __kfree_skb(struct sk_buff *skb)
nf_bridge_put(skb->nf_bridge);
#endif
#endif
/* XXX: IS this still necessary? - JHS */
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
skb->tc_classid = 0;
#endif
#endif
kfree_skbmem(skb);
}
......@@ -312,6 +324,14 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
C(input_dev);
C(tc_classid);
#endif
#endif
C(truesize);
atomic_set(&n->users, 1);
......@@ -366,6 +386,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
#endif
#ifdef CONFIG_NET_SCHED
#ifdef CONFIG_NET_CLS_ACT
new->tc_verd = old->tc_verd;
#endif
new->tc_index = old->tc_index;
#endif
atomic_set(&new->users, 1);
......
......@@ -164,6 +164,9 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->mac.raw=skb->data;
skb_pull(skb,ETH_HLEN);
eth= skb->mac.ethernet;
#ifdef CONFIG_NET_CLS_ACT
skb->input_dev = dev;
#endif
if(*eth->h_dest&1)
{
......
......@@ -177,7 +177,7 @@ config NET_SCH_DELAY
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_SCHED && NETFILTER
depends on NET_SCHED
help
If you say Y here, you will be able to police incoming bandwidth
and drop packets when this bandwidth exceeds your desired rate.
......@@ -274,6 +274,22 @@ config NET_CLS_U32
To compile this code as a module, choose M here: the
module will be called cls_u32.
config CLS_U32_PERF
bool " U32 classifier perfomance counters"
depends on NET_CLS_U32
help
gathers stats that could be used to tune u32 classifier perfomance.
Requires a new iproute2
config NET_CLS_IND
bool "classify input device (slows things u32/fw) "
depends on NET_CLS_U32 || NET_CLS_FW
help
This option will be killed eventually when a
metadata action appears because it slows things a little
Available only for u32 and fw classifiers.
Requires a new iproute2
config NET_CLS_RSVP
tristate "Special RSVP classifier"
depends on NET_CLS && NET_QOS
......@@ -303,9 +319,24 @@ config NET_CLS_RSVP6
To compile this code as a module, choose M here: the
module will be called cls_rsvp6.
config NET_CLS_ACT
bool ' Packet ACTION '
depends on NET_CLS && NET_QOS
---help---
This option requires you have a new iproute2. It enables
tc extensions which can be used with tc classifiers.
Only the u32 and fw classifiers are supported at the moment.
config NET_ACT_POLICE
tristate ' Policing Actions'
depends on NET_CLS_ACT
---help---
If you are using a newer iproute2 select this one, otherwise use one
NET_CLS_POLICE below
config NET_CLS_POLICE
bool "Traffic policing (needed for in/egress)"
depends on NET_CLS && NET_QOS
depends on NET_CLS && NET_QOS && !NET_ACT_POLICE
help
Say Y to support traffic policing (bandwidth limits). Needed for
ingress and egress rate limiting.
......
......@@ -7,7 +7,9 @@ obj-y := sch_generic.o
obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o
obj-$(CONFIG_NET_ESTIMATOR) += estimator.o
obj-$(CONFIG_NET_CLS) += cls_api.o
obj-$(CONFIG_NET_CLS_POLICE) += police.o
obj-$(CONFIG_NET_CLS_ACT) += act_api.o
obj-$(CONFIG_NET_ACT_POLICE) += police.o
obj-$(CONFIG_NET_CLS_POLICE) += police.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_CSZ) += sch_csz.o
......
This diff is collapsed.
......@@ -11,6 +11,7 @@
* Changes:
*
* Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
*
*/
#include <asm/uaccess.h>
......@@ -36,6 +37,12 @@
#include <net/sock.h>
#include <net/pkt_sched.h>
#if 0 /* control */
#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
#else
#define DPRINTK(format,args...)
#endif
/* The list of all installed classifier types */
static struct tcf_proto_ops *tcf_proto_base;
......@@ -132,7 +139,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
struct tcf_proto_ops *tp_ops;
struct Qdisc_class_ops *cops;
unsigned long cl = 0;
unsigned long fh;
unsigned long fh, fh_s;
int err;
if (prio == 0) {
......@@ -238,7 +245,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
} else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind))
goto errout;
fh = tp->ops->get(tp, t->tcm_handle);
fh_s = fh = tp->ops->get(tp, t->tcm_handle);
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
......@@ -247,6 +254,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
*back = tp->next;
spin_unlock_bh(&dev->queue_lock);
write_unlock(&qdisc_tree_lock);
tfilter_notify(skb, n, tp, fh_s, RTM_DELTFILTER);
tcf_destroy(tp);
err = 0;
goto errout;
......@@ -264,6 +272,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
if (err == 0)
tfilter_notify(skb, n, tp, fh_s, RTM_DELTFILTER);
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
......@@ -298,11 +308,14 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
tcm->tcm_family = AF_UNSPEC;
tcm->tcm_ifindex = tp->q->dev->ifindex;
tcm->tcm_parent = tp->classid;
tcm->tcm_handle = 0;
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind);
if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
goto rtattr_failure;
tcm->tcm_handle = fh;
if (RTM_DELTFILTER != event) {
tcm->tcm_handle = 0;
if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
goto rtattr_failure;
}
nlh->nlmsg_len = skb->tail - b;
return skb->len;
......
......@@ -11,6 +11,11 @@
* Changes:
* Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
* Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
* Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
*
* JHS: We should remove the CONFIG_NET_CLS_IND from here
* eventually when the meta match extension is made available
*
*/
#include <linux/config.h>
......@@ -50,9 +55,16 @@ struct fw_filter
struct fw_filter *next;
u32 id;
struct tcf_result res;
#ifdef CONFIG_NET_CLS_ACT
struct tc_action *action;
#ifdef CONFIG_NET_CLS_IND
char indev[IFNAMSIZ];
#endif
#else
#ifdef CONFIG_NET_CLS_POLICE
struct tcf_police *police;
#endif
#endif
};
static __inline__ int fw_hash(u32 handle)
......@@ -77,9 +89,28 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
for (f=head->ht[fw_hash(id)]; f; f=f->next) {
if (f->id == id) {
*res = f->res;
#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_CLS_IND
if (0 != f->indev[0]) {
if (NULL == skb->input_dev) {
continue;
} else {
if (0 != strcmp(f->indev, skb->input_dev->name)) {
continue;
}
}
}
#endif
if (f->action) {
int pol_res = tcf_action_exec(skb, f->action);
if (pol_res >= 0)
return pol_res;
} else
#else
#ifdef CONFIG_NET_CLS_POLICE
if (f->police)
return tcf_police(skb, f->police);
#endif
#endif
return 0;
}
......@@ -136,9 +167,16 @@ static void fw_destroy(struct tcf_proto *tp)
if ((cl = __cls_set_class(&f->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_ACT
if (f->action) {
tcf_action_destroy(f->action,TCA_ACT_UNBIND);
}
#else
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
tcf_police_release(f->police,TCA_ACT_UNBIND);
#endif
#endif
kfree(f);
}
}
......@@ -164,8 +202,14 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg)
if ((cl = cls_set_class(tp, &f->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_ACT
if (f->action) {
tcf_action_destroy(f->action,TCA_ACT_UNBIND);
}
#else
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
tcf_police_release(f->police,TCA_ACT_UNBIND);
#endif
#endif
kfree(f);
return 0;
......@@ -185,6 +229,11 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
struct rtattr *opt = tca[TCA_OPTIONS-1];
struct rtattr *tb[TCA_FW_MAX];
int err;
#ifdef CONFIG_NET_CLS_ACT
struct tc_action *act = NULL;
int ret;
#endif
if (!opt)
return handle ? -EINVAL : 0;
......@@ -206,6 +255,58 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
if (cl)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
}
#ifdef CONFIG_NET_CLS_ACT
if (tb[TCA_FW_POLICE-1]) {
act = kmalloc(sizeof(*act),GFP_KERNEL);
if (NULL == act)
return -ENOMEM;
memset(act,0,sizeof(*act));
ret = tcf_action_init_1(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1] ,act,"police",TCA_ACT_NOREPLACE,TCA_ACT_BIND);
if (0 > ret){
tcf_action_destroy(act,TCA_ACT_UNBIND);
return ret;
}
act->type = TCA_OLD_COMPAT;
sch_tree_lock(tp->q);
act = xchg(&f->action, act);
sch_tree_unlock(tp->q);
tcf_action_destroy(act,TCA_ACT_UNBIND);
}
if(tb[TCA_FW_ACT-1]) {
act = kmalloc(sizeof(*act),GFP_KERNEL);
if (NULL == act)
return -ENOMEM;
memset(act,0,sizeof(*act));
ret = tcf_action_init(tb[TCA_FW_ACT-1], tca[TCA_RATE-1],act,NULL, TCA_ACT_NOREPLACE,TCA_ACT_BIND);
if (0 > ret) {
tcf_action_destroy(act,TCA_ACT_UNBIND);
return ret;
}
sch_tree_lock(tp->q);
act = xchg(&f->action, act);
sch_tree_unlock(tp->q);
tcf_action_destroy(act,TCA_ACT_UNBIND);
}
#ifdef CONFIG_NET_CLS_IND
if(tb[TCA_FW_INDEV-1]) {
struct rtattr *idev = tb[TCA_FW_INDEV-1];
if (RTA_PAYLOAD(idev) >= IFNAMSIZ) {
printk("cls_fw: bad indev name %s\n",(char*)RTA_DATA(idev));
err = -EINVAL;
goto errout;
}
memset(f->indev,0,IFNAMSIZ);
sprintf(f->indev, "%s", (char*)RTA_DATA(idev));
}
#endif
#else /* only POLICE defined */
#ifdef CONFIG_NET_CLS_POLICE
if (tb[TCA_FW_POLICE-1]) {
struct tcf_police *police = tcf_police_locate(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1]);
......@@ -214,8 +315,9 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
police = xchg(&f->police, police);
tcf_tree_unlock(tp);
tcf_police_release(police);
tcf_police_release(police,TCA_ACT_UNBIND);
}
#endif
#endif
return 0;
}
......@@ -249,9 +351,36 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
}
#ifdef CONFIG_NET_CLS_ACT
if(tb[TCA_FW_ACT-1]) {
act = kmalloc(sizeof(*act),GFP_KERNEL);
if (NULL == act)
return -ENOMEM;
memset(act,0,sizeof(*act));
ret = tcf_action_init(tb[TCA_FW_ACT-1], tca[TCA_RATE-1],act,NULL,TCA_ACT_NOREPLACE,TCA_ACT_BIND);
if (0 > ret) {
tcf_action_destroy(act,TCA_ACT_UNBIND);
return ret;
}
f->action= act;
}
#ifdef CONFIG_NET_CLS_IND
if(tb[TCA_FW_INDEV-1]) {
struct rtattr *idev = tb[TCA_FW_INDEV-1];
if (RTA_PAYLOAD(idev) >= IFNAMSIZ) {
printk("cls_fw: bad indev name %s\n",(char*)RTA_DATA(idev));
err = -EINVAL;
goto errout;
}
memset(f->indev,0,IFNAMSIZ);
sprintf(f->indev, "%s", (char*)RTA_DATA(idev));
}
#endif
#else
#ifdef CONFIG_NET_CLS_POLICE
if (tb[TCA_FW_POLICE-1])
f->police = tcf_police_locate(tb[TCA_FW_POLICE-1], tca[TCA_RATE-1]);
#endif
#endif
f->next = head->ht[fw_hash(handle)];
......@@ -309,8 +438,12 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
t->tcm_handle = f->id;
if (!f->res.classid
#ifdef CONFIG_NET_CLS_ACT
&& !f->action
#else
#ifdef CONFIG_NET_CLS_POLICE
&& !f->police
#endif
#endif
)
return skb->len;
......@@ -320,6 +453,36 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
if (f->res.classid)
RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
#ifdef CONFIG_NET_CLS_ACT
/* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
if (f->action) {
struct rtattr * p_rta = (struct rtattr*)skb->tail;
if (f->action->type != TCA_OLD_COMPAT) {
RTA_PUT(skb, TCA_FW_ACT, 0, NULL);
if (tcf_action_dump(skb,f->action,0,0) < 0) {
goto rtattr_failure;
}
} else {
RTA_PUT(skb, TCA_FW_POLICE, 0, NULL);
if (tcf_action_dump_old(skb,f->action,0,0) < 0) {
goto rtattr_failure;
}
}
p_rta->rta_len = skb->tail - (u8*)p_rta;
}
#ifdef CONFIG_NET_CLS_IND
if(strlen(f->indev)) {
struct rtattr * p_rta = (struct rtattr*)skb->tail;
RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
p_rta->rta_len = skb->tail - (u8*)p_rta;
}
#endif
#else
#ifdef CONFIG_NET_CLS_POLICE
if (f->police) {
struct rtattr * p_rta = (struct rtattr*)skb->tail;
......@@ -331,14 +494,22 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
p_rta->rta_len = skb->tail - (u8*)p_rta;
}
#endif
#endif
rta->rta_len = skb->tail - b;
#ifdef CONFIG_NET_CLS_ACT
if (f->action && f->action->type == TCA_OLD_COMPAT) {
if (tcf_action_copy_stats(skb,f->action))
goto rtattr_failure;
}
#else
#ifdef CONFIG_NET_CLS_POLICE
if (f->police) {
if (qdisc_copy_stats(skb, &f->police->stats))
goto rtattr_failure;
}
#endif
#endif
return skb->len;
......
......@@ -297,7 +297,7 @@ static void route4_destroy(struct tcf_proto *tp)
if ((cl = __cls_set_class(&f->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
tcf_police_release(f->police,TCA_ACT_UNBIND);
#endif
kfree(f);
}
......@@ -336,7 +336,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
tcf_police_release(f->police,TCA_ACT_UNBIND);
#endif
kfree(f);
......@@ -398,7 +398,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
police = xchg(&f->police, police);
tcf_tree_unlock(tp);
tcf_police_release(police);
tcf_police_release(police,TCA_ACT_UNBIND);
}
#endif
return 0;
......
......@@ -278,7 +278,7 @@ static void rsvp_destroy(struct tcf_proto *tp)
if ((cl = __cls_set_class(&f->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
tcf_police_release(f->police,TCA_ACT_UNBIND);
#endif
kfree(f);
}
......@@ -310,7 +310,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(f->police);
tcf_police_release(f->police,TCA_ACT_UNBIND);
#endif
kfree(f);
......@@ -452,7 +452,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
police = xchg(&f->police, police);
tcf_tree_unlock(tp);
tcf_police_release(police);
tcf_police_release(police,TCA_ACT_UNBIND);
}
#endif
return 0;
......
......@@ -190,7 +190,7 @@ static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
if (cl)
tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(r->police);
tcf_police_release(r->police, TCA_ACT_UNBIND);
#endif
if (f)
kfree(f);
......@@ -333,7 +333,7 @@ static int tcindex_change(struct tcf_proto *tp,unsigned long base,u32 handle,
tcf_tree_lock(tp);
police = xchg(&r->police,police);
tcf_tree_unlock(tp);
tcf_police_release(police);
tcf_police_release(police,TCA_ACT_UNBIND);
}
#endif
if (r != &new_filter_result)
......
......@@ -23,6 +23,10 @@
* It is especially useful for link sharing combined with QoS;
* pure RSVP doesn't need such a general approach and can use
* much simpler (and faster) schemes, sort of cls_rsvp.c.
*
* JHS: We should remove the CONFIG_NET_CLS_IND from here
* eventually when the meta match extension is made available
*
*/
#include <asm/uaccess.h>
......@@ -58,8 +62,15 @@ struct tc_u_knode
struct tc_u_knode *next;
u32 handle;
struct tc_u_hnode *ht_up;
#ifdef CONFIG_NET_CLS_ACT
struct tc_action *action;
#ifdef CONFIG_NET_CLS_IND
char indev[IFNAMSIZ];
#endif
#else
#ifdef CONFIG_NET_CLS_POLICE
struct tcf_police *police;
#endif
#endif
struct tcf_result res;
struct tc_u_hnode *ht_down;
......@@ -90,10 +101,12 @@ static struct tc_u_common *u32_list;
static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel)
{
unsigned h = key & sel->hmask;
unsigned h = (key & sel->hmask)>>sel->fshift;
/*
h ^= h>>16;
h ^= h>>8;
*/
return h;
}
......@@ -119,22 +132,61 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
if (n) {
struct tc_u32_key *key = n->sel.keys;
#ifdef CONFIG_CLS_U32_PERF
n->sel.rcnt +=1;
#endif
for (i = n->sel.nkeys; i>0; i--, key++) {
if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
n = n->next;
goto next_knode;
}
#ifdef CONFIG_CLS_U32_PERF
key->kcnt +=1;
#endif
}
if (n->ht_down == NULL) {
check_terminal:
if (n->sel.flags&TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_CLS_IND
/* yes, i know it sucks but the feature is
** optional dammit! - JHS */
if (0 != n->indev[0]) {
if (NULL == skb->input_dev) {
n = n->next;
goto next_knode;
} else {
if (0 != strcmp(n->indev, skb->input_dev->name)) {
n = n->next;
goto next_knode;
}
}
}
#endif
#ifdef CONFIG_CLS_U32_PERF
n->sel.rhit +=1;
#endif
if (n->action) {
int pol_res = tcf_action_exec(skb, n->action);
if (skb->tc_classid > 0) {
res->classid = skb->tc_classid;
skb->tc_classid = 0;
}
if (pol_res >= 0)
return pol_res;
} else
#else
#ifdef CONFIG_NET_CLS_POLICE
if (n->police) {
int pol_res = tcf_police(skb, n->police);
if (pol_res >= 0)
return pol_res;
} else
#endif
#endif
return 0;
}
......@@ -298,8 +350,14 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
if ((cl = __cls_set_class(&n->res.class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
#ifdef CONFIG_NET_CLS_ACT
if (n->action) {
tcf_action_destroy(n->action, TCA_ACT_UNBIND);
}
#else
#ifdef CONFIG_NET_CLS_POLICE
tcf_police_release(n->police);
tcf_police_release(n->police, TCA_ACT_UNBIND);
#endif
#endif
if (n->ht_down)
n->ht_down->refcnt--;
......@@ -438,6 +496,10 @@ static int u32_set_parms(struct Qdisc *q, unsigned long base,
struct tc_u_knode *n, struct rtattr **tb,
struct rtattr *est)
{
#ifdef CONFIG_NET_CLS_ACT
struct tc_action *act = NULL;
int ret;
#endif
if (tb[TCA_U32_LINK-1]) {
u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
struct tc_u_hnode *ht_down = NULL;
......@@ -470,17 +532,73 @@ static int u32_set_parms(struct Qdisc *q, unsigned long base,
if (cl)
q->ops->cl_ops->unbind_tcf(q, cl);
}
#ifdef CONFIG_NET_CLS_ACT
/*backward compatibility */
if (tb[TCA_U32_POLICE-1])
{
act = kmalloc(sizeof(*act),GFP_KERNEL);
if (NULL == act)
return -ENOMEM;
memset(act,0,sizeof(*act));
ret = tcf_action_init_1(tb[TCA_U32_POLICE-1], est,act,"police", TCA_ACT_NOREPLACE, TCA_ACT_BIND);
if (0 > ret){
tcf_action_destroy(act, TCA_ACT_UNBIND);
return ret;
}
act->type = TCA_OLD_COMPAT;
sch_tree_lock(q);
act = xchg(&n->action, act);
sch_tree_unlock(q);
tcf_action_destroy(act, TCA_ACT_UNBIND);
}
if(tb[TCA_U32_ACT-1]) {
act = kmalloc(sizeof(*act),GFP_KERNEL);
if (NULL == act)
return -ENOMEM;
memset(act,0,sizeof(*act));
ret = tcf_action_init(tb[TCA_U32_ACT-1], est,act,NULL,TCA_ACT_NOREPLACE, TCA_ACT_BIND);
if (0 > ret) {
tcf_action_destroy(act, TCA_ACT_UNBIND);
return ret;
}
sch_tree_lock(q);
act = xchg(&n->action, act);
sch_tree_unlock(q);
tcf_action_destroy(act, TCA_ACT_UNBIND);
}
#ifdef CONFIG_NET_CLS_IND
n->indev[0] = 0;
if(tb[TCA_U32_INDEV-1]) {
struct rtattr *input_dev = tb[TCA_U32_INDEV-1];
if (RTA_PAYLOAD(input_dev) >= IFNAMSIZ) {
printk("cls_u32: bad indev name %s\n",(char*)RTA_DATA(input_dev));
/* should we clear state first? */
return -EINVAL;
}
sprintf(n->indev, "%s", (char*)RTA_DATA(input_dev));
}
#endif
#else
#ifdef CONFIG_NET_CLS_POLICE
if (tb[TCA_U32_POLICE-1]) {
struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1], est);
sch_tree_lock(q);
police = xchg(&n->police, police);
sch_tree_unlock(q);
tcf_police_release(police);
tcf_police_release(police, TCA_ACT_UNBIND);
}
#endif
#endif
return 0;
}
......@@ -656,26 +774,79 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
if (n->ht_down)
RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
#ifdef CONFIG_CLS_U32_PERF2
printk("fh %x cnt %lu hit %lu\n",n->handle,n->sel.rcnt,n->sel.rhit);
n->sel.rcnt = n->sel.rhit = 0;
/* dump key cnt */
{
int i = 0;
struct tc_u32_key *key = n->sel.keys;
for (i = n->sel.nkeys; i>0; i--, key++) {
printk("\t key%d success %lu\n",i,key->kcnt);
key->cnt = 0;
}
}
#endif
#ifdef CONFIG_NET_CLS_ACT
/* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
if (n->action) {
struct rtattr * p_rta = (struct rtattr*)skb->tail;
if (n->action->type != TCA_OLD_COMPAT) {
RTA_PUT(skb, TCA_U32_ACT, 0, NULL);
if (tcf_action_dump(skb,n->action, 0, 0) < 0) {
goto rtattr_failure;
}
} else {
RTA_PUT(skb, TCA_U32_POLICE, 0, NULL);
if (tcf_action_dump_old(skb,n->action,0,0) < 0) {
goto rtattr_failure;
}
}
p_rta->rta_len = skb->tail - (u8*)p_rta;
}
#ifdef CONFIG_NET_CLS_IND
if(strlen(n->indev)) {
struct rtattr * p_rta = (struct rtattr*)skb->tail;
RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
p_rta->rta_len = skb->tail - (u8*)p_rta;
}
#endif
#else
#ifdef CONFIG_NET_CLS_POLICE
if (n->police) {
struct rtattr * p_rta = (struct rtattr*)skb->tail;
RTA_PUT(skb, TCA_U32_POLICE, 0, NULL);
if (tcf_police_dump(skb, n->police) < 0)
goto rtattr_failure;
p_rta->rta_len = skb->tail - (u8*)p_rta;
}
#endif
#endif
}
rta->rta_len = skb->tail - b;
#ifdef CONFIG_NET_CLS_ACT
if (TC_U32_KEY(n->handle) && n->action && n->action->type == TCA_OLD_COMPAT) {
if (tcf_action_copy_stats(skb,n->action))
goto rtattr_failure;
}
#else
#ifdef CONFIG_NET_CLS_POLICE
if (TC_U32_KEY(n->handle) && n->police) {
if (qdisc_copy_stats(skb, &n->police->stats))
if (qdisc_copy_stats(skb,&n->police->stats))
goto rtattr_failure;
}
#endif
#endif
return skb->len;
......
......@@ -7,6 +7,7 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* J Hadi Salim (action changes)
*/
#include <asm/uaccess.h>
......@@ -26,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/module.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/sock.h>
......@@ -33,9 +35,13 @@
#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
#define PRIV(a) ((struct tcf_police *) (a)->priv)
/* use generic hash table */
#define MY_TAB_SIZE 16
#define MY_TAB_MASK 15
static u32 idx_gen;
static struct tcf_police *tcf_police_ht[16];
static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
/* Policer hash table lock */
static rwlock_t police_lock = RW_LOCK_UNLOCKED;
......@@ -59,6 +65,68 @@ static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
return p;
}
#ifdef CONFIG_NET_CLS_ACT
static __inline__ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a)
{
struct tcf_police *p;
int err =0, index = -1,i= 0, s_i = 0, n_i = 0;
struct rtattr *r ;
read_lock(&police_lock);
s_i = cb->args[0];
for (i = 0; i < MY_TAB_SIZE; i++) {
p = tcf_police_ht[tcf_police_hash(i)];
for (; p; p = p->next) {
index++;
if (index < s_i)
continue;
a->priv = p;
a->order = index;
r = (struct rtattr*) skb->tail;
RTA_PUT(skb, a->order, 0, NULL);
if (type == RTM_DELACTION)
err = tcf_action_dump_1(skb, a, 0, 1);
else
err = tcf_action_dump_1(skb, a, 0, 0);
if (0 > err) {
index--;
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
r->rta_len = skb->tail - (u8*)r;
n_i++;
}
}
done:
read_unlock(&police_lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
rtattr_failure:
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
static inline int
tcf_hash_search(struct tc_action *a, u32 index)
{
struct tcf_police *p = tcf_police_lookup(index);
if (p != NULL) {
a->priv = p;
return 1;
} else {
return 0;
}
}
#endif
static __inline__ u32 tcf_police_new_index(void)
{
do {
......@@ -94,6 +162,264 @@ void tcf_police_destroy(struct tcf_police *p)
BUG_TRAP(0);
}
#ifdef CONFIG_NET_CLS_ACT
int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,struct tc_action *a, int ovr, int bind)
{
unsigned h;
struct rtattr *tb[TCA_POLICE_MAX];
struct tc_police *parm;
struct tcf_police *p;
if (NULL == a) {
if (net_ratelimit())
printk("BUG: tcf_police_locate called with NULL params\n");
return -1;
}
if (rtattr_parse(tb, TCA_POLICE_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta)) < 0)
return -1;
if (tb[TCA_POLICE_TBF-1] == NULL)
return -1;
parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
a->priv = (void *)p;
spin_lock(&p->lock);
if (bind) {
p->bindcnt += 1;
p->refcnt += 1;
}
if (ovr) {
goto override;
}
spin_unlock(&p->lock);
return 1;
}
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return -1;
memset(p, 0, sizeof(*p));
MOD_INC_USE_COUNT;
p->refcnt = 1;
spin_lock_init(&p->lock);
p->stats.lock = &p->lock;
if (bind)
p->bindcnt = 1;
override:
if (parm->rate.rate) {
if ((p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1])) == NULL)
goto failure;
if (parm->peakrate.rate &&
(p->P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1])) == NULL)
goto failure;
}
if (tb[TCA_POLICE_RESULT-1])
p->result = *(int*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
#ifdef CONFIG_NET_ESTIMATOR
if (tb[TCA_POLICE_AVRATE-1])
p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
#endif
p->toks = p->burst = parm->burst;
p->mtu = parm->mtu;
if (p->mtu == 0) {
p->mtu = ~0;
if (p->R_tab)
p->mtu = 255<<p->R_tab->rate.cell_log;
}
if (p->P_tab)
p->ptoks = L2T_P(p, p->mtu);
p->action = parm->action;
if (ovr) {
spin_unlock(&p->lock);
return 1;
}
PSCHED_GET_TIME(p->t_c);
p->index = parm->index ? : tcf_police_new_index();
#ifdef CONFIG_NET_ESTIMATOR
if (est)
qdisc_new_estimator(&p->stats, est);
#endif
h = tcf_police_hash(p->index);
write_lock_bh(&police_lock);
p->next = tcf_police_ht[h];
tcf_police_ht[h] = p;
write_unlock_bh(&police_lock);
MOD_INC_USE_COUNT;
a->priv = (void *)p;
return 1;
failure:
if (p->R_tab)
qdisc_put_rtab(p->R_tab);
if (ovr)
spin_unlock(&p->lock);
kfree(p);
return -1;
}
void tcf_act_police_cleanup(struct tc_action *a, int bind)
{
struct tcf_police *p;
p = PRIV(a);
if (NULL != p)
tcf_police_release(p, bind);
}
int tcf_act_police_stats(struct sk_buff *skb, struct tc_action *a)
{
struct tcf_police *p;
p = PRIV(a);
if (NULL != p)
return qdisc_copy_stats(skb, &p->stats);
return 1;
}
int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
{
psched_time_t now;
struct sk_buff *skb = *pskb;
struct tcf_police *p;
long toks;
long ptoks = 0;
p = PRIV(a);
if (NULL == p) {
printk("BUG: tcf_police called with NULL params\n");
return -1;
}
spin_lock(&p->lock);
p->stats.bytes += skb->len;
p->stats.packets++;
#ifdef CONFIG_NET_ESTIMATOR
if (p->ewma_rate && p->stats.bps >= p->ewma_rate) {
p->stats.overlimits++;
spin_unlock(&p->lock);
return p->action;
}
#endif
if (skb->len <= p->mtu) {
if (p->R_tab == NULL) {
spin_unlock(&p->lock);
return p->result;
}
PSCHED_GET_TIME(now);
toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst, 0);
if (p->P_tab) {
ptoks = toks + p->ptoks;
if (ptoks > (long)L2T_P(p, p->mtu))
ptoks = (long)L2T_P(p, p->mtu);
ptoks -= L2T_P(p, skb->len);
}
toks += p->toks;
if (toks > (long)p->burst)
toks = p->burst;
toks -= L2T(p, skb->len);
if ((toks|ptoks) >= 0) {
p->t_c = now;
p->toks = toks;
p->ptoks = ptoks;
spin_unlock(&p->lock);
return p->result;
}
}
p->stats.overlimits++;
spin_unlock(&p->lock);
return p->action;
}
int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb->tail;
struct tc_police opt;
struct tcf_police *p;
p = PRIV(a);
if (NULL == p) {
printk("BUG: tcf_police_dump called with NULL params\n");
goto rtattr_failure;
}
opt.index = p->index;
opt.action = p->action;
opt.mtu = p->mtu;
opt.burst = p->burst;
opt.refcnt = p->refcnt - ref;
opt.bindcnt = p->bindcnt - bind;
if (p->R_tab)
opt.rate = p->R_tab->rate;
else
memset(&opt.rate, 0, sizeof(opt.rate));
if (p->P_tab)
opt.peakrate = p->P_tab->rate;
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
if (p->result)
RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
#ifdef CONFIG_NET_ESTIMATOR
if (p->ewma_rate)
RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
#endif
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
MODULE_AUTHOR("Alexey Kuznetsov");
MODULE_DESCRIPTION("Policing actions");
MODULE_LICENSE("GPL");
struct tc_action_ops act_police_ops = {
NULL,
"police",
TCA_ACT_POLICE,
TCA_CAP_NONE,
tcf_act_police,
tcf_act_police_stats,
tcf_act_police_dump,
tcf_act_police_cleanup,
tcf_hash_search,
tcf_act_police_locate,
tcf_generic_walker
};
static int __init
police_init_module(void)
{
return tcf_register_action(&act_police_ops);
}
static void __exit
police_cleanup_module(void)
{
tcf_unregister_action(&act_police_ops);
}
module_init(police_init_module);
module_exit(police_cleanup_module);
#endif
struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
{
unsigned h;
......@@ -111,6 +437,7 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
p->refcnt++;
MOD_INC_USE_COUNT;
return p;
}
......@@ -156,6 +483,7 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
p->next = tcf_police_ht[h];
tcf_police_ht[h] = p;
write_unlock_bh(&police_lock);
MOD_INC_USE_COUNT;
return p;
failure:
......
......@@ -45,7 +45,11 @@
/* Thanks to Doron Oz for this hack
*/
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
static int nf_registered;
#endif
#endif
struct ingress_qdisc_data {
struct Qdisc *q;
......@@ -146,27 +150,52 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
* Unlike normal "enqueue" functions, ingress_enqueue returns a
* firewall FW_* code.
*/
#ifdef CONFIG_NET_CLS_POLICE
#ifdef CONFIG_NET_CLS_ACT
sch->stats.packets++;
sch->stats.bytes += skb->len;
switch (result) {
case TC_POLICE_SHOT:
result = NF_DROP;
case TC_ACT_SHOT:
result = TC_ACT_SHOT;
sch->stats.drops++;
break;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
result = TC_ACT_STOLEN;
break;
case TC_ACT_RECLASSIFY:
case TC_ACT_OK:
case TC_ACT_UNSPEC:
default:
skb->tc_index = TC_H_MIN(res.classid);
result = TC_ACT_OK;
break;
};
/* backward compat */
#else
#ifdef CONFIG_NET_CLS_POLICE
switch (result) {
case TC_POLICE_SHOT:
result = NF_DROP;
sch->stats.drops++;
break;
case TC_POLICE_RECLASSIFY: /* DSCP remarking here ? */
case TC_POLICE_OK:
case TC_POLICE_UNSPEC:
default:
sch->stats.packets++;
sch->stats.bytes += skb->len;
result = NF_ACCEPT;
break;
sch->stats.packets++;
sch->stats.bytes += skb->len;
result = NF_ACCEPT;
break;
};
#else
D2PRINTK("Overriding result to ACCEPT\n");
result = NF_ACCEPT;
sch->stats.packets++;
sch->stats.bytes += skb->len;
#endif
#endif
skb->tc_index = TC_H_MIN(res.classid);
return result;
}
......@@ -199,6 +228,8 @@ static unsigned int ingress_drop(struct Qdisc *sch)
return 0;
}
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
static unsigned int
ing_hook(unsigned int hook, struct sk_buff **pskb,
const struct net_device *indev,
......@@ -240,10 +271,29 @@ static struct nf_hook_ops ing_ops = {
.priority = NF_IP_PRI_FILTER + 1,
};
#endif
#endif
int ingress_init(struct Qdisc *sch,struct rtattr *opt)
{
struct ingress_qdisc_data *p = PRIV(sch);
/* Make sure either netfilter or preferably CLS_ACT is
* compiled in */
#ifndef CONFIG_NET_CLS_ACT
#ifndef CONFIG_NETFILTER
printk("You MUST compile classifier actions into the kernel\n");
goto error;
#else
printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
#endif
#endif
if (NULL == p)
goto error;
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
if (!nf_registered) {
if (nf_register_hook(&ing_ops) < 0) {
printk("ingress qdisc registration error \n");
......@@ -251,6 +301,8 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt)
}
nf_registered++;
}
#endif
#endif
DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
memset(p, 0, sizeof(*p));
......@@ -364,8 +416,12 @@ static int __init ingress_module_init(void)
static void __exit ingress_module_exit(void)
{
unregister_qdisc(&ingress_qdisc_ops);
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
if (nf_registered)
nf_unregister_hook(&ing_ops);
#endif
#endif
}
module_init(ingress_module_init)
module_exit(ingress_module_exit)
......
......@@ -47,60 +47,104 @@ struct prio_sched_data
};
static __inline__ unsigned prio_classify(struct sk_buff *skb, struct Qdisc *sch)
struct Qdisc *prio_classify(struct sk_buff *skb, struct Qdisc *sch,int *r)
{
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
struct tcf_result res;
u32 band;
int result = 0;
band = skb->priority;
if (TC_H_MAJ(skb->priority) != sch->handle) {
#ifdef CONFIG_NET_CLS_ACT
*r = result = tc_classify(skb, q->filter_list, &res);
switch (result) {
case TC_ACT_SHOT:
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
case TC_ACT_RECLASSIFY:
case TC_ACT_OK:
case TC_ACT_UNSPEC:
default:
break;
};
if (!q->filter_list ) {
#else
if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) {
#endif
if (TC_H_MAJ(band))
band = 0;
return q->prio2band[band&TC_PRIO_MAX];
return q->queues[q->prio2band[band&TC_PRIO_MAX]];
}
band = res.classid;
}
band = TC_H_MIN(band) - 1;
return band < q->bands ? band : q->prio2band[0];
if (band > q->bands)
return q->queues[q->prio2band[0]];
return q->queues[band];
}
static int
prio_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
struct Qdisc *qdisc;
int ret;
qdisc = q->queues[prio_classify(skb, sch)];
/* moving these up here changes things from before
* packets counted now include everything that was ever
* seen
*/
sch->stats.bytes += skb->len;
sch->stats.packets++;
qdisc = prio_classify(skb, sch, &ret);
if (NULL == qdisc)
goto dropped;
if ((ret = qdisc->enqueue(skb, qdisc)) == 0) {
sch->stats.bytes += skb->len;
sch->stats.packets++;
sch->q.qlen++;
return 0;
return NET_XMIT_SUCCESS;
}
sch->stats.drops++;
return ret;
dropped:
#ifdef CONFIG_NET_CLS_ACT
if (TC_ACT_SHOT == ret || NET_XMIT_DROP == ret) {
#endif
sch->stats.drops++;
return NET_XMIT_DROP;
#ifdef CONFIG_NET_CLS_ACT
} else {
sch->stats.overlimits++; /* abuse, but noone uses it */
return NET_XMIT_BYPASS; /* we dont want to confuse TCP */
}
#endif
}
static int
prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
//struct prio_sched_data *q = (struct prio_sched_data *)sch->data;
struct Qdisc *qdisc;
int ret;
int ret = NET_XMIT_DROP;
qdisc = q->queues[prio_classify(skb, sch)];
sch->stats.reqs++;
qdisc = prio_classify(skb, sch, &ret);
if (qdisc == NULL)
goto dropped;
if ((ret = qdisc->ops->requeue(skb, qdisc)) == 0) {
sch->q.qlen++;
return 0;
}
dropped:
sch->stats.drops++;
return ret;
return NET_XMIT_DROP;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment