Commit 8b6f322e authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-sched-transition-act_pedit-to-rcu-and-percpu-stats'

Pedro Tammela says:

====================
net/sched: transition act_pedit to rcu and percpu stats

The software pedit action didn't get the same love as some of the
other actions and it's still using spinlocks and shared stats.
Therefore, transition the action to rcu and percpu stats which
improves the action's performance.

We test this change with a very simple packet forwarding setup:

tc filter add dev ens2f0 ingress protocol ip matchall \
   action pedit ex munge eth src set b8:ce:f6:4b:68:35 pipe \
   action pedit ex munge eth dst set ac:1f:6b:e4:ff:93 pipe \
   action mirred egress redirect dev ens2f1
tc filter add dev ens2f1 ingress protocol ip matchall \
   action pedit ex munge eth src set b8:ce:f6:4b:68:34 pipe \
   action pedit ex munge eth dst set ac:1f:6b:e4:ff:92 pipe \
   action mirred egress redirect dev ens2f0

Using TRex with a http-like profile, in our setup with a 25G NIC
and a 26 cores Intel CPU, we observe the following in perf:
   before:
    11.59%  2.30%  [kernel]  [k] tcf_pedit_act
       2.55% tcf_pedit_act
             8.38% _raw_spin_lock
                       6.43% native_queued_spin_lock_slowpath
   after:
    1.46%  1.46%  [kernel]  [k] tcf_pedit_act

tdc results for pedit after the patch:
1..69
ok 1 319a - Add pedit action that mangles IP TTL
ok 2 7e67 - Replace pedit action with invalid goto chain
ok 3 377e - Add pedit action with RAW_OP offset u32
ok 4 a0ca - Add pedit action with RAW_OP offset u32 (INVALID)
ok 5 dd8a - Add pedit action with RAW_OP offset u16 u16
ok 6 53db - Add pedit action with RAW_OP offset u16 (INVALID)
ok 7 5c7e - Add pedit action with RAW_OP offset u8 add value
ok 8 2893 - Add pedit action with RAW_OP offset u8 quad
ok 9 3a07 - Add pedit action with RAW_OP offset u8-u16-u8
ok 10 ab0f - Add pedit action with RAW_OP offset u16-u8-u8
ok 11 9d12 - Add pedit action with RAW_OP offset u32 set u16 clear u8 invert
ok 12 ebfa - Add pedit action with RAW_OP offset overflow u32 (INVALID)
ok 13 f512 - Add pedit action with RAW_OP offset u16 at offmask shift set
ok 14 c2cb - Add pedit action with RAW_OP offset u32 retain value
ok 15 1762 - Add pedit action with RAW_OP offset u8 clear value
ok 16 bcee - Add pedit action with RAW_OP offset u8 retain value
ok 17 e89f - Add pedit action with RAW_OP offset u16 retain value
ok 18 c282 - Add pedit action with RAW_OP offset u32 clear value
ok 19 c422 - Add pedit action with RAW_OP offset u16 invert value
ok 20 d3d3 - Add pedit action with RAW_OP offset u32 invert value
ok 21 57e5 - Add pedit action with RAW_OP offset u8 preserve value
ok 22 99e0 - Add pedit action with RAW_OP offset u16 preserve value
ok 23 1892 - Add pedit action with RAW_OP offset u32 preserve value
ok 24 4b60 - Add pedit action with RAW_OP negative offset u16/u32 set value
ok 25 a5a7 - Add pedit action with LAYERED_OP eth set src
ok 26 86d4 - Add pedit action with LAYERED_OP eth set src & dst
ok 27 f8a9 - Add pedit action with LAYERED_OP eth set dst
ok 28 c715 - Add pedit action with LAYERED_OP eth set src (INVALID)
ok 29 8131 - Add pedit action with LAYERED_OP eth set dst (INVALID)
ok 30 ba22 - Add pedit action with LAYERED_OP eth type set/clear sequence
ok 31 dec4 - Add pedit action with LAYERED_OP eth set type (INVALID)
ok 32 ab06 - Add pedit action with LAYERED_OP eth add type
ok 33 918d - Add pedit action with LAYERED_OP eth invert src
ok 34 a8d4 - Add pedit action with LAYERED_OP eth invert dst
ok 35 ee13 - Add pedit action with LAYERED_OP eth invert type
ok 36 7588 - Add pedit action with LAYERED_OP ip set src
ok 37 0fa7 - Add pedit action with LAYERED_OP ip set dst
ok 38 5810 - Add pedit action with LAYERED_OP ip set src & dst
ok 39 1092 - Add pedit action with LAYERED_OP ip set ihl & dsfield
ok 40 02d8 - Add pedit action with LAYERED_OP ip set ttl & protocol
ok 41 3e2d - Add pedit action with LAYERED_OP ip set ttl (INVALID)
ok 42 31ae - Add pedit action with LAYERED_OP ip ttl clear/set
ok 43 486f - Add pedit action with LAYERED_OP ip set duplicate fields
ok 44 e790 - Add pedit action with LAYERED_OP ip set ce, df, mf, firstfrag, nofrag fields
ok 45 cc8a - Add pedit action with LAYERED_OP ip set tos
ok 46 7a17 - Add pedit action with LAYERED_OP ip set precedence
ok 47 c3b6 - Add pedit action with LAYERED_OP ip add tos
ok 48 43d3 - Add pedit action with LAYERED_OP ip add precedence
ok 49 438e - Add pedit action with LAYERED_OP ip clear tos
ok 50 6b1b - Add pedit action with LAYERED_OP ip clear precedence
ok 51 824a - Add pedit action with LAYERED_OP ip invert tos
ok 52 106f - Add pedit action with LAYERED_OP ip invert precedence
ok 53 6829 - Add pedit action with LAYERED_OP beyond ip set dport & sport
ok 54 afd8 - Add pedit action with LAYERED_OP beyond ip set icmp_type & icmp_code
ok 55 3143 - Add pedit action with LAYERED_OP beyond ip set dport (INVALID)
ok 56 815c - Add pedit action with LAYERED_OP ip6 set src
ok 57 4dae - Add pedit action with LAYERED_OP ip6 set dst
ok 58 fc1f - Add pedit action with LAYERED_OP ip6 set src & dst
ok 59 6d34 - Add pedit action with LAYERED_OP ip6 dst retain value (INVALID)
ok 60 94bb - Add pedit action with LAYERED_OP ip6 traffic_class
ok 61 6f5e - Add pedit action with LAYERED_OP ip6 flow_lbl
ok 62 6795 - Add pedit action with LAYERED_OP ip6 set payload_len, nexthdr, hoplimit
ok 63 1442 - Add pedit action with LAYERED_OP tcp set dport & sport
ok 64 b7ac - Add pedit action with LAYERED_OP tcp sport set (INVALID)
ok 65 cfcc - Add pedit action with LAYERED_OP tcp flags set
ok 66 3bc4 - Add pedit action with LAYERED_OP tcp set dport, sport & flags fields
ok 67 f1c8 - Add pedit action with LAYERED_OP udp set dport & sport
ok 68 d784 - Add pedit action with mixed RAW/LAYERED_OP #1
ok 69 70ca - Add pedit action with mixed RAW/LAYERED_OP #2
====================

Link: https://lore.kernel.org/r/20230131190512.3805897-1-pctammela@mojatatu.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents a8248fc4 95b06938
......@@ -4,22 +4,29 @@
#include <net/act_api.h>
#include <linux/tc_act/tc_pedit.h>
#include <linux/types.h>
struct tcf_pedit_key_ex {
enum pedit_header_type htype;
enum pedit_cmd cmd;
};
struct tcf_pedit {
struct tc_action common;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
u32 tcfp_off_max_hint;
struct tcf_pedit_parms {
struct tc_pedit_key *tcfp_keys;
struct tcf_pedit_key_ex *tcfp_keys_ex;
u32 tcfp_off_max_hint;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
struct rcu_head rcu;
};
struct tcf_pedit {
struct tc_action common;
struct tcf_pedit_parms __rcu *parms;
};
#define to_pedit(a) ((struct tcf_pedit *)a)
#define to_pedit_parms(a) (rcu_dereference(to_pedit(a)->parms))
static inline bool is_tcf_pedit(const struct tc_action *a)
{
......@@ -32,37 +39,81 @@ static inline bool is_tcf_pedit(const struct tc_action *a)
static inline int tcf_pedit_nkeys(const struct tc_action *a)
{
return to_pedit(a)->tcfp_nkeys;
struct tcf_pedit_parms *parms;
int nkeys;
rcu_read_lock();
parms = to_pedit_parms(a);
nkeys = parms->tcfp_nkeys;
rcu_read_unlock();
return nkeys;
}
static inline u32 tcf_pedit_htype(const struct tc_action *a, int index)
{
if (to_pedit(a)->tcfp_keys_ex)
return to_pedit(a)->tcfp_keys_ex[index].htype;
u32 htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
struct tcf_pedit_parms *parms;
return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
rcu_read_lock();
parms = to_pedit_parms(a);
if (parms->tcfp_keys_ex)
htype = parms->tcfp_keys_ex[index].htype;
rcu_read_unlock();
return htype;
}
static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index)
{
if (to_pedit(a)->tcfp_keys_ex)
return to_pedit(a)->tcfp_keys_ex[index].cmd;
struct tcf_pedit_parms *parms;
u32 cmd = __PEDIT_CMD_MAX;
rcu_read_lock();
parms = to_pedit_parms(a);
if (parms->tcfp_keys_ex)
cmd = parms->tcfp_keys_ex[index].cmd;
rcu_read_unlock();
return __PEDIT_CMD_MAX;
return cmd;
}
static inline u32 tcf_pedit_mask(const struct tc_action *a, int index)
{
return to_pedit(a)->tcfp_keys[index].mask;
struct tcf_pedit_parms *parms;
u32 mask;
rcu_read_lock();
parms = to_pedit_parms(a);
mask = parms->tcfp_keys[index].mask;
rcu_read_unlock();
return mask;
}
static inline u32 tcf_pedit_val(const struct tc_action *a, int index)
{
return to_pedit(a)->tcfp_keys[index].val;
struct tcf_pedit_parms *parms;
u32 val;
rcu_read_lock();
parms = to_pedit_parms(a);
val = parms->tcfp_keys[index].val;
rcu_read_unlock();
return val;
}
static inline u32 tcf_pedit_offset(const struct tc_action *a, int index)
{
return to_pedit(a)->tcfp_keys[index].off;
struct tcf_pedit_parms *parms;
u32 off;
rcu_read_lock();
parms = to_pedit_parms(a);
off = parms->tcfp_keys[index].off;
rcu_read_unlock();
return off;
}
#endif /* __NET_TC_PED_H */
......@@ -134,6 +134,17 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
return -EINVAL;
}
static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
{
struct tcf_pedit_parms *parms =
container_of(head, struct tcf_pedit_parms, rcu);
kfree(parms->tcfp_keys_ex);
kfree(parms->tcfp_keys);
kfree(parms);
}
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp, u32 flags,
......@@ -141,10 +152,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
struct tcf_pedit_key_ex *keys_ex;
struct tcf_pedit_parms *oparms, *nparms;
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tc_pedit *parm;
struct nlattr *pattr;
struct tcf_pedit *p;
......@@ -181,18 +191,25 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
if (IS_ERR(keys_ex))
return PTR_ERR(keys_ex);
nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
if (!nparms)
return -ENOMEM;
nparms->tcfp_keys_ex =
tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
if (IS_ERR(nparms->tcfp_keys_ex)) {
ret = PTR_ERR(nparms->tcfp_keys_ex);
goto out_free;
}
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
&act_pedit_ops, bind, false, flags);
ret = tcf_idr_create_from_flags(tn, index, est, a,
&act_pedit_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
goto out_free;
goto out_free_ex;
}
ret = ACT_P_CREATED;
} else if (err > 0) {
......@@ -204,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
} else {
ret = err;
goto out_free;
goto out_free_ex;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
......@@ -212,48 +229,50 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
ret = err;
goto out_release;
}
p = to_pedit(*a);
spin_lock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED ||
(p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
keys = kmalloc(ksize, GFP_ATOMIC);
if (!keys) {
spin_unlock_bh(&p->tcf_lock);
nparms->tcfp_off_max_hint = 0;
nparms->tcfp_flags = parm->flags;
nparms->tcfp_nkeys = parm->nkeys;
nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL);
if (!nparms->tcfp_keys) {
ret = -ENOMEM;
goto put_chain;
}
kfree(p->tcfp_keys);
p->tcfp_keys = keys;
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
p->tcfp_off_max_hint = 0;
for (i = 0; i < p->tcfp_nkeys; ++i) {
u32 cur = p->tcfp_keys[i].off;
memcpy(nparms->tcfp_keys, parm->keys, ksize);
for (i = 0; i < nparms->tcfp_nkeys; ++i) {
u32 cur = nparms->tcfp_keys[i].off;
/* sanitize the shift value for any later use */
p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
p->tcfp_keys[i].shift);
nparms->tcfp_keys[i].shift = min_t(size_t,
BITS_PER_TYPE(int) - 1,
nparms->tcfp_keys[i].shift);
/* The AT option can read a single byte, we can bound the actual
* value with uchar max.
*/
cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
/* Each key touches 4 bytes starting from the computed offset */
p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
nparms->tcfp_off_max_hint =
max(nparms->tcfp_off_max_hint, cur + 4);
}
p->tcfp_flags = parm->flags;
p = to_pedit(*a);
spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
oparms = rcu_replace_pointer(p->parms, nparms, 1);
spin_unlock_bh(&p->tcf_lock);
kfree(p->tcfp_keys_ex);
p->tcfp_keys_ex = keys_ex;
if (oparms)
call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
spin_unlock_bh(&p->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
return ret;
put_chain:
......@@ -261,19 +280,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
tcf_chain_put_by_act(goto_ch);
out_release:
tcf_idr_release(*a, bind);
out_free_ex:
kfree(nparms->tcfp_keys_ex);
out_free:
kfree(keys_ex);
kfree(nparms);
return ret;
}
static void tcf_pedit_cleanup(struct tc_action *a)
{
struct tcf_pedit *p = to_pedit(a);
struct tc_pedit_key *keys = p->tcfp_keys;
struct tcf_pedit_parms *parms;
kfree(keys);
kfree(p->tcfp_keys_ex);
parms = rcu_dereference_protected(p->parms, 1);
if (parms)
call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
}
static bool offset_valid(struct sk_buff *skb, int offset)
......@@ -324,31 +346,33 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
struct tcf_pedit *p = to_pedit(a);
struct tcf_pedit_key_ex *tkey_ex;
struct tcf_pedit_parms *parms;
struct tc_pedit_key *tkey;
u32 max_offset;
int i;
spin_lock(&p->tcf_lock);
parms = rcu_dereference_bh(p->parms);
max_offset = (skb_transport_header_was_set(skb) ?
skb_transport_offset(skb) :
skb_network_offset(skb)) +
p->tcfp_off_max_hint;
parms->tcfp_off_max_hint;
if (skb_ensure_writable(skb, min(skb->len, max_offset)))
goto unlock;
goto done;
tcf_lastuse_update(&p->tcf_tm);
tcf_action_update_bstats(&p->common, skb);
if (p->tcfp_nkeys > 0) {
struct tc_pedit_key *tkey = p->tcfp_keys;
struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
enum pedit_header_type htype =
TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
tkey = parms->tcfp_keys;
tkey_ex = parms->tcfp_keys_ex;
for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
u32 *ptr, hdata;
for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
int offset = tkey->off;
u32 *ptr, hdata;
int hoffset;
u32 val;
int rc;
......@@ -417,16 +441,12 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
}
goto done;
} else {
WARN(1, "pedit BUG: index %d\n", p->tcf_index);
}
bad:
spin_lock(&p->tcf_lock);
p->tcf_qstats.overlimits++;
done:
bstats_update(&p->tcf_bstats, skb);
unlock:
spin_unlock(&p->tcf_lock);
done:
return p->tcf_action;
}
......@@ -445,30 +465,33 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_pedit *p = to_pedit(a);
struct tcf_pedit_parms *parms;
struct tc_pedit *opt;
struct tcf_t t;
int s;
s = struct_size(opt, keys, p->tcfp_nkeys);
spin_lock_bh(&p->tcf_lock);
parms = rcu_dereference_protected(p->parms, 1);
s = struct_size(opt, keys, parms->tcfp_nkeys);
/* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
if (unlikely(!opt))
if (unlikely(!opt)) {
spin_unlock_bh(&p->tcf_lock);
return -ENOBUFS;
}
spin_lock_bh(&p->tcf_lock);
memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
memcpy(opt->keys, parms->tcfp_keys,
flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
opt->nkeys = p->tcfp_nkeys;
opt->flags = p->tcfp_flags;
opt->nkeys = parms->tcfp_nkeys;
opt->flags = parms->tcfp_flags;
opt->action = p->tcf_action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
if (p->tcfp_keys_ex) {
if (tcf_pedit_key_ex_dump(skb,
p->tcfp_keys_ex,
p->tcfp_nkeys))
if (parms->tcfp_keys_ex) {
if (tcf_pedit_key_ex_dump(skb, parms->tcfp_keys_ex,
parms->tcfp_nkeys))
goto nla_put_failure;
if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment