Commit 1e40d75e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Extend nf_queue selftest to cover re-queueing, non-gso mode and
   delayed queueing, from Florian Westphal.

2) Clear skb->tstamp in IPVS forwarding path, from Julian Anastasov.

3) Provide netlink extended error reporting for EEXIST case.

4) Missing VLAN offload tag and proto in log target.
====================
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents fdafed45 0d9826bc
......@@ -108,6 +108,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
struct sock *sk);
void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
......
......@@ -43,16 +43,31 @@ static void dump_arp_packet(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int nhoff)
{
const struct arphdr *ah;
struct arphdr _arph;
const struct arppayload *ap;
struct arppayload _arpp;
const struct arphdr *ah;
unsigned int logflags;
struct arphdr _arph;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
nf_log_buf_add(m, "TRUNCATED");
return;
}
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
else
logflags = NF_LOG_DEFAULT_MASK;
if (logflags & NF_LOG_MACDECODE) {
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
}
nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
......
......@@ -284,8 +284,10 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
......
......@@ -297,8 +297,10 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
......
......@@ -609,6 +609,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset_ct(skb);
skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
}
return ret;
}
......@@ -649,6 +651,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
......@@ -669,6 +673,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
......
......@@ -171,6 +171,18 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
if (!skb_vlan_tag_present(skb))
return;
vid = skb_vlan_tag_get(skb);
nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
}
EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
/* bridge and netdev logging families share this code. */
void nf_log_l2packet(struct net *net, u_int8_t pf,
__be16 protocol,
......
......@@ -2103,7 +2103,8 @@ static bool nft_hook_list_equal(struct list_head *hook_list1,
}
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags)
u32 flags, const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
......@@ -2119,9 +2120,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
if (!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
false);
if (err < 0)
......@@ -2130,6 +2132,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
basechain = nft_base_chain(chain);
if (basechain->type != hook.type) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
......@@ -2137,6 +2140,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
} else {
......@@ -2144,6 +2148,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (ops->hooknum != hook.num ||
ops->priority != hook.priority) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
}
......@@ -2156,9 +2161,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask);
if (!IS_ERR(chain2))
if (!IS_ERR(chain2)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return -EEXIST;
}
}
if (nla[NFTA_CHAIN_COUNTERS]) {
if (!nft_is_base_chain(chain))
......@@ -2200,6 +2207,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
kfree(name);
goto err;
}
......@@ -2322,7 +2330,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE;
return nf_tables_updchain(&ctx, genmask, policy, flags);
return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
extack);
}
return nf_tables_addchain(&ctx, family, genmask, policy, flags);
......
......@@ -17,9 +17,12 @@
struct options {
bool count_packets;
bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
uint32_t verdict;
uint32_t delay_ms;
};
static unsigned int queue_stats[5];
......@@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
......@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
nfq_build_verdict(char *buf, int id, int queue_num, int verd)
nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
......@@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
if (!opts.count_packets)
return;
total = 0;
last = queue_stats[0];
......@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
......@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
static void sleep_ms(uint32_t delay)
{
struct timespec ts = { .tv_sec = delay / 1000 };
delay %= 1000;
ts.tv_nsec = delay * 1000llu * 1000llu;
nanosleep(&ts, NULL);
}
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
......@@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
if (errno == ENOBUFS)
if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
......@@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
if (opts.delay_ms)
sleep_ms(opts.delay_ms);
nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
......@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
......@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
case 'Q':
opts.verdict = atoi(optarg);
if (opts.verdict > 0xffff) {
fprintf(stderr, "Expected destination queue number\n");
exit(1);
}
opts.verdict <<= 16;
opts.verdict |= NF_QUEUE;
break;
case 'd':
opts.delay_ms = atoi(optarg);
if (opts.delay_ms == 0) {
fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
exit(1);
}
break;
case 't':
opts.timeout = atoi(optarg);
break;
case 'G':
opts.gso_enabled = false;
break;
case 'v':
opts.verbose++;
break;
}
}
if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
fprintf(stderr, "Cannot use same destination and source queue\n");
exit(1);
}
}
int main(int argc, char *argv[])
{
int ret;
opts.verdict = NF_ACCEPT;
opts.gso_enabled = true;
parse_opts(argc, argv);
ret = mainloop();
......
......@@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
timeout=4
cleanup()
{
......@@ -20,6 +21,7 @@ cleanup()
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
......@@ -42,6 +44,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
......@@ -83,7 +87,7 @@ load_ruleset() {
local name=$1
local prio=$2
ip netns exec ${nsrouter} nft -f - <<EOF
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
......@@ -118,7 +122,7 @@ EOF
load_counter_ruleset() {
local prio=$1
ip netns exec ${nsrouter} nft -f - <<EOF
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
......@@ -175,7 +179,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
ip netns exec ${nsrouter} nft -f - <<EOF
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
......@@ -184,10 +188,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
......@@ -214,8 +218,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
......@@ -250,11 +254,11 @@ test_queue()
test_tcp_forward()
{
ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
......@@ -270,15 +274,13 @@ test_tcp_forward()
test_tcp_localhost()
{
tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
......@@ -287,6 +289,47 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
wait 2>/dev/null
}
test_tcp_localhost_requeue()
{
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
flush ruleset
table inet filter {
chain output {
type filter hook output priority 0; policy accept;
tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
}
chain post {
type filter hook postrouting priority 0; policy accept;
tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
}
}
EOF
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
# nfqueue 1 will be called via output hook. But this time,
# re-queue the packet to nfqueue program on queue 2.
ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
sleep 1
ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
rm -f "$tmpfile"
wait
if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
echo "FAIL: lost packets during requeue?!" 1>&2
return
fi
echo "PASS: tcp via loopback and re-queueing"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
......@@ -328,5 +371,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_requeue
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment