Commit 861602b5 authored by Alexander Duyck's avatar Alexander Duyck Committed by Jakub Kicinski

tcp: Allow full IP tos/IPv6 tclass to be reflected in L3 header

An issue was recently found where DCTCP SYN/ACK packets did not have the
ECT bit set in the L3 header. A bit of code review found that the recent
change referenced below had gone though and added a mask that prevented the
ECN bits from being populated in the L3 header.

This patch addresses that by rolling back the mask so that it is only
applied to the flags coming from the incoming TCP request instead of
applying it to the socket tos/tclass field. Doing this the ECT bits were
restored in the SYN/ACK packets in my testing.

One thing that is not addressed by this patch set is the fact that
tcp_reflect_tos appears to be incompatible with ECN based congestion
avoidance algorithms. At a minimum the feature should likely be documented
which it currently isn't.

Fixes: ac8f1710 ("tcp: reflect tos value received in SYN to the socket")
Signed-off-by: default avatarAlexander Duyck <alexanderduyck@fb.com>
Acked-by: default avatarWei Wang <weiwan@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d2624e70
...@@ -981,7 +981,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -981,7 +981,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
tcp_rsk(req)->syn_tos : inet_sk(sk)->tos; tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
inet_sk(sk)->tos;
if (skb) { if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
...@@ -990,7 +991,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -990,7 +991,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr, ireq->ir_rmt_addr,
rcu_dereference(ireq->ireq_opt), rcu_dereference(ireq->ireq_opt),
tos & ~INET_ECN_MASK); tos);
rcu_read_unlock(); rcu_read_unlock();
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }
......
...@@ -530,12 +530,12 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -530,12 +530,12 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_lock(); rcu_read_lock();
opt = ireq->ipv6_opt; opt = ireq->ipv6_opt;
tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
tcp_rsk(req)->syn_tos : np->tclass; tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
np->tclass;
if (!opt) if (!opt)
opt = rcu_dereference(np->opt); opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
tclass & ~INET_ECN_MASK, tclass, sk->sk_priority);
sk->sk_priority);
rcu_read_unlock(); rcu_read_unlock();
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment