Commit 50f694a6 authored by David S. Miller's avatar David S. Miller

Merge branch 'TCP-data-delivery-and-ECN-stats-tracking'

Yuchung Cheng says:

====================
tracking TCP data delivery and ECN stats

This patch series improve tracking the data delivery status
  1. minor improvement on SYN data
  2. accounting bytes delivered with CE marks
  3. exporting the delivery stats to applications

s.t. users can get better sense of TCP performance at per host,
per connection, and even per application message level.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 415787d7 feb5f2ec
...@@ -281,6 +281,7 @@ struct tcp_sock { ...@@ -281,6 +281,7 @@ struct tcp_sock {
* receiver in Recovery. */ * receiver in Recovery. */
u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */
u32 delivered; /* Total data packets delivered incl. rexmits */ u32 delivered; /* Total data packets delivered incl. rexmits */
u32 delivered_ce; /* Like the above but only ECE marked packets */
u32 lost; /* Total data packets lost incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */
u32 app_limited; /* limited until "delivered" reaches this val */ u32 app_limited; /* limited until "delivered" reaches this val */
u64 first_tx_mstamp; /* start of window send phase */ u64 first_tx_mstamp; /* start of window send phase */
......
...@@ -276,6 +276,8 @@ enum ...@@ -276,6 +276,8 @@ enum
LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */
LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */
LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */
LINUX_MIB_TCPDELIVERED, /* TCPDelivered */
LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */
__LINUX_MIB_MAX __LINUX_MIB_MAX
}; };
......
...@@ -224,6 +224,9 @@ struct tcp_info { ...@@ -224,6 +224,9 @@ struct tcp_info {
__u64 tcpi_busy_time; /* Time (usec) busy sending data */ __u64 tcpi_busy_time; /* Time (usec) busy sending data */
__u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */
__u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
__u32 tcpi_delivered;
__u32 tcpi_delivered_ce;
}; };
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
...@@ -244,6 +247,8 @@ enum { ...@@ -244,6 +247,8 @@ enum {
TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */
TCP_NLA_CA_STATE, /* ca_state of socket */ TCP_NLA_CA_STATE, /* ca_state of socket */
TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */
TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */
TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */
}; };
......
...@@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = { ...@@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
SNMP_MIB_SENTINEL SNMP_MIB_SENTINEL
}; };
......
...@@ -2559,6 +2559,7 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -2559,6 +2559,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0; tp->window_clamp = 0;
tp->delivered_ce = 0;
tcp_set_ca_state(sk, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0; tp->is_sack_reneg = 0;
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
...@@ -3166,6 +3167,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -3166,6 +3167,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
rate64 = tcp_compute_delivery_rate(tp); rate64 = tcp_compute_delivery_rate(tp);
if (rate64) if (rate64)
info->tcpi_delivery_rate = rate64; info->tcpi_delivery_rate = rate64;
info->tcpi_delivered = tp->delivered;
info->tcpi_delivered_ce = tp->delivered_ce;
unlock_sock_fast(sk, slow); unlock_sock_fast(sk, slow);
} }
EXPORT_SYMBOL_GPL(tcp_get_info); EXPORT_SYMBOL_GPL(tcp_get_info);
...@@ -3179,7 +3182,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) ...@@ -3179,7 +3182,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u32 rate; u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
5 * nla_total_size(sizeof(u32)) + 7 * nla_total_size(sizeof(u32)) +
3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats) if (!stats)
return NULL; return NULL;
...@@ -3210,9 +3213,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) ...@@ -3210,9 +3213,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
return stats; return stats;
} }
......
...@@ -3496,6 +3496,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) ...@@ -3496,6 +3496,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
tcp_xmit_retransmit_queue(sk); tcp_xmit_retransmit_queue(sk);
} }
/* Returns the number of packets newly acked or sacked by the current ACK */
static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
{
const struct net *net = sock_net(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 delivered;
delivered = tp->delivered - prior_delivered;
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
if (flag & FLAG_ECE) {
tp->delivered_ce += delivered;
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
}
return delivered;
}
/* This routine deals with incoming acks, but not outgoing ones. */ /* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{ {
...@@ -3619,7 +3635,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3619,7 +3635,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk); sk_dst_confirm(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ delivered = tcp_newly_delivered(sk, delivered, flag);
lost = tp->lost - lost; /* freshly marked lost */ lost = tp->lost - lost; /* freshly marked lost */
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
...@@ -3629,9 +3645,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3629,9 +3645,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue: no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */ /* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) if (flag & FLAG_DSACKING_ACK) {
tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
&rexmit); &rexmit);
tcp_newly_delivered(sk, delivered, flag);
}
/* If this ack opens up a zero window, clear backoff. It was /* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than * being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission. * it needs to be for normal retransmission.
...@@ -3655,6 +3673,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3655,6 +3673,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
&sack_state); &sack_state);
tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
&rexmit); &rexmit);
tcp_newly_delivered(sk, delivered, flag);
tcp_xmit_recovery(sk, rexmit); tcp_xmit_recovery(sk, rexmit);
} }
...@@ -5567,9 +5586,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, ...@@ -5567,9 +5586,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return true; return true;
} }
tp->syn_data_acked = tp->syn_data; tp->syn_data_acked = tp->syn_data;
if (tp->syn_data_acked) if (tp->syn_data_acked) {
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
LINUX_MIB_TCPFASTOPENACTIVE); /* SYN-data is counted as two separate packets in tcp_ack() */
if (tp->delivered > 1)
--tp->delivered;
}
tcp_fastopen_add_skb(sk, synack); tcp_fastopen_add_skb(sk, synack);
...@@ -5901,6 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) ...@@ -5901,6 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
} }
switch (sk->sk_state) { switch (sk->sk_state) {
case TCP_SYN_RECV: case TCP_SYN_RECV:
tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
if (!tp->srtt_us) if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req); tcp_synack_rtt_meas(sk, req);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment