Commit 6c1b6c6b authored by David S. Miller's avatar David S. Miller
parents fe822240 7e87fe84
......@@ -462,6 +462,7 @@ struct dccp_ackvec;
* @dccps_hc_rx_insert_options - receiver wants to add options when acking
* @dccps_hc_tx_insert_options - sender wants to add options when sending
* @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
* @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
* @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
* @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
* @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
......@@ -503,6 +504,7 @@ struct dccp_sock {
__u8 dccps_hc_rx_insert_options:1;
__u8 dccps_hc_tx_insert_options:1;
__u8 dccps_server_timewait:1;
__u8 dccps_sync_scheduled:1;
struct tasklet_struct dccps_xmitlet;
struct timer_list dccps_xmit_timer;
};
......
This diff is collapsed.
......@@ -29,6 +29,9 @@
/* Estimated minimum average Ack Vector length - used for updating MPS */
#define DCCPAV_MIN_OPTLEN 16
/* Threshold for coping with large bursts of losses */
#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8)
enum dccp_ackvec_states {
DCCPAV_RECEIVED = 0x00,
DCCPAV_ECN_MARKED = 0x40,
......@@ -61,7 +64,6 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
* %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
* @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound
* @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously)
* @av_veclen: length of the live portion of @av_buf
*/
struct dccp_ackvec {
u8 av_buf[DCCPAV_MAX_ACKVEC_LEN];
......@@ -72,7 +74,6 @@ struct dccp_ackvec {
bool av_buf_nonce[DCCPAV_NUM_ACKVECS];
u8 av_overflow:1;
struct list_head av_records;
u16 av_vec_len;
};
/** struct dccp_ackvec_record - Records information about sent Ack Vectors
......@@ -98,29 +99,38 @@ struct dccp_ackvec_record {
u8 avr_ack_nonce:1;
};
struct sock;
struct sk_buff;
extern int dccp_ackvec_init(void);
extern void dccp_ackvec_exit(void);
extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
extern void dccp_ackvec_free(struct dccp_ackvec *av);
extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
const u64 ackno, const u8 state);
extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
struct sock *sk, const u64 ackno);
extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
u64 *ackno, const u8 opt,
const u8 *value, const u8 len);
extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
{
return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
}
/**
* struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb
* @vec: start of vector (offset into skb)
* @len: length of @vec
* @nonce: whether @vec had an ECN nonce of 0 or 1
* @node: FIFO - arranged in descending order of ack_ackno
* This structure is used by CCIDs to access Ack Vectors in a received skb.
*/
struct dccp_ackvec_parsed {
u8 *vec,
len,
nonce:1;
struct list_head node;
};
extern int dccp_ackvec_parsed_add(struct list_head *head,
u8 *vec, u8 len, u8 nonce);
extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
#endif /* _ACKVEC_H */
......@@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
#endif
}
/* XXX Lame code duplication!
* returns -1 if none was found.
* else returns the next offset to use in the function call.
*/
static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
unsigned char **vec, unsigned char *veclen)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr;
const unsigned char *opt_end = (unsigned char *)dh +
(dh->dccph_doff * 4);
unsigned char opt, len;
unsigned char *value;
BUG_ON(offset < 0);
options += offset;
opt_ptr = options;
if (opt_ptr >= opt_end)
return -1;
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
value = NULL;
/* Check if this isn't a single byte option */
if (opt > DCCPO_MAX_RESERVED) {
if (opt_ptr == opt_end)
goto out_invalid_option;
len = *opt_ptr++;
if (len < 3)
goto out_invalid_option;
/*
* Remove the type and len fields, leaving
* just the value size
*/
len -= 2;
value = opt_ptr;
opt_ptr += len;
if (opt_ptr > opt_end)
goto out_invalid_option;
}
switch (opt) {
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
*vec = value;
*veclen = len;
return offset + (opt_ptr - options);
}
}
return -1;
out_invalid_option:
DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
return -1;
}
/**
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
* This code is almost identical with TCP's tcp_rtt_estimator(), since
......@@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
}
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
u8 option, u8 *optval, u8 optlen)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
switch (option) {
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
option - DCCPO_ACK_VECTOR_0);
}
return 0;
}
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
struct dccp_ackvec_parsed *avp;
u64 ackno, seqno;
struct ccid2_seq *seqp;
unsigned char *vector;
unsigned char veclen;
int offset = 0;
int done = 0;
unsigned int maxincr = 0;
......@@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
/* check forward path congestion */
/* still didn't send out new data packets */
if (hc->tx_seqh == hc->tx_seqt)
if (dccp_packet_without_ack(skb))
return;
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case DCCP_PKT_ACK:
case DCCP_PKT_DATAACK:
break;
default:
return;
}
/* still didn't send out new data packets */
if (hc->tx_seqh == hc->tx_seqt)
goto done;
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
if (after48(ackno, hc->tx_high_ack))
......@@ -509,15 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
/* go through all ack vectors */
while ((offset = ccid2_ackvector(sk, skb, offset,
&vector, &veclen)) != -1) {
list_for_each_entry(avp, &hc->tx_av_chunks, node) {
/* go through this ack vector */
while (veclen--) {
u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector));
for (; avp->len--; avp->vec++) {
u64 ackno_end_rl = SUB48(ackno,
dccp_ackvec_runlen(avp->vec));
ccid2_pr_debug("ackvec start:%llu end:%llu\n",
ccid2_pr_debug("ackvec %llu |%u,%u|\n",
(unsigned long long)ackno,
(unsigned long long)ackno_end_rl);
dccp_ackvec_state(avp->vec) >> 6,
dccp_ackvec_runlen(avp->vec));
/* if the seqno we are analyzing is larger than the
* current ackno, then move towards the tail of our
* seqnos.
......@@ -536,7 +482,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* run length
*/
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
const u8 state = dccp_ackvec_state(vector);
const u8 state = dccp_ackvec_state(avp->vec);
/* new packet received or marked */
if (state != DCCPAV_NOT_RECEIVED &&
......@@ -563,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
break;
ackno = SUB48(ackno_end_rl, 1);
vector++;
}
if (done)
break;
......@@ -631,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
sk_stop_timer(sk, &hc->tx_rtotimer);
else
sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
done:
/* check if incoming Acks allow pending packets to be sent */
if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
......@@ -663,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_last_cong = ccid2_time_stamp;
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
INIT_LIST_HEAD(&hc->tx_av_chunks);
return 0;
}
......@@ -696,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
struct ccid_operations ccid2_ops = {
.ccid_id = DCCPC_CCID2,
.ccid_name = "TCP-like",
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
.ccid_id = DCCPC_CCID2,
.ccid_name = "TCP-like",
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
};
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
......
......@@ -55,6 +55,7 @@ struct ccid2_seq {
* @tx_rtt_seq: to decay RTTVAR at most once per flight
* @tx_rpseq: last consecutive seqno
* @tx_rpdupack: dupacks since rpseq
* @tx_av_chunks: list of Ack Vectors received on current skb
*/
struct ccid2_hc_tx_sock {
u32 tx_cwnd;
......@@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock {
int tx_rpdupack;
u32 tx_last_cong;
u64 tx_high_ack;
struct list_head tx_av_chunks;
};
static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
......
......@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
}
static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
if (dp->dccps_hc_rx_ackvec != NULL)
dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
if (av == NULL)
return;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
dccp_ackvec_input(av, skb);
}
static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
......@@ -365,21 +367,13 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dccp_check_seqno(sk, skb))
goto discard;
if (dccp_parse_options(sk, NULL, skb))
return 1;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
goto discard;
dccp_handle_ackvec_processing(sk, skb);
dccp_deliver_input_to_ccids(sk, skb);
return __dccp_rcv_established(sk, skb, dh, len);
......@@ -631,14 +625,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dccp_parse_options(sk, NULL, skb))
return 1;
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
goto discard;
dccp_handle_ackvec_processing(sk, skb);
dccp_deliver_input_to_ccids(sk, skb);
}
......
......@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
struct dccp_sock *dp = dccp_sk(sk);
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh +
......@@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
if (rc)
goto out_featneg_failed;
break;
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
break;
if (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
goto out_invalid_option;
break;
case DCCPO_TIMESTAMP:
if (len != 4)
goto out_invalid_option;
......@@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
pkt_type, opt, value, len))
goto out_invalid_option;
break;
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
break;
/*
* Ack vectors are processed by the TX CCID if it is
* interested. The RX CCID need not parse Ack Vectors,
* since it is only interested in clearing old state.
* Fall through.
*/
case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
pkt_type, opt, value, len))
......@@ -429,6 +430,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const u16 buflen = dccp_ackvec_buflen(av);
/* Figure out how many options do we need to represent the ackvec */
const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
......@@ -437,10 +439,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
const unsigned char *tail, *from;
unsigned char *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
dccp_packet_name(dcb->dccpd_type));
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
}
/*
* Since Ack Vectors are variable-length, we can not always predict
* their size. To catch exception cases where the space is running out
* on the skb, a separate Sync is scheduled to carry the Ack Vector.
*/
if (len > DCCPAV_MIN_OPTLEN &&
len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
"MPS=%u ==> reduce payload size?\n", len, skb->len,
dcb->dccpd_opt_len, dp->dccps_mss_cache);
dp->dccps_sync_scheduled = 1;
return 0;
}
dcb->dccpd_opt_len += len;
to = skb_push(skb, len);
len = buflen;
......@@ -481,7 +498,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
/*
* Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
*/
if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce))
if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
return -ENOBUFS;
return 0;
}
......
......@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk)
* any local drop will eventually be reported via receiver feedback.
*/
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
/*
* If the CCID needs to transfer additional header options out-of-band
* (e.g. Ack Vectors or feature-negotiation options), it activates this
* flag to schedule a Sync. The Sync will automatically incorporate all
* currently pending header options, thus clearing the backlog.
*/
if (dp->dccps_sync_scheduled)
dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
}
/**
......@@ -636,6 +645,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
/*
* Clear the flag in case the Sync was scheduled for out-of-band data,
* such as carrying a long Ack Vector.
*/
dccp_sk(sk)->dccps_sync_scheduled = 0;
dccp_transmit_skb(sk, skb);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment