Commit 6258f7e8 authored by Jon Grimm's avatar Jon Grimm

[SCTP] Fix large message sends.

Fix limitation where we could not send messages that exceeded the
maximum data chunk size.   We still need partial data delivery to
accept messages with len > rwnd, but this fixes the sending.  Sends
are still limited (as per the I-D) to SNDBUF. 

Additionally, we can now 'bundle' fragmented DATA with the 
Cookie-Echo packet, not just a small message.  
parent bd8b01e5
...@@ -61,15 +61,6 @@ enum { SCTP_MAX_STREAM = 0xffff }; ...@@ -61,15 +61,6 @@ enum { SCTP_MAX_STREAM = 0xffff };
enum { SCTP_DEFAULT_OUTSTREAMS = 10 }; enum { SCTP_DEFAULT_OUTSTREAMS = 10 };
enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM };
/* Define the amount of space to reserve for SCTP, IP, LL.
* There is a little bit of waste that we are always allocating
* for ipv6 headers, but this seems worth the simplicity.
*/
#define SCTP_IP_OVERHEAD ((sizeof(struct sctphdr)\
+ sizeof(struct ipv6hdr)\
+ MAX_HEADER))
/* Define the amount of space to reserve for SCTP, IP, LL. /* Define the amount of space to reserve for SCTP, IP, LL.
* There is a little bit of waste that we are always allocating * There is a little bit of waste that we are always allocating
* for ipv6 headers, but this seems worth the simplicity. * for ipv6 headers, but this seems worth the simplicity.
...@@ -250,8 +241,13 @@ extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[]; ...@@ -250,8 +241,13 @@ extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[];
#define SCTP_ADDR_REACHABLE 2 #define SCTP_ADDR_REACHABLE 2
#define SCTP_ADDR_NOT_REACHABLE 1 #define SCTP_ADDR_NOT_REACHABLE 1
/* Maximum chunk length considering padding requirements. */
enum { SCTP_MAX_CHUNK_LEN = ((1<<16) - sizeof(__u32)) };
/* Encourage Cookie-Echo bundling by pre-fragmenting chunks a little
* harder (until reaching ESTABLISHED state).
*/
enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
/* Guess at how big to make the TSN mapping array. /* Guess at how big to make the TSN mapping array.
* We guarantee that we can handle at least this big a gap between the * We guarantee that we can handle at least this big a gap between the
...@@ -271,7 +267,8 @@ extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[]; ...@@ -271,7 +267,8 @@ extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[];
* is enough room for 131 duplicate reports. Round down to the * is enough room for 131 duplicate reports. Round down to the
* nearest power of 2. * nearest power of 2.
*/ */
#define SCTP_MAX_DUP_TSNS 128 enum { SCTP_MIN_PMTU = 576 };
enum { SCTP_MAX_DUP_TSNS = 128 };
typedef enum { typedef enum {
SCTP_COUNTER_INIT_ERROR, SCTP_COUNTER_INIT_ERROR,
...@@ -280,7 +277,6 @@ typedef enum { ...@@ -280,7 +277,6 @@ typedef enum {
/* How many counters does an association need? */ /* How many counters does an association need? */
#define SCTP_NUMBER_COUNTERS 5 #define SCTP_NUMBER_COUNTERS 5
/* Here we define the default timers. */ /* Here we define the default timers. */
/* cookie timer def = ? seconds */ /* cookie timer def = ? seconds */
......
...@@ -270,6 +270,10 @@ sctp_chunk_t *sctp_make_op_error(const sctp_association_t *, ...@@ -270,6 +270,10 @@ sctp_chunk_t *sctp_make_op_error(const sctp_association_t *,
size_t paylen); size_t paylen);
void sctp_chunk_assign_tsn(sctp_chunk_t *); void sctp_chunk_assign_tsn(sctp_chunk_t *);
void sctp_chunk_assign_ssn(sctp_chunk_t *); void sctp_chunk_assign_ssn(sctp_chunk_t *);
int sctp_datachunks_from_user(sctp_association_t *,
const struct sctp_sndrcvinfo *,
struct msghdr *, int len,
struct sk_buff_head *);
/* Prototypes for statetable processing. */ /* Prototypes for statetable processing. */
......
...@@ -566,9 +566,7 @@ struct SCTP_chunk { ...@@ -566,9 +566,7 @@ struct SCTP_chunk {
sctp_chunk_t *sctp_make_chunk(const sctp_association_t *, __u8 type, sctp_chunk_t *sctp_make_chunk(const sctp_association_t *, __u8 type,
__u8 flags, int size); __u8 flags, int size);
void sctp_free_chunk(sctp_chunk_t *); void sctp_free_chunk(sctp_chunk_t *);
sctp_chunk_t *sctp_copy_chunk(sctp_chunk_t *, int flags);
void *sctp_addto_chunk(sctp_chunk_t *chunk, int len, const void *data); void *sctp_addto_chunk(sctp_chunk_t *chunk, int len, const void *data);
int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data);
sctp_chunk_t *sctp_chunkify(struct sk_buff *, const sctp_association_t *, sctp_chunk_t *sctp_chunkify(struct sk_buff *, const sctp_association_t *,
struct sock *); struct sock *);
void sctp_init_addrs(sctp_chunk_t *, union sctp_addr *, union sctp_addr *); void sctp_init_addrs(sctp_chunk_t *, union sctp_addr *, union sctp_addr *);
...@@ -1114,10 +1112,7 @@ static inline sctp_endpoint_t *sctp_ep(sctp_endpoint_common_t *base) ...@@ -1114,10 +1112,7 @@ static inline sctp_endpoint_t *sctp_ep(sctp_endpoint_common_t *base)
{ {
sctp_endpoint_t *ep; sctp_endpoint_t *ep;
/* We are not really a list, but the list_entry() macro is ep = container_of(base, sctp_endpoint_t, base);
* really quite generic to find the address of an outter struct.
*/
ep = list_entry(base, sctp_endpoint_t, base);
return ep; return ep;
} }
...@@ -1591,10 +1586,7 @@ static inline sctp_association_t *sctp_assoc(sctp_endpoint_common_t *base) ...@@ -1591,10 +1586,7 @@ static inline sctp_association_t *sctp_assoc(sctp_endpoint_common_t *base)
{ {
sctp_association_t *asoc; sctp_association_t *asoc;
/* We are not really a list, but the list_entry() macro is asoc = container_of(base, sctp_association_t, base);
* really quite generic find the address of an outter struct.
*/
asoc = list_entry(base, sctp_association_t, base);
return asoc; return asoc;
} }
......
...@@ -629,11 +629,6 @@ __u32 __sctp_association_get_tsn_block(sctp_association_t *asoc, int num) ...@@ -629,11 +629,6 @@ __u32 __sctp_association_get_tsn_block(sctp_association_t *asoc, int num)
return retval; return retval;
} }
/* Fetch the next Stream Sequence Number for stream number 'sid'. */
__u16 __sctp_association_get_next_ssn(sctp_association_t *asoc, __u16 sid)
{
return sctp_ssn_next(&asoc->ssnmap->out, sid);
}
/* Compare two addresses to see if they match. Wildcard addresses /* Compare two addresses to see if they match. Wildcard addresses
* only match themselves. * only match themselves.
......
...@@ -571,6 +571,7 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk, ...@@ -571,6 +571,7 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk,
sctp_chunk_t *first_frag, *frag; sctp_chunk_t *first_frag, *frag;
struct list_head *frag_list; struct list_head *frag_list;
int nfrags; int nfrags;
__u8 old_flags, flags;
/* nfrags = no. of max size fragments + any smaller last fragment. */ /* nfrags = no. of max size fragments + any smaller last fragment. */
nfrags = ((chunk_data_len / max_frag_data_len) + nfrags = ((chunk_data_len / max_frag_data_len) +
...@@ -579,9 +580,16 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk, ...@@ -579,9 +580,16 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk,
/* Start of the data in the chunk. */ /* Start of the data in the chunk. */
data_ptr += sizeof(sctp_datahdr_t); data_ptr += sizeof(sctp_datahdr_t);
/* Are we fragmenting an already fragmented large message? */
old_flags = chunk->chunk_hdr->flags;
if (old_flags & SCTP_DATA_FIRST_FRAG)
flags = SCTP_DATA_FIRST_FRAG;
else
flags = SCTP_DATA_MIDDLE_FRAG;
/* Make the first fragment. */ /* Make the first fragment. */
first_frag = sctp_make_datafrag(asoc, sinfo, max_frag_data_len, first_frag = sctp_make_datafrag(asoc, sinfo, max_frag_data_len,
data_ptr, SCTP_DATA_FIRST_FRAG, ssn); data_ptr, flags, ssn);
if (!first_frag) if (!first_frag)
goto err; goto err;
...@@ -609,9 +617,14 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk, ...@@ -609,9 +617,14 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk,
data_ptr += max_frag_data_len; data_ptr += max_frag_data_len;
} }
if (old_flags & SCTP_DATA_LAST_FRAG)
flags = SCTP_DATA_LAST_FRAG;
else
flags = SCTP_DATA_MIDDLE_FRAG;
/* Make the last fragment. */ /* Make the last fragment. */
frag = sctp_make_datafrag(asoc, sinfo, chunk_data_len, data_ptr, frag = sctp_make_datafrag(asoc, sinfo, chunk_data_len, data_ptr,
SCTP_DATA_LAST_FRAG, ssn); flags, ssn);
if (!frag) if (!frag)
goto err; goto err;
frag->has_ssn = 1; frag->has_ssn = 1;
......
...@@ -1078,53 +1078,6 @@ void sctp_free_chunk(sctp_chunk_t *chunk) ...@@ -1078,53 +1078,6 @@ void sctp_free_chunk(sctp_chunk_t *chunk)
SCTP_DBG_OBJCNT_DEC(chunk); SCTP_DBG_OBJCNT_DEC(chunk);
} }
/* Do a deep copy of a chunk. */
sctp_chunk_t *sctp_copy_chunk(sctp_chunk_t *chunk, const int priority)
{
sctp_chunk_t *retval;
long offset;
retval = t_new(sctp_chunk_t, priority);
if (!retval)
goto nodata;
/* Do the shallow copy. */
*retval = *chunk;
/* Make sure that the copy does NOT think it is on any lists. */
retval->next = NULL;
retval->prev = NULL;
retval->list = NULL;
INIT_LIST_HEAD(&retval->transmitted_list);
INIT_LIST_HEAD(&retval->frag_list);
/* Now we copy the deep structure. */
retval->skb = skb_copy(chunk->skb, priority);
if (!retval->skb) {
kfree(retval);
goto nodata;
}
/* Move the copy headers to point into the new skb. */
offset = ((__u8 *)retval->skb->head)
- ((__u8 *)chunk->skb->head);
if (retval->param_hdr.v)
retval->param_hdr.v += offset;
if (retval->subh.v)
retval->subh.v += offset;
if (retval->chunk_end)
((__u8 *) retval->chunk_end) += offset;
if (retval->chunk_hdr)
((__u8 *) retval->chunk_hdr) += offset;
if (retval->sctp_hdr)
((__u8 *) retval->sctp_hdr) += offset;
SCTP_DBG_OBJCNT_INC(chunk);
return retval;
nodata:
return NULL;
}
/* Append bytes to the end of a chunk. Will panic if chunk is not big /* Append bytes to the end of a chunk. Will panic if chunk is not big
* enough. * enough.
...@@ -1153,7 +1106,8 @@ void *sctp_addto_chunk(sctp_chunk_t *chunk, int len, const void *data) ...@@ -1153,7 +1106,8 @@ void *sctp_addto_chunk(sctp_chunk_t *chunk, int len, const void *data)
* chunk is not big enough. * chunk is not big enough.
* Returns a kernel err value. * Returns a kernel err value.
*/ */
int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data) static int sctp_user_addto_chunk(sctp_chunk_t *chunk, int off, int len,
struct iovec *data)
{ {
__u8 *target; __u8 *target;
int err = 0; int err = 0;
...@@ -1162,7 +1116,7 @@ int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data) ...@@ -1162,7 +1116,7 @@ int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data)
target = skb_put(chunk->skb, len); target = skb_put(chunk->skb, len);
/* Copy data (whole iovec) into chunk */ /* Copy data (whole iovec) into chunk */
if ((err = memcpy_fromiovec(target, data, len))) if ((err = memcpy_fromiovecend(target, data, off, len)))
goto out; goto out;
/* Adjust the chunk length field. */ /* Adjust the chunk length field. */
...@@ -1174,6 +1128,125 @@ int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data) ...@@ -1174,6 +1128,125 @@ int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data)
return err; return err;
} }
/* A data chunk can have a maximum payload of (2^16 - 20). Break
* down any such message into smaller chunks. Opportunistically, fragment
* the chunks down to the current MTU constraints. We may get refragmented
* later if the PMTU changes, but it is _much better_ to fragment immediately
* with a reasonable guess than always doing our fragmentation on the
* soft-interrupt.
*/
int sctp_datachunks_from_user(sctp_association_t *asoc,
const struct sctp_sndrcvinfo *sinfo,
struct msghdr *msg, int msg_len,
struct sk_buff_head *chunks)
{
int max, whole, i, offset, over, err;
int len, first_len;
sctp_chunk_t *chunk;
__u8 frag;
/* What is a reasonable fragmentation point right now? */
max = asoc->pmtu;
if (max < SCTP_MIN_PMTU)
max = SCTP_MIN_PMTU;
max -= SCTP_IP_OVERHEAD;
/* Make sure not beyond maximum chunk size. */
if (max > SCTP_MAX_CHUNK_LEN)
max = SCTP_MAX_CHUNK_LEN;
/* Subtract out the overhead of a data chunk header. */
max -= sizeof(struct sctp_data_chunk);
whole = 0;
first_len = max;
/* Encourage Cookie-ECHO bundling. */
if (asoc->state < SCTP_STATE_ESTABLISHED) {
whole = msg_len / (max - SCTP_ARBITRARY_COOKIE_ECHO_LEN);
/* Account for the DATA to be bundled with the COOKIE-ECHO. */
if (whole) {
first_len = max - SCTP_ARBITRARY_COOKIE_ECHO_LEN;
msg_len -= first_len;
whole = 1;
}
}
/* How many full sized? How many bytes leftover? */
whole += msg_len / max;
over = msg_len % max;
offset = 0;
/* Create chunks for all the full sized DATA chunks. */
for (i=0, len=first_len; i < whole; i++) {
frag = SCTP_DATA_MIDDLE_FRAG;
if (0 == i)
frag |= SCTP_DATA_FIRST_FRAG;
if ((i == (whole - 1)) && !over)
frag |= SCTP_DATA_LAST_FRAG;
chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0);
if (!chunk)
goto nomem;
err = sctp_user_addto_chunk(chunk, offset, len, msg->msg_iov);
if (err < 0)
goto errout;
offset += len;
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
- (__u8 *)chunk->skb->data);
__skb_queue_tail(chunks, (struct sk_buff *)chunk);
/* The first chunk, the first chunk was likely short
* to allow bundling, so reset to full size.
*/
if (0 == i)
len = max;
}
/* .. now the leftover bytes. */
if (over) {
if (!whole)
frag = SCTP_DATA_NOT_FRAG;
else
frag = SCTP_DATA_LAST_FRAG;
chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0);
if (!chunk)
goto nomem;
err = sctp_user_addto_chunk(chunk, offset, over, msg->msg_iov);
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
- (__u8 *)chunk->skb->data);
if (err < 0)
goto errout;
__skb_queue_tail(chunks, (struct sk_buff *)chunk);
}
err = 0;
goto out;
nomem:
err = -ENOMEM;
errout:
while ((chunk = (sctp_chunk_t *)__skb_dequeue(chunks)))
sctp_free_chunk(chunk);
out:
return err;
}
/* Helper function to assign a TSN if needed. This assumes that both /* Helper function to assign a TSN if needed. This assumes that both
* the data_hdr and association have already been assigned. * the data_hdr and association have already been assigned.
*/ */
...@@ -1190,7 +1263,11 @@ void sctp_chunk_assign_ssn(sctp_chunk_t *chunk) ...@@ -1190,7 +1263,11 @@ void sctp_chunk_assign_ssn(sctp_chunk_t *chunk)
ssn = 0; ssn = 0;
} else { } else {
sid = htons(chunk->subh.data_hdr->stream); sid = htons(chunk->subh.data_hdr->stream);
ssn = htons(__sctp_association_get_next_ssn(chunk->asoc, sid)); if (chunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG)
ssn = sctp_ssn_next(&chunk->asoc->ssnmap->out, sid);
else
ssn = sctp_ssn_peek(&chunk->asoc->ssnmap->out, sid);
ssn = htons(ssn);
} }
chunk->subh.data_hdr->ssn = ssn; chunk->subh.data_hdr->ssn = ssn;
......
...@@ -730,6 +730,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -730,6 +730,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
sctp_scope_t scope; sctp_scope_t scope;
long timeo; long timeo;
__u16 sinfo_flags = 0; __u16 sinfo_flags = 0;
struct sk_buff_head chunks;
SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %d)\n", SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %d)\n",
sk, msg, msg_len); sk, msg, msg_len);
...@@ -946,19 +947,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -946,19 +947,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_free; goto out_free;
} }
/* FIXME: In the current implementation, a single chunk is created
* for the entire message initially, even if it has to be fragmented
* later. As the length field in the chunkhdr is used to set
* the chunk length, the maximum size of the chunk and hence the
* message is limited by its type(__u16).
* The real fix is to fragment the message before creating the chunks.
*/
if (msg_len > ((__u16)(~(__u16)0) -
WORD_ROUND(sizeof(sctp_data_chunk_t)+1))) {
err = -EMSGSIZE;
goto out_free;
}
/* If fragmentation is disabled and the message length exceeds the /* If fragmentation is disabled and the message length exceeds the
* association fragmentation point, return EMSGSIZE. The I-D * association fragmentation point, return EMSGSIZE. The I-D
* does not specify what this error is, but this looks like * does not specify what this error is, but this looks like
...@@ -991,13 +979,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -991,13 +979,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_free; goto out_free;
} }
/* Get enough memory for the whole message. */
chunk = sctp_make_data_empty(asoc, sinfo, msg_len);
if (!chunk) {
err = -ENOMEM;
goto out_free;
}
#if 0 #if 0
/* FIXME: This looks wrong so I'll comment out. /* FIXME: This looks wrong so I'll comment out.
* We should be able to use this same technique for * We should be able to use this same technique for
...@@ -1013,20 +994,13 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1013,20 +994,13 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
} }
#endif /* 0 */ #endif /* 0 */
/* Copy the message from the user. */ /* Break the message into multiple chunks of maximum size. */
err = sctp_user_addto_chunk(chunk, msg_len, msg->msg_iov); skb_queue_head_init(&chunks);
if (err < 0) err = sctp_datachunks_from_user(asoc, sinfo, msg, msg_len, &chunks);
if (err)
goto out_free; goto out_free;
SCTP_DEBUG_PRINTK("Copied message to chunk: %p.\n", chunk); /* Auto-connect, if we aren't connected already. */
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
- (__u8 *)chunk->skb->data);
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
if (SCTP_STATE_CLOSED == asoc->state) { if (SCTP_STATE_CLOSED == asoc->state) {
err = sctp_primitive_ASSOCIATE(asoc, NULL); err = sctp_primitive_ASSOCIATE(asoc, NULL);
if (err < 0) if (err < 0)
...@@ -1034,18 +1008,22 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1034,18 +1008,22 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
SCTP_DEBUG_PRINTK("We associated primitively.\n"); SCTP_DEBUG_PRINTK("We associated primitively.\n");
} }
/* Send it to the lower layers. */ /* Now send the (possibly) fragmented message. */
err = sctp_primitive_SEND(asoc, chunk); while ((chunk = (sctp_chunk_t *)__skb_dequeue(&chunks))) {
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
/* Send it to the lower layers. */
sctp_primitive_SEND(asoc, chunk);
SCTP_DEBUG_PRINTK("We sent primitively.\n"); SCTP_DEBUG_PRINTK("We sent primitively.\n");
}
/* BUG: SCTP_CHECK_TIMER(sk); */
if (!err) { if (!err) {
err = msg_len; err = msg_len;
goto out_unlock; goto out_unlock;
} }
/* If we are already past ASSOCIATE, the lower /* If we are already past ASSOCIATE, the lower
* layers are responsible for its cleanup. * layers are responsible for association cleanup.
*/ */
goto out_free_chunk; goto out_free_chunk;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment