Commit 6258f7e8 authored by Jon Grimm's avatar Jon Grimm

[SCTP] Fix large message sends.

Fix limitation where we could not send messages that exceeded the
maximum data chunk size.   We still need partial data delivery to
accept messages with len > rwnd, but this fixes the sending.  Sends
are still limited (as per the I-D) to SNDBUF. 

Additionally, we can now 'bundle' fragmented DATA with the 
Cookie-Echo packet, not just a small message.  
parent bd8b01e5
......@@ -3,33 +3,33 @@
* Copyright (c) 1999-2001 Motorola, Inc.
* Copyright (c) 2001 Intel Corp.
* Copyright (c) 2001-2002 International Business Machines Corp.
*
*
* This file is part of the SCTP kernel reference Implementation
*
*
* This file is part of the implementation of the add-IP extension,
* based on <draft-ietf-tsvwg-addip-sctp-02.txt> June 29, 2001,
* for the SCTP kernel reference Implementation.
*
* The SCTP reference implementation is free software;
* you can redistribute it and/or modify it under the terms of
*
* The SCTP reference implementation is free software;
* you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* the SCTP reference implementation is distributed in the hope that it
*
* the SCTP reference implementation is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
* ************************
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* You should have received a copy of the GNU General Public License
* along with GNU CC; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*
* Boston, MA 02111-1307, USA.
*
* Please send any bug reports or fixes you make to one of the following email
* addresses:
*
*
* La Monte H.P. Yarroll <piggy@acm.org>
* Karl Knutson <karl@athena.chicago.il.us>
* Randall Stewart <randall@stewart.chicago.il.us>
......@@ -38,14 +38,14 @@
* Xingang Guo <xingang.guo@intel.com>
* Sridhar Samudrala <samudrala@us.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
*
*
* Any bugs reported given to us we will try to fix... any fixes shared will
* be incorporated into the next SCTP release.
*
*
* There are still LOTS of bugs in this code... I always run on the motto
* "it is a wonder any code ever works :)"
*
*
*
*
*/
#ifndef __sctp_constants_h__
......@@ -61,15 +61,6 @@ enum { SCTP_MAX_STREAM = 0xffff };
enum { SCTP_DEFAULT_OUTSTREAMS = 10 };
enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM };
/* Define the amount of space to reserve for SCTP, IP, LL.
* There is a little bit of waste that we are always allocating
* for ipv6 headers, but this seems worth the simplicity.
*/
#define SCTP_IP_OVERHEAD ((sizeof(struct sctphdr)\
+ sizeof(struct ipv6hdr)\
+ MAX_HEADER))
/* Define the amount of space to reserve for SCTP, IP, LL.
* There is a little bit of waste that we are always allocating
* for ipv6 headers, but this seems worth the simplicity.
......@@ -250,8 +241,13 @@ extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[];
#define SCTP_ADDR_REACHABLE 2
#define SCTP_ADDR_NOT_REACHABLE 1
/* Maximum chunk length considering padding requirements. */
enum { SCTP_MAX_CHUNK_LEN = ((1<<16) - sizeof(__u32)) };
/* Encourage Cookie-Echo bundling by pre-fragmenting chunks a little
* harder (until reaching ESTABLISHED state).
*/
enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 };
/* Guess at how big to make the TSN mapping array.
* We guarantee that we can handle at least this big a gap between the
......@@ -271,7 +267,8 @@ extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[];
* is enough room for 131 duplicate reports. Round down to the
* nearest power of 2.
*/
#define SCTP_MAX_DUP_TSNS 128
enum { SCTP_MIN_PMTU = 576 };
enum { SCTP_MAX_DUP_TSNS = 128 };
typedef enum {
SCTP_COUNTER_INIT_ERROR,
......@@ -280,7 +277,6 @@ typedef enum {
/* How many counters does an association need? */
#define SCTP_NUMBER_COUNTERS 5
/* Here we define the default timers. */
/* cookie timer def = ? seconds */
......
......@@ -270,6 +270,10 @@ sctp_chunk_t *sctp_make_op_error(const sctp_association_t *,
size_t paylen);
void sctp_chunk_assign_tsn(sctp_chunk_t *);
void sctp_chunk_assign_ssn(sctp_chunk_t *);
int sctp_datachunks_from_user(sctp_association_t *,
const struct sctp_sndrcvinfo *,
struct msghdr *, int len,
struct sk_buff_head *);
/* Prototypes for statetable processing. */
......
......@@ -566,9 +566,7 @@ struct SCTP_chunk {
sctp_chunk_t *sctp_make_chunk(const sctp_association_t *, __u8 type,
__u8 flags, int size);
void sctp_free_chunk(sctp_chunk_t *);
sctp_chunk_t *sctp_copy_chunk(sctp_chunk_t *, int flags);
void *sctp_addto_chunk(sctp_chunk_t *chunk, int len, const void *data);
int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data);
sctp_chunk_t *sctp_chunkify(struct sk_buff *, const sctp_association_t *,
struct sock *);
void sctp_init_addrs(sctp_chunk_t *, union sctp_addr *, union sctp_addr *);
......@@ -940,7 +938,7 @@ int sctp_outq_set_output_handlers(struct sctp_outq *,
sctp_outq_ohandler_t build,
sctp_outq_ohandler_force_t force);
void sctp_outq_restart(struct sctp_outq *);
void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
sctp_retransmit_reason_t);
void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
......@@ -1114,10 +1112,7 @@ static inline sctp_endpoint_t *sctp_ep(sctp_endpoint_common_t *base)
{
sctp_endpoint_t *ep;
/* We are not really a list, but the list_entry() macro is
* really quite generic to find the address of an outter struct.
*/
ep = list_entry(base, sctp_endpoint_t, base);
ep = container_of(base, sctp_endpoint_t, base);
return ep;
}
......@@ -1591,10 +1586,7 @@ static inline sctp_association_t *sctp_assoc(sctp_endpoint_common_t *base)
{
sctp_association_t *asoc;
/* We are not really a list, but the list_entry() macro is
* really quite generic find the address of an outter struct.
*/
asoc = list_entry(base, sctp_association_t, base);
asoc = container_of(base, sctp_association_t, base);
return asoc;
}
......
......@@ -629,11 +629,6 @@ __u32 __sctp_association_get_tsn_block(sctp_association_t *asoc, int num)
return retval;
}
/* Fetch the next Stream Sequence Number for stream number 'sid'. */
__u16 __sctp_association_get_next_ssn(sctp_association_t *asoc, __u16 sid)
{
return sctp_ssn_next(&asoc->ssnmap->out, sid);
}
/* Compare two addresses to see if they match. Wildcard addresses
* only match themselves.
......
......@@ -571,6 +571,7 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk,
sctp_chunk_t *first_frag, *frag;
struct list_head *frag_list;
int nfrags;
__u8 old_flags, flags;
/* nfrags = no. of max size fragments + any smaller last fragment. */
nfrags = ((chunk_data_len / max_frag_data_len) +
......@@ -579,9 +580,16 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk,
/* Start of the data in the chunk. */
data_ptr += sizeof(sctp_datahdr_t);
/* Are we fragmenting an already fragmented large message? */
old_flags = chunk->chunk_hdr->flags;
if (old_flags & SCTP_DATA_FIRST_FRAG)
flags = SCTP_DATA_FIRST_FRAG;
else
flags = SCTP_DATA_MIDDLE_FRAG;
/* Make the first fragment. */
first_frag = sctp_make_datafrag(asoc, sinfo, max_frag_data_len,
data_ptr, SCTP_DATA_FIRST_FRAG, ssn);
data_ptr, flags, ssn);
if (!first_frag)
goto err;
......@@ -609,9 +617,14 @@ sctp_chunk_t *sctp_fragment_chunk(sctp_chunk_t *chunk,
data_ptr += max_frag_data_len;
}
if (old_flags & SCTP_DATA_LAST_FRAG)
flags = SCTP_DATA_LAST_FRAG;
else
flags = SCTP_DATA_MIDDLE_FRAG;
/* Make the last fragment. */
frag = sctp_make_datafrag(asoc, sinfo, chunk_data_len, data_ptr,
SCTP_DATA_LAST_FRAG, ssn);
flags, ssn);
if (!frag)
goto err;
frag->has_ssn = 1;
......
......@@ -244,7 +244,7 @@ sctp_chunk_t *sctp_make_init_ack(const sctp_association_t *asoc,
size_t chunksize;
retval = NULL;
addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, priority);
if (!addrs.v)
goto nomem_rawaddr;
......@@ -1078,53 +1078,6 @@ void sctp_free_chunk(sctp_chunk_t *chunk)
SCTP_DBG_OBJCNT_DEC(chunk);
}
/* Do a deep copy of a chunk. */
sctp_chunk_t *sctp_copy_chunk(sctp_chunk_t *chunk, const int priority)
{
sctp_chunk_t *retval;
long offset;
retval = t_new(sctp_chunk_t, priority);
if (!retval)
goto nodata;
/* Do the shallow copy. */
*retval = *chunk;
/* Make sure that the copy does NOT think it is on any lists. */
retval->next = NULL;
retval->prev = NULL;
retval->list = NULL;
INIT_LIST_HEAD(&retval->transmitted_list);
INIT_LIST_HEAD(&retval->frag_list);
/* Now we copy the deep structure. */
retval->skb = skb_copy(chunk->skb, priority);
if (!retval->skb) {
kfree(retval);
goto nodata;
}
/* Move the copy headers to point into the new skb. */
offset = ((__u8 *)retval->skb->head)
- ((__u8 *)chunk->skb->head);
if (retval->param_hdr.v)
retval->param_hdr.v += offset;
if (retval->subh.v)
retval->subh.v += offset;
if (retval->chunk_end)
((__u8 *) retval->chunk_end) += offset;
if (retval->chunk_hdr)
((__u8 *) retval->chunk_hdr) += offset;
if (retval->sctp_hdr)
((__u8 *) retval->sctp_hdr) += offset;
SCTP_DBG_OBJCNT_INC(chunk);
return retval;
nodata:
return NULL;
}
/* Append bytes to the end of a chunk. Will panic if chunk is not big
* enough.
......@@ -1153,7 +1106,8 @@ void *sctp_addto_chunk(sctp_chunk_t *chunk, int len, const void *data)
* chunk is not big enough.
* Returns a kernel err value.
*/
int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data)
static int sctp_user_addto_chunk(sctp_chunk_t *chunk, int off, int len,
struct iovec *data)
{
__u8 *target;
int err = 0;
......@@ -1162,7 +1116,7 @@ int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data)
target = skb_put(chunk->skb, len);
/* Copy data (whole iovec) into chunk */
if ((err = memcpy_fromiovec(target, data, len)))
if ((err = memcpy_fromiovecend(target, data, off, len)))
goto out;
/* Adjust the chunk length field. */
......@@ -1174,6 +1128,125 @@ int sctp_user_addto_chunk(sctp_chunk_t *chunk, int len, struct iovec *data)
return err;
}
/* A data chunk can have a maximum payload of (2^16 - 20). Break
* down any such message into smaller chunks. Opportunistically, fragment
* the chunks down to the current MTU constraints. We may get refragmented
* later if the PMTU changes, but it is _much better_ to fragment immediately
* with a reasonable guess than always doing our fragmentation on the
* soft-interrupt.
*/
int sctp_datachunks_from_user(sctp_association_t *asoc,
const struct sctp_sndrcvinfo *sinfo,
struct msghdr *msg, int msg_len,
struct sk_buff_head *chunks)
{
int max, whole, i, offset, over, err;
int len, first_len;
sctp_chunk_t *chunk;
__u8 frag;
/* What is a reasonable fragmentation point right now? */
max = asoc->pmtu;
if (max < SCTP_MIN_PMTU)
max = SCTP_MIN_PMTU;
max -= SCTP_IP_OVERHEAD;
/* Make sure not beyond maximum chunk size. */
if (max > SCTP_MAX_CHUNK_LEN)
max = SCTP_MAX_CHUNK_LEN;
/* Subtract out the overhead of a data chunk header. */
max -= sizeof(struct sctp_data_chunk);
whole = 0;
first_len = max;
/* Encourage Cookie-ECHO bundling. */
if (asoc->state < SCTP_STATE_ESTABLISHED) {
whole = msg_len / (max - SCTP_ARBITRARY_COOKIE_ECHO_LEN);
/* Account for the DATA to be bundled with the COOKIE-ECHO. */
if (whole) {
first_len = max - SCTP_ARBITRARY_COOKIE_ECHO_LEN;
msg_len -= first_len;
whole = 1;
}
}
/* How many full sized? How many bytes leftover? */
whole += msg_len / max;
over = msg_len % max;
offset = 0;
/* Create chunks for all the full sized DATA chunks. */
for (i=0, len=first_len; i < whole; i++) {
frag = SCTP_DATA_MIDDLE_FRAG;
if (0 == i)
frag |= SCTP_DATA_FIRST_FRAG;
if ((i == (whole - 1)) && !over)
frag |= SCTP_DATA_LAST_FRAG;
chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0);
if (!chunk)
goto nomem;
err = sctp_user_addto_chunk(chunk, offset, len, msg->msg_iov);
if (err < 0)
goto errout;
offset += len;
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
- (__u8 *)chunk->skb->data);
__skb_queue_tail(chunks, (struct sk_buff *)chunk);
/* The first chunk, the first chunk was likely short
* to allow bundling, so reset to full size.
*/
if (0 == i)
len = max;
}
/* .. now the leftover bytes. */
if (over) {
if (!whole)
frag = SCTP_DATA_NOT_FRAG;
else
frag = SCTP_DATA_LAST_FRAG;
chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0);
if (!chunk)
goto nomem;
err = sctp_user_addto_chunk(chunk, offset, over, msg->msg_iov);
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
- (__u8 *)chunk->skb->data);
if (err < 0)
goto errout;
__skb_queue_tail(chunks, (struct sk_buff *)chunk);
}
err = 0;
goto out;
nomem:
err = -ENOMEM;
errout:
while ((chunk = (sctp_chunk_t *)__skb_dequeue(chunks)))
sctp_free_chunk(chunk);
out:
return err;
}
/* Helper function to assign a TSN if needed. This assumes that both
* the data_hdr and association have already been assigned.
*/
......@@ -1190,7 +1263,11 @@ void sctp_chunk_assign_ssn(sctp_chunk_t *chunk)
ssn = 0;
} else {
sid = htons(chunk->subh.data_hdr->stream);
ssn = htons(__sctp_association_get_next_ssn(chunk->asoc, sid));
if (chunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG)
ssn = sctp_ssn_next(&chunk->asoc->ssnmap->out, sid);
else
ssn = sctp_ssn_peek(&chunk->asoc->ssnmap->out, sid);
ssn = htons(ssn);
}
chunk->subh.data_hdr->ssn = ssn;
......
......@@ -730,6 +730,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
sctp_scope_t scope;
long timeo;
__u16 sinfo_flags = 0;
struct sk_buff_head chunks;
SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %d)\n",
sk, msg, msg_len);
......@@ -946,19 +947,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_free;
}
/* FIXME: In the current implementation, a single chunk is created
* for the entire message initially, even if it has to be fragmented
* later. As the length field in the chunkhdr is used to set
* the chunk length, the maximum size of the chunk and hence the
* message is limited by its type(__u16).
* The real fix is to fragment the message before creating the chunks.
*/
if (msg_len > ((__u16)(~(__u16)0) -
WORD_ROUND(sizeof(sctp_data_chunk_t)+1))) {
err = -EMSGSIZE;
goto out_free;
}
/* If fragmentation is disabled and the message length exceeds the
* association fragmentation point, return EMSGSIZE. The I-D
* does not specify what this error is, but this looks like
......@@ -991,13 +979,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_free;
}
/* Get enough memory for the whole message. */
chunk = sctp_make_data_empty(asoc, sinfo, msg_len);
if (!chunk) {
err = -ENOMEM;
goto out_free;
}
#if 0
/* FIXME: This looks wrong so I'll comment out.
* We should be able to use this same technique for
......@@ -1013,20 +994,13 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
#endif /* 0 */
/* Copy the message from the user. */
err = sctp_user_addto_chunk(chunk, msg_len, msg->msg_iov);
if (err < 0)
/* Break the message into multiple chunks of maximum size. */
skb_queue_head_init(&chunks);
err = sctp_datachunks_from_user(asoc, sinfo, msg, msg_len, &chunks);
if (err)
goto out_free;
SCTP_DEBUG_PRINTK("Copied message to chunk: %p.\n", chunk);
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
- (__u8 *)chunk->skb->data);
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
/* Auto-connect, if we aren't connected already. */
if (SCTP_STATE_CLOSED == asoc->state) {
err = sctp_primitive_ASSOCIATE(asoc, NULL);
if (err < 0)
......@@ -1034,18 +1008,22 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
SCTP_DEBUG_PRINTK("We associated primitively.\n");
}
/* Send it to the lower layers. */
err = sctp_primitive_SEND(asoc, chunk);
/* Now send the (possibly) fragmented message. */
while ((chunk = (sctp_chunk_t *)__skb_dequeue(&chunks))) {
SCTP_DEBUG_PRINTK("We sent primitively.\n");
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
/* Send it to the lower layers. */
sctp_primitive_SEND(asoc, chunk);
SCTP_DEBUG_PRINTK("We sent primitively.\n");
}
/* BUG: SCTP_CHECK_TIMER(sk); */
if (!err) {
err = msg_len;
goto out_unlock;
}
/* If we are already past ASSOCIATE, the lower
* layers are responsible for its cleanup.
* layers are responsible for association cleanup.
*/
goto out_free_chunk;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment