Commit 3c1ef96d authored by Sridhar Samudrala's avatar Sridhar Samudrala

[SCTP] SCTP path mtu discovery support for v4 addresses.

parent f51b15cd
......@@ -96,7 +96,7 @@ extern int ip_mc_output(struct sk_buff *skb);
extern int ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*));
extern int ip_do_nat(struct sk_buff *skb);
extern void ip_send_check(struct iphdr *ip);
extern int ip_queue_xmit(struct sk_buff *skb);
extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok);
extern void ip_init(void);
extern int ip_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
......
......@@ -118,11 +118,10 @@ typedef enum {
SCTP_EVENT_TIMEOUT_HEARTBEAT,
SCTP_EVENT_TIMEOUT_SACK,
SCTP_EVENT_TIMEOUT_AUTOCLOSE,
SCTP_EVENT_TIMEOUT_PMTU_RAISE,
} sctp_event_timeout_t;
#define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_PMTU_RAISE
#define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE
#define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1)
typedef enum {
......@@ -319,10 +318,6 @@ typedef enum {
#define SCTP_DEFAULT_TIMEOUT_SACK ((200 * HZ) / 1000)
#define SCTP_DEFAULT_TIMEOUT_SACK_MAX ((500 * HZ) / 1000) /* 500 ms */
/* How long do we wait before attempting to raise the PMTU? */
#define SCTP_DEFAULT_TIMEOUT_PMTU_RAISE (10 * 60 * HZ) /* 10 Minutes */
#define SCTP_DEFAULT_TIMEOUT_PMTU_RAISE_MIN (10 * 60 * HZ) /* 10 Minutes */
/* RTO.Initial - 3 seconds
* RTO.Min - 1 second
* RTO.Max - 60 seconds
......@@ -441,6 +436,13 @@ typedef enum {
#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by
peer */
/* Reasons to retransmit. */
typedef enum {
SCTP_RETRANSMIT_T3_RTX,
SCTP_RETRANSMIT_FAST_RTX,
SCTP_RETRANSMIT_PMTU_DISCOVERY,
} sctp_retransmit_reason_t;
/* Reasons to lower cwnd. */
typedef enum {
SCTP_LOWER_CWND_T3_RTX,
......@@ -450,4 +452,3 @@ typedef enum {
} sctp_lower_cwnd_t;
#endif /* __sctp_constants_h__ */
......@@ -166,6 +166,9 @@ extern void sctp_hash_endpoint(sctp_endpoint_t *);
extern void __sctp_hash_endpoint(sctp_endpoint_t *);
extern void sctp_unhash_endpoint(sctp_endpoint_t *);
extern void __sctp_unhash_endpoint(sctp_endpoint_t *);
extern sctp_association_t *__sctp_lookup_association(const union sctp_addr *,
const union sctp_addr *,
sctp_transport_t **);
/*
* sctp_hashdriver.c
......
......@@ -238,7 +238,8 @@ struct sctp_protocol {
* (i.e. things that depend on the address family.)
*/
struct sctp_af {
int (*queue_xmit) (struct sk_buff *skb);
int (*queue_xmit) (struct sk_buff *skb,
int ipfragok);
int (*setsockopt) (struct sock *sk,
int level,
int optname,
......@@ -619,6 +620,9 @@ struct SCTP_packet {
/* This packet contains a COOKIE-ECHO chunk. */
int has_cookie_echo;
/* SCTP cannot fragment this packet. So let ip fragment it. */
int ipfragok;
int malloced;
};
......@@ -936,7 +940,8 @@ int sctp_outq_set_output_handlers(struct sctp_outq *,
sctp_outq_ohandler_t build,
sctp_outq_ohandler_force_t force);
void sctp_outq_restart(struct sctp_outq *);
void sctp_retransmit(struct sctp_outq *, sctp_transport_t *, __u8);
void sctp_retransmit(struct sctp_outq *, sctp_transport_t *,
sctp_retransmit_reason_t);
void sctp_retransmit_mark(struct sctp_outq *, sctp_transport_t *, __u8);
......@@ -1626,6 +1631,8 @@ __u32 __sctp_association_get_next_tsn(sctp_association_t *);
__u32 __sctp_association_get_tsn_block(sctp_association_t *, int);
__u16 __sctp_association_get_next_ssn(sctp_association_t *, __u16 sid);
void sctp_assoc_sync_pmtu(sctp_association_t *);
int sctp_cmp_addr_exact(const union sctp_addr *ss1,
const union sctp_addr *ss2);
sctp_chunk_t *sctp_get_ecne_prepend(sctp_association_t *asoc);
......
......@@ -566,7 +566,8 @@ static inline void tcp_openreq_free(struct open_request *req)
*/
struct tcp_func {
int (*queue_xmit) (struct sk_buff *skb);
int (*queue_xmit) (struct sk_buff *skb,
int ipfragok);
void (*send_check) (struct sock *sk,
struct tcphdr *th,
......
......@@ -280,7 +280,7 @@ int ip_output(struct sk_buff *skb)
return ip_finish_output(skb);
}
int ip_queue_xmit(struct sk_buff *skb)
int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
{
struct sock *sk = skb->sk;
struct inet_opt *inet = inet_sk(sk);
......@@ -337,7 +337,7 @@ int ip_queue_xmit(struct sk_buff *skb)
iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
*((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
iph->tot_len = htons(skb->len);
if (ip_dont_fragment(sk, &rt->u.dst))
if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
iph->frag_off = htons(IP_DF);
else
iph->frag_off = 0;
......
......@@ -276,7 +276,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
TCP_INC_STATS(TcpOutSegs);
err = tp->af_specific->queue_xmit(skb);
err = tp->af_specific->queue_xmit(skb, 0);
if (err <= 0)
return err;
......
......@@ -60,7 +60,7 @@ static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static int tcp_v6_xmit(struct sk_buff *skb);
static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
static struct tcp_func ipv6_mapped;
static struct tcp_func ipv6_specific;
......@@ -1746,7 +1746,7 @@ static int tcp_v6_rebuild_header(struct sock *sk)
return 0;
}
static int tcp_v6_xmit(struct sk_buff *skb)
static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
{
struct sock *sk = skb->sk;
struct inet_opt *inet = inet_sk(sk);
......
......@@ -248,6 +248,8 @@ EXPORT_SYMBOL(inet_unregister_protosw);
EXPORT_SYMBOL(ip_route_output_key);
EXPORT_SYMBOL(ip_route_input);
EXPORT_SYMBOL(icmp_send);
EXPORT_SYMBOL(icmp_statistics);
EXPORT_SYMBOL(icmp_err_convert);
EXPORT_SYMBOL(ip_options_compile);
EXPORT_SYMBOL(ip_options_undo);
EXPORT_SYMBOL(arp_send);
......
......@@ -948,3 +948,32 @@ sctp_transport_t *sctp_assoc_choose_shutdown_transport(sctp_association_t *asoc)
return t;
}
/* Update the association's pmtu and frag_point by going through all the
* transports. This routine is called when a transport's PMTU has changed.
*/
void sctp_assoc_sync_pmtu(sctp_association_t *asoc)
{
sctp_transport_t *t;
struct list_head *pos;
__u32 pmtu = 0;
if (!asoc)
return;
/* Get the lowest pmtu of all the transports. */
list_for_each(pos, &asoc->peer.transport_addr_list) {
t = list_entry(pos, sctp_transport_t, transports);
if (!pmtu || (t->pmtu < pmtu))
pmtu = t->pmtu;
}
if (pmtu) {
asoc->pmtu = pmtu;
asoc->frag_point = pmtu - (SCTP_IP_OVERHEAD +
sizeof(sctp_data_chunk_t));
}
SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n",
__FUNCTION__, asoc, asoc->pmtu, asoc->frag_point);
}
......@@ -202,7 +202,6 @@ static const char *sctp_timer_tbl[] = {
"TIMEOUT_HEARTBEAT",
"TIMEOUT_SACK",
"TIMEOUT_AUTOCLOSE",
"TIMEOUT_PMTU_RAISE",
};
/* Lookup timer debug name. */
......
......@@ -152,8 +152,6 @@ sctp_endpoint_t *sctp_endpoint_init(sctp_endpoint_t *ep, sctp_protocol_t *proto,
SCTP_DEFAULT_TIMEOUT_SACK;
ep->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
sp->autoclose * HZ;
ep->timeouts[SCTP_EVENT_TIMEOUT_PMTU_RAISE] =
SCTP_DEFAULT_TIMEOUT_PMTU_RAISE;
/* Set up the default send/receive buffer space. */
......
......@@ -53,6 +53,9 @@
#include <linux/socket.h>
#include <linux/ip.h>
#include <linux/time.h> /* For struct timeval */
#include <net/ip.h>
#include <net/icmp.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/sctp/sctp.h>
......@@ -253,6 +256,20 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
/* Handle icmp frag needed error. */
static inline void sctp_icmp_frag_needed(struct sock *sk,
sctp_association_t *asoc,
sctp_transport_t *transport,
__u32 pmtu)
{
if (!sock_owned_by_user(sk) && transport && (transport->pmtu != pmtu)) {
transport->pmtu = pmtu;
sctp_assoc_sync_pmtu(asoc);
sctp_retransmit(&asoc->outqueue, transport,
SCTP_RETRANSMIT_PMTU_DISCOVERY );
}
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
......@@ -268,9 +285,103 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
* is probably better.
*
*/
void sctp_v4_err(struct sk_buff *skb, u32 info)
void sctp_v4_err(struct sk_buff *skb, __u32 info)
{
/* This should probably involve a call to SCTPhandleICMP(). */
struct iphdr *iph = (struct iphdr *)skb->data;
struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
union sctp_addr saddr, daddr;
struct inet_opt *inet;
struct sock *sk = NULL;
sctp_endpoint_t *ep = NULL;
sctp_association_t *asoc = NULL;
sctp_transport_t *transport;
int err;
if (skb->len < ((iph->ihl << 2) + 8)) {
ICMP_INC_STATS_BH(IcmpInErrors);
return;
}
saddr.v4.sin_family = AF_INET;
saddr.v4.sin_port = ntohs(sh->source);
memcpy(&saddr.v4.sin_addr.s_addr, &iph->saddr, sizeof(struct in_addr));
daddr.v4.sin_family = AF_INET;
daddr.v4.sin_port = ntohs(sh->dest);
memcpy(&daddr.v4.sin_addr.s_addr, &iph->daddr, sizeof(struct in_addr));
/* Look for an association that matches the incoming ICMP error
* packet.
*/
asoc = __sctp_lookup_association(&saddr, &daddr, &transport);
if (!asoc) {
/* If there is no matching association, see if it matches any
* endpoint. This may happen for an ICMP error generated in
* response to an INIT_ACK.
*/
ep = __sctp_rcv_lookup_endpoint(&daddr);
if (!ep) {
ICMP_INC_STATS_BH(IcmpInErrors);
return;
}
}
if (asoc) {
if (ntohl(sh->vtag) != asoc->c.peer_vtag) {
ICMP_INC_STATS_BH(IcmpInErrors);
goto out;
}
sk = asoc->base.sk;
} else
sk = ep->base.sk;
sctp_bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
NET_INC_STATS_BH(LockDroppedIcmps);
switch (type) {
case ICMP_PARAMETERPROB:
err = EPROTO;
break;
case ICMP_DEST_UNREACH:
if (code > NR_ICMP_UNREACH)
goto out_unlock;
/* PMTU discovery (RFC1191) */
if (code == ICMP_FRAG_NEEDED) {
sctp_icmp_frag_needed(sk, asoc, transport, info);
goto out_unlock;
}
err = icmp_err_convert[code].errno;
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
break;
default:
goto out_unlock;
}
inet = inet_sk(sk);
if (!sock_owned_by_user(sk) && inet->recverr) {
sk->err = err;
sk->error_report(sk);
} else { /* Only an error on timeout */
sk->err_soft = err;
}
out_unlock:
sctp_bh_unlock_sock(sk);
out:
sock_put(sk);
if (asoc)
sctp_association_put(asoc);
if (ep)
sctp_endpoint_put(ep);
}
/*
......
......@@ -98,7 +98,7 @@ static inline void sctp_v6_err(struct sk_buff *skb,
}
/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
static inline int sctp_v6_xmit(struct sk_buff *skb)
static inline int sctp_v6_xmit(struct sk_buff *skb, int ipfragok)
{
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
......
......@@ -80,6 +80,7 @@ sctp_packet_t *sctp_packet_config(sctp_packet_t *packet,
packet->ecn_capable = ecn_capable;
packet->get_prepend_chunk = prepend_handler;
packet->has_cookie_echo = 0;
packet->ipfragok = 0;
/* We might need to call the prepend_handler right away. */
if (packet_empty)
......@@ -101,6 +102,7 @@ sctp_packet_t *sctp_packet_init(sctp_packet_t *packet,
packet->ecn_capable = 0;
packet->get_prepend_chunk = NULL;
packet->has_cookie_echo = 0;
packet->ipfragok = 0;
packet->malloced = 0;
sctp_packet_reset(packet);
return packet;
......@@ -192,6 +194,7 @@ sctp_xmit_t sctp_packet_append_chunk(sctp_packet_t *packet, sctp_chunk_t *chunk)
* transmit and rely on IP
* fragmentation.
*/
packet->ipfragok = 1;
goto append;
}
} else { /* !packet_empty */
......@@ -425,6 +428,7 @@ int sctp_packet_transmit(sctp_packet_t *packet)
dst = transport->dst;
if (!dst || dst->obsolete) {
sctp_transport_route(transport, NULL, sctp_sk(sk));
sctp_assoc_sync_pmtu(asoc);
}
nskb->dst = dst_clone(transport->dst);
......@@ -433,7 +437,7 @@ int sctp_packet_transmit(sctp_packet_t *packet)
SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb length %d\n",
nskb->len);
(*transport->af_specific->queue_xmit)(nskb);
(*transport->af_specific->queue_xmit)(nskb, packet->ipfragok);
out:
packet->size = SCTP_IP_OVERHEAD;
return err;
......
......@@ -238,8 +238,9 @@ void sctp_retransmit_mark(struct sctp_outq *q, sctp_transport_t *transport,
/* If we are doing retransmission due to a fast retransmit,
* only the chunk's that are marked for fast retransmit
* should be added to the retransmit queue. If we are doing
* retransmission due to a timeout, only the chunks that are
* not yet acked should be added to the retransmit queue.
* retransmission due to a timeout or pmtu discovery, only the
* chunks that are not yet acked should be added to the
* retransmit queue.
*/
if ((fast_retransmit && chunk->fast_retransmit) ||
(!fast_retransmit && !chunk->tsn_gap_acked)) {
......@@ -295,14 +296,21 @@ void sctp_retransmit_mark(struct sctp_outq *q, sctp_transport_t *transport,
* one packet out.
*/
void sctp_retransmit(struct sctp_outq *q, sctp_transport_t *transport,
__u8 fast_retransmit)
sctp_retransmit_reason_t reason)
{
int error = 0;
__u8 fast_retransmit = 0;
if (fast_retransmit) {
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
} else {
switch(reason) {
case SCTP_RETRANSMIT_T3_RTX:
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
break;
case SCTP_RETRANSMIT_FAST_RTX:
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
fast_retransmit = 1;
break;
default:
break;
}
sctp_retransmit_mark(q, transport, fast_retransmit);
......@@ -1501,7 +1509,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
if (transport) {
if (do_fast_retransmit)
sctp_retransmit(q, transport, do_fast_retransmit);
sctp_retransmit(q, transport, SCTP_RETRANSMIT_FAST_RTX);
SCTP_DEBUG_PRINTK("%s: transport: %p, cwnd: %d, "
"ssthresh: %d, flight_size: %d, pba: %d\n",
......
......@@ -427,7 +427,8 @@ int sctp_cmd_interpreter(sctp_event_t event_type, sctp_subtype_t subtype,
case SCTP_CMD_RETRAN:
/* Mark a transport for retransmission. */
sctp_retransmit(&asoc->outqueue,
command->obj.transport, 0);
command->obj.transport,
SCTP_RETRANSMIT_T3_RTX);
break;
case SCTP_CMD_TRANSMIT:
......@@ -957,12 +958,6 @@ void sctp_generate_sack_event(unsigned long data)
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
}
void sctp_generate_pmtu_raise_event(unsigned long data)
{
sctp_association_t *asoc = (sctp_association_t *) data;
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_PMTU_RAISE);
}
sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
NULL,
sctp_generate_t1_cookie_event,
......@@ -974,7 +969,6 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
sctp_generate_heartbeat_event,
sctp_generate_sack_event,
sctp_generate_autoclose_event,
sctp_generate_pmtu_raise_event,
};
/********************************************************************
......
......@@ -1139,27 +1139,6 @@ sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STA
{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
}
#define TYPE_SCTP_EVENT_TIMEOUT_PMTU_RAISE { \
/* SCTP_STATE_EMPTY */ \
{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
/* SCTP_STATE_CLOSED */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_COOKIE_WAIT */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_COOKIE_ECHOED */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_ESTABLISHED */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_SHUTDOWN_SENT */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
}
sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_NONE,
TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE,
......@@ -1171,7 +1150,6 @@ sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM
TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT,
TYPE_SCTP_EVENT_TIMEOUT_SACK,
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
TYPE_SCTP_EVENT_TIMEOUT_PMTU_RAISE,
};
sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, sctp_state_t state)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment