Commit cf8ababc authored by Sridhar Samudrala's avatar Sridhar Samudrala

[SCTP] sctp v6 source address selection support.

parent 06fbb458
......@@ -237,7 +237,8 @@ struct sctp_protocol {
* (i.e. things that depend on the address family.)
*/
struct sctp_af {
int (*queue_xmit) (struct sk_buff *skb,
int (*sctp_xmit) (struct sk_buff *skb,
struct sctp_transport *,
int ipfragok);
int (*setsockopt) (struct sock *sk,
int level,
......@@ -249,8 +250,13 @@ struct sctp_af {
int optname,
char *optval,
int *optlen);
struct dst_entry *(*get_dst) (union sctp_addr *daddr,
struct dst_entry *(*get_dst) (sctp_association_t *asoc,
union sctp_addr *daddr,
union sctp_addr *saddr);
void (*get_saddr) (sctp_association_t *asoc,
struct dst_entry *dst,
union sctp_addr *daddr,
union sctp_addr *saddr);
void (*copy_addrlist) (struct list_head *,
struct net_device *);
void (*dst_saddr) (union sctp_addr *saddr,
......@@ -740,6 +746,8 @@ struct sctp_transport {
/* Destination */
struct dst_entry *dst;
/* Source address. */
union sctp_addr saddr;
/* When was the last time(in jiffies) that a data packet was sent on
* this transport? This is used to adjust the cwnd when the transport
......@@ -825,6 +833,7 @@ struct sctp_transport *sctp_transport_init(struct sctp_transport *,
void sctp_transport_set_owner(struct sctp_transport *, sctp_association_t *);
void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
struct sctp_opt *);
void sctp_transport_pmtu(struct sctp_transport *);
void sctp_transport_free(struct sctp_transport *);
void sctp_transport_destroy(struct sctp_transport *);
void sctp_transport_reset_timers(struct sctp_transport *);
......
......@@ -390,8 +390,8 @@ struct sctp_transport *sctp_assoc_add_peer(sctp_association_t *asoc,
sctp_transport_set_owner(peer, asoc);
/* Cache a route for the transport. */
sctp_transport_route(peer, NULL, sctp_sk(asoc->base.sk));
/* Initialize the pmtu of the transport. */
sctp_transport_pmtu(peer);
/* If this is the first transport addr on this association,
* initialize the association PMTU to the peer's PMTU.
......
......@@ -98,42 +98,22 @@ static inline void sctp_v6_err(struct sk_buff *skb,
}
/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
static inline int sctp_v6_xmit(struct sk_buff *skb, int ipfragok)
static inline int sctp_v6_xmit(struct sk_buff *skb,
struct sctp_transport *transport, int ipfragok)
{
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi fl;
struct dst_entry *dst = skb->dst;
struct rt6_info *rt6 = (struct rt6_info *)dst;
struct in6_addr saddr;
int err;
fl.proto = sk->protocol;
fl.fl6_dst = &rt6->rt6i_dst.addr;
/* FIXME: Currently, ip6_route_output() doesn't fill in the source
* address in the returned route entry. So we call ipv6_get_saddr()
* to get an appropriate source address. It is possible that this
* address may not be part of the bind address list of the association.
* Once ip6_route_ouput() is fixed so that it returns a route entry
* with an appropriate source address, the following if condition can
* be removed. With ip6_route_output() returning a source address
* filled route entry, sctp_transport_route() can do real source
* address selection for v6.
*/
if (ipv6_addr_any(&rt6->rt6i_src.addr)) {
err = ipv6_get_saddr(dst, fl.fl6_dst, &saddr);
if (err) {
printk(KERN_ERR "%s: No saddr available for "
"DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
__FUNCTION__, NIP6(fl.fl6_src));
return err;
}
fl.fl6_src = &saddr;
} else {
fl.fl6_src = &rt6->rt6i_src.addr;
}
/* Fill in the dest address from the route entry passed with the skb
* and the source address from the transport.
*/
fl.fl6_dst = &rt6->rt6i_dst.addr;
fl.fl6_src = &transport->saddr.v6.sin6_addr;
fl.fl6_flowlabel = np->flow_label;
IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
......@@ -146,20 +126,26 @@ static inline int sctp_v6_xmit(struct sk_buff *skb, int ipfragok)
fl.nl_u.ip6_u.daddr = rt0->addr;
}
SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
"src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
"dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
__FUNCTION__, skb, skb->len, NIP6(fl.fl6_src),
NIP6(fl.fl6_dst));
return ip6_xmit(sk, skb, &fl, np->opt);
}
/* Returns the dst cache entry for the given source and destination ip
* addresses.
*/
struct dst_entry *sctp_v6_get_dst(union sctp_addr *daddr,
struct dst_entry *sctp_v6_get_dst(sctp_association_t *asoc,
union sctp_addr *daddr,
union sctp_addr *saddr)
{
struct dst_entry *dst;
struct flowi fl = {
.nl_u = { .ip6_u = { .daddr = &daddr->v6.sin6_addr, } } };
SCTP_DEBUG_PRINTK("%s: DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
__FUNCTION__, NIP6(fl.fl6_dst));
......@@ -180,12 +166,96 @@ struct dst_entry *sctp_v6_get_dst(union sctp_addr *daddr,
NIP6(&rt->rt6i_dst.addr), NIP6(&rt->rt6i_src.addr));
} else {
SCTP_DEBUG_PRINTK("NO ROUTE\n");
return NULL;
}
return dst;
}
/* Returns the number of consecutive initial bits that match in the 2 ipv6
* addresses.
*/
static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
union sctp_addr *s2)
{
struct in6_addr *a1 = &s1->v6.sin6_addr;
struct in6_addr *a2 = &s2->v6.sin6_addr;
int i, j;
for (i = 0; i < 4 ; i++) {
__u32 a1xora2;
a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
if ((j = fls(ntohl(a1xora2))))
return (i * 32 + 32 - j);
}
return (i*32);
}
/* Fills in the source address(saddr) based on the destination address(daddr)
* and asoc's bind address list.
*/
void sctp_v6_get_saddr(sctp_association_t *asoc, struct dst_entry *dst,
union sctp_addr *daddr, union sctp_addr *saddr)
{
sctp_bind_addr_t *bp;
rwlock_t *addr_lock;
struct sockaddr_storage_list *laddr;
struct list_head *pos;
sctp_scope_t scope;
union sctp_addr *baddr = NULL;
__u8 matchlen = 0;
__u8 bmatchlen;
SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p "
"daddr:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
__FUNCTION__, asoc, dst, NIP6(&daddr->v6.sin6_addr));
if (!asoc) {
ipv6_get_saddr(dst, &daddr->v6.sin6_addr, &saddr->v6.sin6_addr);
SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: "
"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
NIP6(&saddr->v6.sin6_addr));
return;
}
scope = sctp_scope(daddr);
bp = &asoc->base.bind_addr;
addr_lock = &asoc->base.addr_lock;
/* Go through the bind address list and find the best source address
* that matches the scope of the destination address.
*/
sctp_read_lock(addr_lock);
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sockaddr_storage_list, list);
if ((laddr->a.sa.sa_family == AF_INET6) &&
(scope <= sctp_scope(&laddr->a))) {
bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
if (!baddr || (matchlen < bmatchlen)) {
baddr = &laddr->a;
matchlen = bmatchlen;
}
}
}
if (baddr) {
memcpy(saddr, baddr, sizeof(union sctp_addr));
SCTP_DEBUG_PRINTK("saddr: "
"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
NIP6(&saddr->v6.sin6_addr));
} else {
printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
"address for the "
"dest:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
__FUNCTION__, asoc, NIP6(&daddr->v6.sin6_addr));
}
sctp_read_unlock(addr_lock);
}
/* Make a copy of all potential local addresses. */
static void sctp_v6_copy_addrlist(struct list_head *addrlist,
struct net_device *dev)
......@@ -528,10 +598,11 @@ static struct inet6_protocol sctpv6_protocol = {
};
static struct sctp_af sctp_ipv6_specific = {
.queue_xmit = sctp_v6_xmit,
.setsockopt = ipv6_setsockopt,
.sctp_xmit = sctp_v6_xmit,
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.get_dst = sctp_v6_get_dst,
.get_saddr = sctp_v6_get_saddr,
.copy_addrlist = sctp_v6_copy_addrlist,
.from_skb = sctp_v6_from_skb,
.from_sk = sctp_v6_from_sk,
......
......@@ -368,15 +368,6 @@ int sctp_packet_transmit(sctp_packet_t *packet)
*/
sh->checksum = htonl(crc32);
/* FIXME: Delete the rest of this switch statement once phase 2
* of address selection (ipv6 support) drops in.
*/
switch (transport->ipaddr.sa.sa_family) {
case AF_INET6:
SCTP_V6(inet6_sk(sk)->daddr = transport->ipaddr.v6.sin6_addr;)
break;
};
/* IP layer ECN support
* From RFC 2481
* "The ECN-Capable Transport (ECT) bit would be set by the
......@@ -426,7 +417,8 @@ int sctp_packet_transmit(sctp_packet_t *packet)
}
dst = transport->dst;
if (!dst || dst->obsolete) {
/* The 'obsolete' field of dst is set to 2 when a dst is freed. */
if (!dst || (dst->obsolete > 1)) {
sctp_transport_route(transport, NULL, sctp_sk(sk));
sctp_assoc_sync_pmtu(asoc);
}
......@@ -437,14 +429,22 @@ int sctp_packet_transmit(sctp_packet_t *packet)
SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb length %d\n",
nskb->len);
(*transport->af_specific->queue_xmit)(nskb, packet->ipfragok);
(*transport->af_specific->sctp_xmit)(nskb, transport, packet->ipfragok);
out:
packet->size = SCTP_IP_OVERHEAD;
return err;
no_route:
kfree_skb(nskb);
IP_INC_STATS_BH(IpOutNoRoutes);
err = -EHOSTUNREACH;
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
* association is unreachable.
* The real failure of a transport or association can be passed on
* to the user via notifications. So setting this error may not be
* required.
*/
/* err = -EHOSTUNREACH; */
goto out;
}
......
......@@ -223,37 +223,6 @@ int sctp_copy_local_addr_list(sctp_protocol_t *proto, sctp_bind_addr_t *bp,
return error;
}
/* Returns the dst cache entry for the given source and destination ip
* addresses.
*/
struct dst_entry *sctp_v4_get_dst(union sctp_addr *daddr,
union sctp_addr *saddr)
{
struct rtable *rt;
struct flowi fl;
memset(&fl, 0x0, sizeof(struct flowi));
fl.fl4_dst = daddr->v4.sin_addr.s_addr;
fl.proto = IPPROTO_SCTP;
if (saddr)
fl.fl4_src = saddr->v4.sin_addr.s_addr;
SCTP_DEBUG_PRINTK("%s: DST:%u.%u.%u.%u, SRC:%u.%u.%u.%u - ",
__FUNCTION__, NIPQUAD(fl.fl4_dst),
NIPQUAD(fl.fl4_src));
if (ip_route_output_key(&rt, &fl)) {
SCTP_DEBUG_PRINTK("NO ROUTE\n");
return NULL;
}
SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
NIPQUAD(rt->rt_src), NIPQUAD(rt->rt_dst));
return &rt->u.dst;
}
/* Initialize a sctp_addr from in incoming skb. */
static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
int is_saddr)
......@@ -396,6 +365,108 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
return retval;
}
/* Returns a valid dst cache entry for the given source and destination ip
* addresses. If an association is passed, trys to get a dst entry with a
* source adddress that matches an address in the bind address list.
*/
struct dst_entry *sctp_v4_get_dst(sctp_association_t *asoc,
union sctp_addr *daddr,
union sctp_addr *saddr)
{
struct rtable *rt;
struct flowi fl;
sctp_bind_addr_t *bp;
rwlock_t *addr_lock;
struct sockaddr_storage_list *laddr;
struct list_head *pos;
struct dst_entry *dst = NULL;
union sctp_addr dst_saddr;
memset(&fl, 0x0, sizeof(struct flowi));
fl.fl4_dst = daddr->v4.sin_addr.s_addr;
fl.proto = IPPROTO_SCTP;
if (saddr)
fl.fl4_src = saddr->v4.sin_addr.s_addr;
SCTP_DEBUG_PRINTK("%s: DST:%u.%u.%u.%u, SRC:%u.%u.%u.%u - ",
__FUNCTION__, NIPQUAD(fl.fl4_dst),
NIPQUAD(fl.fl4_src));
if (!ip_route_output_key(&rt, &fl)) {
dst = &rt->u.dst;
}
/* If there is no association or if a source address is passed, no
* more validation is required.
*/
if (!asoc || saddr)
goto out;
bp = &asoc->base.bind_addr;
addr_lock = &asoc->base.addr_lock;
if (dst) {
/* Walk through the bind address list and look for a bind
* address that matches the source address of the returned dst.
*/
sctp_read_lock(addr_lock);
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sockaddr_storage_list,
list);
sctp_v4_dst_saddr(&dst_saddr, dst, bp->port);
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
}
sctp_read_unlock(addr_lock);
/* None of the bound addresses match the source address of the
* dst. So release it.
*/
dst_release(dst);
dst = NULL;
}
/* Walk through the bind address list and try to get a dst that
* matches a bind address as the source address.
*/
sctp_read_lock(addr_lock);
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sockaddr_storage_list, list);
if (AF_INET == laddr->a.sa.sa_family) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
dst = sctp_v4_get_dst(asoc, daddr, &laddr->a);
if (!ip_route_output_key(&rt, &fl)) {
dst = &rt->u.dst;
goto out_unlock;
}
}
}
out_unlock:
sctp_read_unlock(addr_lock);
out:
if (dst)
SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_src));
else
SCTP_DEBUG_PRINTK("NO ROUTE\n");
return dst;
}
/* For v4, the source address is cached in the route entry(dst). So no need
* to cache it separately and hence this is an empty routine.
*/
void sctp_v4_get_saddr(sctp_association_t *asoc,
struct dst_entry *dst,
union sctp_addr *daddr,
union sctp_addr *saddr)
{
}
/* Event handler for inet address addition/deletion events.
* Basically, whenever there is an event, we re-build our local address list.
*/
......@@ -547,6 +618,19 @@ static int sctp_inet_bind_verify(struct sctp_opt *opt, union sctp_addr *addr)
return sctp_v4_available(addr);
}
/* Wrapper routine that calls the ip transmit routine. */
static inline int sctp_v4_xmit(struct sk_buff *skb,
struct sctp_transport *transport, int ipfragok)
{
SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
"src:%u.%u.%u.%u, dst:%u.%u.%u.%u\n",
__FUNCTION__, skb, skb->len,
NIPQUAD(((struct rtable *)skb->dst)->rt_src),
NIPQUAD(((struct rtable *)skb->dst)->rt_dst));
return ip_queue_xmit(skb, ipfragok);
}
struct sctp_af sctp_ipv4_specific;
static struct sctp_pf sctp_pf_inet = {
......@@ -603,10 +687,11 @@ static struct inet_protocol sctp_protocol = {
/* IPv4 address related functions. */
struct sctp_af sctp_ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.sctp_xmit = sctp_v4_xmit,
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
.get_dst = sctp_v4_get_dst,
.get_saddr = sctp_v4_get_saddr,
.copy_addrlist = sctp_v4_copy_addrlist,
.from_skb = sctp_v4_from_skb,
.from_sk = sctp_v4_from_sk,
......
......@@ -90,6 +90,9 @@ struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
peer->asoc = NULL;
peer->dst = NULL;
memset(&peer->saddr, 0, sizeof(union sctp_addr));
/* From 6.3.1 RTO Calculation:
*
* C1) Until an RTT measurement has been made for a packet sent to the
......@@ -202,8 +205,22 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
sctp_association_hold(asoc);
}
/* Caches the dst entry for a transport's destination address and an optional
* souce address.
/* Initialize the pmtu of a transport. */
void sctp_transport_pmtu(struct sctp_transport *transport)
{
struct dst_entry *dst;
dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
if (dst) {
transport->pmtu = dst_pmtu(dst);
dst_release(dst);
} else
transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
}
/* Caches the dst entry and source address for a transport's destination
* address.
*/
void sctp_transport_route(struct sctp_transport *transport,
union sctp_addr *saddr, struct sctp_opt *opt)
......@@ -211,64 +228,15 @@ void sctp_transport_route(struct sctp_transport *transport,
sctp_association_t *asoc = transport->asoc;
struct sctp_af *af = transport->af_specific;
union sctp_addr *daddr = &transport->ipaddr;
sctp_bind_addr_t *bp;
rwlock_t *addr_lock;
struct sockaddr_storage_list *laddr;
struct list_head *pos;
struct dst_entry *dst;
union sctp_addr dst_saddr;
dst = af->get_dst(daddr, saddr);
/* If there is no association or if a source address is passed,
* no more validation is required.
*/
if (!asoc || saddr)
goto out;
if (SCTP_STATE_ESTABLISHED == asoc->state) {
bp = &asoc->base.bind_addr;
addr_lock = &asoc->base.addr_lock;
} else {
bp = &asoc->ep->base.bind_addr;
addr_lock = &asoc->ep->base.addr_lock;
}
if (dst) {
/* Walk through the bind address list and look for a bind
* address that matches the source address of the returned dst.
*/
sctp_read_lock(addr_lock);
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sockaddr_storage_list,
list);
af->dst_saddr(&dst_saddr, dst, bp->port);
if (opt->pf->cmp_addr(&dst_saddr, &laddr->a, opt))
goto out_unlock;
}
sctp_read_unlock(addr_lock);
/* None of the bound addresses match the source address of the
* dst. So release it.
*/
dst_release(dst);
}
dst = af->get_dst(asoc, daddr, saddr);
/* Walk through the bind address list and try to get a dst that
* matches a bind address as the source address.
*/
sctp_read_lock(addr_lock);
list_for_each(pos, &bp->address_list) {
laddr = list_entry(pos, struct sockaddr_storage_list, list);
dst = af->get_dst(daddr, &laddr->a);
if (dst)
goto out_unlock;
}
if (saddr)
memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
else
af->get_saddr(asoc, dst, daddr, &transport->saddr);
out_unlock:
sctp_read_unlock(addr_lock);
out:
transport->dst = dst;
if (dst)
transport->pmtu = dst_pmtu(dst);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment