Commit 67879274 authored by Tung Nguyen's avatar Tung Nguyen Committed by David S. Miller

tipc: buffer overflow handling in listener socket

Default socket receive buffer size for a listener socket is 2Mb. For
each arriving empty SYN, the linux kernel allocates a 768 bytes buffer.
This means that a listener socket can serve maximum 2700 simultaneous
empty connection setup requests before it hits a receive buffer
overflow, and much fewer if the SYN is carrying any significant
amount of data.

When this happens the setup request is rejected, and the client
receives an ECONNREFUSED error.

This commit mitigates this problem by letting the client socket try to
retransmit the SYN message multiple times when it sees it rejected with
the code TIPC_ERR_OVERLOAD. Retransmission is done at random intervals
in the range of [100 ms, setup_timeout / 4], as many times as there is
room for within the setup timeout limit.
Signed-off-by: default avatarTung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: default avatarYing Xue <ying.xue@windriver.com>
Signed-off-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 25b9221b
...@@ -525,6 +525,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) ...@@ -525,6 +525,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
if (hlen == SHORT_H_SIZE) if (hlen == SHORT_H_SIZE)
hlen = BASIC_H_SIZE; hlen = BASIC_H_SIZE;
/* Don't return data along with SYN+, - sender has a clone */
if (msg_is_syn(_hdr) && err == TIPC_ERR_OVERLOAD)
dlen = 0;
/* Allocate new buffer to return */ /* Allocate new buffer to return */
*skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC); *skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC);
if (!*skb) if (!*skb)
...@@ -552,6 +556,22 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) ...@@ -552,6 +556,22 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
return false; return false;
} }
bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
{
struct sk_buff *skb, *_skb;
skb_queue_walk(msg, skb) {
_skb = skb_clone(skb, GFP_ATOMIC);
if (!_skb) {
__skb_queue_purge(cpy);
pr_err_ratelimited("Failed to clone buffer chain\n");
return false;
}
__skb_queue_tail(cpy, _skb);
}
return true;
}
/** /**
* tipc_msg_lookup_dest(): try to find new destination for named message * tipc_msg_lookup_dest(): try to find new destination for named message
* @skb: the buffer containing the message. * @skb: the buffer containing the message.
......
...@@ -980,6 +980,7 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, ...@@ -980,6 +980,7 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy); struct sk_buff_head *cpy);
void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb); struct sk_buff *skb);
bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
static inline u16 buf_seqno(struct sk_buff *skb) static inline u16 buf_seqno(struct sk_buff *skb)
{ {
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
#include "netlink.h" #include "netlink.h"
#include "group.h" #include "group.h"
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1 #define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff #define TIPC_MAX_PORT 0xffffffff
...@@ -80,7 +80,6 @@ struct sockaddr_pair { ...@@ -80,7 +80,6 @@ struct sockaddr_pair {
* @publications: list of publications for port * @publications: list of publications for port
* @blocking_link: address of the congested link we are currently sleeping on * @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime * @pub_count: total # of publications port has made during its lifetime
* @probing_state:
* @conn_timeout: the time we can wait for an unresponded setup request * @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links * @cong_link_cnt: number of congested links
...@@ -102,8 +101,8 @@ struct tipc_sock { ...@@ -102,8 +101,8 @@ struct tipc_sock {
struct list_head cong_links; struct list_head cong_links;
struct list_head publications; struct list_head publications;
u32 pub_count; u32 pub_count;
uint conn_timeout;
atomic_t dupl_rcvcnt; atomic_t dupl_rcvcnt;
u16 conn_timeout;
bool probe_unacked; bool probe_unacked;
u16 cong_link_cnt; u16 cong_link_cnt;
u16 snt_unacked; u16 snt_unacked;
...@@ -507,6 +506,9 @@ static void __tipc_shutdown(struct socket *sock, int error) ...@@ -507,6 +506,9 @@ static void __tipc_shutdown(struct socket *sock, int error)
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk))); !tsk_conn_cong(tsk)));
/* Remove any pending SYN message */
__skb_queue_purge(&sk->sk_write_queue);
/* Reject all unreceived messages, except on an active connection /* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer). * (which disconnects locally & sends a 'FIN+' to peer).
*/ */
...@@ -1362,6 +1364,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) ...@@ -1362,6 +1364,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen)) if (unlikely(rc != dlen))
return rc; return rc;
if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
return -ENOMEM;
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) { if (unlikely(rc == -ELINKCONG)) {
...@@ -1491,6 +1495,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, ...@@ -1491,6 +1495,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
__skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return; return;
...@@ -1977,6 +1982,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) ...@@ -1977,6 +1982,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
u32 oport = msg_origport(hdr); u32 oport = msg_origport(hdr);
u32 onode = msg_orignode(hdr); u32 onode = msg_orignode(hdr);
int err = msg_errcode(hdr); int err = msg_errcode(hdr);
unsigned long delay;
if (unlikely(msg_mcast(hdr))) if (unlikely(msg_mcast(hdr)))
return false; return false;
...@@ -2001,8 +2007,18 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) ...@@ -2001,8 +2007,18 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
if (oport != pport || onode != pnode) if (oport != pport || onode != pnode)
return false; return false;
/* Rejected SYN - abort */ /* Rejected SYN */
break; if (err != TIPC_ERR_OVERLOAD)
break;
/* Prepare for new setup attempt if we have a SYN clone */
if (skb_queue_empty(&sk->sk_write_queue))
break;
get_random_bytes(&delay, 2);
delay %= (tsk->conn_timeout / 4);
delay = msecs_to_jiffies(delay + 100);
sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
return false;
case TIPC_OPEN: case TIPC_OPEN:
case TIPC_DISCONNECTING: case TIPC_DISCONNECTING:
return false; return false;
...@@ -2561,12 +2577,26 @@ static void tipc_sk_check_probing_state(struct sock *sk, ...@@ -2561,12 +2577,26 @@ static void tipc_sk_check_probing_state(struct sock *sk,
sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
} }
static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
{
struct tipc_sock *tsk = tipc_sk(sk);
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
tipc_msg_skb_clone(&sk->sk_write_queue, list);
}
static void tipc_sk_timeout(struct timer_list *t) static void tipc_sk_timeout(struct timer_list *t)
{ {
struct sock *sk = from_timer(sk, t, sk_timer); struct sock *sk = from_timer(sk, t, sk_timer);
struct tipc_sock *tsk = tipc_sk(sk); struct tipc_sock *tsk = tipc_sk(sk);
u32 pnode = tsk_peer_node(tsk); u32 pnode = tsk_peer_node(tsk);
struct sk_buff_head list; struct sk_buff_head list;
int rc = 0;
skb_queue_head_init(&list); skb_queue_head_init(&list);
bh_lock_sock(sk); bh_lock_sock(sk);
...@@ -2580,12 +2610,19 @@ static void tipc_sk_timeout(struct timer_list *t) ...@@ -2580,12 +2610,19 @@ static void tipc_sk_timeout(struct timer_list *t)
if (sk->sk_state == TIPC_ESTABLISHED) if (sk->sk_state == TIPC_ESTABLISHED)
tipc_sk_check_probing_state(sk, &list); tipc_sk_check_probing_state(sk, &list);
else if (sk->sk_state == TIPC_CONNECTING)
tipc_sk_retry_connect(sk, &list);
bh_unlock_sock(sk); bh_unlock_sock(sk);
if (!skb_queue_empty(&list)) if (!skb_queue_empty(&list))
tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid); rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
/* SYN messages may cause link congestion */
if (rc == -ELINKCONG) {
tipc_dest_push(&tsk->cong_links, pnode, 0);
tsk->cong_link_cnt = 1;
}
sock_put(sk); sock_put(sk);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment