Commit ca7ae891 authored by Dmytro Shytyi's avatar Dmytro Shytyi Committed by Jakub Kicinski

selftests: mptcp: mptfo Initiator/Listener

This patch first adds TFO support in mptcp_connect.c.

This can be enabled via a new option: -o MPTFO.

Once enabled, the TCP_FASTOPEN socket option is enabled for the server
side and a sendto() with MSG_FASTOPEN is used instead of a connect() for
the client side.

Note that the first SYN has a limit of bytes it can carry. In other
words, it is allowed to send less data than the provided one. We then
need to track more status info to properly allow the next sendmsg()
starting from the next part of the data to send the rest.

Also in TFO scenarios, we need to completely spool the partially xmitted
buffer -- and account for that -- before starting sendfile/mmap xmit,
otherwise the relevant tests will fail.
Co-developed-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarDmytro Shytyi <dmytro@shytyi.net>
Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent cb99816c
...@@ -83,6 +83,7 @@ struct cfg_cmsg_types { ...@@ -83,6 +83,7 @@ struct cfg_cmsg_types {
struct cfg_sockopt_types { struct cfg_sockopt_types {
unsigned int transparent:1; unsigned int transparent:1;
unsigned int mptfo:1;
}; };
struct tcp_inq_state { struct tcp_inq_state {
...@@ -90,6 +91,13 @@ struct tcp_inq_state { ...@@ -90,6 +91,13 @@ struct tcp_inq_state {
bool expect_eof; bool expect_eof;
}; };
struct wstate {
char buf[8192];
unsigned int len;
unsigned int off;
unsigned int total_len;
};
static struct tcp_inq_state tcp_inq; static struct tcp_inq_state tcp_inq;
static struct cfg_cmsg_types cfg_cmsg_types; static struct cfg_cmsg_types cfg_cmsg_types;
...@@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf) ...@@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf)
} }
} }
static void set_mptfo(int fd, int pf)
{
int qlen = 25;
if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1)
perror("TCP_FASTOPEN");
}
static int do_ulp_so(int sock, const char *name) static int do_ulp_so(int sock, const char *name)
{ {
return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
...@@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr, ...@@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
if (cfg_sockopt_types.transparent) if (cfg_sockopt_types.transparent)
set_transparent(sock, pf); set_transparent(sock, pf);
if (cfg_sockopt_types.mptfo)
set_mptfo(sock, pf);
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */ break; /* success */
...@@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr, ...@@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr,
static int sock_connect_mptcp(const char * const remoteaddr, static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto, const char * const port, int proto,
struct addrinfo **peer) struct addrinfo **peer,
int infd, struct wstate *winfo)
{ {
struct addrinfo hints = { struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP, .ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM, .ai_socktype = SOCK_STREAM,
}; };
struct addrinfo *a, *addr; struct addrinfo *a, *addr;
int syn_copied = 0;
int sock = -1; int sock = -1;
hints.ai_family = pf; hints.ai_family = pf;
...@@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr, ...@@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr,
if (cfg_mark) if (cfg_mark)
set_mark(sock, cfg_mark); set_mark(sock, cfg_mark);
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { if (cfg_sockopt_types.mptfo) {
*peer = a; if (!winfo->total_len)
break; /* success */ winfo->total_len = winfo->len = read(infd, winfo->buf,
sizeof(winfo->buf));
syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN,
a->ai_addr, a->ai_addrlen);
if (syn_copied >= 0) {
winfo->off = syn_copied;
winfo->len -= syn_copied;
*peer = a;
break; /* success */
}
} else {
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
*peer = a;
break; /* success */
}
}
if (cfg_sockopt_types.mptfo) {
perror("sendto()");
close(sock);
sock = -1;
} else {
perror("connect()");
close(sock);
sock = -1;
} }
perror("connect()");
close(sock);
sock = -1;
} }
freeaddrinfo(addr); freeaddrinfo(addr);
...@@ -571,14 +612,14 @@ static void shut_wr(int fd) ...@@ -571,14 +612,14 @@ static void shut_wr(int fd)
shutdown(fd, SHUT_WR); shutdown(fd, SHUT_WR);
} }
static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) static int copyfd_io_poll(int infd, int peerfd, int outfd,
bool *in_closed_after_out, struct wstate *winfo)
{ {
struct pollfd fds = { struct pollfd fds = {
.fd = peerfd, .fd = peerfd,
.events = POLLIN | POLLOUT, .events = POLLIN | POLLOUT,
}; };
unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; unsigned int total_wlen = 0, total_rlen = 0;
char wbuf[8192];
set_nonblock(peerfd, true); set_nonblock(peerfd, true);
...@@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after ...@@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
} }
if (fds.revents & POLLOUT) { if (fds.revents & POLLOUT) {
if (wlen == 0) { if (winfo->len == 0) {
woff = 0; winfo->off = 0;
wlen = read(infd, wbuf, sizeof(wbuf)); winfo->len = read(infd, winfo->buf, sizeof(winfo->buf));
} }
if (wlen > 0) { if (winfo->len > 0) {
ssize_t bw; ssize_t bw;
/* limit the total amount of written data to the trunc value */ /* limit the total amount of written data to the trunc value */
if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate) if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate)
wlen = cfg_truncate - total_wlen; winfo->len = cfg_truncate - total_wlen;
bw = do_rnd_write(peerfd, wbuf + woff, wlen); bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
if (bw < 0) { if (bw < 0) {
if (cfg_rcv_trunc) if (cfg_rcv_trunc)
return 0; return 0;
...@@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after ...@@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
return 111; return 111;
} }
woff += bw; winfo->off += bw;
wlen -= bw; winfo->len -= bw;
total_wlen += bw; total_wlen += bw;
} else if (wlen == 0) { } else if (winfo->len == 0) {
/* We have no more data to send. */ /* We have no more data to send. */
fds.events &= ~POLLOUT; fds.events &= ~POLLOUT;
...@@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd) ...@@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd)
return (int)r; return (int)r;
} }
static int do_mmap(int infd, int outfd, unsigned int size) static int spool_buf(int fd, struct wstate *winfo)
{
while (winfo->len) {
int ret = write(fd, winfo->buf + winfo->off, winfo->len);
if (ret < 0) {
perror("write");
return 4;
}
winfo->off += ret;
winfo->len -= ret;
}
return 0;
}
static int do_mmap(int infd, int outfd, unsigned int size,
struct wstate *winfo)
{ {
char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
ssize_t ret = 0, off = 0; ssize_t ret = 0, off = winfo->total_len;
size_t rem; size_t rem;
if (inbuf == MAP_FAILED) { if (inbuf == MAP_FAILED) {
...@@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) ...@@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size)
return 1; return 1;
} }
rem = size; ret = spool_buf(outfd, winfo);
if (ret < 0)
return ret;
rem = size - winfo->total_len;
while (rem > 0) { while (rem > 0) {
ret = write(outfd, inbuf + off, rem); ret = write(outfd, inbuf + off, rem);
...@@ -772,8 +833,16 @@ static int get_infd_size(int fd) ...@@ -772,8 +833,16 @@ static int get_infd_size(int fd)
return (int)count; return (int)count;
} }
static int do_sendfile(int infd, int outfd, unsigned int count) static int do_sendfile(int infd, int outfd, unsigned int count,
struct wstate *winfo)
{ {
int ret = spool_buf(outfd, winfo);
if (ret < 0)
return ret;
count -= winfo->total_len;
while (count > 0) { while (count > 0) {
ssize_t r; ssize_t r;
...@@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) ...@@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
} }
static int copyfd_io_mmap(int infd, int peerfd, int outfd, static int copyfd_io_mmap(int infd, int peerfd, int outfd,
unsigned int size, bool *in_closed_after_out) unsigned int size, bool *in_closed_after_out,
struct wstate *winfo)
{ {
int err; int err;
...@@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, ...@@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
if (err) if (err)
return err; return err;
err = do_mmap(infd, peerfd, size); err = do_mmap(infd, peerfd, size, winfo);
} else { } else {
err = do_mmap(infd, peerfd, size); err = do_mmap(infd, peerfd, size, winfo);
if (err) if (err)
return err; return err;
...@@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, ...@@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
} }
static int copyfd_io_sendfile(int infd, int peerfd, int outfd, static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
unsigned int size, bool *in_closed_after_out) unsigned int size, bool *in_closed_after_out, struct wstate *winfo)
{ {
int err; int err;
...@@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, ...@@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
if (err) if (err)
return err; return err;
err = do_sendfile(infd, peerfd, size); err = do_sendfile(infd, peerfd, size, winfo);
} else { } else {
err = do_sendfile(infd, peerfd, size); err = do_sendfile(infd, peerfd, size, winfo);
if (err) if (err)
return err; return err;
...@@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, ...@@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err; return err;
} }
static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{ {
bool in_closed_after_out = false; bool in_closed_after_out = false;
struct timespec start, end; struct timespec start, end;
...@@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) ...@@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
switch (cfg_mode) { switch (cfg_mode) {
case CFG_MODE_POLL: case CFG_MODE_POLL:
ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out,
winfo);
break; break;
case CFG_MODE_MMAP: case CFG_MODE_MMAP:
file_size = get_infd_size(infd); file_size = get_infd_size(infd);
if (file_size < 0) if (file_size < 0)
return file_size; return file_size;
ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); ret = copyfd_io_mmap(infd, peerfd, outfd, file_size,
&in_closed_after_out, winfo);
break; break;
case CFG_MODE_SENDFILE: case CFG_MODE_SENDFILE:
file_size = get_infd_size(infd); file_size = get_infd_size(infd);
if (file_size < 0) if (file_size < 0)
return file_size; return file_size;
ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size,
&in_closed_after_out, winfo);
break; break;
default: default:
...@@ -999,6 +1072,7 @@ static void maybe_close(int fd) ...@@ -999,6 +1072,7 @@ static void maybe_close(int fd)
int main_loop_s(int listensock) int main_loop_s(int listensock)
{ {
struct sockaddr_storage ss; struct sockaddr_storage ss;
struct wstate winfo;
struct pollfd polls; struct pollfd polls;
socklen_t salen; socklen_t salen;
int remotesock; int remotesock;
...@@ -1033,7 +1107,8 @@ int main_loop_s(int listensock) ...@@ -1033,7 +1107,8 @@ int main_loop_s(int listensock)
SOCK_TEST_TCPULP(remotesock, 0); SOCK_TEST_TCPULP(remotesock, 0);
copyfd_io(fd, remotesock, 1, true); memset(&winfo, 0, sizeof(winfo));
copyfd_io(fd, remotesock, 1, true, &winfo);
} else { } else {
perror("accept"); perror("accept");
return 1; return 1;
...@@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name) ...@@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name)
return; return;
} }
if (strncmp(name, "MPTFO", len) == 0) {
cfg_sockopt_types.mptfo = 1;
return;
}
fprintf(stderr, "Unrecognized setsockopt option %s\n", name); fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
exit(1); exit(1);
} }
...@@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen) ...@@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen)
int main_loop(void) int main_loop(void)
{ {
int fd, ret, fd_in = 0; int fd = 0, ret, fd_in = 0;
struct addrinfo *peer; struct addrinfo *peer;
struct wstate winfo;
if (cfg_input && cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
if (fd < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
/* listener is ready. */ memset(&winfo, 0, sizeof(winfo));
fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo);
if (fd < 0) if (fd < 0)
return 2; return 2;
...@@ -1186,14 +1273,13 @@ int main_loop(void) ...@@ -1186,14 +1273,13 @@ int main_loop(void)
if (cfg_cmsg_types.cmsg_enabled) if (cfg_cmsg_types.cmsg_enabled)
apply_cmsg_types(fd, &cfg_cmsg_types); apply_cmsg_types(fd, &cfg_cmsg_types);
if (cfg_input) { if (cfg_input && !cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY); fd_in = open(cfg_input, O_RDONLY);
if (fd < 0) if (fd < 0)
xerror("can't open %s:%d", cfg_input, errno); xerror("can't open %s:%d", cfg_input, errno);
} }
/* close the client socket open only if we are not going to reconnect */ ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
ret = copyfd_io(fd_in, fd, 1, 0);
if (ret) if (ret)
return ret; return ret;
...@@ -1210,6 +1296,7 @@ int main_loop(void) ...@@ -1210,6 +1296,7 @@ int main_loop(void)
xerror("can't reconnect: %d", errno); xerror("can't reconnect: %d", errno);
if (cfg_input) if (cfg_input)
close(fd_in); close(fd_in);
memset(&winfo, 0, sizeof(winfo));
goto again; goto again;
} else { } else {
close(fd); close(fd);
......
...@@ -762,6 +762,23 @@ run_tests_peekmode() ...@@ -762,6 +762,23 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
} }
run_tests_mptfo()
{
echo "INFO: with MPTFO start"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
echo "INFO: with MPTFO end"
}
run_tests_disconnect() run_tests_disconnect()
{ {
local peekmode="$1" local peekmode="$1"
...@@ -901,6 +918,10 @@ run_tests_peekmode "saveWithPeek" ...@@ -901,6 +918,10 @@ run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek" run_tests_peekmode "saveAfterPeek"
stop_if_error "Tests with peek mode have failed" stop_if_error "Tests with peek mode have failed"
# MPTFO (MultiPath TCP Fatopen tests)
run_tests_mptfo
stop_if_error "Tests with MPTFO have failed"
# connect to ns4 ip address, ns2 should intercept/proxy # connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6" run_test_transparent dead:beef:3::1 "tproxy ipv6"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment