Commit f1bcd486 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'selftests-net-packetdrill-netns-and-two-imports'

Willem de Bruijn says:

====================
selftests/net: packetdrill: netns and two imports

From: Willem de Bruijn <willemb@google.com>

1/3: run in nets, as discussed, and add missing CONFIGs
2/3: import tcp/zerocopy
3/3: import tcp/slow_start
====================

Link: https://patch.msgid.link/20240912005317.1253001-1-willemdebruijn.kernel@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents eda5891f e874be27
......@@ -2,6 +2,7 @@
TEST_INCLUDES := ksft_runner.sh \
defaults.sh \
set_sysctls.py \
../../kselftest/ktap_helpers.sh
TEST_PROGS := $(wildcard *.pkt)
......
CONFIG_IPV6=y
CONFIG_HZ_1000=y
CONFIG_HZ=1000
CONFIG_NET_NS=y
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_FQ=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYN_COOKIES=y
CONFIG_TCP_CONG_CUBIC=y
CONFIG_TCP_MD5SIG=y
CONFIG_TUN=y
......@@ -33,9 +33,9 @@ fi
ktap_print_header
ktap_set_plan 2
packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \
unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \
&& ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \
unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \
&& ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
ktap_finished
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""Sets sysctl values and writes a file that restores them.
The arguments are of the form "<proc-file>=<val>" separated by spaces.
The program first reads the current value of the proc-file and creates
a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which
restores the values when executed. It then sets the new values.
PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt
file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh`
at the end.
"""
import os
import subprocess
import sys
filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID']
# Open file for restoring sysctl values
restore_file = open(filename, 'w')
print('#!/bin/bash', file=restore_file)
for a in sys.argv[1:]:
sysctl = a.split('=')
# sysctl[0] contains the proc-file name, sysctl[1] the new value
# read current value and add restore command to file
cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True)
print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file)
# set new value
cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0])
os.system(cmd)
os.system('chmod u+x %s' % filename)
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when not application-limited, so that
// the cwnd continues to grow.
// In this variant, the receiver ACKs every packet.
// Set up config. To keep things simple, disable the
// mechanism that defers sending in order to send bigger TSO packets.
`./defaults.sh
sysctl -q net.ipv4.tcp_tso_win_divisor=100`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 30000) = 30000
+0 > P. 1:10001(10000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.105 < . 1:1(0) ack 1001 win 257
+0 > P. 10001:12001(2000) ack 1
+0 < . 1:1(0) ack 2001 win 257
+0 > P. 12001:14001(2000) ack 1
+.005 < . 1:1(0) ack 3001 win 257
+0 > P. 14001:16001(2000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 > P. 16001:18001(2000) ack 1
+.005 < . 1:1(0) ack 5001 win 257
+0 > P. 18001:20001(2000) ack 1
+0 < . 1:1(0) ack 6001 win 257
+0 > P. 20001:22001(2000) ack 1
+.005 < . 1:1(0) ack 7001 win 257
+0 > P. 22001:24001(2000) ack 1
+0 < . 1:1(0) ack 8001 win 257
+0 > P. 24001:26001(2000) ack 1
+.005 < . 1:1(0) ack 9001 win 257
+0 > P. 26001:28001(2000) ack 1
+0 < . 1:1(0) ack 10001 win 257
+0 > P. 28001:30001(2000) ack 1
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when an outstanding flight of packets is
// less than the current cwnd, and not big enough to bump up cwnd.
//
// In this variant, the receiver ACKs every other packet,
// approximating standard delayed ACKs.
// Set up config.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
// Only send 5 packets.
+0 write(4, ..., 5000) = 5000
+0 > P. 1:5001(5000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+0 < . 1:1(0) ack 2001 win 257
+0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+0 < . 1:1(0) ack 4001 win 257
+0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+0 < . 1:1(0) ack 5001 win 257
+0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when an outstanding flight of packets is
// less than the current cwnd, but still big enough that in slow
// start we want to increase our cwnd a little.
//
// In this variant, the receiver ACKs every other packet,
// approximating standard delayed ACKs.
// Set up config.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
// Only send 6 packets.
+0 write(4, ..., 6000) = 6000
+0 > P. 1:6001(6000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+0 < . 1:1(0) ack 2001 win 257
+0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+0 < . 1:1(0) ack 4001 win 257
+0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+0 < . 1:1(0) ack 6001 win 257
+0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when not application-limited, so that
// the cwnd continues to grow.
// In this variant, the receiver ACKs every other packet,
// approximating standard delayed ACKs.
// Set up config. To keep things simple, disable the
// mechanism that defers sending in order to send bigger TSO packets.
`./defaults.sh
sysctl -q net.ipv4.tcp_tso_win_divisor=100`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 30000) = 30000
+0 > P. 1:10001(10000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.105 < . 1:1(0) ack 2001 win 257
+0 > P. 10001:14001(4000) ack 1
+.005 < . 1:1(0) ack 4001 win 257
+0 > P. 14001:18001(4000) ack 1
+.005 < . 1:1(0) ack 6001 win 257
+0 > P. 18001:22001(4000) ack 1
+.005 < . 1:1(0) ack 8001 win 257
+0 > P. 22001:26001(4000) ack 1
+.005 < . 1:1(0) ack 10001 win 257
+0 > P. 26001:30001(4000) ack 1
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when not application-limited, so that
// the cwnd continues to grow.
// In this variant, the receiver sends one ACK per 4 packets.
// Set up config. To keep things simple, disable the
// mechanism that defers sending in order to send bigger TSO packets.
`./defaults.sh
sysctl -q net.ipv4.tcp_tso_win_divisor=100`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 30000) = 30000
+0 > P. 1:10001(10000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.11 < . 1:1(0) ack 4001 win 257
+0 > P. 10001:18001(8000) ack 1
+.01 < . 1:1(0) ack 8001 win 257
+0 > P. 18001:26001(8000) ack 1
+.005 < . 1:1(0) ack 10001 win 257
+0 > P. 26001:30001(4000) ack 1
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start after idle
// This test expects tso size to be at least initial cwnd * mss
`./defaults.sh
./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
/proc/sys/net/ipv4/tcp_min_tso_segs=10`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 511
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 26000) = 26000
+0 > P. 1:5001(5000) ack 1
+0 > P. 5001:10001(5000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.1 < . 1:1(0) ack 10001 win 511
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+0 > P. 10001:20001(10000) ack 1
+0 > P. 20001:26001(6000) ack 1
+.1 < . 1:1(0) ack 26001 win 511
+0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+2 write(4, ..., 20000) = 20000
// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+0 > P. 26001:31001(5000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
// Reset sysctls
`/tmp/sysctl_restore_${PPID}.sh`
// SPDX-License-Identifier: GPL-2.0
// Test of slow start after window update
// This test expects tso size to be at least initial cwnd * mss
`./defaults.sh
./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
/proc/sys/net/ipv4/tcp_min_tso_segs=10`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 511
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 26000) = 26000
+0 > P. 1:5001(5000) ack 1
+0 > P. 5001:10001(5000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.1 < . 1:1(0) ack 10001 win 511
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+0 > P. 10001:20001(10000) ack 1
+0 > P. 20001:26001(6000) ack 1
+.1 < . 1:1(0) ack 26001 win 0
+0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+0 write(4, ..., 20000) = 20000
// 1st win0 probe
+.3~+.310 > . 26000:26000(0) ack 1
+0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
// 2nd win0 probe
+.6~+.620 > . 26000:26000(0) ack 1
+0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
// 3rd win0 probe
+1.2~+1.240 > . 26000:26000(0) ack 1
+0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+.9 < . 1:1(0) ack 26001 win 511
+0 > P. 26001:31001(5000) ack 1
// Reset sysctls
`/tmp/sysctl_restore_${PPID}.sh`
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when application-limited: in this case,
// with IW10, if we don't fully use our cwnd but instead
// send just 9 packets, then cwnd should grow to twice that
// value, or 18 packets.
// Set up config.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 9000) = 9000
+0 > P. 1:9001(9000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.105 < . 1:1(0) ack 2001 win 257
+0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 4001 win 257
+0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 6001 win 257
+0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 8001 win 257
+0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 9001 win 257
+0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when application-limited: in this case,
// with IW10, if we send exactly 10 packets then cwnd should grow to 20.
// Set up config.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 10000) = 10000
+0 > P. 1:10001(10000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.105 < . 1:1(0) ack 2001 win 257
+0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 4001 win 257
+0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 6001 win 257
+0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 8001 win 257
+0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+.005 < . 1:1(0) ack 10001 win 257
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// Test of slow start when not application-limited, so that
// the cwnd continues to grow, even if TSQ triggers.
// In this variant, the receiver ACKs every other packet,
// approximating standard delayed ACKs.
// Note we use FQ/pacing to check if TCP Small Queues is not hurting
`./defaults.sh
tc qdisc replace dev tun0 root fq
sysctl -q net/ipv4/tcp_pacing_ss_ratio=200
sysctl -e -q net.ipv4.tcp_min_tso_segs=2`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.1 < . 1:1(0) ack 1 win 500
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 40000) = 40000
// This might change if we cook the initial packet with 10 MSS.
+0 > P. 1:2921(2920) ack 1
+0 > P. 2921:5841(2920) ack 1
+0 > P. 5841:8761(2920) ack 1
+0 > P. 8761:11681(2920) ack 1
+0 > P. 11681:14601(2920) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+.105 < . 1:1(0) ack 2921 win 500
+0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts"
// FQ notices that this packet missed the 'time to send next packet' computed
// when prior packet (11681:14601(2920)) was sent.
// So FQ will allow following packet to be sent a bit earlier (quantum/2)
// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit)
+0 > P. 14601:17521(2920) ack 1
+.003 < . 1:1(0) ack 5841 win 500
+0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+.001 > P. 17521:20441(2920) ack 1
+.001 < . 1:1(0) ack 8761 win 500
+0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
// remaining packets are delivered at a constant rate.
+.007 > P. 20441:23361(2920) ack 1
+.002 < . 1:1(0) ack 11681 win 500
+0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+.001 < . 1:1(0) ack 14601 win 500
+.004 > P. 23361:26281(2920) ack 1
+.007 > P. 26281:29201(2920) ack 1
+0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }%
// SPDX-License-Identifier: GPL-2.0
// basic zerocopy test:
//
// send a packet with MSG_ZEROCOPY and receive the notification ID
// repeat and verify IDs are consecutive
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 4001:8001(4000) ack 1
+0 < . 1:1(0) ack 8001 win 257
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=1,
ee_data=1}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// batch zerocopy test:
//
// send multiple packets, then read one range of all notifications.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 4001:8001(4000) ack 1
+0 < . 1:1(0) ack 8001 win 257
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=1}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// Minimal client-side zerocopy test
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0...0 connect(4, ..., ...) = 0
+0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > . 1:1(0) ack 1
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// send with MSG_ZEROCOPY on a non-established socket
//
// verify that a send in state TCP_CLOSE correctly aborts the zerocopy
// operation, specifically it does not increment the zerocopy counter.
//
// First send on a closed socket and wait for (absent) notification.
// Then connect and send and verify that notification nr. is zero.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe)
+0.1 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+0...0 connect(4, ..., ...) = 0
+0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > . 1:1(0) ack 1
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// epoll zerocopy test:
//
// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
// it is not level-triggered either.
//
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLET is set. send another packet with
// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 epoll_create(1) = 5
+0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0
+0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 4001:8001(4000) ack 1
+0 < . 1:1(0) ack 8001 win 257
// receive only one EPOLLERR for the two sends above.
+0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 8001:12001(4000) ack 1
+0 < . 1:1(0) ack 12001 win 257
// receive only one EPOLLERR for the third send above.
+0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=2}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// epoll zerocopy test:
//
// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
// it is not level-triggered either. this tests verify that the same behavior is
// maintained when we have EPOLLEXCLUSIVE.
//
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLET is set. send another packet with
// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 epoll_create(1) = 5
+0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
{events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0
+0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 4001:8001(4000) ack 1
+0 < . 1:1(0) ack 8001 win 257
// receive only one EPOLLERR for the two sends above.
+0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 8001:12001(4000) ack 1
+0 < . 1:1(0) ack 12001 win 257
// receive only one EPOLLERR for the third send above.
+0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=2}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// epoll zerocopy test:
//
// This is a test to confirm that EPOLLERR is only fired once for an FD when
// EPOLLONESHOT is set.
//
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLONESHOT is set. send another packet
// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
// confirm that EPOLLERR is correctly set.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
+0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 epoll_create(1) = 5
+0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
{events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 1:4001(4000) ack 1
+0 < . 1:1(0) ack 4001 win 257
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 4001:8001(4000) ack 1
+0 < . 1:1(0) ack 8001 win 257
// receive only one EPOLLERR for the two sends above.
+0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+0 > P. 8001:12001(4000) ack 1
+0 < . 1:1(0) ack 12001 win 257
// receive no EPOLLERR for the third send above.
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
// rearm the FD and verify the EPOLLERR is fired again.
+0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0
+0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+0 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=2}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// Fastopen client zerocopy test:
//
// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
// kernel returns the notification ID.
//
// Fastopen requires a stored cookie. Create two sockets. The first
// one will have no data in the initial send. On return 0 the
// zerocopy notification counter is not incremented. Verify this too.
`./defaults.sh`
// Send a FastOpen request, no cookie yet so no data in SYN
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop>
+.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop>
+0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000>
// Read from error queue: no zerocopy notification
+1 recvmsg(3, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+.01 close(3) = 0
+0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000>
+.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002>
+0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001>
// Send another Fastopen request, now SYN will have data
+.07 `sysctl -q net.ipv4.tcp_timestamps=0`
+.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
+0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500
+0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop>
+.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+0 > . 501:501(0) ack 1
// Read from error queue: now has first zerocopy notification
+0.5 recvmsg(5, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// Fastopen server zerocopy test:
//
// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
// kernel returns the notification ID.
`./defaults.sh
./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
// Set up a TFO server listening socket.
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
// Client sends a SYN with data.
+.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
// Server accepts and replies with data.
+.005 accept(3, ..., ...) = 4
+0 read(4, ..., 1024) = 1000
+0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000
+0 > P. 1:1001(1000) ack 1001
+.05 < . 1001:1001(0) ack 1001 win 32792
// Read from error queue: now has first zerocopy notification
+0.1 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
`/tmp/sysctl_restore_${PPID}.sh`
// SPDX-License-Identifier: GPL-2.0
// tcp_MAX_SKB_FRAGS test
//
// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will
// 1) fit in a single packet without zerocopy
// 2) spill over into a second packet with zerocopy,
// because each iovec element becomes a frag
// 3) the PSH bit is set on an skb when it runs out of fragments
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
// Each pinned zerocopy page is fully accounted to skb->truesize.
// This test generates a worst case packet with each frag storing
// one byte, but increasing truesize with a page (64KB on PPC).
+0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
// send an iov of 18 elements: just becomes a linear skb
+0 sendmsg(4, {msg_name(...)=...,
msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}],
msg_flags=0}, 0) = 18
+0 > P. 1:19(18) ack 1
+0 < . 1:1(0) ack 19 win 257
// send a zerocopy iov of 18 elements:
+1 sendmsg(4, {msg_name(...)=...,
msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}],
msg_flags=0}, MSG_ZEROCOPY) = 18
// verify that it is split in one skb of 17 frags + 1 of 1 frag
// verify that both have the PSH bit set
+0 > P. 19:36(17) ack 1
+0 < . 1:1(0) ack 36 win 257
+0 > P. 36:37(1) ack 1
+0 < . 1:1(0) ack 37 win 257
+1 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
// send a zerocopy iov of 64 elements:
+0 sendmsg(4, {msg_name(...)=...,
msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1},
{..., 1}, {..., 1}, {..., 1}, {..., 1}],
msg_flags=0}, MSG_ZEROCOPY) = 64
// verify that it is split in skbs with 17 frags
+0 > P. 37:54(17) ack 1
+0 < . 1:1(0) ack 54 win 257
+0 > P. 54:71(17) ack 1
+0 < . 1:1(0) ack 71 win 257
+0 > P. 71:88(17) ack 1
+0 < . 1:1(0) ack 88 win 257
+0 > P. 88:101(13) ack 1
+0 < . 1:1(0) ack 101 win 257
+1 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=1,
ee_data=1}}
]}, MSG_ERRQUEUE) = 0
// SPDX-License-Identifier: GPL-2.0
// small packet zerocopy test:
//
// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
// packets of all sizes, including the smallest payload, 1B.
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0 < . 1:1(0) ack 1 win 257
+0 accept(3, ..., ...) = 4
// send 1B
+0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+0 > P. 1:2(1) ack 1
+0 < . 1:1(0) ack 2 win 257
+1 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=0,
ee_data=0}}
]}, MSG_ERRQUEUE) = 0
// send 1B again
+0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+0 > P. 2:3(1) ack 1
+0 < . 1:1(0) ack 3 win 257
+1 recvmsg(4, {msg_name(...)=...,
msg_iov(1)=[{...,0}],
msg_flags=MSG_ERRQUEUE,
msg_control=[
{cmsg_level=CMSG_LEVEL_IP,
cmsg_type=CMSG_TYPE_RECVERR,
cmsg_data={ee_errno=0,
ee_origin=SO_EE_ORIGIN_ZEROCOPY,
ee_type=0,
ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
ee_info=1,
ee_data=1}}
]}, MSG_ERRQUEUE) = 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment