selftests: rds: add testing infrastructure

This adds some basic self-testing infrastructure for RDS-TCP. Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Allison Henderson <allison.henderson@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>

selftests: rds: add testing infrastructure
This adds some basic self-testing infrastructure for RDS-TCP. Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Allison Henderson <allison.henderson@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
3ade6ce1 · Vegard Nossum · David S. Miller · bc75dcc3 · 3ade6ce1 · 3ade6ce1
Commit 3ade6ce1 authored Aug 06, 2024 by Vegard Nossum Committed by David S. Miller Aug 09, 2024
8 changed files
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@@ -75,6 +75,17 @@ Only files which are linked to the main kernel image or are compiled as
 kernel modules are supported by this mechanism.
+Module specific configs
+-----------------------
+Gcov kernel configs for specific modules are described below:
+CONFIG_GCOV_PROFILE_RDS:
+        Enables GCOV profiling on RDS for checking which functions or
+        lines are executed. This config is used by the rds selftest to
+        generate coverage reports. If left unset the report is omitted.
 Files
 -----

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19197,6 +19197,7 @@ S:	Supported
 W:	https://oss.oracle.com/projects/rds/
 F:	Documentation/networking/rds.rst
 F:	net/rds/
+F:	tools/testing/selftests/net/rds/
 RDT - RESOURCE ALLOCATION
 M:	Fenghua Yu <fenghua.yu@intel.com>

--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -68,6 +68,7 @@ TARGETS += net/mptcp
 TARGETS += net/openvswitch
 TARGETS += net/tcp_ao
 TARGETS += net/netfilter
+TARGETS += net/rds
 TARGETS += nsfs
 TARGETS += perf_events
 TARGETS += pidfd

--- a/tools/testing/selftests/net/rds/Makefile
+++ b/tools/testing/selftests/net/rds/Makefile
+# SPDX-License-Identifier: GPL-2.0
+all:
+	@echo mk_build_dir="$(shell pwd)" > include.sh
+TEST_PROGS := run.sh \
+	include.sh \
+	test.py
+EXTRA_CLEAN := /tmp/rds_logs
+include ../../lib.mk
--- a/tools/testing/selftests/net/rds/README.txt
+++ b/tools/testing/selftests/net/rds/README.txt
+RDS self-tests
+==============
+These scripts provide a coverage test for RDS-TCP by creating two
+network namespaces and running rds packets between them. A loopback
+network is provisioned with optional probability of packet loss or
+corruption. A workload of 50000 hashes, each 64 characters in size,
+are passed over an RDS socket on this test network. A passing test means
+the RDS-TCP stack was able to recover properly.  The provided config.sh
+can be used to compile the kernel with the necessary gcov options.  The
+kernel may optionally be configured to omit the coverage report as well.
+USAGE:
+	run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]
+	       [-u packet_duplcate]
+OPTIONS:
+	-d	Log directory.  Defaults to tools/testing/selftests/net/rds/rds_logs
+	-l	Simulates a percentage of packet loss
+	-c	Simulates a percentage of packet corruption
+	-u	Simulates a percentage of packet duplication.
+EXAMPLE:
+    # Create a suitable gcov enabled .config
+    tools/testing/selftests/net/rds/config.sh -g
+    # Alternatly create a gcov disabled .config
+    tools/testing/selftests/net/rds/config.sh
+    # build the kernel
+    vng --build  --config tools/testing/selftests/net/config
+    # launch the tests in a VM
+    vng -v --rwdir ./ --run . --user root --cpus 4 -- \
+        "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh"
+An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/.
--- a/tools/testing/selftests/net/rds/config.sh
+++ b/tools/testing/selftests/net/rds/config.sh
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+set -e
+set -u
+set -x
+unset KBUILD_OUTPUT
+GENERATE_GCOV_REPORT=0
+while getopts "g" opt; do
+  case ${opt} in
+    g)
+      GENERATE_GCOV_REPORT=1
+      ;;
+    :)
+      echo "USAGE: config.sh [-g]"
+      exit 1
+      ;;
+    ?)
+      echo "Invalid option: -${OPTARG}."
+      exit 1
+      ;;
+  esac
+done
+CONF_FILE="tools/testing/selftests/net/config"
+# no modules
+scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES
+# enable RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+	# instrument RDS and only RDS
+	scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL
+	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+	scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS
+else
+	scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL
+	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS
+fi
+# need network namespaces to run tests with veth network interfaces
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS
+scripts/config --file "$CONF_FILE" --enable CONFIG_VETH
+# simulate packet loss
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM
--- a/tools/testing/selftests/net/rds/run.sh
+++ b/tools/testing/selftests/net/rds/run.sh
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+set -e
+set -u
+unset KBUILD_OUTPUT
+current_dir="$(realpath "$(dirname "$0")")"
+build_dir="$current_dir"
+build_include="$current_dir/include.sh"
+if test -f "$build_include"; then
+	# this include will define "$mk_build_dir" as the location the test was
+	# built.  We will need this if the tests are installed in a location
+	# other than the kernel source
+	source "$build_include"
+	build_dir="$mk_build_dir"
+fi
+# This test requires kernel source and the *.gcda data therein
+# Locate the top level of the kernel source, and the net/rds
+# subfolder with the appropriate *.gcno object files
+ksrc_dir="$(realpath "$build_dir"/../../../../../)"
+kconfig="$ksrc_dir/.config"
+obj_dir="$ksrc_dir/net/rds"
+GCOV_CMD=gcov
+#check to see if the host has the required packages to generate a gcov report
+check_gcov_env()
+{
+	if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+		echo "Warning: Could not find gcov. "
+		GENERATE_GCOV_REPORT=0
+		return
+	fi
+	# the gcov version must match the gcc version
+	GCC_VER=$(gcc -dumpfullversion)
+	GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}')
+	if [ "$GCOV_VER" != "$GCC_VER" ]; then
+		#attempt to find a matching gcov version
+		GCOV_CMD=gcov-$(gcc -dumpversion)
+		if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+			echo "Warning: Could not find an appropriate gcov installation. \
+				gcov version must match gcc version"
+			GENERATE_GCOV_REPORT=0
+			return
+		fi
+		#recheck version number of found gcov executable
+		GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \
+			awk 'BEGIN {FS="-"}{print $1}')
+		if [ "$GCOV_VER" != "$GCC_VER" ]; then
+			echo "Warning: Could not find an appropriate gcov installation. \
+				gcov version must match gcc version"
+			GENERATE_GCOV_REPORT=0
+		else
+			echo "Warning: Mismatched gcc and gcov detected.  Using $GCOV_CMD"
+		fi
+	fi
+}
+# Check to see if the kconfig has the required configs to generate a coverage report
+check_gcov_conf()
+{
+	if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then
+		echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports"
+		GENERATE_GCOV_REPORT=0
+	fi
+	if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then
+		echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports"
+		GENERATE_GCOV_REPORT=0
+	fi
+	if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then
+		echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports"
+		GENERATE_GCOV_REPORT=0
+	fi
+	if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then
+		echo "To enable gcov reports, please run "\
+			"\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel"
+	else
+		# if we have the required kernel configs, proceed to check the environment to
+		# ensure we have the required gcov packages
+		check_gcov_env
+	fi
+}
+# Kselftest framework requirement - SKIP code is 4.
+check_conf_enabled() {
+	if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+		echo "selftests: [SKIP] This test requires $1 enabled"
+		echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+		exit 4
+	fi
+}
+check_conf_disabled() {
+	if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+		echo "selftests: [SKIP] This test requires $1 disabled"
+		echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+		exit 4
+	fi
+}
+check_conf() {
+	check_conf_enabled CONFIG_NET_SCH_NETEM
+	check_conf_enabled CONFIG_VETH
+	check_conf_enabled CONFIG_NET_NS
+	check_conf_enabled CONFIG_RDS_TCP
+	check_conf_enabled CONFIG_RDS
+	check_conf_disabled CONFIG_MODULES
+}
+check_env()
+{
+	if ! test -d "$obj_dir"; then
+		echo "selftests: [SKIP] This test requires a kernel source tree"
+		exit 4
+	fi
+	if ! test -e "$kconfig"; then
+		echo "selftests: [SKIP] This test requires a configured kernel source tree"
+		exit 4
+	fi
+	if ! which strace > /dev/null 2>&1; then
+		echo "selftests: [SKIP] Could not run test without strace"
+		exit 4
+	fi
+	if ! which tcpdump > /dev/null 2>&1; then
+		echo "selftests: [SKIP] Could not run test without tcpdump"
+		exit 4
+	fi
+	if ! which python3 > /dev/null 2>&1; then
+		echo "selftests: [SKIP] Could not run test without python3"
+		exit 4
+	fi
+	python_major=$(python3 -c "import sys; print(sys.version_info[0])")
+	python_minor=$(python3 -c "import sys; print(sys.version_info[1])")
+	if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then
+		echo "selftests: [SKIP] Could not run test without at least python3.9"
+		python3 -V
+		exit 4
+	fi
+}
+LOG_DIR="$current_dir"/rds_logs
+PLOSS=0
+PCORRUPT=0
+PDUP=0
+GENERATE_GCOV_REPORT=1
+while getopts "d:l:c:u:" opt; do
+  case ${opt} in
+    d)
+      LOG_DIR=${OPTARG}
+      ;;
+    l)
+      PLOSS=${OPTARG}
+      ;;
+    c)
+      PCORRUPT=${OPTARG}
+      ;;
+    u)
+      PDUP=${OPTARG}
+      ;;
+    :)
+      echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \
+           "[-u packet_duplcate] [-g]"
+      exit 1
+      ;;
+    ?)
+      echo "Invalid option: -${OPTARG}."
+      exit 1
+      ;;
+  esac
+done
+check_env
+check_conf
+check_gcov_conf
+rm -fr "$LOG_DIR"
+TRACE_FILE="${LOG_DIR}/rds-strace.txt"
+COVR_DIR="${LOG_DIR}/coverage/"
+mkdir -p  "$LOG_DIR"
+mkdir -p "$COVR_DIR"
+set +e
+echo running RDS tests...
+echo Traces will be logged to "$TRACE_FILE"
+rm -f "$TRACE_FILE"
+strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \
+       -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
+test_rc=$?
+dmesg > "${LOG_DIR}/dmesg.out"
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+       echo saving coverage data...
+       (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \
+       while read -r f
+       do
+               cat < "/sys/kernel/debug/gcov/$f" > "/$f"
+       done)
+       echo running gcovr...
+       gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \
+             -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/"
+else
+       echo "Coverage report will be skipped"
+fi
+if [ "$test_rc" -eq 0 ]; then
+	echo "PASS: Test completed successfully"
+else
+	echo "FAIL: Test failed"
+fi
+exit "$test_rc"
--- a/tools/testing/selftests/net/rds/test.py
+++ b/tools/testing/selftests/net/rds/test.py
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+import argparse
+import ctypes
+import errno
+import hashlib
+import os
+import select
+import signal
+import socket
+import subprocess
+import sys
+import atexit
+from pwd import getpwuid
+from os import stat
+from lib.py import ip
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+setns = libc.setns
+net0 = 'net0'
+net1 = 'net1'
+veth0 = 'veth0'
+veth1 = 'veth1'
+# Helper function for creating a socket inside a network namespace.
+# We need this because otherwise RDS will detect that the two TCP
+# sockets are on the same interface and use the loop transport instead
+# of the TCP transport.
+def netns_socket(netns, *args):
+    u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+    child = os.fork()
+    if child == 0:
+        # change network namespace
+        with open(f'/var/run/netns/{netns}') as f:
+            try:
+                ret = setns(f.fileno(), 0)
+            except IOError as e:
+                print(e.errno)
+                print(e)
+        # create socket in target namespace
+        s = socket.socket(*args)
+        # send resulting socket to parent
+        socket.send_fds(u0, [], [s.fileno()])
+        sys.exit(0)
+    # receive socket from child
+    _, s, _, _ = socket.recv_fds(u1, 0, 1)
+    os.waitpid(child, 0)
+    u0.close()
+    u1.close()
+    return socket.fromfd(s[0], *args)
+def signal_handler(sig, frame):
+    print('Test timed out')
+    sys.exit(1)
+#Parse out command line arguments.  We take an optional
+# timeout parameter and an optional log output folder
+parser = argparse.ArgumentParser(description="init script args",
+                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-d", "--logdir", action="store",
+                    help="directory to store logs", default="/tmp")
+parser.add_argument('--timeout', help="timeout to terminate hung test",
+                    type=int, default=0)
+parser.add_argument('-l', '--loss', help="Simulate tcp packet loss",
+                    type=int, default=0)
+parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption",
+                    type=int, default=0)
+parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication",
+                    type=int, default=0)
+args = parser.parse_args()
+logdir=args.logdir
+packet_loss=str(args.loss)+'%'
+packet_corruption=str(args.corruption)+'%'
+packet_duplicate=str(args.duplicate)+'%'
+ip(f"netns add {net0}")
+ip(f"netns add {net1}")
+ip(f"link add type veth")
+addrs = [
+    # we technically don't need different port numbers, but this will
+    # help identify traffic in the network analyzer
+    ('10.0.0.1', 10000),
+    ('10.0.0.2', 20000),
+]
+# move interfaces to separate namespaces so they can no longer be
+# bound directly; this prevents rds from switching over from the tcp
+# transport to the loop transport.
+ip(f"link set {veth0} netns {net0} up")
+ip(f"link set {veth1} netns {net1} up")
+# add addresses
+ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}")
+ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}")
+# add routes
+ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}")
+ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}")
+# sanity check that our two interfaces/addresses are correctly set up
+# and communicating by doing a single ping
+ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}")
+# Start a packet capture on each network
+for net in [net0, net1]:
+    tcpdump_pid = os.fork()
+    if tcpdump_pid == 0:
+        pcap = logdir+'/'+net+'.pcap'
+        subprocess.check_call(['touch', pcap])
+        user = getpwuid(stat(pcap).st_uid).pw_name
+        ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}")
+        sys.exit(0)
+# simulate packet loss, duplication and corruption
+for net, iface in [(net0, veth0), (net1, veth1)]:
+    ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem  \
+         corrupt {packet_corruption} loss {packet_loss} duplicate  \
+         {packet_duplicate}")
+# add a timeout
+if args.timeout > 0:
+    signal.alarm(args.timeout)
+    signal.signal(signal.SIGALRM, signal_handler)
+sockets = [
+    netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET),
+    netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET),
+]
+for s, addr in zip(sockets, addrs):
+    s.bind(addr)
+    s.setblocking(0)
+fileno_to_socket = {
+    s.fileno(): s for s in sockets
+}
+addr_to_socket = {
+    addr: s for addr, s in zip(addrs, sockets)
+}
+socket_to_addr = {
+    s: addr for addr, s in zip(addrs, sockets)
+}
+send_hashes = {}
+recv_hashes = {}
+ep = select.epoll()
+for s in sockets:
+    ep.register(s, select.EPOLLRDNORM)
+n = 50000
+nr_send = 0
+nr_recv = 0
+while nr_send < n:
+    # Send as much as we can without blocking
+    print("sending...", nr_send, nr_recv)
+    while nr_send < n:
+        send_data = hashlib.sha256(
+            f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8')
+        # pseudo-random send/receive pattern
+        sender = sockets[nr_send % 2]
+        receiver = sockets[1 - (nr_send % 3) % 2]
+        try:
+            sender.sendto(send_data, socket_to_addr[receiver])
+            send_hashes.setdefault((sender.fileno(), receiver.fileno()),
+                    hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8'))
+            nr_send = nr_send + 1
+        except BlockingIOError as e:
+            break
+        except OSError as e:
+            if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]:
+                break
+            raise
+    # Receive as much as we can without blocking
+    print("receiving...", nr_send, nr_recv)
+    while nr_recv < nr_send:
+        for fileno, eventmask in ep.poll():
+            receiver = fileno_to_socket[fileno]
+            if eventmask & select.EPOLLRDNORM:
+                while True:
+                    try:
+                        recv_data, address = receiver.recvfrom(1024)
+                        sender = addr_to_socket[address]
+                        recv_hashes.setdefault((sender.fileno(),
+                            receiver.fileno()), hashlib.sha256()).update(
+                                    f'<{recv_data}>'.encode('utf-8'))
+                        nr_recv = nr_recv + 1
+                    except BlockingIOError as e:
+                        break
+    # exercise net/rds/tcp.c:rds_tcp_sysctl_reset()
+    for net in [net0, net1]:
+        ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000")
+        ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000")
+print("done", nr_send, nr_recv)
+# the Python socket module doesn't know these
+RDS_INFO_FIRST = 10000
+RDS_INFO_LAST = 10017
+nr_success = 0
+nr_error = 0
+for s in sockets:
+    for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1):
+        # Sigh, the Python socket module doesn't allow us to pass
+        # buffer lengths greater than 1024 for some reason. RDS
+        # wants multiple pages.
+        try:
+            s.getsockopt(socket.SOL_RDS, optname, 1024)
+            nr_success = nr_success + 1
+        except OSError as e:
+            nr_error = nr_error + 1
+            if e.errno == errno.ENOSPC:
+                # ignore
+                pass
+print(f"getsockopt(): {nr_success}/{nr_error}")
+print("Stopping network packet captures")
+subprocess.check_call(['killall', '-q', 'tcpdump'])
+# We're done sending and receiving stuff, now let's check if what
+# we received is what we sent.
+for (sender, receiver), send_hash in send_hashes.items():
+    recv_hash = recv_hashes.get((sender, receiver))
+    if recv_hash is None:
+        print("FAIL: No data received")
+        sys.exit(1)
+    if send_hash.hexdigest() != recv_hash.hexdigest():
+        print("FAIL: Send/recv mismatch")
+        print("hash expected:", send_hash.hexdigest())
+        print("hash received:", recv_hash.hexdigest())
+        sys.exit(1)
+    print(f"{sender}/{receiver}: ok")
+print("Success")
+sys.exit(0)