Commit d48ecb40 authored by David S. Miller's avatar David S. Miller

Merge branch 'add-TFO-backup-key'

Jason Baron says:

====================
add TFO backup key

Christoph, Igor, and I have worked on an API that facilitates TFO key
rotation. This is a follow up to the series that Christoph previously
posted, with an API that meets both of our use-cases. Here's a
link to the previous work:
https://patchwork.ozlabs.org/cover/1013753/

Changes in v2:
  -spelling fixes in ip-sysctl.txt (Jeremy Sowden)
  -re-base to latest net-next
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5b5d331a 10fbcdd1
...@@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER ...@@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
0 to disable the blackhole detection. 0 to disable the blackhole detection.
By default, it is set to 1hr. By default, it is set to 1hr.
tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
The list consists of a primary key and an optional backup key. The
primary key is used for both creating and validating cookies, while the
optional backup key is only used for validating cookies. The purpose of
the backup key is to maximize TFO validation when keys are rotated.
A randomly chosen primary key may be configured by the kernel if
the tcp_fastopen sysctl is set to 0x400 (see above), or if the
TCP_FASTOPEN setsockopt() optname is set and a key has not been
previously configured via sysctl. If keys are configured via
setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
per-socket keys will be used instead of any keys that are specified via
sysctl.
A key is specified as 4 8-digit hexadecimal integers which are separated
by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
omitted. A primary and a backup key may be specified by separating them
by a comma. If only one key is specified, it becomes the primary key and
any previously configured backup keys are removed.
tcp_syn_retries - INTEGER tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 127. Default value will be retransmitted. Should not be higher than 127. Default value
......
...@@ -1614,7 +1614,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); ...@@ -1614,7 +1614,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_destroy_cipher(struct sock *sk);
void tcp_fastopen_ctx_destroy(struct net *net); void tcp_fastopen_ctx_destroy(struct net *net);
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
void *key, unsigned int len); void *primary_key, void *backup_key,
unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, struct request_sock *req,
...@@ -1625,11 +1626,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, ...@@ -1625,11 +1626,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie); struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err); bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
#define TCP_FASTOPEN_KEY_LENGTH 16 #define TCP_FASTOPEN_KEY_LENGTH 16
#define TCP_FASTOPEN_KEY_MAX 2
#define TCP_FASTOPEN_KEY_BUF_LENGTH \
(TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX)
/* Fastopen key context */ /* Fastopen key context */
struct tcp_fastopen_context { struct tcp_fastopen_context {
struct crypto_cipher *tfm; struct crypto_cipher *tfm[TCP_FASTOPEN_KEY_MAX];
__u8 key[TCP_FASTOPEN_KEY_LENGTH]; __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -1639,6 +1643,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk); ...@@ -1639,6 +1643,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
/* Caller needs to wrap with rcu_read_(un)lock() */
static inline
struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk)
{
struct tcp_fastopen_context *ctx;
ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
if (!ctx)
ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
return ctx;
}
static inline
bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc,
const struct tcp_fastopen_cookie *orig)
{
if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
orig->len == foc->len &&
!memcmp(orig->val, foc->val, foc->len))
return true;
return false;
}
static inline
int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx)
{
if (ctx->tfm[1])
return 2;
return 1;
}
/* Latencies incurred by various limits for a sender. They are /* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive. * chronograph-like stats that are mutually exclusive.
*/ */
......
...@@ -283,6 +283,7 @@ enum ...@@ -283,6 +283,7 @@ enum
LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */
LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */
LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */
__LINUX_MIB_MAX __LINUX_MIB_MAX
}; };
......
...@@ -291,6 +291,7 @@ static const struct snmp_mib snmp4_net_list[] = { ...@@ -291,6 +291,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
SNMP_MIB_SENTINEL SNMP_MIB_SENTINEL
}; };
......
...@@ -277,55 +277,97 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, ...@@ -277,55 +277,97 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret; return ret;
} }
static int sscanf_key(char *buf, __le32 *key)
{
u32 user_key[4];
int i, ret = 0;
if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1,
user_key + 2, user_key + 3) != 4) {
ret = -EINVAL;
} else {
for (i = 0; i < ARRAY_SIZE(user_key); i++)
key[i] = cpu_to_le32(user_key[i]);
}
pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
user_key[0], user_key[1], user_key[2], user_key[3], buf, ret);
return ret;
}
static int proc_tcp_fastopen_key(struct ctl_table *table, int write, static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, void __user *buffer, size_t *lenp,
loff_t *ppos) loff_t *ppos)
{ {
struct net *net = container_of(table->data, struct net, struct net *net = container_of(table->data, struct net,
ipv4.sysctl_tcp_fastopen); ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; /* maxlen to print the list of keys in hex (*2), with dashes
struct tcp_fastopen_context *ctxt; * separating doublewords and a comma in between keys.
u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ */
__le32 key[4]; struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
int ret, i; 2 * TCP_FASTOPEN_KEY_MAX) +
(TCP_FASTOPEN_KEY_MAX * 5)) };
struct tcp_fastopen_context *ctx;
u32 user_key[TCP_FASTOPEN_KEY_MAX * 4];
__le32 key[TCP_FASTOPEN_KEY_MAX * 4];
char *backup_data;
int ret, i = 0, off = 0, n_keys = 0;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data) if (!tbl.data)
return -ENOMEM; return -ENOMEM;
rcu_read_lock(); rcu_read_lock();
ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt) if (ctx) {
memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); n_keys = tcp_fastopen_context_len(ctx);
else memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys);
memset(key, 0, sizeof(key)); }
rcu_read_unlock(); rcu_read_unlock();
for (i = 0; i < ARRAY_SIZE(key); i++) if (!n_keys) {
memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH);
n_keys = 1;
}
for (i = 0; i < n_keys * 4; i++)
user_key[i] = le32_to_cpu(key[i]); user_key[i] = le32_to_cpu(key[i]);
snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", for (i = 0; i < n_keys; i++) {
user_key[0], user_key[1], user_key[2], user_key[3]); off += snprintf(tbl.data + off, tbl.maxlen - off,
"%08x-%08x-%08x-%08x",
user_key[i * 4],
user_key[i * 4 + 1],
user_key[i * 4 + 2],
user_key[i * 4 + 3]);
if (i + 1 < n_keys)
off += snprintf(tbl.data + off, tbl.maxlen - off, ",");
}
ret = proc_dostring(&tbl, write, buffer, lenp, ppos); ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0) { if (write && ret == 0) {
if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, backup_data = strchr(tbl.data, ',');
user_key + 2, user_key + 3) != 4) { if (backup_data) {
*backup_data = '\0';
backup_data++;
}
if (sscanf_key(tbl.data, key)) {
ret = -EINVAL; ret = -EINVAL;
goto bad_key; goto bad_key;
} }
if (backup_data) {
for (i = 0; i < ARRAY_SIZE(user_key); i++) if (sscanf_key(backup_data, key + 4)) {
key[i] = cpu_to_le32(user_key[i]); ret = -EINVAL;
goto bad_key;
}
}
tcp_fastopen_reset_cipher(net, NULL, key, tcp_fastopen_reset_cipher(net, NULL, key,
backup_data ? key + 4 : NULL,
TCP_FASTOPEN_KEY_LENGTH); TCP_FASTOPEN_KEY_LENGTH);
} }
bad_key: bad_key:
pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
user_key[0], user_key[1], user_key[2], user_key[3],
(char *)tbl.data, ret);
kfree(tbl.data); kfree(tbl.data);
return ret; return ret;
} }
...@@ -933,7 +975,12 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -933,7 +975,12 @@ static struct ctl_table ipv4_net_table[] = {
.procname = "tcp_fastopen_key", .procname = "tcp_fastopen_key",
.mode = 0600, .mode = 0600,
.data = &init_net.ipv4.sysctl_tcp_fastopen, .data = &init_net.ipv4.sysctl_tcp_fastopen,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), /* maxlen to print the list of keys in hex (*2), with dashes
* separating doublewords and a comma in between keys.
*/
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
2 * TCP_FASTOPEN_KEY_MAX) +
(TCP_FASTOPEN_KEY_MAX * 5)),
.proc_handler = proc_tcp_fastopen_key, .proc_handler = proc_tcp_fastopen_key,
}, },
{ {
......
...@@ -2790,15 +2790,24 @@ static int do_tcp_setsockopt(struct sock *sk, int level, ...@@ -2790,15 +2790,24 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
return err; return err;
} }
case TCP_FASTOPEN_KEY: { case TCP_FASTOPEN_KEY: {
__u8 key[TCP_FASTOPEN_KEY_LENGTH]; __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
__u8 *backup_key = NULL;
if (optlen != sizeof(key)) /* Allow a backup key as well to facilitate key rotation
* First key is the active one.
*/
if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
return -EINVAL; return -EINVAL;
if (copy_from_user(key, optval, optlen)) if (copy_from_user(key, optval, optlen))
return -EFAULT; return -EFAULT;
return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key)); if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
return tcp_fastopen_reset_cipher(net, sk, key, backup_key,
TCP_FASTOPEN_KEY_LENGTH);
} }
default: default:
/* fallthru */ /* fallthru */
...@@ -3452,21 +3461,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level, ...@@ -3452,21 +3461,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0; return 0;
case TCP_FASTOPEN_KEY: { case TCP_FASTOPEN_KEY: {
__u8 key[TCP_FASTOPEN_KEY_LENGTH]; __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
struct tcp_fastopen_context *ctx; struct tcp_fastopen_context *ctx;
unsigned int key_len = 0;
if (get_user(len, optlen)) if (get_user(len, optlen))
return -EFAULT; return -EFAULT;
rcu_read_lock(); rcu_read_lock();
ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
if (ctx) if (ctx) {
memcpy(key, ctx->key, sizeof(key)); key_len = tcp_fastopen_context_len(ctx) *
else TCP_FASTOPEN_KEY_LENGTH;
len = 0; memcpy(&key[0], &ctx->key[0], key_len);
}
rcu_read_unlock(); rcu_read_unlock();
len = min_t(unsigned int, len, sizeof(key)); len = min_t(unsigned int, len, key_len);
if (put_user(len, optlen)) if (put_user(len, optlen))
return -EFAULT; return -EFAULT;
if (copy_to_user(optval, key, len)) if (copy_to_user(optval, key, len))
......
...@@ -30,14 +30,20 @@ void tcp_fastopen_init_key_once(struct net *net) ...@@ -30,14 +30,20 @@ void tcp_fastopen_init_key_once(struct net *net)
* for a valid cookie, so this is an acceptable risk. * for a valid cookie, so this is an acceptable risk.
*/ */
get_random_bytes(key, sizeof(key)); get_random_bytes(key, sizeof(key));
tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key)); tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key));
} }
static void tcp_fastopen_ctx_free(struct rcu_head *head) static void tcp_fastopen_ctx_free(struct rcu_head *head)
{ {
struct tcp_fastopen_context *ctx = struct tcp_fastopen_context *ctx =
container_of(head, struct tcp_fastopen_context, rcu); container_of(head, struct tcp_fastopen_context, rcu);
crypto_free_cipher(ctx->tfm); int i;
/* We own ctx, thus no need to hold the Fastopen-lock */
for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) {
if (ctx->tfm[i])
crypto_free_cipher(ctx->tfm[i]);
}
kfree(ctx); kfree(ctx);
} }
...@@ -66,33 +72,54 @@ void tcp_fastopen_ctx_destroy(struct net *net) ...@@ -66,33 +72,54 @@ void tcp_fastopen_ctx_destroy(struct net *net)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
} }
struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key,
void *backup_key,
unsigned int len)
{
struct tcp_fastopen_context *new_ctx;
void *key = primary_key;
int err, i;
new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
if (!new_ctx)
return ERR_PTR(-ENOMEM);
for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++)
new_ctx->tfm[i] = NULL;
for (i = 0; i < (backup_key ? 2 : 1); i++) {
new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(new_ctx->tfm[i])) {
err = PTR_ERR(new_ctx->tfm[i]);
new_ctx->tfm[i] = NULL;
pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
goto out;
}
err = crypto_cipher_setkey(new_ctx->tfm[i], key, len);
if (err) {
pr_err("TCP: TFO cipher key error: %d\n", err);
goto out;
}
memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len);
key = backup_key;
}
return new_ctx;
out:
tcp_fastopen_ctx_free(&new_ctx->rcu);
return ERR_PTR(err);
}
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
void *key, unsigned int len) void *primary_key, void *backup_key,
unsigned int len)
{ {
struct tcp_fastopen_context *ctx, *octx; struct tcp_fastopen_context *ctx, *octx;
struct fastopen_queue *q; struct fastopen_queue *q;
int err; int err = 0;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len);
if (!ctx) if (IS_ERR(ctx)) {
return -ENOMEM; err = PTR_ERR(ctx);
ctx->tfm = crypto_alloc_cipher("aes", 0, 0); goto out;
if (IS_ERR(ctx->tfm)) {
err = PTR_ERR(ctx->tfm);
error: kfree(ctx);
pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
return err;
} }
err = crypto_cipher_setkey(ctx->tfm, key, len);
if (err) {
pr_err("TCP: TFO cipher key error: %d\n", err);
crypto_free_cipher(ctx->tfm);
goto error;
}
memcpy(ctx->key, key, len);
spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) { if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq; q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
...@@ -108,28 +135,42 @@ error: kfree(ctx); ...@@ -108,28 +135,42 @@ error: kfree(ctx);
if (octx) if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free); call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
out:
return err; return err;
} }
static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path, static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
struct tcp_fastopen_cookie *foc) struct sk_buff *syn,
struct crypto_cipher *tfm,
struct tcp_fastopen_cookie *foc)
{ {
struct tcp_fastopen_context *ctx; if (req->rsk_ops->family == AF_INET) {
bool ok = false; const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
rcu_read_lock();
ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); crypto_cipher_encrypt_one(tfm, foc->val, (void *)path);
if (!ctx) foc->len = TCP_FASTOPEN_COOKIE_SIZE;
ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); return true;
}
if (ctx) { #if IS_ENABLED(CONFIG_IPV6)
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); if (req->rsk_ops->family == AF_INET6) {
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
struct in6_addr *buf;
int i;
crypto_cipher_encrypt_one(tfm, tmp.val,
(void *)&ip6h->saddr);
buf = &tmp.addr;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf);
foc->len = TCP_FASTOPEN_COOKIE_SIZE; foc->len = TCP_FASTOPEN_COOKIE_SIZE;
ok = true; return true;
} }
rcu_read_unlock(); #endif
return ok; return false;
} }
/* Generate the fastopen cookie by doing aes128 encryption on both /* Generate the fastopen cookie by doing aes128 encryption on both
...@@ -138,37 +179,20 @@ static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path, ...@@ -138,37 +179,20 @@ static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
* *
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/ */
static bool tcp_fastopen_cookie_gen(struct sock *sk, static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req, struct request_sock *req,
struct sk_buff *syn, struct sk_buff *syn,
struct tcp_fastopen_cookie *foc) struct tcp_fastopen_cookie *foc)
{ {
if (req->rsk_ops->family == AF_INET) { struct tcp_fastopen_context *ctx;
const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
return __tcp_fastopen_cookie_gen(sk, path, foc);
}
#if IS_ENABLED(CONFIG_IPV6)
if (req->rsk_ops->family == AF_INET6) {
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++) rcu_read_lock();
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; ctx = tcp_fastopen_get_ctx(sk);
return __tcp_fastopen_cookie_gen(sk, buf, foc); if (ctx)
} __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc);
} rcu_read_unlock();
#endif
return false;
} }
/* If an incoming SYN or SYNACK frame contains a payload and/or FIN, /* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN. * queue this additional data / FIN.
*/ */
...@@ -212,6 +236,35 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) ...@@ -212,6 +236,35 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
tcp_fin(sk); tcp_fin(sk);
} }
/* returns 0 - no key match, 1 for primary, 2 for backup */
static int tcp_fastopen_cookie_gen_check(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *orig,
struct tcp_fastopen_cookie *valid_foc)
{
struct tcp_fastopen_cookie search_foc = { .len = -1 };
struct tcp_fastopen_cookie *foc = valid_foc;
struct tcp_fastopen_context *ctx;
int i, ret = 0;
rcu_read_lock();
ctx = tcp_fastopen_get_ctx(sk);
if (!ctx)
goto out;
for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
__tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc);
if (tcp_fastopen_cookie_match(foc, orig)) {
ret = i + 1;
goto out;
}
foc = &search_foc;
}
out:
rcu_read_unlock();
return ret;
}
static struct sock *tcp_fastopen_create_child(struct sock *sk, static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct request_sock *req) struct request_sock *req)
...@@ -331,6 +384,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, ...@@ -331,6 +384,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child; struct sock *child;
int ret = 0;
if (foc->len == 0) /* Client requests a cookie */ if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
...@@ -346,31 +400,44 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, ...@@ -346,31 +400,44 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen; goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */ if (foc->len == 0) {
tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) && /* Client requests a cookie. */
foc->len == TCP_FASTOPEN_COOKIE_SIZE && tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
foc->len == valid_foc.len && } else if (foc->len > 0) {
!memcmp(foc->val, valid_foc.val, foc->len)) { ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
/* Cookie is valid. Create a (full) child socket to accept &valid_foc);
* the data in SYN before returning a SYN-ACK to ack the if (!ret) {
* data. If we fail to create the socket, fall back and NET_INC_STATS(sock_net(sk),
* ack the ISN only but includes the same cookie. LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
* } else {
* Note: Data-less SYN with valid cookie is allowed to send /* Cookie is valid. Create a (full) child socket to
* data in SYN_RECV state. * accept the data in SYN before returning a SYN-ACK to
*/ * ack the data. If we fail to create the socket, fall
* back and ack the ISN only but includes the same
* cookie.
*
* Note: Data-less SYN with valid cookie is allowed to
* send data in SYN_RECV state.
*/
fastopen: fastopen:
child = tcp_fastopen_create_child(sk, skb, req); child = tcp_fastopen_create_child(sk, skb, req);
if (child) { if (child) {
foc->len = -1; if (ret == 2) {
valid_foc.exp = foc->exp;
*foc = valid_foc;
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
} else {
foc->len = -1;
}
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENPASSIVE);
return child;
}
NET_INC_STATS(sock_net(sk), NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENPASSIVE); LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
return child;
} }
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); }
} else if (foc->len > 0) /* Client presents an invalid cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
valid_foc.exp = foc->exp; valid_foc.exp = foc->exp;
*foc = valid_foc; *foc = valid_foc;
return NULL; return NULL;
......
...@@ -20,3 +20,4 @@ ip_defrag ...@@ -20,3 +20,4 @@ ip_defrag
so_txtime so_txtime
flowlabel flowlabel
flowlabel_mgr flowlabel_mgr
tcp_fastopen_backup_key
...@@ -10,12 +10,14 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh ...@@ -10,12 +10,14 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh
TEST_PROGS_EXTENDED := in_netns.sh TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
......
// SPDX-License-Identifier: GPL-2.0
/*
* Test key rotation for TFO.
* New keys are 'rotated' in two steps:
* 1) Add new key as the 'backup' key 'behind' the primary key
* 2) Make new key the primary by swapping the backup and primary keys
*
* The rotation is done in stages using multiple sockets bound
* to the same port via SO_REUSEPORT. This simulates key rotation
* behind say a load balancer. We verify that across the rotation
* there are no cases in which a cookie is not accepted by verifying
* that TcpExtTCPFastOpenPassiveFail remains 0.
*/
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#include <time.h>
#ifndef TCP_FASTOPEN_KEY
#define TCP_FASTOPEN_KEY 33
#endif
#define N_LISTEN 10
#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
#define KEY_LENGTH 16
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
static bool do_ipv6;
static bool do_sockopt;
static bool do_rotate;
static int key_len = KEY_LENGTH;
static int rcv_fds[N_LISTEN];
static int proc_fd;
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const int PORT = 8891;
static void get_keys(int fd, uint32_t *keys)
{
char buf[128];
int len = KEY_LENGTH * 2;
if (do_sockopt) {
if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
error(1, errno, "Unable to get key");
return;
}
lseek(proc_fd, 0, SEEK_SET);
if (read(proc_fd, buf, sizeof(buf)) <= 0)
error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY);
if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2,
keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8)
error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY);
}
static void set_keys(int fd, uint32_t *keys)
{
char buf[128];
if (do_sockopt) {
if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys,
key_len))
error(1, errno, "Unable to set key");
return;
}
if (do_rotate)
snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x",
keys[0], keys[1], keys[2], keys[3], keys[4], keys[5],
keys[6], keys[7]);
else
snprintf(buf, 128, "%08x-%08x-%08x-%08x",
keys[0], keys[1], keys[2], keys[3]);
lseek(proc_fd, 0, SEEK_SET);
if (write(proc_fd, buf, sizeof(buf)) <= 0)
error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY);
}
static void build_rcv_fd(int family, int proto, int *rcv_fds)
{
struct sockaddr_in addr4 = {0};
struct sockaddr_in6 addr6 = {0};
struct sockaddr *addr;
int opt = 1, i, sz;
int qlen = 100;
uint32_t keys[8];
switch (family) {
case AF_INET:
addr4.sin_family = family;
addr4.sin_addr.s_addr = htonl(INADDR_ANY);
addr4.sin_port = htons(PORT);
sz = sizeof(addr4);
addr = (struct sockaddr *)&addr4;
break;
case AF_INET6:
addr6.sin6_family = AF_INET6;
addr6.sin6_addr = in6addr_any;
addr6.sin6_port = htons(PORT);
sz = sizeof(addr6);
addr = (struct sockaddr *)&addr6;
break;
default:
error(1, 0, "Unsupported family %d", family);
/* clang does not recognize error() above as terminating
* the program, so it complains that saddr, sz are
* not initialized when this code path is taken. Silence it.
*/
return;
}
for (i = 0; i < ARRAY_SIZE(keys); i++)
keys[i] = rand();
for (i = 0; i < N_LISTEN; i++) {
rcv_fds[i] = socket(family, proto, 0);
if (rcv_fds[i] < 0)
error(1, errno, "failed to create receive socket");
if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
sizeof(opt)))
error(1, errno, "failed to set SO_REUSEPORT");
if (bind(rcv_fds[i], addr, sz))
error(1, errno, "failed to bind receive socket");
if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen,
sizeof(qlen)))
error(1, errno, "failed to set TCP_FASTOPEN");
set_keys(rcv_fds[i], keys);
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "failed to listen on receive port");
}
}
static int connect_and_send(int family, int proto)
{
struct sockaddr_in saddr4 = {0};
struct sockaddr_in daddr4 = {0};
struct sockaddr_in6 saddr6 = {0};
struct sockaddr_in6 daddr6 = {0};
struct sockaddr *saddr, *daddr;
int fd, sz, ret;
char data[1];
switch (family) {
case AF_INET:
saddr4.sin_family = AF_INET;
saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
saddr4.sin_port = 0;
daddr4.sin_family = AF_INET;
if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
error(1, errno, "inet_pton failed: %s", IP4_ADDR);
daddr4.sin_port = htons(PORT);
sz = sizeof(saddr4);
saddr = (struct sockaddr *)&saddr4;
daddr = (struct sockaddr *)&daddr4;
break;
case AF_INET6:
saddr6.sin6_family = AF_INET6;
saddr6.sin6_addr = in6addr_any;
daddr6.sin6_family = AF_INET6;
if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
error(1, errno, "inet_pton failed: %s", IP6_ADDR);
daddr6.sin6_port = htons(PORT);
sz = sizeof(saddr6);
saddr = (struct sockaddr *)&saddr6;
daddr = (struct sockaddr *)&daddr6;
break;
default:
error(1, 0, "Unsupported family %d", family);
/* clang does not recognize error() above as terminating
* the program, so it complains that saddr, daddr, sz are
* not initialized when this code path is taken. Silence it.
*/
return -1;
}
fd = socket(family, proto, 0);
if (fd < 0)
error(1, errno, "failed to create send socket");
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
data[0] = 'a';
ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz);
if (ret != 1)
error(1, errno, "failed to sendto");
return fd;
}
static bool is_listen_fd(int fd)
{
int i;
for (i = 0; i < N_LISTEN; i++) {
if (rcv_fds[i] == fd)
return true;
}
return false;
}
static int rotate_key(int fd)
{
static int iter;
static uint32_t new_key[4];
uint32_t keys[8];
uint32_t tmp_key[4];
int i;
int len = KEY_LENGTH * 2;
if (iter < N_LISTEN) {
/* first set new key as backups */
if (iter == 0) {
for (i = 0; i < ARRAY_SIZE(new_key); i++)
new_key[i] = rand();
}
get_keys(fd, keys);
memcpy(keys + 4, new_key, KEY_LENGTH);
set_keys(fd, keys);
} else {
/* swap the keys */
get_keys(fd, keys);
memcpy(tmp_key, keys + 4, KEY_LENGTH);
memcpy(keys + 4, keys, KEY_LENGTH);
memcpy(keys, tmp_key, KEY_LENGTH);
set_keys(fd, keys);
}
if (++iter >= (N_LISTEN * 2))
iter = 0;
}
static void run_one_test(int family)
{
struct epoll_event ev;
int i, send_fd;
int n_loops = 10000;
int rotate_key_fd = 0;
int key_rotate_interval = 50;
int fd, epfd;
char buf[1];
build_rcv_fd(family, SOCK_STREAM, rcv_fds);
epfd = epoll_create(1);
if (epfd < 0)
error(1, errno, "failed to create epoll");
ev.events = EPOLLIN;
for (i = 0; i < N_LISTEN; i++) {
ev.data.fd = rcv_fds[i];
if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
error(1, errno, "failed to register sock epoll");
}
while (n_loops--) {
send_fd = connect_and_send(family, SOCK_STREAM);
if (do_rotate && ((n_loops % key_rotate_interval) == 0)) {
rotate_key(rcv_fds[rotate_key_fd]);
if (++rotate_key_fd >= N_LISTEN)
rotate_key_fd = 0;
}
while (1) {
i = epoll_wait(epfd, &ev, 1, -1);
if (i < 0)
error(1, errno, "epoll_wait failed");
if (is_listen_fd(ev.data.fd)) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
ev.data.fd = fd;
if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
error(1, errno, "failed epoll add");
continue;
}
i = recv(ev.data.fd, buf, sizeof(buf), 0);
if (i != 1)
error(1, errno, "failed recv data");
if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL))
error(1, errno, "failed epoll del");
close(ev.data.fd);
break;
}
close(send_fd);
}
for (i = 0; i < N_LISTEN; i++)
close(rcv_fds[i]);
}
static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "46sr")) != -1) {
switch (c) {
case '4':
do_ipv6 = false;
break;
case '6':
do_ipv6 = true;
break;
case 's':
do_sockopt = true;
break;
case 'r':
do_rotate = true;
key_len = KEY_LENGTH * 2;
break;
default:
error(1, 0, "%s: parse error", argv[0]);
}
}
}
int main(int argc, char **argv)
{
parse_opts(argc, argv);
proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR);
if (proc_fd < 0)
error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY);
srand(time(NULL));
if (do_ipv6)
run_one_test(AF_INET6);
else
run_one_test(AF_INET);
close(proc_fd);
fprintf(stderr, "PASS\n");
return 0;
}
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# rotate TFO keys for ipv4/ipv6 and verify that the client does
# not present an invalid cookie.
set +x
set -e
readonly NETNS="ns-$(mktemp -u XXXXXX)"
setup() {
ip netns add "${NETNS}"
ip -netns "${NETNS}" link set lo up
ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \
>/dev/null 2>&1
}
cleanup() {
ip netns del "${NETNS}"
}
trap cleanup EXIT
setup
do_test() {
# flush routes before each run, otherwise successive runs can
# initially present an old TFO cookie
ip netns exec "${NETNS}" ip tcp_metrics flush
ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1"
val=$(ip netns exec "${NETNS}" nstat -az | \
grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}')
if [ $val -ne 0 ]; then
echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero"
return 1
fi
}
do_test "-4"
do_test "-6"
do_test "-4"
do_test "-6"
do_test "-4s"
do_test "-6s"
do_test "-4s"
do_test "-6s"
do_test "-4r"
do_test "-6r"
do_test "-4r"
do_test "-6r"
do_test "-4sr"
do_test "-6sr"
do_test "-4sr"
do_test "-6sr"
echo "all tests done"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment