Commit 4314175a authored by David S. Miller's avatar David S. Miller

Merge branch 'net-smc-IPPROTO_SMC'

D. Wythe says:

====================
Introduce IPPROTO_SMC

This patch allows to create smc socket via AF_INET,
similar to the following code,

/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);

There are several reasons why we believe it is appropriate here:

1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.

2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path. Such as:
  1. Replace IPPROTO_TCP with IPPROTO_SMC in the socket() syscall
     initiated by the user, without the use of LD-PRELOAD.
  2. Select whether immediate fallback is required based on peer's port/ip
     before connect().

A very significant result is that we can now use eBPF to implement smc_run
instead of LD_PRELOAD, who is completely ineffective in scenarios of static
linking.

Another potential value is that we are attempting to optimize the
performance of fallback socks, where merging socks is an important part,
and it relies on the creation of SMC sockets under the AF_INET path.
(More information :
https://lore.kernel.org/netdev/1699442703-25015-1-git-send-email-alibuda@linux.alibaba.com/T/)

v2 -> v1:

- Code formatting, mainly including alignment and annotation repair.
- move inet_smc proto ops to inet_smc.c, avoiding af_smc.c becoming too bulky.
- Fix the issue where refactoring affects the initialization order.
- Fix compile warning (unused out_inet_prot) while CONFIG_IPV6 was not set.

v3 -> v2:

- Add Alibaba's copyright information to the newfile

v4 -> v3:

- Fix some spelling errors
- Align function naming style with smc_sock_init() to smc_sk_init()
- Reversing the order of the conditional checks on clcsock to make the code more intuitive

v5 -> v4:

- Fix some spelling errors
- Added comment, "/* CONFIG_IPV6 */", after the final #endif directive.
- Rename smc_inet.h and smc_inet.c to smc_inet.h and smc_inet.c
- Encapsulate the initialization and destruction of inet_smc in inet_smc.c,
  rather than implementing it directly in af_smc.c.
- Remove useless header files in smc_inet.h
- Make smc_inet_prot_xxx and smc_inet_sock_init() to be static, since it's
  only used in smc_inet.c

v6 -> v5:

- Wrapping lines to not exceed 80 characters
- Combine initialization and error handling of smc_inet6 into the same #if
  macro block.

v7 -> v6:

- Modify the value of IPPROTO_SMC to 256 so that it does not affect IPPROTO-MAX

v8 -> v7:

- Remove useless declarations.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f22b4b55 d25a92cc
......@@ -81,6 +81,8 @@ enum {
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_SMC = 256, /* Shared Memory Communications */
#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
......
......@@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o
smc-y += smc_tracepoint.o smc_inet.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
smc-$(CONFIG_SMC_LO) += smc_loopback.o
This diff is collapsed.
......@@ -34,6 +34,44 @@
extern struct proto smc_proto;
extern struct proto smc_proto6;
extern struct smc_hashinfo smc_v4_hashinfo;
extern struct smc_hashinfo smc_v6_hashinfo;
int smc_hash_sk(struct sock *sk);
void smc_unhash_sk(struct sock *sk);
void smc_release_cb(struct sock *sk);
int smc_release(struct socket *sock);
int smc_bind(struct socket *sock, struct sockaddr *uaddr,
int addr_len);
int smc_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags);
int smc_accept(struct socket *sock, struct socket *new_sock,
struct proto_accept_arg *arg);
int smc_getname(struct socket *sock, struct sockaddr *addr,
int peer);
__poll_t smc_poll(struct file *file, struct socket *sock,
poll_table *wait);
int smc_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg);
int smc_listen(struct socket *sock, int backlog);
int smc_shutdown(struct socket *sock, int how);
int smc_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen);
int smc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen);
int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
/* smc sock initialization */
void smc_sk_init(struct net *net, struct sock *sk, int protocol);
/* clcsock initialization */
int smc_create_clcsk(struct net *net, struct sock *sk, int family);
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
#endif
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Definitions for the IPPROTO_SMC (socket related)
*
* Copyright IBM Corp. 2016, 2018
* Copyright (c) 2024, Alibaba Inc.
*
* Author: D. Wythe <alibuda@linux.alibaba.com>
*/
#include <net/protocol.h>
#include <net/sock.h>
#include "smc_inet.h"
#include "smc.h"
static int smc_inet_init_sock(struct sock *sk);
static struct proto smc_inet_prot = {
.name = "INET_SMC",
.owner = THIS_MODULE,
.init = smc_inet_init_sock,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.release_cb = smc_release_cb,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v4_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
static const struct proto_ops smc_inet_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = smc_release,
.bind = smc_bind,
.connect = smc_connect,
.socketpair = sock_no_socketpair,
.accept = smc_accept,
.getname = smc_getname,
.poll = smc_poll,
.ioctl = smc_ioctl,
.listen = smc_listen,
.shutdown = smc_shutdown,
.setsockopt = smc_setsockopt,
.getsockopt = smc_getsockopt,
.sendmsg = smc_sendmsg,
.recvmsg = smc_recvmsg,
.mmap = sock_no_mmap,
.splice_read = smc_splice_read,
};
static struct inet_protosw smc_inet_protosw = {
.type = SOCK_STREAM,
.protocol = IPPROTO_SMC,
.prot = &smc_inet_prot,
.ops = &smc_inet_stream_ops,
.flags = INET_PROTOSW_ICSK,
};
#if IS_ENABLED(CONFIG_IPV6)
static struct proto smc_inet6_prot = {
.name = "INET6_SMC",
.owner = THIS_MODULE,
.init = smc_inet_init_sock,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.release_cb = smc_release_cb,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
static const struct proto_ops smc_inet6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
.release = smc_release,
.bind = smc_bind,
.connect = smc_connect,
.socketpair = sock_no_socketpair,
.accept = smc_accept,
.getname = smc_getname,
.poll = smc_poll,
.ioctl = smc_ioctl,
.listen = smc_listen,
.shutdown = smc_shutdown,
.setsockopt = smc_setsockopt,
.getsockopt = smc_getsockopt,
.sendmsg = smc_sendmsg,
.recvmsg = smc_recvmsg,
.mmap = sock_no_mmap,
.splice_read = smc_splice_read,
};
static struct inet_protosw smc_inet6_protosw = {
.type = SOCK_STREAM,
.protocol = IPPROTO_SMC,
.prot = &smc_inet6_prot,
.ops = &smc_inet6_stream_ops,
.flags = INET_PROTOSW_ICSK,
};
#endif /* CONFIG_IPV6 */
static int smc_inet_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
/* init common smc sock */
smc_sk_init(net, sk, IPPROTO_SMC);
/* create clcsock */
return smc_create_clcsk(net, sk, sk->sk_family);
}
int __init smc_inet_init(void)
{
int rc;
rc = proto_register(&smc_inet_prot, 1);
if (rc) {
pr_err("%s: proto_register smc_inet_prot fails with %d\n",
__func__, rc);
return rc;
}
/* no return value */
inet_register_protosw(&smc_inet_protosw);
#if IS_ENABLED(CONFIG_IPV6)
rc = proto_register(&smc_inet6_prot, 1);
if (rc) {
pr_err("%s: proto_register smc_inet6_prot fails with %d\n",
__func__, rc);
goto out_inet6_prot;
}
rc = inet6_register_protosw(&smc_inet6_protosw);
if (rc) {
pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n",
__func__, rc);
goto out_inet6_protosw;
}
return rc;
out_inet6_protosw:
proto_unregister(&smc_inet6_prot);
out_inet6_prot:
inet_unregister_protosw(&smc_inet_protosw);
proto_unregister(&smc_inet_prot);
#endif /* CONFIG_IPV6 */
return rc;
}
void smc_inet_exit(void)
{
#if IS_ENABLED(CONFIG_IPV6)
inet6_unregister_protosw(&smc_inet6_protosw);
proto_unregister(&smc_inet6_prot);
#endif /* CONFIG_IPV6 */
inet_unregister_protosw(&smc_inet_protosw);
proto_unregister(&smc_inet_prot);
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Definitions for the IPPROTO_SMC (socket related)
* Copyright IBM Corp. 2016
* Copyright (c) 2024, Alibaba Inc.
*
* Author: D. Wythe <alibuda@linux.alibaba.com>
*/
#ifndef __INET_SMC
#define __INET_SMC
/* Initialize protocol registration on IPPROTO_SMC,
* @return 0 on success
*/
int smc_inet_init(void);
void smc_inet_exit(void);
#endif /* __INET_SMC */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment