Commit c0cfa2d8 authored by Stefano Garzarella's avatar Stefano Garzarella Committed by David S. Miller

vsock: add multi-transports support

This patch adds the support of multiple transports in the
VSOCK core.

With the multi-transports support, we can use vsock with nested VMs
(using also different hypervisors) loading both guest->host and
host->guest transports at the same time.

Major changes:
- vsock core module can be loaded regardless of the transports
- vsock_core_init() and vsock_core_exit() are renamed to
  vsock_core_register() and vsock_core_unregister()
- vsock_core_register() has a feature parameter (H2G, G2H, DGRAM)
  to identify which directions the transport can handle and if it's
  support DGRAM (only vmci)
- each stream socket is assigned to a transport when the remote CID
  is set (during the connect() or when we receive a connection request
  on a listener socket).
  The remote CID is used to decide which transport to use:
  - remote CID <= VMADDR_CID_HOST will use guest->host transport;
  - remote CID == local_cid (guest->host transport) will use guest->host
    transport for loopback (host->guest transports don't support loopback);
  - remote CID > VMADDR_CID_HOST will use host->guest transport;
- listener sockets are not bound to any transports since no transport
  operations are done on it. In this way we can create a listener
  socket, also if the transports are not loaded or with VMADDR_CID_ANY
  to listen on all transports.
- DGRAM sockets are handled as before, since only the vmci_transport
  provides this feature.
Signed-off-by: default avatarStefano Garzarella <sgarzare@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 03964257
......@@ -831,7 +831,8 @@ static int __init vhost_vsock_init(void)
{
int ret;
ret = vsock_core_init(&vhost_transport.transport);
ret = vsock_core_register(&vhost_transport.transport,
VSOCK_TRANSPORT_F_H2G);
if (ret < 0)
return ret;
return misc_register(&vhost_vsock_misc);
......@@ -840,7 +841,7 @@ static int __init vhost_vsock_init(void)
static void __exit vhost_vsock_exit(void)
{
misc_deregister(&vhost_vsock_misc);
vsock_core_exit();
vsock_core_unregister(&vhost_transport.transport);
};
module_init(vhost_vsock_init);
......
......@@ -91,6 +91,14 @@ struct vsock_transport_send_notify_data {
u64 data2; /* Transport-defined. */
};
/* Transport features flags */
/* Transport provides host->guest communication */
#define VSOCK_TRANSPORT_F_H2G 0x00000001
/* Transport provides guest->host communication */
#define VSOCK_TRANSPORT_F_G2H 0x00000002
/* Transport provides DGRAM communication */
#define VSOCK_TRANSPORT_F_DGRAM 0x00000004
struct vsock_transport {
/* Initialize/tear-down socket. */
int (*init)(struct vsock_sock *, struct vsock_sock *);
......@@ -154,12 +162,8 @@ struct vsock_transport {
/**** CORE ****/
int __vsock_core_init(const struct vsock_transport *t, struct module *owner);
static inline int vsock_core_init(const struct vsock_transport *t)
{
return __vsock_core_init(t, THIS_MODULE);
}
void vsock_core_exit(void);
int vsock_core_register(const struct vsock_transport *t, int features);
void vsock_core_unregister(const struct vsock_transport *t);
/* The transport may downcast this to access transport-specific functions */
const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk);
......@@ -190,6 +194,8 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
bool vsock_find_cid(unsigned int cid);
/**** TAP ****/
......
This diff is collapsed.
......@@ -165,6 +165,8 @@ static const guid_t srv_id_template =
GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
static bool hvs_check_transport(struct vsock_sock *vsk);
static bool is_valid_srv_id(const guid_t *id)
{
return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4);
......@@ -367,6 +369,18 @@ static void hvs_open_connection(struct vmbus_channel *chan)
new->sk_state = TCP_SYN_SENT;
vnew = vsock_sk(new);
hvs_addr_init(&vnew->local_addr, if_type);
hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
ret = vsock_assign_transport(vnew, vsock_sk(sk));
/* Transport assigned (looking at remote_addr) must be the
* same where we received the request.
*/
if (ret || !hvs_check_transport(vnew)) {
sock_put(new);
goto out;
}
hvs_new = vnew->trans;
hvs_new->chan = chan;
} else {
......@@ -430,9 +444,6 @@ static void hvs_open_connection(struct vmbus_channel *chan)
new->sk_state = TCP_ESTABLISHED;
sk_acceptq_added(sk);
hvs_addr_init(&vnew->local_addr, if_type);
hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
hvs_new->vm_srv_id = *if_type;
hvs_new->host_srv_id = *if_instance;
......@@ -880,6 +891,11 @@ static struct vsock_transport hvs_transport = {
};
static bool hvs_check_transport(struct vsock_sock *vsk)
{
return vsk->transport == &hvs_transport;
}
static int hvs_probe(struct hv_device *hdev,
const struct hv_vmbus_device_id *dev_id)
{
......@@ -928,7 +944,7 @@ static int __init hvs_init(void)
if (ret != 0)
return ret;
ret = vsock_core_init(&hvs_transport);
ret = vsock_core_register(&hvs_transport, VSOCK_TRANSPORT_F_G2H);
if (ret) {
vmbus_driver_unregister(&hvs_drv);
return ret;
......@@ -939,7 +955,7 @@ static int __init hvs_init(void)
static void __exit hvs_exit(void)
{
vsock_core_exit();
vsock_core_unregister(&hvs_transport);
vmbus_driver_unregister(&hvs_drv);
}
......
......@@ -770,7 +770,8 @@ static int __init virtio_vsock_init(void)
if (!virtio_vsock_workqueue)
return -ENOMEM;
ret = vsock_core_init(&virtio_transport.transport);
ret = vsock_core_register(&virtio_transport.transport,
VSOCK_TRANSPORT_F_G2H);
if (ret)
goto out_wq;
......@@ -781,7 +782,7 @@ static int __init virtio_vsock_init(void)
return 0;
out_vci:
vsock_core_exit();
vsock_core_unregister(&virtio_transport.transport);
out_wq:
destroy_workqueue(virtio_vsock_workqueue);
return ret;
......@@ -790,7 +791,7 @@ static int __init virtio_vsock_init(void)
static void __exit virtio_vsock_exit(void)
{
unregister_virtio_driver(&virtio_vsock_driver);
vsock_core_exit();
vsock_core_unregister(&virtio_transport.transport);
destroy_workqueue(virtio_vsock_workqueue);
}
......
......@@ -453,7 +453,7 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
vsk->trans = vvs;
vvs->vsk = vsk;
if (psk) {
if (psk && psk->trans) {
struct virtio_vsock_sock *ptrans = psk->trans;
vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
......@@ -986,13 +986,39 @@ virtio_transport_send_response(struct vsock_sock *vsk,
return virtio_transport_send_pkt_info(vsk, &info);
}
static bool virtio_transport_space_update(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct virtio_vsock_sock *vvs = vsk->trans;
bool space_available;
/* Listener sockets are not associated with any transport, so we are
* not able to take the state to see if there is space available in the
* remote peer, but since they are only used to receive requests, we
* can assume that there is always space available in the other peer.
*/
if (!vvs)
return true;
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
space_available = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock);
return space_available;
}
/* Handle server socket */
static int
virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
struct virtio_transport *t)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct vsock_sock *vchild;
struct sock *child;
int ret;
if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
virtio_transport_reset(vsk, pkt);
......@@ -1022,6 +1048,20 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
ret = vsock_assign_transport(vchild, vsk);
/* Transport assigned (looking at remote_addr) must be the same
* where we received the request.
*/
if (ret || vchild->transport != &t->transport) {
release_sock(child);
virtio_transport_reset(vsk, pkt);
sock_put(child);
return ret;
}
if (virtio_transport_space_update(child, pkt))
child->sk_write_space(child);
vsock_insert_connected(vchild);
vsock_enqueue_accept(sk, child);
virtio_transport_send_response(vchild, pkt);
......@@ -1032,22 +1072,6 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
return 0;
}
static bool virtio_transport_space_update(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct virtio_vsock_sock *vvs = vsk->trans;
bool space_available;
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
space_available = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock);
return space_available;
}
/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
* lock.
*/
......@@ -1104,7 +1128,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
switch (sk->sk_state) {
case TCP_LISTEN:
virtio_transport_recv_listen(sk, pkt);
virtio_transport_recv_listen(sk, pkt, t);
virtio_transport_free_pkt(pkt);
break;
case TCP_SYN_SENT:
......@@ -1122,6 +1146,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
virtio_transport_free_pkt(pkt);
break;
}
release_sock(sk);
/* Release refcnt obtained when we fetched this socket out of the
......
......@@ -57,6 +57,7 @@ static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
static u16 vmci_transport_new_proto_supported_versions(void);
static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
bool old_pkt_proto);
static bool vmci_check_transport(struct vsock_sock *vsk);
struct vmci_transport_recv_pkt_info {
struct work_struct work;
......@@ -1017,6 +1018,16 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
pkt->src_port);
err = vsock_assign_transport(vpending, vsock_sk(sk));
/* Transport assigned (looking at remote_addr) must be the same
* where we received the request.
*/
if (err || !vmci_check_transport(vpending)) {
vmci_transport_send_reset(sk, pkt);
sock_put(pending);
return err;
}
/* If the proposed size fits within our min/max, accept it. Otherwise
* propose our own size.
*/
......@@ -2008,7 +2019,7 @@ static u32 vmci_transport_get_local_cid(void)
return vmci_get_context_id();
}
static const struct vsock_transport vmci_transport = {
static struct vsock_transport vmci_transport = {
.init = vmci_transport_socket_init,
.destruct = vmci_transport_destruct,
.release = vmci_transport_release,
......@@ -2038,10 +2049,25 @@ static const struct vsock_transport vmci_transport = {
.get_local_cid = vmci_transport_get_local_cid,
};
static bool vmci_check_transport(struct vsock_sock *vsk)
{
return vsk->transport == &vmci_transport;
}
static int __init vmci_transport_init(void)
{
int features = VSOCK_TRANSPORT_F_DGRAM | VSOCK_TRANSPORT_F_H2G;
int cid;
int err;
cid = vmci_get_context_id();
if (cid == VMCI_INVALID_ID)
return -EINVAL;
if (cid != VMCI_HOST_CONTEXT_ID)
features |= VSOCK_TRANSPORT_F_G2H;
/* Create the datagram handle that we will use to send and receive all
* VSocket control messages for this context.
*/
......@@ -2065,7 +2091,7 @@ static int __init vmci_transport_init(void)
goto err_destroy_stream_handle;
}
err = vsock_core_init(&vmci_transport);
err = vsock_core_register(&vmci_transport, features);
if (err < 0)
goto err_unsubscribe;
......@@ -2096,7 +2122,7 @@ static void __exit vmci_transport_exit(void)
vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
}
vsock_core_exit();
vsock_core_unregister(&vmci_transport);
}
module_exit(vmci_transport_exit);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment