Commit e1a8fde7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block

Pull io_uring 'more data in socket' support from Jens Axboe:
 "To be able to fully utilize the 'poll first' support in the core
  io_uring branch, it's advantageous knowing if the socket was empty
  after a receive. This adds support for that"

* tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block:
  io_uring: return hint on whether more data is available after receive
  tcp: pass back data left in socket after receive
parents 368da430 f548a12e
......@@ -6115,6 +6115,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
struct io_async_msghdr iomsg, *kmsg;
struct io_sr_msg *sr = &req->sr_msg;
struct socket *sock;
unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
......@@ -6154,6 +6155,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
kmsg->msg.msg_get_inq = 1;
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
......@@ -6178,7 +6180,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
cflags = io_put_kbuf(req, issue_flags);
if (kmsg->msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
__io_req_complete(req, issue_flags, ret, cflags);
return 0;
}
......@@ -6188,6 +6193,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
struct msghdr msg;
struct socket *sock;
struct iovec iov;
unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
......@@ -6214,11 +6220,12 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
goto out_free;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_get_inq = 1;
msg.msg_flags = 0;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
msg.msg_iocb = NULL;
msg.msg_flags = 0;
flags = sr->msg_flags;
if (force_nonblock)
......@@ -6249,7 +6256,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
cflags = io_put_kbuf(req, issue_flags);
if (msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
__io_req_complete(req, issue_flags, ret, cflags);
return 0;
}
......
......@@ -50,6 +50,9 @@ struct linger {
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
int msg_inq; /* output, data left in socket */
struct iov_iter msg_iter; /* data */
/*
......@@ -62,8 +65,9 @@ struct msghdr {
void __user *msg_control_user;
};
bool msg_control_is_user : 1;
__kernel_size_t msg_controllen; /* ancillary data buffer length */
bool msg_get_inq : 1;/* return INQ after receive */
unsigned int msg_flags; /* flags on received message */
__kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
......
......@@ -258,9 +258,11 @@ struct io_uring_cqe {
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
* IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
......
......@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN)
goto out;
if (tp->recvmsg_inq)
if (tp->recvmsg_inq) {
*cmsg_flags = TCP_CMSG_INQ;
msg->msg_get_inq = 1;
}
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
......@@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len)
{
int cmsg_flags = 0, ret, inq;
int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE))
......@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
release_sock(sk);
sk_defer_free_flush(sk);
if (cmsg_flags && ret >= 0) {
if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
if (cmsg_flags & TCP_CMSG_INQ) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
if (msg->msg_get_inq) {
msg->msg_inq = tcp_inq_hint(sk);
if (cmsg_flags & TCP_CMSG_INQ)
put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
sizeof(msg->msg_inq), &msg->msg_inq);
}
}
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment