Commit 16e57262 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

af_unix: dont send SCM_CREDENTIALS by default

Since commit 7361c36c (af_unix: Allow credentials to work across
user and pid namespaces) af_unix performance dropped a lot.

This is because we now take a reference on pid and cred in each write(),
and release them in read(), usually done from another process,
eventually from another cpu. This triggers false sharing.

# Events: 154K cycles
#
# Overhead  Command       Shared Object        Symbol
# ........  .......  ..................  .........................
#
    10.40%  hackbench  [kernel.kallsyms]   [k] put_pid
     8.60%  hackbench  [kernel.kallsyms]   [k] unix_stream_recvmsg
     7.87%  hackbench  [kernel.kallsyms]   [k] unix_stream_sendmsg
     6.11%  hackbench  [kernel.kallsyms]   [k] do_raw_spin_lock
     4.95%  hackbench  [kernel.kallsyms]   [k] unix_scm_to_skb
     4.87%  hackbench  [kernel.kallsyms]   [k] pid_nr_ns
     4.34%  hackbench  [kernel.kallsyms]   [k] cred_to_ucred
     2.39%  hackbench  [kernel.kallsyms]   [k] unix_destruct_scm
     2.24%  hackbench  [kernel.kallsyms]   [k] sub_preempt_count
     1.75%  hackbench  [kernel.kallsyms]   [k] fget_light
     1.51%  hackbench  [kernel.kallsyms]   [k]
__mutex_lock_interruptible_slowpath
     1.42%  hackbench  [kernel.kallsyms]   [k] sock_alloc_send_pskb

This patch includes SCM_CREDENTIALS information in a af_unix message/skb
only if requested by the sender, [man 7 unix for details how to include
ancillary data using sendmsg() system call]

Note: This might break buggy applications that expected SCM_CREDENTIAL
from an unaware write() system call, and receiver not using SO_PASSCRED
socket option.

If SOCK_PASSCRED is set on source or destination socket, we still
include credentials for mere write() syscalls.

Performance boost in hackbench : more than 50% gain on a 16 thread
machine (2 quad-core cpus, 2 threads per core)

hackbench 20 thread 2000

4.228 sec instead of 9.102 sec
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Acked-by: default avatarTim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a9e9fd71
...@@ -49,7 +49,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm, ...@@ -49,7 +49,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm,
struct pid *pid, const struct cred *cred) struct pid *pid, const struct cred *cred)
{ {
scm->pid = get_pid(pid); scm->pid = get_pid(pid);
scm->cred = get_cred(cred); scm->cred = cred ? get_cred(cred) : NULL;
cred_to_ucred(pid, cred, &scm->creds); cred_to_ucred(pid, cred, &scm->creds);
} }
...@@ -73,8 +73,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm) ...@@ -73,8 +73,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
struct scm_cookie *scm) struct scm_cookie *scm)
{ {
scm_set_cred(scm, task_tgid(current), current_cred()); memset(scm, 0, sizeof(*scm));
scm->fp = NULL;
unix_get_peersec_dgram(sock, scm); unix_get_peersec_dgram(sock, scm);
if (msg->msg_controllen <= 0) if (msg->msg_controllen <= 0)
return 0; return 0;
......
...@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) ...@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
if (err) if (err)
goto error; goto error;
if (pid_vnr(p->pid) != p->creds.pid) { if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
struct pid *pid; struct pid *pid;
err = -ESRCH; err = -ESRCH;
pid = find_get_pid(p->creds.pid); pid = find_get_pid(p->creds.pid);
...@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) ...@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
p->pid = pid; p->pid = pid;
} }
if ((p->cred->euid != p->creds.uid) || if (!p->cred ||
(p->cred->egid != p->creds.gid)) { (p->cred->euid != p->creds.uid) ||
(p->cred->egid != p->creds.gid)) {
struct cred *cred; struct cred *cred;
err = -ENOMEM; err = -ENOMEM;
cred = prepare_creds(); cred = prepare_creds();
...@@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) ...@@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
cred->uid = cred->euid = p->creds.uid; cred->uid = cred->euid = p->creds.uid;
cred->gid = cred->egid = p->creds.gid; cred->gid = cred->egid = p->creds.gid;
put_cred(p->cred); if (p->cred)
put_cred(p->cred);
p->cred = cred; p->cred = cred;
} }
break; break;
......
...@@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, ...@@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (msg->msg_flags&MSG_OOB) if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (NULL == siocb->scm) { if (NULL == siocb->scm)
siocb->scm = &scm; siocb->scm = &scm;
memset(&scm, 0, sizeof(scm));
}
err = scm_send(sock, msg, siocb->scm); err = scm_send(sock, msg, siocb->scm);
if (err < 0) if (err < 0)
return err; return err;
......
...@@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) ...@@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{ {
int err = 0; int err = 0;
UNIXCB(skb).pid = get_pid(scm->pid); UNIXCB(skb).pid = get_pid(scm->pid);
UNIXCB(skb).cred = get_cred(scm->cred); if (scm->cred)
UNIXCB(skb).cred = get_cred(scm->cred);
UNIXCB(skb).fp = NULL; UNIXCB(skb).fp = NULL;
if (scm->fp && send_fds) if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb); err = unix_attach_fds(scm, skb);
...@@ -1391,6 +1393,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen ...@@ -1391,6 +1393,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
return err; return err;
} }
/*
* Some apps rely on write() giving SCM_CREDENTIALS
* We include credentials if source or destination socket
* asserted SOCK_PASSCRED.
*/
static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
const struct sock *other)
{
if (UNIXCB(skb).cred)
return;
if (test_bit(SOCK_PASSCRED, &sock->flags) ||
!other->sk_socket ||
test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
UNIXCB(skb).cred = get_current_cred();
}
}
/* /*
* Send AF_UNIX data. * Send AF_UNIX data.
*/ */
...@@ -1538,6 +1558,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, ...@@ -1538,6 +1558,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (sock_flag(other, SOCK_RCVTSTAMP)) if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb); __net_timestamp(skb);
maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb); skb_queue_tail(&other->sk_receive_queue, skb);
if (max_level > unix_sk(other)->recursion_level) if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level; unix_sk(other)->recursion_level = max_level;
...@@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, ...@@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
(other->sk_shutdown & RCV_SHUTDOWN)) (other->sk_shutdown & RCV_SHUTDOWN))
goto pipe_err_free; goto pipe_err_free;
maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb); skb_queue_tail(&other->sk_receive_queue, skb);
if (max_level > unix_sk(other)->recursion_level) if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level; unix_sk(other)->recursion_level = max_level;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment