sem.c 61.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7 8 9
/*
 * linux/ipc/sem.c
 * Copyright (C) 1992 Krishna Balasubramanian
 * Copyright (C) 1995 Eric Schenk, Bruno Haible
 *
 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
 *
 * SMP-threaded, sysctl's added
10
 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
Linus Torvalds's avatar
Linus Torvalds committed
11
 * Enforced range limit on SEM_UNDO
Alan Cox's avatar
Alan Cox committed
12
 * (c) 2001 Red Hat Inc
Linus Torvalds's avatar
Linus Torvalds committed
13 14
 * Lockless wakeup
 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
15
 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net>
16 17
 * Further wakeup optimizations, documentation
 * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
Steve Grubb's avatar
Steve Grubb committed
18 19 20
 *
 * support for audit of ipc object properties and permission changes
 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
Kirill Korotaev's avatar
Kirill Korotaev committed
21 22 23 24
 *
 * namespaces support
 * OpenVZ, SWsoft Inc.
 * Pavel Emelianov <xemul@openvz.org>
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
 *
 * Implementation notes: (May 2010)
 * This file implements System V semaphores.
 *
 * User space visible behavior:
 * - FIFO ordering for semop() operations (just FIFO, not starvation
 *   protection)
 * - multiple semaphore operations that alter the same semaphore in
 *   one semop() are handled.
 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
 *   SETALL calls.
 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
 * - undo adjustments at process exit are limited to 0..SEMVMX.
 * - namespace are supported.
 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
 *   to /proc/sys/kernel/sem.
 * - statistics about the usage are reported in /proc/sysvipc/sem.
 *
 * Internals:
 * - scalability:
 *   - all global variables are read-mostly.
 *   - semop() calls and semctl(RMID) are synchronized by RCU.
 *   - most operations do write operations (actually: spin_lock calls) to
 *     the per-semaphore array structure.
 *   Thus: Perfect SMP scaling between independent semaphore arrays.
 *         If multiple semaphores in one array are used, then cache line
 *         trashing on the semaphore array spinlock will limit the scaling.
52
 * - semncnt and semzcnt are calculated on demand in count_semcnt()
53 54 55 56 57
 * - the task that performs a successful semop() scans the list of all
 *   sleeping tasks and completes any pending operations that can be fulfilled.
 *   Semaphores are actively given to waiting tasks (necessary for FIFO).
 *   (see update_queue())
 * - To improve the scalability, the actual wake-up calls are performed after
58
 *   dropping all locks. (see wake_up_sem_queue_prepare())
59 60 61 62 63 64 65 66 67 68 69 70
 * - All work is done by the waker, the woken up task does not have to do
 *   anything - not even acquiring a lock or dropping a refcount.
 * - A woken up task may not even touch the semaphore array anymore, it may
 *   have been destroyed already by a semctl(RMID).
 * - UNDO values are stored in an array (one per process and per
 *   semaphore array, lazily allocated). For backwards compatibility, multiple
 *   modes for the UNDO variables are supported (per process, per thread)
 *   (see copy_semundo, CLONE_SYSVSEM)
 * - There are two lists of the pending operations: a per-array list
 *   and per-semaphore list (stored in the array). This allows to achieve FIFO
 *   ordering without always scanning all pending operations.
 *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
Linus Torvalds's avatar
Linus Torvalds committed
71 72
 */

73
#include <linux/compat.h>
Linus Torvalds's avatar
Linus Torvalds committed
74 75 76 77 78 79 80 81
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/time.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
82
#include <linux/capability.h>
83
#include <linux/seq_file.h>
Nadia Derbey's avatar
Nadia Derbey committed
84
#include <linux/rwsem.h>
Kirill Korotaev's avatar
Kirill Korotaev committed
85
#include <linux/nsproxy.h>
86
#include <linux/ipc_namespace.h>
87
#include <linux/sched/wake_q.h>
88
#include <linux/nospec.h>
NeilBrown's avatar
NeilBrown committed
89
#include <linux/rhashtable.h>
Ingo Molnar's avatar
Ingo Molnar committed
90

Paul McQuade's avatar
Paul McQuade committed
91
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
92 93
#include "util.h"

94 95 96 97 98 99 100 101 102 103
/* One semaphore structure for each semaphore in the system. */
struct sem {
	int	semval;		/* current value */
	/*
	 * PID of the process that last modified the semaphore. For
	 * Linux, specifically these are:
	 *  - semop
	 *  - semctl, via SETVAL and SETALL.
	 *  - at task exit when performing undo adjustments (see exit_sem).
	 */
104
	struct pid *sempid;
105 106 107 108 109
	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
	struct list_head pending_alter; /* pending single-sop operations */
					/* that alter the semaphore */
	struct list_head pending_const; /* pending single-sop operations */
					/* that do not alter the semaphore*/
110
	time64_t	 sem_otime;	/* candidate for sem_otime */
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
} ____cacheline_aligned_in_smp;

/* One sem_array data structure for each set of semaphores in the system. */
struct sem_array {
	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
	time64_t		sem_ctime;	/* create/last semctl() time */
	struct list_head	pending_alter;	/* pending operations */
						/* that alter the array */
	struct list_head	pending_const;	/* pending complex operations */
						/* that do not alter semvals */
	struct list_head	list_id;	/* undo requests on this array */
	int			sem_nsems;	/* no. of semaphores in array */
	int			complex_count;	/* pending complex operations */
	unsigned int		use_global_lock;/* >0: global lock required */

	struct sem		sems[];
} __randomize_layout;
128 129 130 131 132 133

/* One queue for each sleeping process in the system. */
struct sem_queue {
	struct list_head	list;	 /* queue of pending operations */
	struct task_struct	*sleeper; /* this process */
	struct sem_undo		*undo;	 /* undo structure */
134
	struct pid		*pid;	 /* process id of requesting process */
135 136
	int			status;	 /* completion status of operation */
	struct sembuf		*sops;	 /* array of pending operations */
137
	struct sembuf		*blocking; /* the operation that blocked */
138
	int			nsops;	 /* number of operations */
139 140
	bool			alter;	 /* does *sops alter the array? */
	bool                    dupsop;	 /* sops on more than one sem_num */
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
};

/* Each task has a list of undo requests. They are executed automatically
 * when the process exits.
 */
struct sem_undo {
	struct list_head	list_proc;	/* per-process list: *
						 * all undos from one process
						 * rcu protected */
	struct rcu_head		rcu;		/* rcu struct for sem_undo */
	struct sem_undo_list	*ulp;		/* back ptr to sem_undo_list */
	struct list_head	list_id;	/* per semaphore array list:
						 * all undos for one array */
	int			semid;		/* semaphore set identifier */
	short			*semadj;	/* array of adjustments */
						/* one per semaphore */
};

/* sem_undo_list controls shared access to the list of sem_undo structures
 * that may be shared among all a CLONE_SYSVSEM task group.
 */
struct sem_undo_list {
163
	refcount_t		refcnt;
164 165 166 167 168
	spinlock_t		lock;
	struct list_head	list_proc;
};


169
#define sem_ids(ns)	((ns)->ids[IPC_SEM_IDS])
Kirill Korotaev's avatar
Kirill Korotaev committed
170

Nadia Derbey's avatar
Nadia Derbey committed
171
static int newary(struct ipc_namespace *, struct ipc_params *);
172
static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
Linus Torvalds's avatar
Linus Torvalds committed
173
#ifdef CONFIG_PROC_FS
174
static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
Linus Torvalds's avatar
Linus Torvalds committed
175 176 177 178 179
#endif

#define SEMMSL_FAST	256 /* 512 bytes on stack */
#define SEMOPM_FAST	64  /* ~ 372 bytes on stack */

Manfred Spraul's avatar
Manfred Spraul committed
180 181 182 183 184 185 186
/*
 * Switching from the mode suitable for simple ops
 * to the mode for complex ops is costly. Therefore:
 * use some hysteresis
 */
#define USE_GLOBAL_LOCK_HYSTERESIS	10

Linus Torvalds's avatar
Linus Torvalds committed
187
/*
188
 * Locking:
189
 * a) global sem_lock() for read/write
Linus Torvalds's avatar
Linus Torvalds committed
190
 *	sem_undo.id_next,
191
 *	sem_array.complex_count,
192 193
 *	sem_array.pending{_alter,_const},
 *	sem_array.sem_undo
Paul McQuade's avatar
Paul McQuade committed
194
 *
195
 * b) global or semaphore sem_lock() for read/write:
196
 *	sem_array.sems[i].pending_{const,alter}:
197 198 199 200 201
 *
 * c) special:
 *	sem_undo_list.list_proc:
 *	* undo_list->lock for write
 *	* rcu for read
Manfred Spraul's avatar
Manfred Spraul committed
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
 *	use_global_lock:
 *	* global sem_lock() for write
 *	* either local or global sem_lock() for read.
 *
 * Memory ordering:
 * Most ordering is enforced by using spin_lock() and spin_unlock().
 * The special case is use_global_lock:
 * Setting it from non-zero to 0 is a RELEASE, this is ensured by
 * using smp_store_release().
 * Testing if it is non-zero is an ACQUIRE, this is ensured by using
 * smp_load_acquire().
 * Setting it from 0 to non-zero must be ordered with regards to
 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
 * is inside a spin_lock() and after a write from 0 to non-zero a
 * spin_lock()+spin_unlock() is done.
Linus Torvalds's avatar
Linus Torvalds committed
217 218
 */

Kirill Korotaev's avatar
Kirill Korotaev committed
219 220 221 222 223
#define sc_semmsl	sem_ctls[0]
#define sc_semmns	sem_ctls[1]
#define sc_semopm	sem_ctls[2]
#define sc_semmni	sem_ctls[3]

224
void sem_init_ns(struct ipc_namespace *ns)
Kirill Korotaev's avatar
Kirill Korotaev committed
225 226 227 228 229 230
{
	ns->sc_semmsl = SEMMSL;
	ns->sc_semmns = SEMMNS;
	ns->sc_semopm = SEMOPM;
	ns->sc_semmni = SEMMNI;
	ns->used_sems = 0;
231
	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
Kirill Korotaev's avatar
Kirill Korotaev committed
232 233
}

234
#ifdef CONFIG_IPC_NS
Kirill Korotaev's avatar
Kirill Korotaev committed
235 236
void sem_exit_ns(struct ipc_namespace *ns)
{
237
	free_ipcs(ns, &sem_ids(ns), freeary);
238
	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
239
	rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
Kirill Korotaev's avatar
Kirill Korotaev committed
240
}
241
#endif
Linus Torvalds's avatar
Linus Torvalds committed
242

243
void __init sem_init(void)
Linus Torvalds's avatar
Linus Torvalds committed
244
{
245
	sem_init_ns(&init_ipc_ns);
246 247
	ipc_init_proc_interface("sysvipc/sem",
				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
Kirill Korotaev's avatar
Kirill Korotaev committed
248
				IPC_SEM_IDS, sysvipc_sem_proc_show);
Linus Torvalds's avatar
Linus Torvalds committed
249 250
}

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
/**
 * unmerge_queues - unmerge queues, if possible.
 * @sma: semaphore array
 *
 * The function unmerges the wait queues if complex_count is 0.
 * It must be called prior to dropping the global semaphore array lock.
 */
static void unmerge_queues(struct sem_array *sma)
{
	struct sem_queue *q, *tq;

	/* complex operations still around? */
	if (sma->complex_count)
		return;
	/*
	 * We will switch back to simple mode.
	 * Move all pending operation back into the per-semaphore
	 * queues.
	 */
	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
		struct sem *curr;
272
		curr = &sma->sems[q->sops[0].sem_num];
273 274 275 276 277 278 279

		list_add_tail(&q->list, &curr->pending_alter);
	}
	INIT_LIST_HEAD(&sma->pending_alter);
}

/**
280
 * merge_queues - merge single semop queues into global queue
281 282 283 284 285 286 287 288 289 290 291
 * @sma: semaphore array
 *
 * This function merges all per-semaphore queues into the global queue.
 * It is necessary to achieve FIFO ordering for the pending single-sop
 * operations when a multi-semop operation must sleep.
 * Only the alter operations must be moved, the const operations can stay.
 */
static void merge_queues(struct sem_array *sma)
{
	int i;
	for (i = 0; i < sma->sem_nsems; i++) {
292
		struct sem *sem = &sma->sems[i];
293 294 295 296 297

		list_splice_init(&sem->pending_alter, &sma->pending_alter);
	}
}

Davidlohr Bueso's avatar
Davidlohr Bueso committed
298 299
static void sem_rcu_free(struct rcu_head *head)
{
300 301
	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
Davidlohr Bueso's avatar
Davidlohr Bueso committed
302

303
	security_sem_free(&sma->sem_perm);
Kees Cook's avatar
Kees Cook committed
304
	kvfree(sma);
Davidlohr Bueso's avatar
Davidlohr Bueso committed
305 306
}

307
/*
308
 * Enter the mode suitable for non-simple operations:
309 310
 * Caller must own sem_perm.lock.
 */
311
static void complexmode_enter(struct sem_array *sma)
312 313 314 315
{
	int i;
	struct sem *sem;

Manfred Spraul's avatar
Manfred Spraul committed
316 317 318 319 320 321 322
	if (sma->use_global_lock > 0)  {
		/*
		 * We are already in global lock mode.
		 * Nothing to do, just reset the
		 * counter until we return to simple mode.
		 */
		sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
323 324
		return;
	}
Manfred Spraul's avatar
Manfred Spraul committed
325
	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
326

327
	for (i = 0; i < sma->sem_nsems; i++) {
328
		sem = &sma->sems[i];
329 330
		spin_lock(&sem->lock);
		spin_unlock(&sem->lock);
331
	}
332 333 334 335 336 337 338 339 340 341 342 343 344 345
}

/*
 * Try to leave the mode that disallows simple operations:
 * Caller must own sem_perm.lock.
 */
static void complexmode_tryleave(struct sem_array *sma)
{
	if (sma->complex_count)  {
		/* Complex ops are sleeping.
		 * We must stay in complex mode
		 */
		return;
	}
Manfred Spraul's avatar
Manfred Spraul committed
346 347 348 349 350 351 352 353 354 355 356
	if (sma->use_global_lock == 1) {
		/*
		 * Immediately after setting use_global_lock to 0,
		 * a simple op can start. Thus: all memory writes
		 * performed by the current operation must be visible
		 * before we set use_global_lock to 0.
		 */
		smp_store_release(&sma->use_global_lock, 0);
	} else {
		sma->use_global_lock--;
	}
357 358
}

359
#define SEM_GLOBAL_LOCK	(-1)
360 361 362 363 364 365 366 367 368 369
/*
 * If the request contains only one semaphore operation, and there are
 * no complex transactions pending, lock only the semaphore involved.
 * Otherwise, lock the entire semaphore array, since we either have
 * multiple semaphores in our own semops, or we need to look at
 * semaphores from other pending complex operations.
 */
static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
			      int nsops)
{
370
	struct sem *sem;
371
	int idx;
372

373 374 375
	if (nsops != 1) {
		/* Complex operation - acquire a full lock */
		ipc_lock_object(&sma->sem_perm);
376

377 378 379
		/* Prevent parallel simple ops */
		complexmode_enter(sma);
		return SEM_GLOBAL_LOCK;
380 381 382 383
	}

	/*
	 * Only one semaphore affected - try to optimize locking.
384 385 386
	 * Optimized locking is possible if no complex operation
	 * is either enqueued or processed right now.
	 *
Manfred Spraul's avatar
Manfred Spraul committed
387
	 * Both facts are tracked by use_global_mode.
388
	 */
389 390
	idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
	sem = &sma->sems[idx];
391

392
	/*
Manfred Spraul's avatar
Manfred Spraul committed
393
	 * Initial check for use_global_lock. Just an optimization,
394 395
	 * no locking, no memory barrier.
	 */
Manfred Spraul's avatar
Manfred Spraul committed
396
	if (!sma->use_global_lock) {
397
		/*
398 399
		 * It appears that no complex operation is around.
		 * Acquire the per-semaphore lock.
400
		 */
401 402
		spin_lock(&sem->lock);

Manfred Spraul's avatar
Manfred Spraul committed
403 404
		/* pairs with smp_store_release() */
		if (!smp_load_acquire(&sma->use_global_lock)) {
405 406
			/* fast path successful! */
			return sops->sem_num;
407
		}
408 409 410 411 412
		spin_unlock(&sem->lock);
	}

	/* slow path: acquire the full lock */
	ipc_lock_object(&sma->sem_perm);
413

Manfred Spraul's avatar
Manfred Spraul committed
414 415 416 417 418 419 420 421 422
	if (sma->use_global_lock == 0) {
		/*
		 * The use_global_lock mode ended while we waited for
		 * sma->sem_perm.lock. Thus we must switch to locking
		 * with sem->lock.
		 * Unlike in the fast path, there is no need to recheck
		 * sma->use_global_lock after we have acquired sem->lock:
		 * We own sma->sem_perm.lock, thus use_global_lock cannot
		 * change.
423 424
		 */
		spin_lock(&sem->lock);
Manfred Spraul's avatar
Manfred Spraul committed
425

426 427
		ipc_unlock_object(&sma->sem_perm);
		return sops->sem_num;
428
	} else {
Manfred Spraul's avatar
Manfred Spraul committed
429 430 431 432
		/*
		 * Not a false alarm, thus continue to use the global lock
		 * mode. No need for complexmode_enter(), this was done by
		 * the caller that has set use_global_mode to non-zero.
433
		 */
434
		return SEM_GLOBAL_LOCK;
435 436 437 438 439
	}
}

static inline void sem_unlock(struct sem_array *sma, int locknum)
{
440
	if (locknum == SEM_GLOBAL_LOCK) {
441
		unmerge_queues(sma);
442
		complexmode_tryleave(sma);
443
		ipc_unlock_object(&sma->sem_perm);
444
	} else {
445
		struct sem *sem = &sma->sems[locknum];
446 447 448 449
		spin_unlock(&sem->lock);
	}
}

Nadia Derbey's avatar
Nadia Derbey committed
450
/*
Davidlohr Bueso's avatar
Davidlohr Bueso committed
451
 * sem_lock_(check_) routines are called in the paths where the rwsem
Nadia Derbey's avatar
Nadia Derbey committed
452
 * is not held.
453 454
 *
 * The caller holds the RCU read lock.
Nadia Derbey's avatar
Nadia Derbey committed
455
 */
456 457
static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
{
458
	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
459 460 461 462 463 464 465 466 467 468 469 470 471 472

	if (IS_ERR(ipcp))
		return ERR_CAST(ipcp);

	return container_of(ipcp, struct sem_array, sem_perm);
}

static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
							int id)
{
	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);

	if (IS_ERR(ipcp))
		return ERR_CAST(ipcp);
473

Nadia Derbey's avatar
Nadia Derbey committed
474
	return container_of(ipcp, struct sem_array, sem_perm);
475 476
}

477 478
static inline void sem_lock_and_putref(struct sem_array *sma)
{
479
	sem_lock(sma, NULL, -1);
480
	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
481 482
}

Nadia Derbey's avatar
Nadia Derbey committed
483 484 485 486 487
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
{
	ipc_rmid(&sem_ids(ns), &s->sem_perm);
}

Kees Cook's avatar
Kees Cook committed
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
static struct sem_array *sem_alloc(size_t nsems)
{
	struct sem_array *sma;
	size_t size;

	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
		return NULL;

	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
	sma = kvmalloc(size, GFP_KERNEL);
	if (unlikely(!sma))
		return NULL;

	memset(sma, 0, size);

	return sma;
}

Nadia Derbey's avatar
Nadia Derbey committed
506 507 508 509 510
/**
 * newary - Create a new semaphore set
 * @ns: namespace
 * @params: ptr to the structure that contains key, semflg and nsems
 *
Davidlohr Bueso's avatar
Davidlohr Bueso committed
511
 * Called with sem_ids.rwsem held (as a writer)
Nadia Derbey's avatar
Nadia Derbey committed
512
 */
Nadia Derbey's avatar
Nadia Derbey committed
513
static int newary(struct ipc_namespace *ns, struct ipc_params *params)
Linus Torvalds's avatar
Linus Torvalds committed
514 515 516
{
	int retval;
	struct sem_array *sma;
Nadia Derbey's avatar
Nadia Derbey committed
517 518 519
	key_t key = params->key;
	int nsems = params->u.nsems;
	int semflg = params->flg;
520
	int i;
Linus Torvalds's avatar
Linus Torvalds committed
521 522 523

	if (!nsems)
		return -EINVAL;
Kirill Korotaev's avatar
Kirill Korotaev committed
524
	if (ns->used_sems + nsems > ns->sc_semmns)
Linus Torvalds's avatar
Linus Torvalds committed
525 526
		return -ENOSPC;

Kees Cook's avatar
Kees Cook committed
527
	sma = sem_alloc(nsems);
528
	if (!sma)
Linus Torvalds's avatar
Linus Torvalds committed
529
		return -ENOMEM;
530

Linus Torvalds's avatar
Linus Torvalds committed
531 532 533 534
	sma->sem_perm.mode = (semflg & S_IRWXUGO);
	sma->sem_perm.key = key;

	sma->sem_perm.security = NULL;
535
	retval = security_sem_alloc(&sma->sem_perm);
Linus Torvalds's avatar
Linus Torvalds committed
536
	if (retval) {
Kees Cook's avatar
Kees Cook committed
537
		kvfree(sma);
Linus Torvalds's avatar
Linus Torvalds committed
538 539 540
		return retval;
	}

541
	for (i = 0; i < nsems; i++) {
542 543 544
		INIT_LIST_HEAD(&sma->sems[i].pending_alter);
		INIT_LIST_HEAD(&sma->sems[i].pending_const);
		spin_lock_init(&sma->sems[i].lock);
545
	}
546 547

	sma->complex_count = 0;
Manfred Spraul's avatar
Manfred Spraul committed
548
	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
549 550
	INIT_LIST_HEAD(&sma->pending_alter);
	INIT_LIST_HEAD(&sma->pending_const);
551
	INIT_LIST_HEAD(&sma->list_id);
Linus Torvalds's avatar
Linus Torvalds committed
552
	sma->sem_nsems = nsems;
553
	sma->sem_ctime = ktime_get_real_seconds();
554

555
	/* ipc_addid() locks sma upon success. */
556 557
	retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
	if (retval < 0) {
558
		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
559
		return retval;
560 561 562
	}
	ns->used_sems += nsems;

563
	sem_unlock(sma, -1);
564
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
565

Nadia Derbey's avatar
Nadia Derbey committed
566
	return sma->sem_perm.id;
Linus Torvalds's avatar
Linus Torvalds committed
567 568
}

Nadia Derbey's avatar
Nadia Derbey committed
569

Nadia Derbey's avatar
Nadia Derbey committed
570
/*
Davidlohr Bueso's avatar
Davidlohr Bueso committed
571
 * Called with sem_ids.rwsem and ipcp locked.
Nadia Derbey's avatar
Nadia Derbey committed
572
 */
Nadia Derbey's avatar
Nadia Derbey committed
573 574
static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
				struct ipc_params *params)
Nadia Derbey's avatar
Nadia Derbey committed
575
{
Nadia Derbey's avatar
Nadia Derbey committed
576 577 578 579
	struct sem_array *sma;

	sma = container_of(ipcp, struct sem_array, sem_perm);
	if (params->u.nsems > sma->sem_nsems)
Nadia Derbey's avatar
Nadia Derbey committed
580 581 582 583 584
		return -EINVAL;

	return 0;
}

585
long ksys_semget(key_t key, int nsems, int semflg)
Linus Torvalds's avatar
Linus Torvalds committed
586
{
Kirill Korotaev's avatar
Kirill Korotaev committed
587
	struct ipc_namespace *ns;
Mathias Krause's avatar
Mathias Krause committed
588 589
	static const struct ipc_ops sem_ops = {
		.getnew = newary,
590
		.associate = security_sem_associate,
Mathias Krause's avatar
Mathias Krause committed
591 592
		.more_checks = sem_more_checks,
	};
Nadia Derbey's avatar
Nadia Derbey committed
593
	struct ipc_params sem_params;
Kirill Korotaev's avatar
Kirill Korotaev committed
594 595

	ns = current->nsproxy->ipc_ns;
Linus Torvalds's avatar
Linus Torvalds committed
596

Kirill Korotaev's avatar
Kirill Korotaev committed
597
	if (nsems < 0 || nsems > ns->sc_semmsl)
Linus Torvalds's avatar
Linus Torvalds committed
598
		return -EINVAL;
Nadia Derbey's avatar
Nadia Derbey committed
599

Nadia Derbey's avatar
Nadia Derbey committed
600 601 602
	sem_params.key = key;
	sem_params.flg = semflg;
	sem_params.u.nsems = nsems;
Linus Torvalds's avatar
Linus Torvalds committed
603

Nadia Derbey's avatar
Nadia Derbey committed
604
	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
Linus Torvalds's avatar
Linus Torvalds committed
605 606
}

607 608 609 610 611
SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
{
	return ksys_semget(key, nsems, semflg);
}

612
/**
613 614
 * perform_atomic_semop[_slow] - Attempt to perform semaphore
 *                               operations on a given array.
615
 * @sma: semaphore array
616
 * @q: struct sem_queue that describes the operation
617
 *
618 619 620 621 622 623 624
 * Caller blocking are as follows, based the value
 * indicated by the semaphore operation (sem_op):
 *
 *  (1) >0 never blocks.
 *  (2)  0 (wait-for-zero operation): semval is non-zero.
 *  (3) <0 attempting to decrement semval to a value smaller than zero.
 *
625 626
 * Returns 0 if the operation was possible.
 * Returns 1 if the operation is impossible, the caller must sleep.
627
 * Returns <0 for error codes.
Linus Torvalds's avatar
Linus Torvalds committed
628
 */
629
static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
630
{
631 632
	int result, sem_op, nsops;
	struct pid *pid;
Linus Torvalds's avatar
Linus Torvalds committed
633
	struct sembuf *sop;
Manfred Spraul's avatar
Manfred Spraul committed
634
	struct sem *curr;
635 636 637 638 639 640
	struct sembuf *sops;
	struct sem_undo *un;

	sops = q->sops;
	nsops = q->nsops;
	un = q->undo;
Linus Torvalds's avatar
Linus Torvalds committed
641 642

	for (sop = sops; sop < sops + nsops; sop++) {
643 644
		int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
		curr = &sma->sems[idx];
Linus Torvalds's avatar
Linus Torvalds committed
645 646
		sem_op = sop->sem_op;
		result = curr->semval;
647

Linus Torvalds's avatar
Linus Torvalds committed
648 649 650 651 652 653 654 655
		if (!sem_op && result)
			goto would_block;

		result += sem_op;
		if (result < 0)
			goto would_block;
		if (result > SEMVMX)
			goto out_of_range;
656

Linus Torvalds's avatar
Linus Torvalds committed
657 658
		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;
659
			/* Exceeding the undo range is an error. */
Linus Torvalds's avatar
Linus Torvalds committed
660 661
			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
				goto out_of_range;
662
			un->semadj[sop->sem_num] = undo;
Linus Torvalds's avatar
Linus Torvalds committed
663
		}
664

Linus Torvalds's avatar
Linus Torvalds committed
665 666 667 668
		curr->semval = result;
	}

	sop--;
669
	pid = q->pid;
Linus Torvalds's avatar
Linus Torvalds committed
670
	while (sop >= sops) {
671
		ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid);
Linus Torvalds's avatar
Linus Torvalds committed
672 673
		sop--;
	}
674

Linus Torvalds's avatar
Linus Torvalds committed
675 676 677 678 679 680 681
	return 0;

out_of_range:
	result = -ERANGE;
	goto undo;

would_block:
682 683
	q->blocking = sop;

Linus Torvalds's avatar
Linus Torvalds committed
684 685 686 687 688 689 690 691
	if (sop->sem_flg & IPC_NOWAIT)
		result = -EAGAIN;
	else
		result = 1;

undo:
	sop--;
	while (sop >= sops) {
692
		sem_op = sop->sem_op;
693
		sma->sems[sop->sem_num].semval -= sem_op;
694 695
		if (sop->sem_flg & SEM_UNDO)
			un->semadj[sop->sem_num] += sem_op;
Linus Torvalds's avatar
Linus Torvalds committed
696 697 698 699 700 701
		sop--;
	}

	return result;
}

702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
{
	int result, sem_op, nsops;
	struct sembuf *sop;
	struct sem *curr;
	struct sembuf *sops;
	struct sem_undo *un;

	sops = q->sops;
	nsops = q->nsops;
	un = q->undo;

	if (unlikely(q->dupsop))
		return perform_atomic_semop_slow(sma, q);

	/*
	 * We scan the semaphore set twice, first to ensure that the entire
	 * operation can succeed, therefore avoiding any pointless writes
	 * to shared memory and having to undo such changes in order to block
	 * until the operations can go through.
	 */
	for (sop = sops; sop < sops + nsops; sop++) {
724 725 726
		int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);

		curr = &sma->sems[idx];
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
		sem_op = sop->sem_op;
		result = curr->semval;

		if (!sem_op && result)
			goto would_block; /* wait-for-zero */

		result += sem_op;
		if (result < 0)
			goto would_block;

		if (result > SEMVMX)
			return -ERANGE;

		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;

			/* Exceeding the undo range is an error. */
			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
				return -ERANGE;
		}
	}

	for (sop = sops; sop < sops + nsops; sop++) {
750
		curr = &sma->sems[sop->sem_num];
751 752 753 754 755 756 757 758 759
		sem_op = sop->sem_op;
		result = curr->semval;

		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;

			un->semadj[sop->sem_num] = undo;
		}
		curr->semval += sem_op;
760
		ipc_update_pid(&curr->sempid, q->pid);
761 762 763 764 765 766 767 768 769
	}

	return 0;

would_block:
	q->blocking = sop;
	return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
}

770 771
static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
					     struct wake_q_head *wake_q)
772
{
773 774 775 776 777 778 779 780 781
	wake_q_add(wake_q, q->sleeper);
	/*
	 * Rely on the above implicit barrier, such that we can
	 * ensure that we hold reference to the task before setting
	 * q->status. Otherwise we could race with do_exit if the
	 * task is awoken by an external event before calling
	 * wake_up_process().
	 */
	WRITE_ONCE(q->status, error);
Nick Piggin's avatar
Nick Piggin committed
782 783
}

784 785 786
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
{
	list_del(&q->list);
787
	if (q->nsops > 1)
788 789 790
		sma->complex_count--;
}

791 792 793 794 795 796 797
/** check_restart(sma, q)
 * @sma: semaphore array
 * @q: the operation that just completed
 *
 * update_queue is O(N^2) when it restarts scanning the whole queue of
 * waiting operations. Therefore this function checks if the restart is
 * really necessary. It is called after a previously waiting operation
798 799
 * modified the array.
 * Note that wait-for-zero operations are handled without restart.
800
 */
801
static inline int check_restart(struct sem_array *sma, struct sem_queue *q)
802
{
803 804
	/* pending complex alter operations are too difficult to analyse */
	if (!list_empty(&sma->pending_alter))
805 806 807 808 809 810
		return 1;

	/* we were a sleeping complex operation. Too difficult */
	if (q->nsops > 1)
		return 1;

811 812 813 814 815 816 817 818 819 820 821 822 823
	/* It is impossible that someone waits for the new value:
	 * - complex operations always restart.
	 * - wait-for-zero are handled seperately.
	 * - q is a previously sleeping simple operation that
	 *   altered the array. It must be a decrement, because
	 *   simple increments never sleep.
	 * - If there are older (higher priority) decrements
	 *   in the queue, then they have observed the original
	 *   semval value and couldn't proceed. The operation
	 *   decremented to value - thus they won't proceed either.
	 */
	return 0;
}
824

825
/**
826
 * wake_const_ops - wake up non-alter tasks
827 828
 * @sma: semaphore array.
 * @semnum: semaphore that was modified.
829
 * @wake_q: lockless wake-queue head.
830 831 832 833 834
 *
 * wake_const_ops must be called after a semaphore in a semaphore array
 * was set to 0. If complex const operations are pending, wake_const_ops must
 * be called with semnum = -1, as well as with the number of each modified
 * semaphore.
835
 * The tasks that must be woken up are added to @wake_q. The return code
836 837 838 839
 * is stored in q->pid.
 * The function returns 1 if at least one operation was completed successfully.
 */
static int wake_const_ops(struct sem_array *sma, int semnum,
840
			  struct wake_q_head *wake_q)
841
{
842
	struct sem_queue *q, *tmp;
843 844 845 846 847 848
	struct list_head *pending_list;
	int semop_completed = 0;

	if (semnum == -1)
		pending_list = &sma->pending_const;
	else
849
		pending_list = &sma->sems[semnum].pending_const;
850

851 852
	list_for_each_entry_safe(q, tmp, pending_list, list) {
		int error = perform_atomic_semop(sma, q);
853

854 855 856 857
		if (error > 0)
			continue;
		/* operation completed, remove from queue & wakeup */
		unlink_queue(sma, q);
858

859 860 861
		wake_up_sem_queue_prepare(q, error, wake_q);
		if (error == 0)
			semop_completed = 1;
862
	}
863

864 865 866 867
	return semop_completed;
}

/**
868
 * do_smart_wakeup_zero - wakeup all wait for zero tasks
869 870 871
 * @sma: semaphore array
 * @sops: operations that were performed
 * @nsops: number of operations
872
 * @wake_q: lockless wake-queue head
873
 *
874 875
 * Checks all required queue for wait-for-zero operations, based
 * on the actual changes that were performed on the semaphore array.
876 877 878
 * The function returns 1 if at least one operation was completed successfully.
 */
static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
879
				int nsops, struct wake_q_head *wake_q)
880 881 882 883 884 885 886 887 888 889
{
	int i;
	int semop_completed = 0;
	int got_zero = 0;

	/* first: the per-semaphore queues, if known */
	if (sops) {
		for (i = 0; i < nsops; i++) {
			int num = sops[i].sem_num;

890
			if (sma->sems[num].semval == 0) {
891
				got_zero = 1;
892
				semop_completed |= wake_const_ops(sma, num, wake_q);
893 894 895 896 897 898
			}
		}
	} else {
		/*
		 * No sops means modified semaphores not known.
		 * Assume all were changed.
899
		 */
900
		for (i = 0; i < sma->sem_nsems; i++) {
901
			if (sma->sems[i].semval == 0) {
902
				got_zero = 1;
903
				semop_completed |= wake_const_ops(sma, i, wake_q);
904 905
			}
		}
906 907
	}
	/*
908 909
	 * If one of the modified semaphores got 0,
	 * then check the global queue, too.
910
	 */
911
	if (got_zero)
912
		semop_completed |= wake_const_ops(sma, -1, wake_q);
913

914
	return semop_completed;
915 916
}

917 918

/**
919
 * update_queue - look for tasks that can be completed.
920 921
 * @sma: semaphore array.
 * @semnum: semaphore that was modified.
922
 * @wake_q: lockless wake-queue head.
923 924
 *
 * update_queue must be called after a semaphore in a semaphore array
925 926 927
 * was modified. If multiple semaphores were modified, update_queue must
 * be called with semnum = -1, as well as with the number of each modified
 * semaphore.
928
 * The tasks that must be woken up are added to @wake_q. The return code
929
 * is stored in q->pid.
930 931
 * The function internally checks if const operations can now succeed.
 *
932
 * The function return 1 if at least one semop was completed successfully.
Linus Torvalds's avatar
Linus Torvalds committed
933
 */
934
static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q)
Linus Torvalds's avatar
Linus Torvalds committed
935
{
936
	struct sem_queue *q, *tmp;
937
	struct list_head *pending_list;
938
	int semop_completed = 0;
939

940
	if (semnum == -1)
941
		pending_list = &sma->pending_alter;
942
	else
943
		pending_list = &sma->sems[semnum].pending_alter;
944 945

again:
946
	list_for_each_entry_safe(q, tmp, pending_list, list) {
947
		int error, restart;
948

949 950
		/* If we are scanning the single sop, per-semaphore list of
		 * one semaphore and that semaphore is 0, then it is not
951
		 * necessary to scan further: simple increments
952 953 954 955
		 * that affect only one entry succeed immediately and cannot
		 * be in the  per semaphore pending queue, and decrements
		 * cannot be successful if the value is already 0.
		 */
956
		if (semnum != -1 && sma->sems[semnum].semval == 0)
957 958
			break;

959
		error = perform_atomic_semop(sma, q);
Linus Torvalds's avatar
Linus Torvalds committed
960 961

		/* Does q->sleeper still need to sleep? */
962 963 964
		if (error > 0)
			continue;

965
		unlink_queue(sma, q);
966

967
		if (error) {
968
			restart = 0;
969 970
		} else {
			semop_completed = 1;
971
			do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
972
			restart = check_restart(sma, q);
973
		}
974

975
		wake_up_sem_queue_prepare(q, error, wake_q);
976
		if (restart)
977
			goto again;
Linus Torvalds's avatar
Linus Torvalds committed
978
	}
979
	return semop_completed;
Linus Torvalds's avatar
Linus Torvalds committed
980 981
}

982
/**
983
 * set_semotime - set sem_otime
984 985 986 987 988 989 990 991 992
 * @sma: semaphore array
 * @sops: operations that modified the array, may be NULL
 *
 * sem_otime is replicated to avoid cache line trashing.
 * This function sets one instance to the current time.
 */
static void set_semotime(struct sem_array *sma, struct sembuf *sops)
{
	if (sops == NULL) {
993
		sma->sems[0].sem_otime = ktime_get_real_seconds();
994
	} else {
995
		sma->sems[sops[0].sem_num].sem_otime =
996
						ktime_get_real_seconds();
997 998 999
	}
}

1000
/**
1001
 * do_smart_update - optimized update_queue
1002 1003 1004
 * @sma: semaphore array
 * @sops: operations that were performed
 * @nsops: number of operations
1005
 * @otime: force setting otime
1006
 * @wake_q: lockless wake-queue head
1007
 *
1008 1009
 * do_smart_update() does the required calls to update_queue and wakeup_zero,
 * based on the actual changes that were performed on the semaphore array.
1010
 * Note that the function does not do the actual wake-up: the caller is
1011
 * responsible for calling wake_up_q().
1012
 * It is safe to perform this call after dropping all locks.
1013
 */
1014
static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
1015
			    int otime, struct wake_q_head *wake_q)
1016 1017 1018
{
	int i;

1019
	otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
1020

1021 1022
	if (!list_empty(&sma->pending_alter)) {
		/* semaphore array uses the global queue - just process it. */
1023
		otime |= update_queue(sma, -1, wake_q);
1024 1025 1026 1027 1028 1029 1030
	} else {
		if (!sops) {
			/*
			 * No sops, thus the modified semaphores are not
			 * known. Check all.
			 */
			for (i = 0; i < sma->sem_nsems; i++)
1031
				otime |= update_queue(sma, i, wake_q);
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
		} else {
			/*
			 * Check the semaphores that were increased:
			 * - No complex ops, thus all sleeping ops are
			 *   decrease.
			 * - if we decreased the value, then any sleeping
			 *   semaphore ops wont be able to run: If the
			 *   previous value was too small, then the new
			 *   value will be too small, too.
			 */
			for (i = 0; i < nsops; i++) {
				if (sops[i].sem_op > 0) {
					otime |= update_queue(sma,
1045
							      sops[i].sem_num, wake_q);
1046
				}
1047
			}
1048
		}
1049
	}
1050 1051
	if (otime)
		set_semotime(sma, sops);
1052 1053
}

1054
/*
1055
 * check_qop: Test if a queued operation sleeps on the semaphore semnum
1056 1057 1058 1059
 */
static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q,
			bool count_zero)
{
1060
	struct sembuf *sop = q->blocking;
1061

1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	/*
	 * Linux always (since 0.99.10) reported a task as sleeping on all
	 * semaphores. This violates SUS, therefore it was changed to the
	 * standard compliant behavior.
	 * Give the administrators a chance to notice that an application
	 * might misbehave because it relies on the Linux behavior.
	 */
	pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
			"The task %s (%d) triggered the difference, watch for misbehavior.\n",
			current->comm, task_pid_nr(current));

1073 1074
	if (sop->sem_num != semnum)
		return 0;
1075

1076 1077 1078 1079 1080 1081
	if (count_zero && sop->sem_op == 0)
		return 1;
	if (!count_zero && sop->sem_op < 0)
		return 1;

	return 0;
1082 1083
}

Linus Torvalds's avatar
Linus Torvalds committed
1084 1085 1086
/* The following counts are associated to each semaphore:
 *   semncnt        number of tasks waiting on semval being nonzero
 *   semzcnt        number of tasks waiting on semval being zero
1087 1088 1089
 *
 * Per definition, a task waits only on the semaphore of the first semop
 * that cannot proceed, even if additional operation would block, too.
Linus Torvalds's avatar
Linus Torvalds committed
1090
 */
1091 1092
static int count_semcnt(struct sem_array *sma, ushort semnum,
			bool count_zero)
Linus Torvalds's avatar
Linus Torvalds committed
1093
{
1094
	struct list_head *l;
Manfred Spraul's avatar
Manfred Spraul committed
1095
	struct sem_queue *q;
1096
	int semcnt;
Linus Torvalds's avatar
Linus Torvalds committed
1097

1098 1099 1100
	semcnt = 0;
	/* First: check the simple operations. They are easy to evaluate */
	if (count_zero)
1101
		l = &sma->sems[semnum].pending_const;
1102
	else
1103
		l = &sma->sems[semnum].pending_alter;
Linus Torvalds's avatar
Linus Torvalds committed
1104

1105 1106 1107 1108 1109
	list_for_each_entry(q, l, list) {
		/* all task on a per-semaphore list sleep on exactly
		 * that semaphore
		 */
		semcnt++;
1110 1111
	}

1112
	/* Then: check the complex operations. */
1113
	list_for_each_entry(q, &sma->pending_alter, list) {
1114 1115 1116 1117 1118 1119
		semcnt += check_qop(sma, semnum, q, count_zero);
	}
	if (count_zero) {
		list_for_each_entry(q, &sma->pending_const, list) {
			semcnt += check_qop(sma, semnum, q, count_zero);
		}
1120
	}
1121
	return semcnt;
Linus Torvalds's avatar
Linus Torvalds committed
1122 1123
}

Davidlohr Bueso's avatar
Davidlohr Bueso committed
1124 1125
/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
Nadia Derbey's avatar
Nadia Derbey committed
1126
 * remains locked on exit.
Linus Torvalds's avatar
Linus Torvalds committed
1127
 */
1128
static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
Linus Torvalds's avatar
Linus Torvalds committed
1129
{
1130 1131
	struct sem_undo *un, *tu;
	struct sem_queue *q, *tq;
1132
	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
1133
	int i;
1134
	DEFINE_WAKE_Q(wake_q);
Linus Torvalds's avatar
Linus Torvalds committed
1135

1136
	/* Free the existing undo structures for this semaphore set.  */
1137
	ipc_assert_locked_object(&sma->sem_perm);
1138 1139 1140
	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
		list_del(&un->list_id);
		spin_lock(&un->ulp->lock);
Linus Torvalds's avatar
Linus Torvalds committed
1141
		un->semid = -1;
1142 1143
		list_del_rcu(&un->list_proc);
		spin_unlock(&un->ulp->lock);
1144
		kfree_rcu(un, rcu);
1145
	}
Linus Torvalds's avatar
Linus Torvalds committed
1146 1147

	/* Wake up all pending processes and let them fail with EIDRM. */
1148 1149
	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
		unlink_queue(sma, q);
1150
		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1151 1152 1153
	}

	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1154
		unlink_queue(sma, q);
1155
		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
Linus Torvalds's avatar
Linus Torvalds committed
1156
	}
1157
	for (i = 0; i < sma->sem_nsems; i++) {
1158
		struct sem *sem = &sma->sems[i];
1159 1160
		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
			unlink_queue(sma, q);
1161
			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1162 1163
		}
		list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
1164
			unlink_queue(sma, q);
1165
			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1166
		}
1167
		ipc_update_pid(&sem->sempid, NULL);
1168
	}
Linus Torvalds's avatar
Linus Torvalds committed
1169

Nadia Derbey's avatar
Nadia Derbey committed
1170 1171
	/* Remove the semaphore set from the IDR */
	sem_rmid(ns, sma);
1172
	sem_unlock(sma, -1);
1173
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1174

1175
	wake_up_q(&wake_q);
Kirill Korotaev's avatar
Kirill Korotaev committed
1176
	ns->used_sems -= sma->sem_nsems;
1177
	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
Linus Torvalds's avatar
Linus Torvalds committed
1178 1179 1180 1181
}

static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
{
Manfred Spraul's avatar
Manfred Spraul committed
1182
	switch (version) {
Linus Torvalds's avatar
Linus Torvalds committed
1183 1184 1185 1186 1187 1188
	case IPC_64:
		return copy_to_user(buf, in, sizeof(*in));
	case IPC_OLD:
	    {
		struct semid_ds out;

1189 1190
		memset(&out, 0, sizeof(out));

Linus Torvalds's avatar
Linus Torvalds committed
1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203
		ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);

		out.sem_otime	= in->sem_otime;
		out.sem_ctime	= in->sem_ctime;
		out.sem_nsems	= in->sem_nsems;

		return copy_to_user(buf, &out, sizeof(out));
	    }
	default:
		return -EINVAL;
	}
}

1204
static time64_t get_semotime(struct sem_array *sma)
1205 1206
{
	int i;
1207
	time64_t res;
1208

1209
	res = sma->sems[0].sem_otime;
1210
	for (i = 1; i < sma->sem_nsems; i++) {
1211
		time64_t to = sma->sems[i].sem_otime;
1212 1213 1214 1215 1216 1217 1218

		if (to > res)
			res = to;
	}
	return res;
}

1219 1220
static int semctl_stat(struct ipc_namespace *ns, int semid,
			 int cmd, struct semid64_ds *semid64)
Linus Torvalds's avatar
Linus Torvalds committed
1221 1222
{
	struct sem_array *sma;
1223
	time64_t semotime;
1224
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
1225

1226
	memset(semid64, 0, sizeof(*semid64));
Paul McQuade's avatar
Paul McQuade committed
1227

1228
	rcu_read_lock();
1229
	if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) {
1230 1231 1232 1233 1234
		sma = sem_obtain_object(ns, semid);
		if (IS_ERR(sma)) {
			err = PTR_ERR(sma);
			goto out_unlock;
		}
1235
	} else { /* IPC_STAT */
1236 1237 1238 1239
		sma = sem_obtain_object_check(ns, semid);
		if (IS_ERR(sma)) {
			err = PTR_ERR(sma);
			goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
1240 1241 1242
		}
	}

1243 1244 1245 1246 1247 1248 1249 1250
	/* see comment for SHM_STAT_ANY */
	if (cmd == SEM_STAT_ANY)
		audit_ipc_obj(&sma->sem_perm);
	else {
		err = -EACCES;
		if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
			goto out_unlock;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1251

1252
	err = security_sem_semctl(&sma->sem_perm, cmd);
1253 1254
	if (err)
		goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
1255

1256 1257 1258 1259 1260 1261 1262 1263
	ipc_lock_object(&sma->sem_perm);

	if (!ipc_valid_object(&sma->sem_perm)) {
		ipc_unlock_object(&sma->sem_perm);
		err = -EIDRM;
		goto out_unlock;
	}

1264
	kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
1265 1266
	semotime = get_semotime(sma);
	semid64->sem_otime = semotime;
1267
	semid64->sem_ctime = sma->sem_ctime;
1268 1269 1270 1271
#ifndef CONFIG_64BIT
	semid64->sem_otime_high = semotime >> 32;
	semid64->sem_ctime_high = sma->sem_ctime >> 32;
#endif
1272
	semid64->sem_nsems = sma->sem_nsems;
1273

1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
	if (cmd == IPC_STAT) {
		/*
		 * As defined in SUS:
		 * Return 0 on success
		 */
		err = 0;
	} else {
		/*
		 * SEM_STAT and SEM_STAT_ANY (both Linux specific)
		 * Return the full id, including the sequence number
		 */
		err = sma->sem_perm.id;
	}
1287
	ipc_unlock_object(&sma->sem_perm);
Linus Torvalds's avatar
Linus Torvalds committed
1288
out_unlock:
1289
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1290 1291 1292
	return err;
}

1293 1294 1295 1296
static int semctl_info(struct ipc_namespace *ns, int semid,
			 int cmd, void __user *p)
{
	struct seminfo seminfo;
1297
	int max_idx;
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
	int err;

	err = security_sem_semctl(NULL, cmd);
	if (err)
		return err;

	memset(&seminfo, 0, sizeof(seminfo));
	seminfo.semmni = ns->sc_semmni;
	seminfo.semmns = ns->sc_semmns;
	seminfo.semmsl = ns->sc_semmsl;
	seminfo.semopm = ns->sc_semopm;
	seminfo.semvmx = SEMVMX;
	seminfo.semmnu = SEMMNU;
	seminfo.semmap = SEMMAP;
	seminfo.semume = SEMUME;
	down_read(&sem_ids(ns).rwsem);
	if (cmd == SEM_INFO) {
		seminfo.semusz = sem_ids(ns).in_use;
		seminfo.semaem = ns->used_sems;
	} else {
		seminfo.semusz = SEMUSZ;
		seminfo.semaem = SEMAEM;
	}
1321
	max_idx = ipc_get_maxidx(&sem_ids(ns));
1322 1323 1324
	up_read(&sem_ids(ns).rwsem);
	if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
		return -EFAULT;
1325
	return (max_idx < 0) ? 0 : max_idx;
1326 1327
}

1328
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1329
		int val)
1330 1331 1332
{
	struct sem_undo *un;
	struct sem_array *sma;
Manfred Spraul's avatar
Manfred Spraul committed
1333
	struct sem *curr;
1334
	int err;
1335 1336
	DEFINE_WAKE_Q(wake_q);

1337 1338
	if (val > SEMVMX || val < 0)
		return -ERANGE;
1339

1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
	rcu_read_lock();
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
		return PTR_ERR(sma);
	}

	if (semnum < 0 || semnum >= sma->sem_nsems) {
		rcu_read_unlock();
		return -EINVAL;
	}


	if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
		rcu_read_unlock();
		return -EACCES;
	}
1357

1358
	err = security_sem_semctl(&sma->sem_perm, SETVAL);
1359 1360 1361 1362
	if (err) {
		rcu_read_unlock();
		return -EACCES;
	}
1363

1364
	sem_lock(sma, NULL, -1);
1365

1366
	if (!ipc_valid_object(&sma->sem_perm)) {
1367 1368 1369 1370 1371
		sem_unlock(sma, -1);
		rcu_read_unlock();
		return -EIDRM;
	}

1372
	semnum = array_index_nospec(semnum, sma->sem_nsems);
1373
	curr = &sma->sems[semnum];
1374

1375
	ipc_assert_locked_object(&sma->sem_perm);
1376 1377 1378 1379
	list_for_each_entry(un, &sma->list_id, list_id)
		un->semadj[semnum] = 0;

	curr->semval = val;
1380
	ipc_update_pid(&curr->sempid, task_tgid(current));
1381
	sma->sem_ctime = ktime_get_real_seconds();
1382
	/* maybe some queued-up processes were waiting for this */
1383
	do_smart_update(sma, NULL, 0, 0, &wake_q);
1384
	sem_unlock(sma, -1);
1385
	rcu_read_unlock();
1386
	wake_up_q(&wake_q);
1387
	return 0;
1388 1389
}

Kirill Korotaev's avatar
Kirill Korotaev committed
1390
static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1391
		int cmd, void __user *p)
Linus Torvalds's avatar
Linus Torvalds committed
1392 1393
{
	struct sem_array *sma;
Manfred Spraul's avatar
Manfred Spraul committed
1394
	struct sem *curr;
1395
	int err, nsems;
Linus Torvalds's avatar
Linus Torvalds committed
1396
	ushort fast_sem_io[SEMMSL_FAST];
Manfred Spraul's avatar
Manfred Spraul committed
1397
	ushort *sem_io = fast_sem_io;
1398
	DEFINE_WAKE_Q(wake_q);
1399 1400 1401 1402 1403

	rcu_read_lock();
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
1404
		return PTR_ERR(sma);
1405
	}
Linus Torvalds's avatar
Linus Torvalds committed
1406 1407 1408 1409

	nsems = sma->sem_nsems;

	err = -EACCES;
1410 1411
	if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
		goto out_rcu_wakeup;
Linus Torvalds's avatar
Linus Torvalds committed
1412

1413
	err = security_sem_semctl(&sma->sem_perm, cmd);
1414 1415
	if (err)
		goto out_rcu_wakeup;
Linus Torvalds's avatar
Linus Torvalds committed
1416 1417 1418 1419 1420

	err = -EACCES;
	switch (cmd) {
	case GETALL:
	{
1421
		ushort __user *array = p;
Linus Torvalds's avatar
Linus Torvalds committed
1422 1423
		int i;

1424
		sem_lock(sma, NULL, -1);
1425
		if (!ipc_valid_object(&sma->sem_perm)) {
1426 1427 1428
			err = -EIDRM;
			goto out_unlock;
		}
Manfred Spraul's avatar
Manfred Spraul committed
1429
		if (nsems > SEMMSL_FAST) {
1430
			if (!ipc_rcu_getref(&sma->sem_perm)) {
1431
				err = -EIDRM;
1432
				goto out_unlock;
1433 1434
			}
			sem_unlock(sma, -1);
1435
			rcu_read_unlock();
Kees Cook's avatar
Kees Cook committed
1436 1437
			sem_io = kvmalloc_array(nsems, sizeof(ushort),
						GFP_KERNEL);
Manfred Spraul's avatar
Manfred Spraul committed
1438
			if (sem_io == NULL) {
1439
				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
Linus Torvalds's avatar
Linus Torvalds committed
1440 1441 1442
				return -ENOMEM;
			}

1443
			rcu_read_lock();
1444
			sem_lock_and_putref(sma);
1445
			if (!ipc_valid_object(&sma->sem_perm)) {
Linus Torvalds's avatar
Linus Torvalds committed
1446
				err = -EIDRM;
1447
				goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
1448
			}
1449
		}
Linus Torvalds's avatar
Linus Torvalds committed
1450
		for (i = 0; i < sma->sem_nsems; i++)
1451
			sem_io[i] = sma->sems[i].semval;
1452
		sem_unlock(sma, -1);
1453
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1454
		err = 0;
Manfred Spraul's avatar
Manfred Spraul committed
1455
		if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
Linus Torvalds's avatar
Linus Torvalds committed
1456 1457 1458 1459 1460 1461 1462 1463
			err = -EFAULT;
		goto out_free;
	}
	case SETALL:
	{
		int i;
		struct sem_undo *un;

1464
		if (!ipc_rcu_getref(&sma->sem_perm)) {
1465 1466
			err = -EIDRM;
			goto out_rcu_wakeup;
1467
		}
1468
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1469

Manfred Spraul's avatar
Manfred Spraul committed
1470
		if (nsems > SEMMSL_FAST) {
Kees Cook's avatar
Kees Cook committed
1471 1472
			sem_io = kvmalloc_array(nsems, sizeof(ushort),
						GFP_KERNEL);
Manfred Spraul's avatar
Manfred Spraul committed
1473
			if (sem_io == NULL) {
1474
				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
Linus Torvalds's avatar
Linus Torvalds committed
1475 1476 1477 1478
				return -ENOMEM;
			}
		}

Manfred Spraul's avatar
Manfred Spraul committed
1479
		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1480
			ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
Linus Torvalds's avatar
Linus Torvalds committed
1481 1482 1483 1484 1485 1486
			err = -EFAULT;
			goto out_free;
		}

		for (i = 0; i < nsems; i++) {
			if (sem_io[i] > SEMVMX) {
1487
				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
Linus Torvalds's avatar
Linus Torvalds committed
1488 1489 1490 1491
				err = -ERANGE;
				goto out_free;
			}
		}
1492
		rcu_read_lock();
1493
		sem_lock_and_putref(sma);
1494
		if (!ipc_valid_object(&sma->sem_perm)) {
Linus Torvalds's avatar
Linus Torvalds committed
1495
			err = -EIDRM;
1496
			goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
1497 1498
		}

1499
		for (i = 0; i < nsems; i++) {
1500
			sma->sems[i].semval = sem_io[i];
1501
			ipc_update_pid(&sma->sems[i].sempid, task_tgid(current));
1502
		}
1503

1504
		ipc_assert_locked_object(&sma->sem_perm);
1505
		list_for_each_entry(un, &sma->list_id, list_id) {
Linus Torvalds's avatar
Linus Torvalds committed
1506 1507
			for (i = 0; i < nsems; i++)
				un->semadj[i] = 0;
1508
		}
1509
		sma->sem_ctime = ktime_get_real_seconds();
Linus Torvalds's avatar
Linus Torvalds committed
1510
		/* maybe some queued-up processes were waiting for this */
1511
		do_smart_update(sma, NULL, 0, 0, &wake_q);
Linus Torvalds's avatar
Linus Torvalds committed
1512 1513 1514
		err = 0;
		goto out_unlock;
	}
1515
	/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
Linus Torvalds's avatar
Linus Torvalds committed
1516 1517
	}
	err = -EINVAL;
1518 1519
	if (semnum < 0 || semnum >= nsems)
		goto out_rcu_wakeup;
Linus Torvalds's avatar
Linus Torvalds committed
1520

1521
	sem_lock(sma, NULL, -1);
1522
	if (!ipc_valid_object(&sma->sem_perm)) {
1523 1524 1525
		err = -EIDRM;
		goto out_unlock;
	}
1526 1527

	semnum = array_index_nospec(semnum, nsems);
1528
	curr = &sma->sems[semnum];
Linus Torvalds's avatar
Linus Torvalds committed
1529 1530 1531 1532 1533 1534

	switch (cmd) {
	case GETVAL:
		err = curr->semval;
		goto out_unlock;
	case GETPID:
1535
		err = pid_vnr(curr->sempid);
Linus Torvalds's avatar
Linus Torvalds committed
1536 1537
		goto out_unlock;
	case GETNCNT:
1538
		err = count_semcnt(sma, semnum, 0);
Linus Torvalds's avatar
Linus Torvalds committed
1539 1540
		goto out_unlock;
	case GETZCNT:
1541
		err = count_semcnt(sma, semnum, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1542 1543
		goto out_unlock;
	}
1544

Linus Torvalds's avatar
Linus Torvalds committed
1545
out_unlock:
1546
	sem_unlock(sma, -1);
1547
out_rcu_wakeup:
1548
	rcu_read_unlock();
1549
	wake_up_q(&wake_q);
Linus Torvalds's avatar
Linus Torvalds committed
1550
out_free:
Manfred Spraul's avatar
Manfred Spraul committed
1551
	if (sem_io != fast_sem_io)
Kees Cook's avatar
Kees Cook committed
1552
		kvfree(sem_io);
Linus Torvalds's avatar
Linus Torvalds committed
1553 1554 1555
	return err;
}

1556 1557
static inline unsigned long
copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
Linus Torvalds's avatar
Linus Torvalds committed
1558
{
Manfred Spraul's avatar
Manfred Spraul committed
1559
	switch (version) {
Linus Torvalds's avatar
Linus Torvalds committed
1560
	case IPC_64:
1561
		if (copy_from_user(out, buf, sizeof(*out)))
Linus Torvalds's avatar
Linus Torvalds committed
1562 1563 1564 1565 1566 1567
			return -EFAULT;
		return 0;
	case IPC_OLD:
	    {
		struct semid_ds tbuf_old;

Manfred Spraul's avatar
Manfred Spraul committed
1568
		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
Linus Torvalds's avatar
Linus Torvalds committed
1569 1570
			return -EFAULT;

1571 1572 1573
		out->sem_perm.uid	= tbuf_old.sem_perm.uid;
		out->sem_perm.gid	= tbuf_old.sem_perm.gid;
		out->sem_perm.mode	= tbuf_old.sem_perm.mode;
Linus Torvalds's avatar
Linus Torvalds committed
1574 1575 1576 1577 1578 1579 1580 1581

		return 0;
	    }
	default:
		return -EINVAL;
	}
}

1582
/*
Davidlohr Bueso's avatar
Davidlohr Bueso committed
1583
 * This function handles some semctl commands which require the rwsem
1584
 * to be held in write mode.
Davidlohr Bueso's avatar
Davidlohr Bueso committed
1585
 * NOTE: no locks must be held, the rwsem is taken inside this function.
1586
 */
1587
static int semctl_down(struct ipc_namespace *ns, int semid,
1588
		       int cmd, struct semid64_ds *semid64)
Linus Torvalds's avatar
Linus Torvalds committed
1589 1590 1591 1592 1593
{
	struct sem_array *sma;
	int err;
	struct kern_ipc_perm *ipcp;

Davidlohr Bueso's avatar
Davidlohr Bueso committed
1594
	down_write(&sem_ids(ns).rwsem);
1595 1596
	rcu_read_lock();

1597
	ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,
1598
				      &semid64->sem_perm, 0);
1599 1600 1601 1602
	if (IS_ERR(ipcp)) {
		err = PTR_ERR(ipcp);
		goto out_unlock1;
	}
Steve Grubb's avatar
Steve Grubb committed
1603

1604
	sma = container_of(ipcp, struct sem_array, sem_perm);
Linus Torvalds's avatar
Linus Torvalds committed
1605

1606
	err = security_sem_semctl(&sma->sem_perm, cmd);
1607 1608
	if (err)
		goto out_unlock1;
Linus Torvalds's avatar
Linus Torvalds committed
1609

1610
	switch (cmd) {
Linus Torvalds's avatar
Linus Torvalds committed
1611
	case IPC_RMID:
1612
		sem_lock(sma, NULL, -1);
1613
		/* freeary unlocks the ipc object and rcu */
1614
		freeary(ns, ipcp);
1615
		goto out_up;
Linus Torvalds's avatar
Linus Torvalds committed
1616
	case IPC_SET:
1617
		sem_lock(sma, NULL, -1);
1618
		err = ipc_update_perm(&semid64->sem_perm, ipcp);
1619
		if (err)
1620
			goto out_unlock0;
1621
		sma->sem_ctime = ktime_get_real_seconds();
Linus Torvalds's avatar
Linus Torvalds committed
1622 1623 1624
		break;
	default:
		err = -EINVAL;
1625
		goto out_unlock1;
Linus Torvalds's avatar
Linus Torvalds committed
1626 1627
	}

1628
out_unlock0:
1629
	sem_unlock(sma, -1);
1630
out_unlock1:
1631
	rcu_read_unlock();
1632
out_up:
Davidlohr Bueso's avatar
Davidlohr Bueso committed
1633
	up_write(&sem_ids(ns).rwsem);
Linus Torvalds's avatar
Linus Torvalds committed
1634 1635 1636
	return err;
}

1637
long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
Linus Torvalds's avatar
Linus Torvalds committed
1638 1639
{
	int version;
Kirill Korotaev's avatar
Kirill Korotaev committed
1640
	struct ipc_namespace *ns;
1641
	void __user *p = (void __user *)arg;
1642 1643
	struct semid64_ds semid64;
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
1644 1645 1646 1647 1648

	if (semid < 0)
		return -EINVAL;

	version = ipc_parse_version(&cmd);
Kirill Korotaev's avatar
Kirill Korotaev committed
1649
	ns = current->nsproxy->ipc_ns;
Linus Torvalds's avatar
Linus Torvalds committed
1650

Manfred Spraul's avatar
Manfred Spraul committed
1651
	switch (cmd) {
Linus Torvalds's avatar
Linus Torvalds committed
1652 1653
	case IPC_INFO:
	case SEM_INFO:
1654
		return semctl_info(ns, semid, cmd, p);
1655
	case IPC_STAT:
Linus Torvalds's avatar
Linus Torvalds committed
1656
	case SEM_STAT:
1657
	case SEM_STAT_ANY:
1658 1659 1660 1661 1662 1663
		err = semctl_stat(ns, semid, cmd, &semid64);
		if (err < 0)
			return err;
		if (copy_semid_to_user(p, &semid64, version))
			err = -EFAULT;
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
1664 1665 1666 1667 1668 1669
	case GETALL:
	case GETVAL:
	case GETPID:
	case GETNCNT:
	case GETZCNT:
	case SETALL:
1670
		return semctl_main(ns, semid, semnum, cmd, p);
1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
	case SETVAL: {
		int val;
#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
		/* big-endian 64bit */
		val = arg >> 32;
#else
		/* 32bit or little-endian 64bit */
		val = arg;
#endif
		return semctl_setval(ns, semid, semnum, val);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1682
	case IPC_SET:
1683 1684 1685 1686
		if (copy_semid_from_user(&semid64, p, version))
			return -EFAULT;
	case IPC_RMID:
		return semctl_down(ns, semid, cmd, &semid64);
Linus Torvalds's avatar
Linus Torvalds committed
1687 1688 1689 1690 1691
	default:
		return -EINVAL;
	}
}

1692 1693 1694 1695 1696
SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
{
	return ksys_semctl(semid, semnum, cmd, arg);
}

Al Viro's avatar
Al Viro committed
1697 1698 1699 1700
#ifdef CONFIG_COMPAT

struct compat_semid_ds {
	struct compat_ipc_perm sem_perm;
1701 1702
	old_time32_t sem_otime;
	old_time32_t sem_ctime;
Al Viro's avatar
Al Viro committed
1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714
	compat_uptr_t sem_base;
	compat_uptr_t sem_pending;
	compat_uptr_t sem_pending_last;
	compat_uptr_t undo;
	unsigned short sem_nsems;
};

static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf,
					int version)
{
	memset(out, 0, sizeof(*out));
	if (version == IPC_64) {
1715
		struct compat_semid64_ds __user *p = buf;
Al Viro's avatar
Al Viro committed
1716 1717
		return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);
	} else {
1718
		struct compat_semid_ds __user *p = buf;
Al Viro's avatar
Al Viro committed
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
		return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);
	}
}

static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
					int version)
{
	if (version == IPC_64) {
		struct compat_semid64_ds v;
		memset(&v, 0, sizeof(v));
		to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
1730 1731 1732 1733
		v.sem_otime	 = lower_32_bits(in->sem_otime);
		v.sem_otime_high = upper_32_bits(in->sem_otime);
		v.sem_ctime	 = lower_32_bits(in->sem_ctime);
		v.sem_ctime_high = upper_32_bits(in->sem_ctime);
Al Viro's avatar
Al Viro committed
1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746
		v.sem_nsems = in->sem_nsems;
		return copy_to_user(buf, &v, sizeof(v));
	} else {
		struct compat_semid_ds v;
		memset(&v, 0, sizeof(v));
		to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);
		v.sem_otime = in->sem_otime;
		v.sem_ctime = in->sem_ctime;
		v.sem_nsems = in->sem_nsems;
		return copy_to_user(buf, &v, sizeof(v));
	}
}

1747
long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
Al Viro's avatar
Al Viro committed
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765
{
	void __user *p = compat_ptr(arg);
	struct ipc_namespace *ns;
	struct semid64_ds semid64;
	int version = compat_ipc_parse_version(&cmd);
	int err;

	ns = current->nsproxy->ipc_ns;

	if (semid < 0)
		return -EINVAL;

	switch (cmd & (~IPC_64)) {
	case IPC_INFO:
	case SEM_INFO:
		return semctl_info(ns, semid, cmd, p);
	case IPC_STAT:
	case SEM_STAT:
1766
	case SEM_STAT_ANY:
Al Viro's avatar
Al Viro committed
1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
		err = semctl_stat(ns, semid, cmd, &semid64);
		if (err < 0)
			return err;
		if (copy_compat_semid_to_user(p, &semid64, version))
			err = -EFAULT;
		return err;
	case GETVAL:
	case GETPID:
	case GETNCNT:
	case GETZCNT:
	case GETALL:
Linus Torvalds's avatar
Linus Torvalds committed
1778
	case SETALL:
1779 1780 1781
		return semctl_main(ns, semid, semnum, cmd, p);
	case SETVAL:
		return semctl_setval(ns, semid, semnum, arg);
Linus Torvalds's avatar
Linus Torvalds committed
1782
	case IPC_SET:
Al Viro's avatar
Al Viro committed
1783 1784 1785 1786 1787
		if (copy_compat_semid_from_user(&semid64, p, version))
			return -EFAULT;
		/* fallthru */
	case IPC_RMID:
		return semctl_down(ns, semid, cmd, &semid64);
Linus Torvalds's avatar
Linus Torvalds committed
1788 1789 1790 1791
	default:
		return -EINVAL;
	}
}
1792 1793 1794 1795 1796

COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
{
	return compat_ksys_semctl(semid, semnum, cmd, arg);
}
Al Viro's avatar
Al Viro committed
1797
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815

/* If the task doesn't already have a undo_list, then allocate one
 * here.  We guarantee there is only one thread using this undo list,
 * and current is THE ONE
 *
 * If this allocation and assignment succeeds, but later
 * portions of this code fail, there is no need to free the sem_undo_list.
 * Just let it stay associated with the task, and it'll be freed later
 * at exit time.
 *
 * This can block, so callers must hold no locks.
 */
static inline int get_undo_list(struct sem_undo_list **undo_listp)
{
	struct sem_undo_list *undo_list;

	undo_list = current->sysvsem.undo_list;
	if (!undo_list) {
1816
		undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
1817 1818
		if (undo_list == NULL)
			return -ENOMEM;
Ingo Molnar's avatar
Ingo Molnar committed
1819
		spin_lock_init(&undo_list->lock);
1820
		refcount_set(&undo_list->refcnt, 1);
1821 1822
		INIT_LIST_HEAD(&undo_list->list_proc);

Linus Torvalds's avatar
Linus Torvalds committed
1823 1824 1825 1826 1827 1828
		current->sysvsem.undo_list = undo_list;
	}
	*undo_listp = undo_list;
	return 0;
}

1829
static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
Linus Torvalds's avatar
Linus Torvalds committed
1830
{
1831
	struct sem_undo *un;
1832

1833 1834 1835
	list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
		if (un->semid == semid)
			return un;
Linus Torvalds's avatar
Linus Torvalds committed
1836
	}
1837
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1838 1839
}

1840 1841 1842 1843
static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
{
	struct sem_undo *un;

Manfred Spraul's avatar
Manfred Spraul committed
1844
	assert_spin_locked(&ulp->lock);
1845 1846 1847 1848 1849 1850 1851 1852 1853

	un = __lookup_undo(ulp, semid);
	if (un) {
		list_del_rcu(&un->list_proc);
		list_add_rcu(&un->list_proc, &ulp->list_proc);
	}
	return un;
}

1854
/**
1855
 * find_alloc_undo - lookup (and if not present create) undo array
1856 1857 1858 1859 1860 1861
 * @ns: namespace
 * @semid: semaphore array id
 *
 * The function looks up (and if not present creates) the undo structure.
 * The size of the undo structure depends on the size of the semaphore
 * array, thus the alloc path is not that straightforward.
1862 1863
 * Lifetime-rules: sem_undo is rcu-protected, on success, the function
 * performs a rcu_read_lock().
1864 1865
 */
static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
Linus Torvalds's avatar
Linus Torvalds committed
1866 1867 1868 1869
{
	struct sem_array *sma;
	struct sem_undo_list *ulp;
	struct sem_undo *un, *new;
1870
	int nsems, error;
Linus Torvalds's avatar
Linus Torvalds committed
1871 1872 1873 1874 1875

	error = get_undo_list(&ulp);
	if (error)
		return ERR_PTR(error);

1876
	rcu_read_lock();
1877
	spin_lock(&ulp->lock);
Linus Torvalds's avatar
Linus Torvalds committed
1878
	un = lookup_undo(ulp, semid);
1879
	spin_unlock(&ulp->lock);
Manfred Spraul's avatar
Manfred Spraul committed
1880
	if (likely(un != NULL))
Linus Torvalds's avatar
Linus Torvalds committed
1881 1882 1883
		goto out;

	/* no undo structure around - allocate one. */
1884
	/* step 1: figure out the size of the semaphore array */
1885 1886 1887
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
Julia Lawall's avatar
Julia Lawall committed
1888
		return ERR_CAST(sma);
1889
	}
1890

Linus Torvalds's avatar
Linus Torvalds committed
1891
	nsems = sma->sem_nsems;
1892
	if (!ipc_rcu_getref(&sma->sem_perm)) {
1893 1894 1895 1896
		rcu_read_unlock();
		un = ERR_PTR(-EIDRM);
		goto out;
	}
1897
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1898

1899
	/* step 2: allocate new undo structure */
1900
	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
Linus Torvalds's avatar
Linus Torvalds committed
1901
	if (!new) {
1902
		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
Linus Torvalds's avatar
Linus Torvalds committed
1903 1904 1905
		return ERR_PTR(-ENOMEM);
	}

1906
	/* step 3: Acquire the lock on semaphore array */
1907
	rcu_read_lock();
1908
	sem_lock_and_putref(sma);
1909
	if (!ipc_valid_object(&sma->sem_perm)) {
1910
		sem_unlock(sma, -1);
1911
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
1912 1913 1914 1915
		kfree(new);
		un = ERR_PTR(-EIDRM);
		goto out;
	}
1916 1917 1918 1919 1920 1921 1922 1923 1924 1925
	spin_lock(&ulp->lock);

	/*
	 * step 4: check for races: did someone else allocate the undo struct?
	 */
	un = lookup_undo(ulp, semid);
	if (un) {
		kfree(new);
		goto success;
	}
1926 1927
	/* step 5: initialize & link new undo structure */
	new->semadj = (short *) &new[1];
1928
	new->ulp = ulp;
1929 1930
	new->semid = semid;
	assert_spin_locked(&ulp->lock);
1931
	list_add_rcu(&new->list_proc, &ulp->list_proc);
1932
	ipc_assert_locked_object(&sma->sem_perm);
1933
	list_add(&new->list_id, &sma->list_id);
1934
	un = new;
1935

1936
success:
1937
	spin_unlock(&ulp->lock);
1938
	sem_unlock(sma, -1);
Linus Torvalds's avatar
Linus Torvalds committed
1939 1940 1941 1942
out:
	return un;
}

Al Viro's avatar
Al Viro committed
1943
static long do_semtimedop(int semid, struct sembuf __user *tsops,
1944
		unsigned nsops, const struct timespec64 *timeout)
Linus Torvalds's avatar
Linus Torvalds committed
1945 1946 1947 1948
{
	int error = -EINVAL;
	struct sem_array *sma;
	struct sembuf fast_sops[SEMOPM_FAST];
Manfred Spraul's avatar
Manfred Spraul committed
1949
	struct sembuf *sops = fast_sops, *sop;
Linus Torvalds's avatar
Linus Torvalds committed
1950
	struct sem_undo *un;
1951 1952
	int max, locknum;
	bool undos = false, alter = false, dupsop = false;
Linus Torvalds's avatar
Linus Torvalds committed
1953
	struct sem_queue queue;
1954
	unsigned long dup = 0, jiffies_left = 0;
Kirill Korotaev's avatar
Kirill Korotaev committed
1955 1956 1957
	struct ipc_namespace *ns;

	ns = current->nsproxy->ipc_ns;
Linus Torvalds's avatar
Linus Torvalds committed
1958 1959 1960

	if (nsops < 1 || semid < 0)
		return -EINVAL;
Kirill Korotaev's avatar
Kirill Korotaev committed
1961
	if (nsops > ns->sc_semopm)
Linus Torvalds's avatar
Linus Torvalds committed
1962
		return -E2BIG;
Manfred Spraul's avatar
Manfred Spraul committed
1963
	if (nsops > SEMOPM_FAST) {
1964
		sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
Manfred Spraul's avatar
Manfred Spraul committed
1965
		if (sops == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
1966 1967
			return -ENOMEM;
	}
1968

Manfred Spraul's avatar
Manfred Spraul committed
1969 1970
	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
		error =  -EFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
1971 1972
		goto out_free;
	}
1973

Linus Torvalds's avatar
Linus Torvalds committed
1974
	if (timeout) {
Al Viro's avatar
Al Viro committed
1975 1976
		if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 ||
			timeout->tv_nsec >= 1000000000L) {
Linus Torvalds's avatar
Linus Torvalds committed
1977 1978 1979
			error = -EINVAL;
			goto out_free;
		}
1980
		jiffies_left = timespec64_to_jiffies(timeout);
Linus Torvalds's avatar
Linus Torvalds committed
1981
	}
1982

Linus Torvalds's avatar
Linus Torvalds committed
1983 1984
	max = 0;
	for (sop = sops; sop < sops + nsops; sop++) {
1985 1986
		unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);

Linus Torvalds's avatar
Linus Torvalds committed
1987 1988 1989
		if (sop->sem_num >= max)
			max = sop->sem_num;
		if (sop->sem_flg & SEM_UNDO)
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
			undos = true;
		if (dup & mask) {
			/*
			 * There was a previous alter access that appears
			 * to have accessed the same semaphore, thus use
			 * the dupsop logic. "appears", because the detection
			 * can only check % BITS_PER_LONG.
			 */
			dupsop = true;
		}
		if (sop->sem_op != 0) {
			alter = true;
			dup |= mask;
		}
Linus Torvalds's avatar
Linus Torvalds committed
2004 2005 2006
	}

	if (undos) {
2007
		/* On success, find_alloc_undo takes the rcu_read_lock */
2008
		un = find_alloc_undo(ns, semid);
Linus Torvalds's avatar
Linus Torvalds committed
2009 2010 2011 2012
		if (IS_ERR(un)) {
			error = PTR_ERR(un);
			goto out_free;
		}
2013
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
2014
		un = NULL;
2015 2016
		rcu_read_lock();
	}
Linus Torvalds's avatar
Linus Torvalds committed
2017

2018
	sma = sem_obtain_object_check(ns, semid);
2019
	if (IS_ERR(sma)) {
2020
		rcu_read_unlock();
2021
		error = PTR_ERR(sma);
Linus Torvalds's avatar
Linus Torvalds committed
2022
		goto out_free;
2023 2024
	}

2025
	error = -EFBIG;
2026 2027 2028 2029
	if (max >= sma->sem_nsems) {
		rcu_read_unlock();
		goto out_free;
	}
2030 2031

	error = -EACCES;
2032 2033 2034 2035
	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
		rcu_read_unlock();
		goto out_free;
	}
2036

2037
	error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
2038 2039 2040 2041
	if (error) {
		rcu_read_unlock();
		goto out_free;
	}
2042

2043 2044
	error = -EIDRM;
	locknum = sem_lock(sma, sops, nsops);
2045 2046 2047 2048 2049 2050 2051 2052 2053
	/*
	 * We eventually might perform the following check in a lockless
	 * fashion, considering ipc_valid_object() locking constraints.
	 * If nsops == 1 and there is no contention for sem_perm.lock, then
	 * only a per-semaphore lock is held and it's OK to proceed with the
	 * check below. More details on the fine grained locking scheme
	 * entangled here and why it's RMID race safe on comments at sem_lock()
	 */
	if (!ipc_valid_object(&sma->sem_perm))
2054
		goto out_unlock_free;
Linus Torvalds's avatar
Linus Torvalds committed
2055
	/*
2056
	 * semid identifiers are not unique - find_alloc_undo may have
Linus Torvalds's avatar
Linus Torvalds committed
2057
	 * allocated an undo structure, it was invalidated by an RMID
2058
	 * and now a new array with received the same id. Check and fail.
Lucas De Marchi's avatar
Lucas De Marchi committed
2059
	 * This case can be detected checking un->semid. The existence of
2060
	 * "un" itself is guaranteed by rcu.
Linus Torvalds's avatar
Linus Torvalds committed
2061
	 */
2062 2063
	if (un && un->semid == -1)
		goto out_unlock_free;
2064

2065 2066 2067
	queue.sops = sops;
	queue.nsops = nsops;
	queue.undo = un;
2068
	queue.pid = task_tgid(current);
2069
	queue.alter = alter;
2070
	queue.dupsop = dupsop;
2071 2072

	error = perform_atomic_semop(sma, &queue);
2073 2074 2075 2076 2077
	if (error == 0) { /* non-blocking succesfull path */
		DEFINE_WAKE_Q(wake_q);

		/*
		 * If the operation was successful, then do
2078 2079 2080
		 * the required updates.
		 */
		if (alter)
2081
			do_smart_update(sma, sops, nsops, 1, &wake_q);
2082 2083
		else
			set_semotime(sma, sops);
2084 2085 2086 2087 2088 2089

		sem_unlock(sma, locknum);
		rcu_read_unlock();
		wake_up_q(&wake_q);

		goto out_free;
Linus Torvalds's avatar
Linus Torvalds committed
2090
	}
2091
	if (error < 0) /* non-blocking error path */
2092
		goto out_unlock_free;
Linus Torvalds's avatar
Linus Torvalds committed
2093

2094 2095
	/*
	 * We need to sleep on this operation, so we put the current
Linus Torvalds's avatar
Linus Torvalds committed
2096 2097
	 * task into the pending queue and go to sleep.
	 */
2098 2099
	if (nsops == 1) {
		struct sem *curr;
2100 2101
		int idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
		curr = &sma->sems[idx];
2102

2103 2104 2105 2106 2107 2108 2109 2110 2111 2112
		if (alter) {
			if (sma->complex_count) {
				list_add_tail(&queue.list,
						&sma->pending_alter);
			} else {

				list_add_tail(&queue.list,
						&curr->pending_alter);
			}
		} else {
2113
			list_add_tail(&queue.list, &curr->pending_const);
2114
		}
2115
	} else {
2116 2117 2118
		if (!sma->complex_count)
			merge_queues(sma);

2119
		if (alter)
2120
			list_add_tail(&queue.list, &sma->pending_alter);
2121
		else
2122 2123
			list_add_tail(&queue.list, &sma->pending_const);

2124 2125 2126
		sma->complex_count++;
	}

2127
	do {
2128
		WRITE_ONCE(queue.status, -EINTR);
2129
		queue.sleeper = current;
2130

2131 2132 2133
		__set_current_state(TASK_INTERRUPTIBLE);
		sem_unlock(sma, locknum);
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
2134

2135 2136 2137 2138
		if (timeout)
			jiffies_left = schedule_timeout(jiffies_left);
		else
			schedule();
Linus Torvalds's avatar
Linus Torvalds committed
2139

2140
		/*
2141 2142 2143 2144 2145 2146 2147 2148 2149
		 * fastpath: the semop has completed, either successfully or
		 * not, from the syscall pov, is quite irrelevant to us at this
		 * point; we're done.
		 *
		 * We _do_ care, nonetheless, about being awoken by a signal or
		 * spuriously.  The queue.status is checked again in the
		 * slowpath (aka after taking sem_lock), such that we can detect
		 * scenarios where we were awakened externally, during the
		 * window between wake_q_add() and wake_up_q().
2150
		 */
2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161
		error = READ_ONCE(queue.status);
		if (error != -EINTR) {
			/*
			 * User space could assume that semop() is a memory
			 * barrier: Without the mb(), the cpu could
			 * speculatively read in userspace stale data that was
			 * overwritten by the previous owner of the semaphore.
			 */
			smp_mb();
			goto out_free;
		}
2162

2163
		rcu_read_lock();
2164
		locknum = sem_lock(sma, sops, nsops);
Linus Torvalds's avatar
Linus Torvalds committed
2165

2166 2167 2168 2169
		if (!ipc_valid_object(&sma->sem_perm))
			goto out_unlock_free;

		error = READ_ONCE(queue.status);
Linus Torvalds's avatar
Linus Torvalds committed
2170

2171 2172 2173 2174 2175 2176
		/*
		 * If queue.status != -EINTR we are woken up by another process.
		 * Leave without unlink_queue(), but with sem_unlock().
		 */
		if (error != -EINTR)
			goto out_unlock_free;
2177

2178 2179 2180 2181 2182 2183
		/*
		 * If an interrupt occurred we have to clean up the queue.
		 */
		if (timeout && jiffies_left == 0)
			error = -EAGAIN;
	} while (error == -EINTR && !signal_pending(current)); /* spurious */
2184

2185
	unlink_queue(sma, &queue);
Linus Torvalds's avatar
Linus Torvalds committed
2186 2187

out_unlock_free:
2188
	sem_unlock(sma, locknum);
2189
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
2190
out_free:
Manfred Spraul's avatar
Manfred Spraul committed
2191
	if (sops != fast_sops)
2192
		kvfree(sops);
Linus Torvalds's avatar
Linus Torvalds committed
2193 2194 2195
	return error;
}

2196
long ksys_semtimedop(int semid, struct sembuf __user *tsops,
2197
		     unsigned int nsops, const struct __kernel_timespec __user *timeout)
Al Viro's avatar
Al Viro committed
2198 2199
{
	if (timeout) {
2200 2201
		struct timespec64 ts;
		if (get_timespec64(&ts, timeout))
Al Viro's avatar
Al Viro committed
2202 2203 2204 2205 2206 2207
			return -EFAULT;
		return do_semtimedop(semid, tsops, nsops, &ts);
	}
	return do_semtimedop(semid, tsops, nsops, NULL);
}

2208
SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2209
		unsigned int, nsops, const struct __kernel_timespec __user *, timeout)
2210 2211 2212 2213
{
	return ksys_semtimedop(semid, tsops, nsops, timeout);
}

2214
#ifdef CONFIG_COMPAT_32BIT_TIME
2215 2216
long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
			    unsigned int nsops,
2217
			    const struct old_timespec32 __user *timeout)
Al Viro's avatar
Al Viro committed
2218 2219
{
	if (timeout) {
2220
		struct timespec64 ts;
2221
		if (get_old_timespec32(&ts, timeout))
Al Viro's avatar
Al Viro committed
2222 2223 2224 2225 2226
			return -EFAULT;
		return do_semtimedop(semid, tsems, nsops, &ts);
	}
	return do_semtimedop(semid, tsems, nsops, NULL);
}
2227 2228 2229

COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
		       unsigned int, nsops,
2230
		       const struct old_timespec32 __user *, timeout)
2231 2232 2233
{
	return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
}
Al Viro's avatar
Al Viro committed
2234 2235
#endif

2236 2237
SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
		unsigned, nsops)
Linus Torvalds's avatar
Linus Torvalds committed
2238
{
Al Viro's avatar
Al Viro committed
2239
	return do_semtimedop(semid, tsops, nsops, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254
}

/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
 * parent and child tasks.
 */

int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
{
	struct sem_undo_list *undo_list;
	int error;

	if (clone_flags & CLONE_SYSVSEM) {
		error = get_undo_list(&undo_list);
		if (error)
			return error;
2255
		refcount_inc(&undo_list->refcnt);
Linus Torvalds's avatar
Linus Torvalds committed
2256
		tsk->sysvsem.undo_list = undo_list;
Paul McQuade's avatar
Paul McQuade committed
2257
	} else
Linus Torvalds's avatar
Linus Torvalds committed
2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276
		tsk->sysvsem.undo_list = NULL;

	return 0;
}

/*
 * add semadj values to semaphores, free undo structures.
 * undo structures are not freed when semaphore arrays are destroyed
 * so some of them may be out of date.
 * IMPLEMENTATION NOTE: There is some confusion over whether the
 * set of adjustments that needs to be done should be done in an atomic
 * manner or not. That is, if we are attempting to decrement the semval
 * should we queue up and wait until we can do so legally?
 * The original implementation attempted to do this (queue and wait).
 * The current implementation does not do so. The POSIX standard
 * and SVID should be consulted to determine what behavior is mandated.
 */
void exit_sem(struct task_struct *tsk)
{
2277
	struct sem_undo_list *ulp;
Linus Torvalds's avatar
Linus Torvalds committed
2278

2279 2280
	ulp = tsk->sysvsem.undo_list;
	if (!ulp)
Linus Torvalds's avatar
Linus Torvalds committed
2281
		return;
2282
	tsk->sysvsem.undo_list = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
2283

2284
	if (!refcount_dec_and_test(&ulp->refcnt))
Linus Torvalds's avatar
Linus Torvalds committed
2285 2286
		return;

2287
	for (;;) {
Linus Torvalds's avatar
Linus Torvalds committed
2288
		struct sem_array *sma;
2289
		struct sem_undo *un;
2290
		int semid, i;
2291
		DEFINE_WAKE_Q(wake_q);
2292

2293 2294
		cond_resched();

2295
		rcu_read_lock();
2296 2297
		un = list_entry_rcu(ulp->list_proc.next,
				    struct sem_undo, list_proc);
2298 2299 2300 2301 2302 2303 2304
		if (&un->list_proc == &ulp->list_proc) {
			/*
			 * We must wait for freeary() before freeing this ulp,
			 * in case we raced with last sem_undo. There is a small
			 * possibility where we exit while freeary() didn't
			 * finish unlocking sem_undo_list.
			 */
2305 2306
			spin_lock(&ulp->lock);
			spin_unlock(&ulp->lock);
2307 2308 2309 2310 2311 2312
			rcu_read_unlock();
			break;
		}
		spin_lock(&ulp->lock);
		semid = un->semid;
		spin_unlock(&ulp->lock);
2313

2314
		/* exit_sem raced with IPC_RMID, nothing to do */
2315 2316
		if (semid == -1) {
			rcu_read_unlock();
2317
			continue;
2318
		}
Linus Torvalds's avatar
Linus Torvalds committed
2319

2320
		sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
2321
		/* exit_sem raced with IPC_RMID, nothing to do */
2322 2323
		if (IS_ERR(sma)) {
			rcu_read_unlock();
2324
			continue;
2325
		}
Linus Torvalds's avatar
Linus Torvalds committed
2326

2327
		sem_lock(sma, NULL, -1);
2328
		/* exit_sem raced with IPC_RMID, nothing to do */
2329
		if (!ipc_valid_object(&sma->sem_perm)) {
2330 2331 2332 2333
			sem_unlock(sma, -1);
			rcu_read_unlock();
			continue;
		}
2334
		un = __lookup_undo(ulp, semid);
2335 2336 2337 2338
		if (un == NULL) {
			/* exit_sem raced with IPC_RMID+semget() that created
			 * exactly the same semid. Nothing to do.
			 */
2339
			sem_unlock(sma, -1);
2340
			rcu_read_unlock();
2341 2342 2343 2344
			continue;
		}

		/* remove un from the linked lists */
2345
		ipc_assert_locked_object(&sma->sem_perm);
2346 2347
		list_del(&un->list_id);

2348 2349 2350 2351
		/* we are the last process using this ulp, acquiring ulp->lock
		 * isn't required. Besides that, we are also protected against
		 * IPC_RMID as we hold sma->sem_perm lock now
		 */
2352 2353
		list_del_rcu(&un->list_proc);

2354 2355
		/* perform adjustments registered in un */
		for (i = 0; i < sma->sem_nsems; i++) {
2356
			struct sem *semaphore = &sma->sems[i];
2357 2358
			if (un->semadj[i]) {
				semaphore->semval += un->semadj[i];
Linus Torvalds's avatar
Linus Torvalds committed
2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369
				/*
				 * Range checks of the new semaphore value,
				 * not defined by sus:
				 * - Some unices ignore the undo entirely
				 *   (e.g. HP UX 11i 11.22, Tru64 V5.1)
				 * - some cap the value (e.g. FreeBSD caps
				 *   at 0, but doesn't enforce SEMVMX)
				 *
				 * Linux caps the semaphore value, both at 0
				 * and at SEMVMX.
				 *
Manfred Spraul's avatar
Manfred Spraul committed
2370
				 *	Manfred <manfred@colorfullife.com>
Linus Torvalds's avatar
Linus Torvalds committed
2371
				 */
Ingo Molnar's avatar
Ingo Molnar committed
2372 2373 2374 2375
				if (semaphore->semval < 0)
					semaphore->semval = 0;
				if (semaphore->semval > SEMVMX)
					semaphore->semval = SEMVMX;
2376
				ipc_update_pid(&semaphore->sempid, task_tgid(current));
Linus Torvalds's avatar
Linus Torvalds committed
2377 2378 2379
			}
		}
		/* maybe some queued-up processes were waiting for this */
2380
		do_smart_update(sma, NULL, 0, 1, &wake_q);
2381
		sem_unlock(sma, -1);
2382
		rcu_read_unlock();
2383
		wake_up_q(&wake_q);
2384

2385
		kfree_rcu(un, rcu);
Linus Torvalds's avatar
Linus Torvalds committed
2386
	}
2387
	kfree(ulp);
Linus Torvalds's avatar
Linus Torvalds committed
2388 2389 2390
}

#ifdef CONFIG_PROC_FS
2391
static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
Linus Torvalds's avatar
Linus Torvalds committed
2392
{
2393
	struct user_namespace *user_ns = seq_user_ns(s);
2394 2395
	struct kern_ipc_perm *ipcp = it;
	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
2396
	time64_t sem_otime;
2397

2398 2399 2400
	/*
	 * The proc interface isn't aware of sem_lock(), it calls
	 * ipc_lock_object() directly (in sysvipc_find_ipc).
2401 2402
	 * In order to stay compatible with sem_lock(), we must
	 * enter / leave complex_mode.
2403
	 */
2404
	complexmode_enter(sma);
2405

2406
	sem_otime = get_semotime(sma);
2407

2408
	seq_printf(s,
2409
		   "%10d %10d  %4o %10u %5u %5u %5u %5u %10llu %10llu\n",
2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420
		   sma->sem_perm.key,
		   sma->sem_perm.id,
		   sma->sem_perm.mode,
		   sma->sem_nsems,
		   from_kuid_munged(user_ns, sma->sem_perm.uid),
		   from_kgid_munged(user_ns, sma->sem_perm.gid),
		   from_kuid_munged(user_ns, sma->sem_perm.cuid),
		   from_kgid_munged(user_ns, sma->sem_perm.cgid),
		   sem_otime,
		   sma->sem_ctime);

2421 2422
	complexmode_tryleave(sma);

2423
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2424 2425
}
#endif