ipc/sem.c: always use only one queue for alter operations

There are two places that can contain alter operations: - the global queue: sma->pending_alter - the per-semaphore queues: sma->sem_base[].pending_alter. Since one of the queues must be processed first, this causes an odd priorization of the wakeups: complex operations have priority over simple ops. The patch restores the behavior of linux <=3.0.9: The longest waiting operation has the highest priority. This is done by using only one queue: - if there are complex ops, then sma->pending_alter is used. - otherwise, the per-semaphore queues are used. As a side effect, do_smart_update_queue() becomes much simpler: no more goto logic. Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Cc: Rik van Riel <riel@redhat.com> Cc: Davidlohr Bueso <davidlohr.bueso@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

ipc/sem.c: always use only one queue for alter operations
There are two places that can contain alter operations: - the global queue: sma->pending_alter - the per-semaphore queues: sma->sem_base[].pending_alter. Since one of the queues must be processed first, this causes an odd priorization of the wakeups: complex operations have priority over simple ops. The patch restores the behavior of linux <=3.0.9: The longest waiting operation has the highest priority. This is done by using only one queue: - if there are complex ops, then sma->pending_alter is used. - otherwise, the per-semaphore queues are used. As a side effect, do_smart_update_queue() becomes much simpler: no more goto logic. Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Cc: Rik van Riel <riel@redhat.com> Cc: Davidlohr Bueso <davidlohr.bueso@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
f269f40a · Manfred Spraul · Linus Torvalds · 1a82e9e1 · f269f40a
Commit f269f40a authored Jul 08, 2013 by Manfred Spraul Committed by Linus Torvalds Jul 09, 2013
Hide whitespace changes
Inline Side-by-side

Showing with 88 additions and 40 deletions

ipc/sem.c ipc/sem.c +88 -40

No files found.
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -192,6 +192,53 @@ void __init sem_init (void)
 				IPC_SEM_IDS, sysvipc_sem_proc_show);
 }
+/**
+ * unmerge_queues - unmerge queues, if possible.
+ * @sma: semaphore array
+ *
+ * The function unmerges the wait queues if complex_count is 0.
+ * It must be called prior to dropping the global semaphore array lock.
+ */
+static void unmerge_queues(struct sem_array *sma)
+{
+	struct sem_queue *q, *tq;
+	/* complex operations still around? */
+	if (sma->complex_count)
+		return;
+	/*
+	 * We will switch back to simple mode.
+	 * Move all pending operation back into the per-semaphore
+	 * queues.
+	 */
+	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
+		struct sem *curr;
+		curr = &sma->sem_base[q->sops[0].sem_num];
+		list_add_tail(&q->list, &curr->pending_alter);
+	}
+	INIT_LIST_HEAD(&sma->pending_alter);
+}
+/**
+ * merge_queues - Merge single semop queues into global queue
+ * @sma: semaphore array
+ *
+ * This function merges all per-semaphore queues into the global queue.
+ * It is necessary to achieve FIFO ordering for the pending single-sop
+ * operations when a multi-semop operation must sleep.
+ * Only the alter operations must be moved, the const operations can stay.
+ */
+static void merge_queues(struct sem_array *sma)
+{
+	int i;
+	for (i = 0; i < sma->sem_nsems; i++) {
+		struct sem *sem = sma->sem_base + i;
+		list_splice_init(&sem->pending_alter, &sma->pending_alter);
+	}
+}
 /*
 * If the request contains only one semaphore operation, and there are
 * no complex transactions pending, lock only the semaphore involved.
@@ -262,6 +309,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 static inline void sem_unlock(struct sem_array *sma, int locknum)
 {
 	if (locknum == -1) {
+		unmerge_queues(sma);
 		ipc_unlock_object(&sma->sem_perm);
 	} else {
 		struct sem *sem = sma->sem_base + locknum;
@@ -831,49 +879,38 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
 			int otime, struct list_head *pt)
 {
 	int i;
-	int progress;
 	otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
-	progress = 1;
+	if (!list_empty(&sma->pending_alter)) {
-retry_global:
+		/* semaphore array uses the global queue - just process it. */
-	if (sma->complex_count) {
+		otime |= update_queue(sma, -1, pt);
-		if (update_queue(sma, -1, pt)) {
+	} else {
-			progress = 1;
+		if (!sops) {
-			otime = 1;
+			/*
-			sops = NULL;
+			 * No sops, thus the modified semaphores are not
-		}
+			 * known. Check all.
-	}
+			 */
-	if (!progress)
+			for (i = 0; i < sma->sem_nsems; i++)
-		goto done;
+				otime |= update_queue(sma, i, pt);
+		} else {
-	if (!sops) {
+			/*
-		/* No semops; something special is going on. */
+			 * Check the semaphores that were increased:
-		for (i = 0; i < sma->sem_nsems; i++) {
+			 * - No complex ops, thus all sleeping ops are
-			if (update_queue(sma, i, pt)) {
+			 *   decrease.
-				otime = 1;
+			 * - if we decreased the value, then any sleeping
-				progress = 1;
+			 *   semaphore ops wont be able to run: If the
+			 *   previous value was too small, then the new
+			 *   value will be too small, too.
+			 */
+			for (i = 0; i < nsops; i++) {
+				if (sops[i].sem_op > 0) {
+					otime |= update_queue(sma,
+							sops[i].sem_num, pt);
+				}
 			}
 		}
-		goto done_checkretry;
-	}
-	/* Check the semaphores that were modified. */
-	for (i = 0; i < nsops; i++) {
-		if (sops[i].sem_op > 0 ||
-			(sops[i].sem_op < 0 &&
-				sma->sem_base[sops[i].sem_num].semval == 0))
-			if (update_queue(sma, sops[i].sem_num, pt)) {
-				otime = 1;
-				progress = 1;
-			}
-	}
-done_checkretry:
-	if (progress) {
-		progress = 0;
-		goto retry_global;
 	}
-done:
 	if (otime)
 		sma->sem_otime = get_seconds();
 }
@@ -1747,11 +1784,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 		struct sem *curr;
 		curr = &sma->sem_base[sops->sem_num];
-		if (alter)
+		if (alter) {
-			list_add_tail(&queue.list, &curr->pending_alter);
+			if (sma->complex_count) {
-		else
+				list_add_tail(&queue.list,
+						&sma->pending_alter);
+			} else {
+				list_add_tail(&queue.list,
+						&curr->pending_alter);
+			}
+		} else {
 			list_add_tail(&queue.list, &curr->pending_const);
+		}
 	} else {
+		if (!sma->complex_count)
+			merge_queues(sma);
 		if (alter)
 			list_add_tail(&queue.list, &sma->pending_alter);
 		else