audit: fix a race condition with the auditd tracking code

Originally reported by Adam and Dusty, it appears we have a small race window in kauditd_thread(), as documented in the Fedora BZ: * https://bugzilla.redhat.com/show_bug.cgi?id=1459326#c35 "This issue is partly due to the read-copy nature of RCU, and partly due to how we sync the auditd_connection state across kauditd_thread and the audit control channel. The kauditd_thread thread is always running so it can service the record queues and emit the multicast messages, if it happens to be just past the "main_queue" label, but before the "if (sk == NULL || ...)" if-statement which calls auditd_reset() when the new auditd connection is registered it could end up resetting the auditd connection, regardless of if it is valid or not. This is a rather small window and the variable nature of multi-core scheduling explains why this is proving rather difficult to reproduce." The fix is to have functions only call auditd_reset() when they believe that the kernel/auditd connection is still valid, e.g. non-NULL, and to have these callers pass their local copy of the auditd_connection pointer to auditd_reset() where it can be compared with the current connection state before resetting. If the caller has a stale state tracking pointer then the reset is ignored. We also make a small change to kauditd_thread() so that if the kernel/auditd connection is dead we skip the retry queue and send the records straight to the hold queue. This is necessary as we used to rely on auditd_reset() to occasionally purge the retry queue but we are going to be calling the reset function much less now and we want to make sure the retry queue doesn't grow unbounded. Reported-by: Adam Williamson <awilliam@redhat.com> Reported-by: Dusty Mabe <dustymabe@redhat.com> Reviewed-by: Richard Guy Briggs <rgb@redhat.com> Signed-off-by: Paul Moore <paul@paul-moore.com>

audit: fix a race condition with the auditd tracking code
Originally reported by Adam and Dusty, it appears we have a small race window in kauditd_thread(), as documented in the Fedora BZ: * https://bugzilla.redhat.com/show_bug.cgi?id=1459326#c35 "This issue is partly due to the read-copy nature of RCU, and partly due to how we sync the auditd_connection state across kauditd_thread and the audit control channel. The kauditd_thread thread is always running so it can service the record queues and emit the multicast messages, if it happens to be just past the "main_queue" label, but before the "if (sk == NULL || ...)" if-statement which calls auditd_reset() when the new auditd connection is registered it could end up resetting the auditd connection, regardless of if it is valid or not. This is a rather small window and the variable nature of multi-core scheduling explains why this is proving rather difficult to reproduce." The fix is to have functions only call auditd_reset() when they believe that the kernel/auditd connection is still valid, e.g. non-NULL, and to have these callers pass their local copy of the auditd_connection pointer to auditd_reset() where it can be compared with the current connection state before resetting. If the caller has a stale state tracking pointer then the reset is ignored. We also make a small change to kauditd_thread() so that if the kernel/auditd connection is dead we skip the retry queue and send the records straight to the hold queue. This is necessary as we used to rely on auditd_reset() to occasionally purge the retry queue but we are going to be calling the reset function much less now and we want to make sure the retry queue doesn't grow unbounded. Reported-by: Adam Williamson <awilliam@redhat.com> Reported-by: Dusty Mabe <dustymabe@redhat.com> Reviewed-by: Richard Guy Briggs <rgb@redhat.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
c81be52a · Paul Moore · e4c1a0d1 · c81be52a
Commit c81be52a authored Jun 12, 2017 by Paul Moore
Show whitespace changes
Inline Side-by-side

Showing with 23 additions and 13 deletions

kernel/audit.c kernel/audit.c +23 -13

No files found.
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -575,12 +575,16 @@ static void kauditd_retry_skb(struct sk_buff *skb)
 /**
 * auditd_reset - Disconnect the auditd connection
+ * @ac: auditd connection state
 *
 * Description:
 * Break the auditd/kauditd connection and move all the queued records into the
- * hold queue in case auditd reconnects.
+ * hold queue in case auditd reconnects.  It is important to note that the @ac
+ * pointer should never be dereferenced inside this function as it may be NULL
+ * or invalid, you can only compare the memory address!  If @ac is NULL then
+ * the connection will always be reset.
 */
-static void auditd_reset(void)
+static void auditd_reset(const struct auditd_connection *ac)
 {
 	unsigned long flags;
 	struct sk_buff *skb;
@@ -590,6 +594,11 @@ static void auditd_reset(void)
 	spin_lock_irqsave(&auditd_conn_lock, flags);
 	ac_old = rcu_dereference_protected(auditd_conn,
 					   lockdep_is_held(&auditd_conn_lock));
+	if (ac && ac != ac_old) {
+		/* someone already registered a new auditd connection */
+		spin_unlock_irqrestore(&auditd_conn_lock, flags);
+		return;
+	}
 	rcu_assign_pointer(auditd_conn, NULL);
 	spin_unlock_irqrestore(&auditd_conn_lock, flags);
@@ -649,8 +658,8 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)
 	return rc;
 err:
-	if (rc == -ECONNREFUSED)
+	if (ac && rc == -ECONNREFUSED)
-		auditd_reset();
+		auditd_reset(ac);
 	return rc;
 }
@@ -795,9 +804,9 @@ static int kauditd_thread(void *dummy)
 		rc = kauditd_send_queue(sk, portid,
 					&audit_hold_queue, UNICAST_RETRIES,
 					NULL, kauditd_rehold_skb);
-		if (rc < 0) {
+		if (ac && rc < 0) {
 			sk = NULL;
-			auditd_reset();
+			auditd_reset(ac);
 			goto main_queue;
 		}
@@ -805,9 +814,9 @@ static int kauditd_thread(void *dummy)
 		rc = kauditd_send_queue(sk, portid,
 					&audit_retry_queue, UNICAST_RETRIES,
 					NULL, kauditd_hold_skb);
-		if (rc < 0) {
+		if (ac && rc < 0) {
 			sk = NULL;
-			auditd_reset();
+			auditd_reset(ac);
 			goto main_queue;
 		}
@@ -815,12 +824,13 @@ static int kauditd_thread(void *dummy)
 		/* process the main queue - do the multicast send and attempt
 		 * unicast, dump failed record sends to the retry queue; if
 		 * sk == NULL due to previous failures we will just do the
-		 * multicast send and move the record to the retry queue */
+		 * multicast send and move the record to the hold queue */
 		rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
 					kauditd_send_multicast_skb,
-					kauditd_retry_skb);
+					(sk ?
-		if (sk == NULL || rc < 0)
+					 kauditd_retry_skb : kauditd_hold_skb));
-			auditd_reset();
+		if (ac && rc < 0)
+			auditd_reset(ac);
 		sk = NULL;
 		/* drop our netns reference, no auditd sends past this line */
@@ -1230,7 +1240,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 								auditd_pid, 1);
 				/* unregister the auditd connection */
-				auditd_reset();
+				auditd_reset(NULL);
 			}
 		}
 		if (s.mask & AUDIT_STATUS_RATE_LIMIT) {