Commit b0546776 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'printk-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux

Pull printk updates from Petr Mladek:
 "Improve the behavior during panic. The issues were found when testing
  the ongoing changes introducing atomic consoles and printk kthreads:

   - pr_flush() has to wait for the last reserved record instead of the
     last finalized one. Note that records are finalized in random order
     when generated by more CPUs in parallel.

   - Ignore non-finalized records during panic(). Messages printed on
     panic-CPU are always finalized. Messages printed by other CPUs
     might never be finalized when the CPUs get stopped.

   - Block new printk() calls on non-panic CPUs completely. Backtraces
     are printed before entering the panic mode. Later messages would
     just mess information printed by the panic CPU.

   - Do not take console_lock in console_flush_on_panic() at all. The
     original code did try_lock()/console_unlock(). The unlock part
     might cause a deadlock when panic() happened in a scheduler code.

   - Fix conversion of 64-bit sequence number for 32-bit atomic
     operations"

* tag 'printk-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux:
  dump_stack: Do not get cpu_sync for panic CPU
  panic: Flush kernel log buffer at the end
  printk: Avoid non-panic CPUs writing to ringbuffer
  printk: Disable passing console lock owner completely during panic()
  printk: ringbuffer: Skip non-finalized records in panic
  printk: Wait for all reserved records with pr_flush()
  printk: ringbuffer: Cleanup reader terminology
  printk: Add this_cpu_in_panic()
  printk: For @suppress_panic_printk check for other CPU in panic
  printk: ringbuffer: Clarify special lpos values
  printk: ringbuffer: Do not skip non-finalized records with prb_next_seq()
  printk: Use prb_first_seq() as base for 32bit seq macros
  printk: Adjust mapping for 32bit seq macros
  printk: nbcon: Relocate 32bit seq macros
parents f88c3fb8 7412dc6d
...@@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void) ...@@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void)
} }
#endif #endif
bool this_cpu_in_panic(void);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern int __printk_cpu_sync_try_get(void); extern int __printk_cpu_sync_try_get(void);
extern void __printk_cpu_sync_wait(void); extern void __printk_cpu_sync_wait(void);
......
...@@ -446,6 +446,14 @@ void panic(const char *fmt, ...) ...@@ -446,6 +446,14 @@ void panic(const char *fmt, ...)
/* Do not scroll important messages printed above */ /* Do not scroll important messages printed above */
suppress_printk = 1; suppress_printk = 1;
/*
* The final messages may not have been printed if in a context that
* defers printing (such as NMI) and irq_work is not available.
* Explicitly flush the kernel log buffer one last time.
*/
console_flush_on_panic(CONSOLE_FLUSH_PENDING);
local_irq_enable(); local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) { for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog(); touch_softlockup_watchdog();
......
...@@ -140,39 +140,6 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta ...@@ -140,39 +140,6 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta
return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom);
} }
#ifdef CONFIG_64BIT
#define __seq_to_nbcon_seq(seq) (seq)
#define __nbcon_seq_to_seq(seq) (seq)
#else /* CONFIG_64BIT */
#define __seq_to_nbcon_seq(seq) ((u32)seq)
static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq)
{
u64 seq;
u64 rb_next_seq;
/*
* The provided sequence is only the lower 32 bits of the ringbuffer
* sequence. It needs to be expanded to 64bit. Get the next sequence
* number from the ringbuffer and fold it.
*
* Having a 32bit representation in the console is sufficient.
* If a console ever gets more than 2^31 records behind
* the ringbuffer then this is the least of the problems.
*
* Also the access to the ring buffer is always safe.
*/
rb_next_seq = prb_next_seq(prb);
seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq);
return seq;
}
#endif /* CONFIG_64BIT */
/** /**
* nbcon_seq_read - Read the current console sequence * nbcon_seq_read - Read the current console sequence
* @con: Console to read the sequence of * @con: Console to read the sequence of
...@@ -183,7 +150,7 @@ u64 nbcon_seq_read(struct console *con) ...@@ -183,7 +150,7 @@ u64 nbcon_seq_read(struct console *con)
{ {
unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq));
return __nbcon_seq_to_seq(nbcon_seq); return __ulseq_to_u64seq(prb, nbcon_seq);
} }
/** /**
...@@ -204,7 +171,7 @@ void nbcon_seq_force(struct console *con, u64 seq) ...@@ -204,7 +171,7 @@ void nbcon_seq_force(struct console *con, u64 seq)
*/ */
u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
/* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */
con->seq = 0; con->seq = 0;
...@@ -223,11 +190,11 @@ void nbcon_seq_force(struct console *con, u64 seq) ...@@ -223,11 +190,11 @@ void nbcon_seq_force(struct console *con, u64 seq)
*/ */
static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
{ {
unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq);
struct console *con = ctxt->console; struct console *con = ctxt->console;
if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq,
__seq_to_nbcon_seq(new_seq))) { __u64seq_to_ulseq(new_seq))) {
ctxt->seq = new_seq; ctxt->seq = new_seq;
} else { } else {
ctxt->seq = nbcon_seq_read(con); ctxt->seq = nbcon_seq_read(con);
......
...@@ -347,6 +347,29 @@ static bool panic_in_progress(void) ...@@ -347,6 +347,29 @@ static bool panic_in_progress(void)
return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
} }
/* Return true if a panic is in progress on the current CPU. */
bool this_cpu_in_panic(void)
{
/*
* We can use raw_smp_processor_id() here because it is impossible for
* the task to be migrated to the panic_cpu, or away from it. If
* panic_cpu has already been set, and we're not currently executing on
* that CPU, then we never will be.
*/
return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
}
/*
* Return true if a panic is in progress on a remote CPU.
*
* On true, the local CPU should immediately release any printing resources
* that may be needed by the panic CPU.
*/
bool other_cpu_in_panic(void)
{
return (panic_in_progress() && !this_cpu_in_panic());
}
/* /*
* This is used for debugging the mess that is the VT code by * This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's * keeping track if we have the console semaphore held. It's
...@@ -439,12 +462,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; ...@@ -439,12 +462,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
static DEFINE_MUTEX(syslog_lock); static DEFINE_MUTEX(syslog_lock);
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
/*
* During panic, heavy printk by other CPUs can delay the
* panic and risk deadlock on console resources.
*/
static int __read_mostly suppress_panic_printk;
DECLARE_WAIT_QUEUE_HEAD(log_wait); DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */ /* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */ /* the next printk record to read by syslog(READ) or /proc/kmsg */
...@@ -1835,10 +1852,23 @@ static bool console_waiter; ...@@ -1835,10 +1852,23 @@ static bool console_waiter;
*/ */
static void console_lock_spinning_enable(void) static void console_lock_spinning_enable(void)
{ {
/*
* Do not use spinning in panic(). The panic CPU wants to keep the lock.
* Non-panic CPUs abandon the flush anyway.
*
* Just keep the lockdep annotation. The panic-CPU should avoid
* taking console_owner_lock because it might cause a deadlock.
* This looks like the easiest way how to prevent false lockdep
* reports without handling races a lockless way.
*/
if (panic_in_progress())
goto lockdep;
raw_spin_lock(&console_owner_lock); raw_spin_lock(&console_owner_lock);
console_owner = current; console_owner = current;
raw_spin_unlock(&console_owner_lock); raw_spin_unlock(&console_owner_lock);
lockdep:
/* The waiter may spin on us after setting console_owner */ /* The waiter may spin on us after setting console_owner */
spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
} }
...@@ -1863,6 +1893,22 @@ static int console_lock_spinning_disable_and_check(int cookie) ...@@ -1863,6 +1893,22 @@ static int console_lock_spinning_disable_and_check(int cookie)
{ {
int waiter; int waiter;
/*
* Ignore spinning waiters during panic() because they might get stopped
* or blocked at any time,
*
* It is safe because nobody is allowed to start spinning during panic
* in the first place. If there has been a waiter then non panic CPUs
* might stay spinning. They would get stopped anyway. The panic context
* will never start spinning and an interrupted spin on panic CPU will
* never continue.
*/
if (panic_in_progress()) {
/* Keep lockdep happy. */
spin_release(&console_owner_dep_map, _THIS_IP_);
return 0;
}
raw_spin_lock(&console_owner_lock); raw_spin_lock(&console_owner_lock);
waiter = READ_ONCE(console_waiter); waiter = READ_ONCE(console_waiter);
console_owner = NULL; console_owner = NULL;
...@@ -2259,8 +2305,12 @@ asmlinkage int vprintk_emit(int facility, int level, ...@@ -2259,8 +2305,12 @@ asmlinkage int vprintk_emit(int facility, int level,
if (unlikely(suppress_printk)) if (unlikely(suppress_printk))
return 0; return 0;
if (unlikely(suppress_panic_printk) && /*
atomic_read(&panic_cpu) != raw_smp_processor_id()) * The messages on the panic CPU are the most important. If
* non-panic CPUs are generating any messages, they will be
* silently dropped.
*/
if (other_cpu_in_panic())
return 0; return 0;
if (level == LOGLEVEL_SCHED) { if (level == LOGLEVEL_SCHED) {
...@@ -2590,26 +2640,6 @@ static int console_cpu_notify(unsigned int cpu) ...@@ -2590,26 +2640,6 @@ static int console_cpu_notify(unsigned int cpu)
return 0; return 0;
} }
/*
* Return true if a panic is in progress on a remote CPU.
*
* On true, the local CPU should immediately release any printing resources
* that may be needed by the panic CPU.
*/
bool other_cpu_in_panic(void)
{
if (!panic_in_progress())
return false;
/*
* We can use raw_smp_processor_id() here because it is impossible for
* the task to be migrated to the panic_cpu, or away from it. If
* panic_cpu has already been set, and we're not currently executing on
* that CPU, then we never will be.
*/
return atomic_read(&panic_cpu) != raw_smp_processor_id();
}
/** /**
* console_lock - block the console subsystem from printing * console_lock - block the console subsystem from printing
* *
...@@ -2765,8 +2795,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) ...@@ -2765,8 +2795,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
bool printk_get_next_message(struct printk_message *pmsg, u64 seq, bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
bool is_extended, bool may_suppress) bool is_extended, bool may_suppress)
{ {
static int panic_console_dropped;
struct printk_buffers *pbufs = pmsg->pbufs; struct printk_buffers *pbufs = pmsg->pbufs;
const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
const size_t outbuf_sz = sizeof(pbufs->outbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf);
...@@ -2794,17 +2822,6 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, ...@@ -2794,17 +2822,6 @@ bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
pmsg->seq = r.info->seq; pmsg->seq = r.info->seq;
pmsg->dropped = r.info->seq - seq; pmsg->dropped = r.info->seq - seq;
/*
* Check for dropped messages in panic here so that printk
* suppression can occur as early as possible if necessary.
*/
if (pmsg->dropped &&
panic_in_progress() &&
panic_console_dropped++ > 10) {
suppress_panic_printk = 1;
pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
}
/* Skip record that has level above the console loglevel. */ /* Skip record that has level above the console loglevel. */
if (may_suppress && suppress_message_printing(r.info->level)) if (may_suppress && suppress_message_printing(r.info->level))
goto out; goto out;
...@@ -3750,7 +3767,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre ...@@ -3750,7 +3767,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
might_sleep(); might_sleep();
seq = prb_next_seq(prb); seq = prb_next_reserve_seq(prb);
/* Flush the consoles so that records up to @seq are printed. */ /* Flush the consoles so that records up to @seq are printed. */
console_lock(); console_lock();
......
This diff is collapsed.
...@@ -75,7 +75,7 @@ struct prb_desc_ring { ...@@ -75,7 +75,7 @@ struct prb_desc_ring {
struct printk_info *infos; struct printk_info *infos;
atomic_long_t head_id; atomic_long_t head_id;
atomic_long_t tail_id; atomic_long_t tail_id;
atomic_long_t last_finalized_id; atomic_long_t last_finalized_seq;
}; };
/* /*
...@@ -127,8 +127,22 @@ enum desc_state { ...@@ -127,8 +127,22 @@ enum desc_state {
#define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id)
#define DESC_ID_MASK (~DESC_FLAGS_MASK) #define DESC_ID_MASK (~DESC_FLAGS_MASK)
#define DESC_ID(sv) ((sv) & DESC_ID_MASK) #define DESC_ID(sv) ((sv) & DESC_ID_MASK)
/*
* Special data block logical position values (for fields of
* @prb_desc.text_blk_lpos).
*
* - Bit0 is used to identify if the record has no data block. (Implemented in
* the LPOS_DATALESS() macro.)
*
* - Bit1 specifies the reason for not having a data block.
*
* These special values could never be real lpos values because of the
* meta data and alignment padding of data blocks. (See to_blk_size() for
* details.)
*/
#define FAILED_LPOS 0x1 #define FAILED_LPOS 0x1
#define NO_LPOS 0x3 #define EMPTY_LINE_LPOS 0x3
#define FAILED_BLK_LPOS \ #define FAILED_BLK_LPOS \
{ \ { \
...@@ -259,7 +273,7 @@ static struct printk_ringbuffer name = { \ ...@@ -259,7 +273,7 @@ static struct printk_ringbuffer name = { \
.infos = &_##name##_infos[0], \ .infos = &_##name##_infos[0], \
.head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \
.tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \
.last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \ .last_finalized_seq = ATOMIC_INIT(0), \
}, \ }, \
.text_data_ring = { \ .text_data_ring = { \
.size_bits = (avgtextbits) + (descbits), \ .size_bits = (avgtextbits) + (descbits), \
...@@ -378,7 +392,41 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, ...@@ -378,7 +392,41 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
struct printk_info *info, unsigned int *line_count); struct printk_info *info, unsigned int *line_count);
u64 prb_first_seq(struct printk_ringbuffer *rb);
u64 prb_first_valid_seq(struct printk_ringbuffer *rb); u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
u64 prb_next_seq(struct printk_ringbuffer *rb); u64 prb_next_seq(struct printk_ringbuffer *rb);
u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);
#ifdef CONFIG_64BIT
#define __u64seq_to_ulseq(u64seq) (u64seq)
#define __ulseq_to_u64seq(rb, ulseq) (ulseq)
#else /* CONFIG_64BIT */
#define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
{
u64 rb_first_seq = prb_first_seq(rb);
u64 seq;
/*
* The provided sequence is only the lower 32 bits of the ringbuffer
* sequence. It needs to be expanded to 64bit. Get the first sequence
* number from the ringbuffer and fold it.
*
* Having a 32bit representation in the console is sufficient.
* If a console ever gets more than 2^31 records behind
* the ringbuffer then this is the least of the problems.
*
* Also the access to the ring buffer is always safe.
*/
seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);
return seq;
}
#endif /* CONFIG_64BIT */
#endif /* _KERNEL_PRINTK_RINGBUFFER_H */ #endif /* _KERNEL_PRINTK_RINGBUFFER_H */
...@@ -96,15 +96,25 @@ static void __dump_stack(const char *log_lvl) ...@@ -96,15 +96,25 @@ static void __dump_stack(const char *log_lvl)
*/ */
asmlinkage __visible void dump_stack_lvl(const char *log_lvl) asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
{ {
bool in_panic = this_cpu_in_panic();
unsigned long flags; unsigned long flags;
/* /*
* Permit this cpu to perform nested stack dumps while serialising * Permit this cpu to perform nested stack dumps while serialising
* against other CPUs * against other CPUs, unless this CPU is in panic.
*
* When in panic, non-panic CPUs are not permitted to store new
* printk messages so there is no need to synchronize the output.
* This avoids potential deadlock in panic() if another CPU is
* holding and unable to release the printk_cpu_sync.
*/ */
printk_cpu_sync_get_irqsave(flags); if (!in_panic)
printk_cpu_sync_get_irqsave(flags);
__dump_stack(log_lvl); __dump_stack(log_lvl);
printk_cpu_sync_put_irqrestore(flags);
if (!in_panic)
printk_cpu_sync_put_irqrestore(flags);
} }
EXPORT_SYMBOL(dump_stack_lvl); EXPORT_SYMBOL(dump_stack_lvl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment