Commit a20257e3 authored by Mathias Nyman's avatar Mathias Nyman Committed by Greg Kroah-Hartman

xhci: Fix handling timeouted commands on hosts in weird states.

commit 3425aa03 upstream.

If commands timeout we mark them for abortion, then stop the command
ring, and turn the commands to no-ops and finally restart the command
ring.

If the host is working properly the no-op commands will finish and
pending completions are called.
If we notice the host is failing, driver clears the command ring and
completes, deletes and frees all pending commands.

There are two separate cases reported where host is believed to work
properly but is not. In the first case we successfully stop the ring
but no abort or stop command ring event is ever sent and host locks up.

The second case is if a host is removed, command times out and driver
believes the ring is stopped, and assumes it will be restarted, but
actually ends up timing out on the same command forever.
If one of the pending commands has the xhci->mutex held it will block
xhci_stop() in the remove codepath which otherwise would cleanup pending
commands.

Add a check that clears all pending commands in case host is removed,
or we are stuck timing out on the same command. Also restart the
command timeout timer when stopping the command ring to ensure we
recive an ring stop/abort event.
Tested-by: default avatarJoe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: default avatarMathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 4582ddf7
...@@ -289,6 +289,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) ...@@ -289,6 +289,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
/*
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
* however on some host hw the CMD_RING_RUNNING bit is correctly cleared
* but the completion event in never sent. Use the cmd timeout timer to
* handle those cases. Use twice the time to cover the bit polling retry
*/
mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT));
xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
&xhci->op_regs->cmd_ring); &xhci->op_regs->cmd_ring);
...@@ -313,6 +321,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) ...@@ -313,6 +321,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
xhci_err(xhci, "Stopped the command ring failed, " xhci_err(xhci, "Stopped the command ring failed, "
"maybe the host is dead\n"); "maybe the host is dead\n");
del_timer(&xhci->cmd_timer);
xhci->xhc_state |= XHCI_STATE_DYING; xhci->xhc_state |= XHCI_STATE_DYING;
xhci_quiesce(xhci); xhci_quiesce(xhci);
xhci_halt(xhci); xhci_halt(xhci);
...@@ -1252,22 +1261,21 @@ void xhci_handle_command_timeout(unsigned long data) ...@@ -1252,22 +1261,21 @@ void xhci_handle_command_timeout(unsigned long data)
int ret; int ret;
unsigned long flags; unsigned long flags;
u64 hw_ring_state; u64 hw_ring_state;
struct xhci_command *cur_cmd = NULL; bool second_timeout = false;
xhci = (struct xhci_hcd *) data; xhci = (struct xhci_hcd *) data;
/* mark this command to be cancelled */ /* mark this command to be cancelled */
spin_lock_irqsave(&xhci->lock, flags); spin_lock_irqsave(&xhci->lock, flags);
if (xhci->current_cmd) { if (xhci->current_cmd) {
cur_cmd = xhci->current_cmd; if (xhci->current_cmd->status == COMP_CMD_ABORT)
cur_cmd->status = COMP_CMD_ABORT; second_timeout = true;
xhci->current_cmd->status = COMP_CMD_ABORT;
} }
/* Make sure command ring is running before aborting it */ /* Make sure command ring is running before aborting it */
hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
(hw_ring_state & CMD_RING_RUNNING)) { (hw_ring_state & CMD_RING_RUNNING)) {
spin_unlock_irqrestore(&xhci->lock, flags); spin_unlock_irqrestore(&xhci->lock, flags);
xhci_dbg(xhci, "Command timeout\n"); xhci_dbg(xhci, "Command timeout\n");
ret = xhci_abort_cmd_ring(xhci); ret = xhci_abort_cmd_ring(xhci);
...@@ -1279,6 +1287,15 @@ void xhci_handle_command_timeout(unsigned long data) ...@@ -1279,6 +1287,15 @@ void xhci_handle_command_timeout(unsigned long data)
} }
return; return;
} }
/* command ring failed to restart, or host removed. Bail out */
if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
spin_unlock_irqrestore(&xhci->lock, flags);
xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
xhci_cleanup_command_queue(xhci);
return;
}
/* command timeout on stopped ring, ring can't be aborted */ /* command timeout on stopped ring, ring can't be aborted */
xhci_dbg(xhci, "Command timeout on stopped ring\n"); xhci_dbg(xhci, "Command timeout on stopped ring\n");
xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd); xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment