xhci: Fix race related to abort operation
Current abort operation has race. xhci_handle_command_timeout() xhci_abort_cmd_ring() xhci_write_64(CMD_RING_ABORT) xhci_handshake(5s) do { check CMD_RING_RUNNING udelay(1) ... COMP_CMD_ABORT event COMP_CMD_STOP event xhci_handle_stopped_cmd_ring() restart cmd_ring CMD_RING_RUNNING become 1 again } while () return -ETIMEDOUT xhci_write_64(CMD_RING_ABORT) /* can abort random command */ To do abort operation correctly, we have to wait both of COMP_CMD_STOP event and negation of CMD_RING_RUNNING. But like above, while timeout handler is waiting negation of CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout handler never be notice negation of CMD_RING_RUNNING, and retry of CMD_RING_ABORT can abort random command (BTW, I guess retry of CMD_RING_ABORT was workaround of this race). To fix this race, this moves xhci_handle_stopped_cmd_ring() to xhci_abort_cmd_ring(). And timeout handler waits COMP_CMD_STOP event. At this point, timeout handler is owner of cmd_ring, and safely restart cmd_ring by using xhci_handle_stopped_cmd_ring(). [FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE operation] [locks edited as patch is rebased on other locking fixes -Mathias] Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
cb4d5ce588
commit
1c111b6c38
|
@ -2378,6 +2378,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
|
|||
|
||||
/* init command timeout work */
|
||||
INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout);
|
||||
init_completion(&xhci->cmd_ring_stop_completion);
|
||||
|
||||
page_size = readl(&xhci->op_regs->page_size);
|
||||
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
|
||||
|
|
|
@ -284,23 +284,71 @@ static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay)
|
|||
return mod_delayed_work(system_wq, &xhci->cmd_timer, delay);
|
||||
}
|
||||
|
||||
static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
|
||||
static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci)
|
||||
{
|
||||
return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command,
|
||||
cmd_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn all commands on command ring with status set to "aborted" to no-op trbs.
|
||||
* If there are other commands waiting then restart the ring and kick the timer.
|
||||
* This must be called with command ring stopped and xhci->lock held.
|
||||
*/
|
||||
static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
|
||||
struct xhci_command *cur_cmd)
|
||||
{
|
||||
struct xhci_command *i_cmd;
|
||||
u32 cycle_state;
|
||||
|
||||
/* Turn all aborted commands in list to no-ops, then restart */
|
||||
list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) {
|
||||
|
||||
if (i_cmd->status != COMP_CMD_ABORT)
|
||||
continue;
|
||||
|
||||
i_cmd->status = COMP_CMD_STOP;
|
||||
|
||||
xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
|
||||
i_cmd->command_trb);
|
||||
/* get cycle state from the original cmd trb */
|
||||
cycle_state = le32_to_cpu(
|
||||
i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
|
||||
/* modify the command trb to no-op command */
|
||||
i_cmd->command_trb->generic.field[0] = 0;
|
||||
i_cmd->command_trb->generic.field[1] = 0;
|
||||
i_cmd->command_trb->generic.field[2] = 0;
|
||||
i_cmd->command_trb->generic.field[3] = cpu_to_le32(
|
||||
TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
|
||||
|
||||
/*
|
||||
* caller waiting for completion is called when command
|
||||
* completion event is received for these no-op commands
|
||||
*/
|
||||
}
|
||||
|
||||
xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
|
||||
|
||||
/* ring command ring doorbell to restart the command ring */
|
||||
if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
|
||||
!(xhci->xhc_state & XHCI_STATE_DYING)) {
|
||||
xhci->current_cmd = cur_cmd;
|
||||
xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
|
||||
xhci_ring_cmd_db(xhci);
|
||||
}
|
||||
}
|
||||
|
||||
/* Must be called with xhci->lock held, releases and aquires lock back */
|
||||
static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
|
||||
{
|
||||
u64 temp_64;
|
||||
int ret;
|
||||
|
||||
xhci_dbg(xhci, "Abort command ring\n");
|
||||
|
||||
temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
|
||||
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
|
||||
reinit_completion(&xhci->cmd_ring_stop_completion);
|
||||
|
||||
/*
|
||||
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
|
||||
* however on some host hw the CMD_RING_RUNNING bit is correctly cleared
|
||||
* but the completion event in never sent. Use the cmd timeout timer to
|
||||
* handle those cases. Use twice the time to cover the bit polling retry
|
||||
*/
|
||||
xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT);
|
||||
temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
|
||||
xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
|
||||
&xhci->op_regs->cmd_ring);
|
||||
|
||||
|
@ -320,17 +368,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
|
|||
udelay(1000);
|
||||
ret = xhci_handshake(&xhci->op_regs->cmd_ring,
|
||||
CMD_RING_RUNNING, 0, 3 * 1000 * 1000);
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
|
||||
if (ret < 0) {
|
||||
xhci_err(xhci, "Stopped the command ring failed, "
|
||||
"maybe the host is dead\n");
|
||||
cancel_delayed_work(&xhci->cmd_timer);
|
||||
xhci->xhc_state |= XHCI_STATE_DYING;
|
||||
xhci_halt(xhci);
|
||||
return -ESHUTDOWN;
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
|
||||
* however on some host hw the CMD_RING_RUNNING bit is correctly cleared
|
||||
* but the completion event in never sent. Wait 2 secs (arbitrary
|
||||
* number) to handle those cases after negation of CMD_RING_RUNNING.
|
||||
*/
|
||||
spin_unlock_irqrestore(&xhci->lock, flags);
|
||||
ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion,
|
||||
msecs_to_jiffies(2000));
|
||||
spin_lock_irqsave(&xhci->lock, flags);
|
||||
if (!ret) {
|
||||
xhci_dbg(xhci, "No stop event for abort, ring start fail?\n");
|
||||
xhci_cleanup_command_queue(xhci);
|
||||
} else {
|
||||
xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1212,64 +1273,12 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
|
|||
xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn all commands on command ring with status set to "aborted" to no-op trbs.
|
||||
* If there are other commands waiting then restart the ring and kick the timer.
|
||||
* This must be called with command ring stopped and xhci->lock held.
|
||||
*/
|
||||
static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
|
||||
struct xhci_command *cur_cmd)
|
||||
{
|
||||
struct xhci_command *i_cmd, *tmp_cmd;
|
||||
u32 cycle_state;
|
||||
|
||||
/* Turn all aborted commands in list to no-ops, then restart */
|
||||
list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list,
|
||||
cmd_list) {
|
||||
|
||||
if (i_cmd->status != COMP_CMD_ABORT)
|
||||
continue;
|
||||
|
||||
i_cmd->status = COMP_CMD_STOP;
|
||||
|
||||
xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
|
||||
i_cmd->command_trb);
|
||||
/* get cycle state from the original cmd trb */
|
||||
cycle_state = le32_to_cpu(
|
||||
i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
|
||||
/* modify the command trb to no-op command */
|
||||
i_cmd->command_trb->generic.field[0] = 0;
|
||||
i_cmd->command_trb->generic.field[1] = 0;
|
||||
i_cmd->command_trb->generic.field[2] = 0;
|
||||
i_cmd->command_trb->generic.field[3] = cpu_to_le32(
|
||||
TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
|
||||
|
||||
/*
|
||||
* caller waiting for completion is called when command
|
||||
* completion event is received for these no-op commands
|
||||
*/
|
||||
}
|
||||
|
||||
xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
|
||||
|
||||
/* ring command ring doorbell to restart the command ring */
|
||||
if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
|
||||
!(xhci->xhc_state & XHCI_STATE_DYING)) {
|
||||
xhci->current_cmd = cur_cmd;
|
||||
xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
|
||||
xhci_ring_cmd_db(xhci);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void xhci_handle_command_timeout(struct work_struct *work)
|
||||
{
|
||||
struct xhci_hcd *xhci;
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
u64 hw_ring_state;
|
||||
bool second_timeout = false;
|
||||
|
||||
xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer);
|
||||
|
||||
|
@ -1283,18 +1292,17 @@ void xhci_handle_command_timeout(struct work_struct *work)
|
|||
spin_unlock_irqrestore(&xhci->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/* mark this command to be cancelled */
|
||||
if (xhci->current_cmd->status == COMP_CMD_ABORT)
|
||||
second_timeout = true;
|
||||
xhci->current_cmd->status = COMP_CMD_ABORT;
|
||||
|
||||
/* Make sure command ring is running before aborting it */
|
||||
hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
|
||||
if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
|
||||
(hw_ring_state & CMD_RING_RUNNING)) {
|
||||
/* Prevent new doorbell, and start command abort */
|
||||
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
|
||||
xhci_dbg(xhci, "Command timeout\n");
|
||||
ret = xhci_abort_cmd_ring(xhci);
|
||||
ret = xhci_abort_cmd_ring(xhci, flags);
|
||||
if (unlikely(ret == -ESHUTDOWN)) {
|
||||
xhci_err(xhci, "Abort command ring failed\n");
|
||||
xhci_cleanup_command_queue(xhci);
|
||||
|
@ -1308,9 +1316,9 @@ void xhci_handle_command_timeout(struct work_struct *work)
|
|||
goto time_out_completed;
|
||||
}
|
||||
|
||||
/* command ring failed to restart, or host removed. Bail out */
|
||||
if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
|
||||
xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
|
||||
/* host removed. Bail out */
|
||||
if (xhci->xhc_state & XHCI_STATE_REMOVING) {
|
||||
xhci_dbg(xhci, "host removed, ring start fail?\n");
|
||||
xhci_cleanup_command_queue(xhci);
|
||||
|
||||
goto time_out_completed;
|
||||
|
@ -1360,7 +1368,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
|
|||
|
||||
/* If CMD ring stopped we own the trbs between enqueue and dequeue */
|
||||
if (cmd_comp_code == COMP_CMD_STOP) {
|
||||
xhci_handle_stopped_cmd_ring(xhci, cmd);
|
||||
complete_all(&xhci->cmd_ring_stop_completion);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -1569,6 +1569,7 @@ struct xhci_hcd {
|
|||
struct list_head cmd_list;
|
||||
unsigned int cmd_ring_reserved_trbs;
|
||||
struct delayed_work cmd_timer;
|
||||
struct completion cmd_ring_stop_completion;
|
||||
struct xhci_command *current_cmd;
|
||||
struct xhci_ring *event_ring;
|
||||
struct xhci_erst erst;
|
||||
|
|
Loading…
Reference in New Issue