Commit 2905be5d authored by David Brownell's avatar David Brownell Committed by Greg Kroah-Hartman

[PATCH] ehci-hcd, use dummy td when queueing

What it does is give up on catching all the "race with HC" cases
when appending to a live QH, by switching to using a disabled
"dummy" TD at the end of all hardware queues.  The HC won't cache
disabled TDs, so it just sees "always good" pointers: no races.

As a side benefit this also makes it safe to not irq on completion
of most TDs that are queued using the scatterlist calls, so it'll
be typical for one 64 KByte usb-storage request to mean just one
irq (or less!) even without tuning ehci irq latency (for the DATA
stage, that is).
parent 79cda97f
......@@ -272,6 +272,7 @@ static inline void remove_debug_files (struct ehci_hcd *bus) { }
static void qh_lines (struct ehci_qh *qh, char **nextp, unsigned *sizep)
{
u32 scratch;
u32 hw_curr;
struct list_head *entry;
struct ehci_qtd *td;
unsigned temp;
......@@ -279,20 +280,22 @@ static void qh_lines (struct ehci_qh *qh, char **nextp, unsigned *sizep)
char *next = *nextp;
scratch = cpu_to_le32p (&qh->hw_info1);
temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x",
hw_curr = cpu_to_le32p (&qh->hw_current);
temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x (%08x %08x)",
qh, scratch & 0x007f,
speed_char (scratch),
(scratch >> 8) & 0x000f,
scratch, cpu_to_le32p (&qh->hw_info2));
scratch, cpu_to_le32p (&qh->hw_info2),
hw_curr, cpu_to_le32p (&qh->hw_token));
size -= temp;
next += temp;
list_for_each (entry, &qh->qtd_list) {
td = list_entry (entry, struct ehci_qtd,
qtd_list);
td = list_entry (entry, struct ehci_qtd, qtd_list);
scratch = cpu_to_le32p (&td->hw_token);
temp = snprintf (next, size,
"\n\ttd/%p %s len=%d %08x urb %p",
"\n\t%std/%p %s len=%d %08x urb %p",
(hw_curr == td->qtd_dma) ? "*" : "",
td, ({ char *tmp;
switch ((scratch>>8)&0x03) {
case 0: tmp = "out"; break;
......@@ -552,8 +555,8 @@ show_registers (struct device *dev, char *buf, size_t count, loff_t off)
size -= temp;
next += temp;
temp = snprintf (next, size, "complete %ld unlink %ld qpatch %ld\n",
ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch);
temp = snprintf (next, size, "complete %ld unlink %ld\n",
ehci->stats.complete, ehci->stats.unlink);
size -= temp;
next += temp;
#endif
......
......@@ -494,8 +494,8 @@ static void ehci_stop (struct usb_hcd *hcd)
#ifdef EHCI_STATS
dbg ("irq normal %ld err %ld reclaim %ld",
ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim);
dbg ("complete %ld unlink %ld qpatch %ld",
ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch);
dbg ("complete %ld unlink %ld",
ehci->stats.complete, ehci->stats.unlink);
#endif
dbg_status (ehci, "ehci_stop completed", readl (&ehci->regs->status));
......
......@@ -58,19 +58,23 @@ static void ehci_hcd_free (struct usb_hcd *hcd)
/* Allocate the key transfer structures from the previously allocated pool */
static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags)
static void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma)
{
struct ehci_qtd *qtd;
dma_addr_t dma;
qtd = pci_pool_alloc (ehci->qtd_pool, flags, &dma);
if (qtd != 0) {
memset (qtd, 0, sizeof *qtd);
qtd->qtd_dma = dma;
qtd->hw_next = EHCI_LIST_END;
qtd->hw_alt_next = EHCI_LIST_END;
INIT_LIST_HEAD (&qtd->qtd_list);
}
}
static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags)
{
struct ehci_qtd *qtd;
dma_addr_t dma;
qtd = pci_pool_alloc (ehci->qtd_pool, flags, &dma);
if (qtd != 0)
ehci_qtd_init (qtd, dma);
return qtd;
}
......@@ -87,12 +91,21 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, int flags)
qh = (struct ehci_qh *)
pci_pool_alloc (ehci->qh_pool, flags, &dma);
if (qh) {
if (!qh)
return qh;
memset (qh, 0, sizeof *qh);
atomic_set (&qh->refcount, 1);
qh->qh_dma = dma;
// INIT_LIST_HEAD (&qh->qh_list);
INIT_LIST_HEAD (&qh->qtd_list);
/* dummy td enables safe urb queuing */
qh->dummy = ehci_qtd_alloc (ehci, flags);
if (qh->dummy == 0) {
dbg ("no dummy td");
pci_pool_free (ehci->qh_pool, qh, qh->qh_dma);
qh = 0;
}
return qh;
}
......@@ -115,6 +128,8 @@ static void qh_put (struct ehci_hcd *ehci, struct ehci_qh *qh)
dbg ("unused qh not empty!");
BUG ();
}
if (qh->dummy)
ehci_qtd_free (ehci, qh->dummy);
pci_pool_free (ehci->qh_pool, qh, qh->qh_dma);
}
......
......@@ -85,7 +85,7 @@ qtd_fill (struct ehci_qtd *qtd, dma_addr_t buf, size_t len, int token)
/* update halted (but potentially linked) qh */
static inline void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd)
static void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd)
{
qh->hw_current = 0;
qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma);
......@@ -221,17 +221,6 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
struct urb *urb = qtd->urb;
u32 token = 0;
/* hc's on-chip qh overlay cache can overwrite our idea of
* next qtd ptrs, if we appended a qtd while the queue was
* advancing. (because we don't use dummy qtds.)
*/
if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current
&& qtd->hw_next != qh->hw_qtd_next) {
qh->hw_alt_next = qtd->hw_alt_next;
qh->hw_qtd_next = qtd->hw_next;
COUNT (ehci->stats.qpatch);
}
/* clean up any state from previous QTD ...*/
if (last) {
if (likely (last->urb != urb)) {
......@@ -495,8 +484,7 @@ qh_urb_transaction (
}
/* by default, enable interrupt on urb completion */
// ... do it always, unless we switch over to dummy qtds
// if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT)))
if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT)))
qtd->hw_token |= __constant_cpu_to_le32 (QTD_IOC);
return head;
......@@ -661,8 +649,15 @@ ehci_qh_make (
/* initialize sw and hw queues with these qtds */
if (!list_empty (qtd_list)) {
struct ehci_qtd *qtd;
/* hc's list view ends with dummy td; we might update it */
qtd = list_entry (qtd_list->prev, struct ehci_qtd, qtd_list);
qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma);
list_splice (qtd_list, &qh->qtd_list);
qh_update (qh, list_entry (qtd_list->next, struct ehci_qtd, qtd_list));
qtd = list_entry (qtd_list->next, struct ehci_qtd, qtd_list);
qh_update (qh, qtd);
} else {
qh->hw_qtd_next = qh->hw_alt_next = EHCI_LIST_END;
}
......@@ -767,39 +762,48 @@ static struct ehci_qh *qh_append_tds (
/* append to tds already queued to this qh? */
if (unlikely (!list_empty (&qh->qtd_list) && qtd)) {
struct ehci_qtd *last_qtd;
u32 hw_next;
struct ehci_qtd *dummy;
dma_addr_t dma;
u32 token;
/* update the last qtd's "next" pointer */
// dbg_qh ("non-empty qh", ehci, qh);
last_qtd = list_entry (qh->qtd_list.prev,
/* to avoid racing the HC, use the dummy td instead of
* the first td of our list (becomes new dummy). both
* tds stay deactivated until we're done, when the
* HC is allowed to fetch the old dummy (4.10.2).
*/
token = qtd->hw_token;
qtd->hw_token = 0;
dummy = qh->dummy;
// dbg ("swap td %p with dummy %p", qtd, dummy);
dma = dummy->qtd_dma;
*dummy = *qtd;
dummy->qtd_dma = dma;
list_del (&qtd->qtd_list);
list_add (&dummy->qtd_list, qtd_list);
ehci_qtd_init (qtd, qtd->qtd_dma);
qh->dummy = qtd;
/* hc must see the new dummy at list end */
qtd = list_entry (qh->qtd_list.prev,
struct ehci_qtd, qtd_list);
hw_next = QTD_NEXT (qtd->qtd_dma);
last_qtd->hw_next = hw_next;
/* previous urb allows short rx? maybe optimize. */
if (!(last_qtd->urb->transfer_flags & URB_SHORT_NOT_OK)
&& (epnum & 0x10)) {
// only the last QTD for now
last_qtd->hw_alt_next = hw_next;
}
qtd->hw_next = QTD_NEXT (dma);
/* qh_completions() may need to patch the qh overlay if
* the hc was advancing this queue while we appended.
* we know it can: last_qtd->hw_token has IOC set.
*
* or: use a dummy td (so the overlay gets the next td
* only when we set its active bit); fewer irqs.
*/
/* let the hc process these next qtds */
wmb ();
dummy->hw_token = token;
/* no URB queued */
} else {
// dbg_qh ("empty qh", ehci, qh);
struct ehci_qtd *last_qtd;
/* NOTE: we already canceled any queued URBs
* when the endpoint halted.
*/
/* make sure hc sees current dummy at the end */
last_qtd = list_entry (qtd_list->prev,
struct ehci_qtd, qtd_list);
last_qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma);
// dbg_qh ("empty qh", ehci, qh);
/* usb_clear_halt() means qh data toggle gets reset */
if (unlikely (!usb_gettoggle (urb->dev,
......
......@@ -31,11 +31,6 @@ struct ehci_stats {
/* termination of urbs from core */
unsigned long complete;
unsigned long unlink;
/* qhs patched to recover from td queueing race
* (can avoid by using 'dummy td', allowing fewer irqs)
*/
unsigned long qpatch;
};
/* ehci_hcd->lock guards shared data against other CPUs:
......@@ -311,6 +306,7 @@ struct ehci_qh {
dma_addr_t qh_dma; /* address of qh */
union ehci_shadow qh_next; /* ptr to qh; or periodic */
struct list_head qtd_list; /* sw qtd list */
struct ehci_qtd *dummy;
atomic_t refcount;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment