Commit 65f0b417 authored by Ben Hutchings's avatar Ben Hutchings

sfc: Use write-combining to reduce TX latency

Based on work by Neil Turton <nturton@solarflare.com> and
Kieran Mansley <kmansley@solarflare.com>.

The BIU has now been verified to handle 3- and 4-dword writes within a
single 128-bit register correctly.  This means we can enable write-
combining and only insert write barriers between writes to distinct
registers.

This has been observed to save about 0.5 us when pushing a TX
descriptor to an empty TX queue.
Signed-off-by: default avatarBen Hutchings <bhutchings@solarflare.com>
parent 6d84b986
...@@ -1104,7 +1104,7 @@ static int efx_init_io(struct efx_nic *efx) ...@@ -1104,7 +1104,7 @@ static int efx_init_io(struct efx_nic *efx)
rc = -EIO; rc = -EIO;
goto fail3; goto fail3;
} }
efx->membase = ioremap_nocache(efx->membase_phys, efx->membase = ioremap_wc(efx->membase_phys,
efx->type->mem_map_size); efx->type->mem_map_size);
if (!efx->membase) { if (!efx->membase) {
netif_err(efx, probe, efx->net_dev, netif_err(efx, probe, efx->net_dev,
......
...@@ -48,9 +48,9 @@ ...@@ -48,9 +48,9 @@
* replacing the low 96 bits with zero does not affect functionality. * replacing the low 96 bits with zero does not affect functionality.
* - If the host writes to the last dword address of such a register * - If the host writes to the last dword address of such a register
* (i.e. the high 32 bits) the underlying register will always be * (i.e. the high 32 bits) the underlying register will always be
* written. If the collector does not hold values for the low 96 * written. If the collector and the current write together do not
* bits of the register, they will be written as zero. Writing to * provide values for all 128 bits of the register, the low 96 bits
* the last qword does not have this effect and must not be done. * will be written as zero.
* - If the host writes to the address of any other part of such a * - If the host writes to the address of any other part of such a
* register while the collector already holds values for some other * register while the collector already holds values for some other
* register, the write is discarded and the collector maintains its * register, the write is discarded and the collector maintains its
...@@ -103,6 +103,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, ...@@ -103,6 +103,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
_efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[2], reg + 8);
_efx_writed(efx, value->u32[3], reg + 12); _efx_writed(efx, value->u32[3], reg + 12);
#endif #endif
wmb();
mmiowb(); mmiowb();
spin_unlock_irqrestore(&efx->biu_lock, flags); spin_unlock_irqrestore(&efx->biu_lock, flags);
} }
...@@ -125,6 +126,7 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, ...@@ -125,6 +126,7 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
__raw_writel((__force u32)value->u32[0], membase + addr); __raw_writel((__force u32)value->u32[0], membase + addr);
__raw_writel((__force u32)value->u32[1], membase + addr + 4); __raw_writel((__force u32)value->u32[1], membase + addr + 4);
#endif #endif
wmb();
mmiowb(); mmiowb();
spin_unlock_irqrestore(&efx->biu_lock, flags); spin_unlock_irqrestore(&efx->biu_lock, flags);
} }
...@@ -139,6 +141,7 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value, ...@@ -139,6 +141,7 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
/* No lock required */ /* No lock required */
_efx_writed(efx, value->u32[0], reg); _efx_writed(efx, value->u32[0], reg);
wmb();
} }
/* Read a 128-bit CSR, locking as appropriate. */ /* Read a 128-bit CSR, locking as appropriate. */
...@@ -237,12 +240,14 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, ...@@ -237,12 +240,14 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
#ifdef EFX_USE_QWORD_IO #ifdef EFX_USE_QWORD_IO
_efx_writeq(efx, value->u64[0], reg + 0); _efx_writeq(efx, value->u64[0], reg + 0);
_efx_writeq(efx, value->u64[1], reg + 8);
#else #else
_efx_writed(efx, value->u32[0], reg + 0); _efx_writed(efx, value->u32[0], reg + 0);
_efx_writed(efx, value->u32[1], reg + 4); _efx_writed(efx, value->u32[1], reg + 4);
#endif
_efx_writed(efx, value->u32[2], reg + 8); _efx_writed(efx, value->u32[2], reg + 8);
_efx_writed(efx, value->u32[3], reg + 12); _efx_writed(efx, value->u32[3], reg + 12);
#endif
wmb();
} }
#define efx_writeo_page(efx, value, reg, page) \ #define efx_writeo_page(efx, value, reg, page) \
_efx_writeo_page(efx, value, \ _efx_writeo_page(efx, value, \
......
...@@ -94,14 +94,15 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, ...@@ -94,14 +94,15 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
efx_writed(efx, &hdr, pdu); efx_writed(efx, &hdr, pdu);
for (i = 0; i < inlen; i += 4) for (i = 0; i < inlen; i += 4) {
_efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i);
/* use wmb() within loop to inhibit write combining */
/* Ensure the payload is written out before the header */
wmb(); wmb();
}
/* ring the doorbell with a distinctive value */ /* ring the doorbell with a distinctive value */
_efx_writed(efx, (__force __le32) 0x45789abc, doorbell); _efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
wmb();
} }
static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment