Commit 1ac12fc6 authored by Christopher Goldfarb's avatar Christopher Goldfarb Committed by Jeff Garzik

e1000 net drvr update 4/13:

Updated transmit path.  Breaks the transmit path up to make it 
more understandable.  Aggressively reclaim resources by checking
for completed transmits before queuing each new frame to avoid 
stalling the driver by delaying interrupts to long.
parent 28318676
......@@ -161,10 +161,6 @@ struct e1000_desc_ring {
unsigned int size;
/* number of descriptors in the ring */
unsigned int count;
/* (atomic) number of desc with no buffer */
atomic_t unused;
/* number of desc with no buffer */
unsigned int unused_count;
/* next descriptor to associate a buffer with */
unsigned int next_to_use;
/* next descriptor to check for DD status bit */
......@@ -173,14 +169,13 @@ struct e1000_desc_ring {
struct e1000_buffer *buffer_info;
};
#define E1000_RX_DESC(ring, i) \
(&(((struct e1000_rx_desc *)((ring).desc))[i]))
#define E1000_DESC_UNUSED(R) \
((((R)->next_to_clean + (R)->count) - ((R)->next_to_use + 1)) % ((R)->count))
#define E1000_TX_DESC(ring, i) \
(&(((struct e1000_tx_desc *)((ring).desc))[i]))
#define E1000_CONTEXT_DESC(ring, i) \
(&(((struct e1000_context_desc *)((ring).desc))[i]))
#define E1000_GET_DESC(R, i, type) (&(((struct type *)((R).desc))[i]))
#define E1000_RX_DESC(R, i) E1000_GET_DESC(R, i, e1000_rx_desc)
#define E1000_TX_DESC(R, i) E1000_GET_DESC(R, i, e1000_tx_desc)
#define E1000_CONTEXT_DESC(R, i) E1000_GET_DESC(R, i, e1000_context_desc)
/* board specific private data structure */
......@@ -204,8 +199,9 @@ struct e1000_adapter {
/* TX */
struct e1000_desc_ring tx_ring;
unsigned long trans_finish;
uint32_t tx_int_delay;
spinlock_t tx_lock;
uint32_t txd_cmd;
int max_data_per_txd;
/* RX */
struct e1000_desc_ring rx_ring;
......
......@@ -400,8 +400,17 @@ e1000_probe(struct pci_dev *pdev,
goto err_eeprom;
e1000_read_part_num(&adapter->hw, &(adapter->part_num));
e1000_get_bus_info(&adapter->hw);
if((adapter->hw.mac_type == e1000_82544) &&
(adapter->hw.bus_type == e1000_bus_type_pcix))
adapter->max_data_per_txd = 4096;
else
adapter->max_data_per_txd = MAX_JUMBO_FRAME_SIZE;
init_timer(&adapter->watchdog_timer);
adapter->watchdog_timer.function = &e1000_watchdog;
adapter->watchdog_timer.data = (unsigned long) adapter;
......@@ -569,6 +578,7 @@ e1000_sw_init(struct e1000_adapter *adapter)
hw->tbi_compatibility_en = TRUE;
atomic_set(&adapter->irq_sem, 1);
spin_lock_init(&adapter->tx_lock);
spin_lock_init(&adapter->stats_lock);
}
......@@ -676,7 +686,6 @@ e1000_setup_tx_resources(struct e1000_adapter *adapter)
}
memset(txdr->desc, 0, txdr->size);
atomic_set(&txdr->unused, txdr->count);
txdr->next_to_use = 0;
txdr->next_to_clean = 0;
......@@ -728,26 +737,23 @@ e1000_configure_tx(struct e1000_adapter *adapter)
/* Set the Tx Interrupt Delay register */
E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay);
E1000_WRITE_REG(&adapter->hw, TIDV, 64);
/* Program the Transmit Control Register */
tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
tctl = E1000_READ_REG(&adapter->hw, TCTL);
if(adapter->link_duplex == FULL_DUPLEX) {
tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
} else {
tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
}
tctl &= ~E1000_TCTL_CT;
tctl |= E1000_TCTL_EN | E1000_TCTL_PSP |
(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
e1000_config_collision_dist(&adapter->hw);
/* Setup Transmit Descriptor Settings for this adapter */
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_IDE;
if(adapter->tx_int_delay > 0)
adapter->txd_cmd |= E1000_TXD_CMD_IDE;
if(adapter->hw.report_tx_early == 1)
adapter->txd_cmd |= E1000_TXD_CMD_RS;
else
......@@ -794,7 +800,6 @@ e1000_setup_rx_resources(struct e1000_adapter *adapter)
memset(rxdr->desc, 0, rxdr->size);
rxdr->next_to_clean = 0;
rxdr->unused_count = rxdr->count;
rxdr->next_to_use = 0;
return 0;
......@@ -959,7 +964,6 @@ e1000_clean_tx_ring(struct e1000_adapter *adapter)
memset(adapter->tx_ring.desc, 0, adapter->tx_ring.size);
atomic_set(&adapter->tx_ring.unused, adapter->tx_ring.count);
adapter->tx_ring.next_to_use = 0;
adapter->tx_ring.next_to_clean = 0;
......@@ -1029,7 +1033,6 @@ e1000_clean_rx_ring(struct e1000_adapter *adapter)
memset(adapter->rx_ring.desc, 0, adapter->rx_ring.size);
adapter->rx_ring.unused_count = adapter->rx_ring.count;
adapter->rx_ring.next_to_clean = 0;
adapter->rx_ring.next_to_use = 0;
......@@ -1248,164 +1251,179 @@ e1000_watchdog(unsigned long data)
/* Reset the timer */
mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ);
return;
}
/**
* e1000_xmit_frame - Transmit entry point
* @skb: buffer with frame data to transmit
* @netdev: network interface device structure
*
* Returns 0 on success, 1 on error
*
* e1000_xmit_frame is called by the stack to initiate a transmit.
* The out of resource condition is checked after each successful Tx
* so that the stack can be notified, preventing the driver from
* ever needing to drop a frame. The atomic operations on
* tx_ring.unused are used to syncronize with the transmit
* interrupt processing code without the need for a spinlock.
**/
#define E1000_TX_FLAGS_CSUM 0x00000001
#define TXD_USE_COUNT(x) (((x) >> 12) + ((x) & 0x0fff ? 1 : 0))
static inline boolean_t
e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
{
struct e1000_context_desc *context_desc;
int i;
uint8_t css, cso;
#define SETUP_TXD_PAGE(L, P, O) do { \
tx_ring->buffer_info[i].length = (L); \
tx_ring->buffer_info[i].dma = \
pci_map_page(pdev, (P), (O), (L), PCI_DMA_TODEVICE); \
tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma); \
tx_desc->lower.data = cpu_to_le32(txd_lower | (L)); \
tx_desc->upper.data = cpu_to_le32(txd_upper); \
} while (0)
if(skb->ip_summed == CHECKSUM_HW) {
css = skb->h.raw - skb->data;
cso = (skb->h.raw + skb->csum) - skb->data;
#define SETUP_TXD_PTR(L, P) \
SETUP_TXD_PAGE((L), virt_to_page(P), (unsigned long)(P) & ~PAGE_MASK)
i = adapter->tx_ring.next_to_use;
context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
#define QUEUE_TXD() do { i = (i + 1) % tx_ring->count; \
atomic_dec(&tx_ring->unused); } while (0)
context_desc->upper_setup.tcp_fields.tucss = css;
context_desc->upper_setup.tcp_fields.tucso = cso;
context_desc->upper_setup.tcp_fields.tucse = 0;
context_desc->tcp_seg_setup.data = 0;
context_desc->cmd_and_length =
cpu_to_le32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
i = (i + 1) % adapter->tx_ring.count;
adapter->tx_ring.next_to_use = i;
static int
e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
return TRUE;
}
return FALSE;
}
static inline int
e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb)
{
struct e1000_adapter *adapter = netdev->priv;
struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
struct pci_dev *pdev = adapter->pdev;
struct e1000_tx_desc *tx_desc;
int f, len, offset, txd_needed;
skb_frag_t *frag;
int len, offset, size, count, i;
int i = tx_ring->next_to_use;
uint32_t txd_upper = 0;
uint32_t txd_lower = adapter->txd_cmd;
int f;
len = skb->len - skb->data_len;
i = (tx_ring->next_to_use + tx_ring->count - 1) % tx_ring->count;
count = 0;
/* If controller appears hung, force transmit timeout */
offset = 0;
if (time_after(netdev->trans_start, adapter->trans_finish + HZ) &&
/* If transmitting XOFFs, we're not really hung */
!(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF)) {
adapter->trans_finish = jiffies;
netif_stop_queue(netdev);
return 1;
while(len) {
i = (i + 1) % tx_ring->count;
size = min(len, adapter->max_data_per_txd);
tx_ring->buffer_info[i].length = size;
tx_ring->buffer_info[i].dma =
pci_map_single(adapter->pdev,
skb->data + offset,
size,
PCI_DMA_TODEVICE);
len -= size;
offset += size;
count++;
}
txd_needed = TXD_USE_COUNT(skb->len - skb->data_len);
for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
frag = &skb_shinfo(skb)->frags[f];
txd_needed += TXD_USE_COUNT(frag->size);
}
if(skb->ip_summed == CHECKSUM_HW)
txd_needed += 1;
struct skb_frag_struct *frag;
/* make sure there are enough Tx descriptors available in the ring */
frag = &skb_shinfo(skb)->frags[f];
len = frag->size;
offset = 0;
if(atomic_read(&tx_ring->unused) <= (txd_needed + 1)) {
adapter->net_stats.tx_dropped++;
netif_stop_queue(netdev);
return 1;
while(len) {
i = (i + 1) % tx_ring->count;
size = min(len, adapter->max_data_per_txd);
tx_ring->buffer_info[i].length = size;
tx_ring->buffer_info[i].dma =
pci_map_page(adapter->pdev,
frag->page,
frag->page_offset + offset,
size,
PCI_DMA_TODEVICE);
len -= size;
offset += size;
count++;
}
}
tx_ring->buffer_info[i].skb = skb;
if(skb->ip_summed == CHECKSUM_HW) {
struct e1000_context_desc *context_desc;
uint8_t css = skb->h.raw - skb->data;
uint8_t cso = (skb->h.raw + skb->csum) - skb->data;
context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
return count;
}
context_desc->upper_setup.tcp_fields.tucss = css;
context_desc->upper_setup.tcp_fields.tucso = cso;
context_desc->upper_setup.tcp_fields.tucse = 0;
context_desc->tcp_seg_setup.data = 0;
context_desc->cmd_and_length =
cpu_to_le32(txd_lower | E1000_TXD_CMD_DEXT);
static inline void
e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags)
{
struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
struct e1000_tx_desc *tx_desc = NULL;
uint32_t txd_upper, txd_lower;
int i;
QUEUE_TXD();
txd_upper = 0;
txd_lower = adapter->txd_cmd;
txd_upper |= E1000_TXD_POPTS_TXSM << 8;
if(tx_flags & E1000_TX_FLAGS_CSUM) {
txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
txd_upper |= E1000_TXD_POPTS_TXSM << 8;
}
tx_desc = E1000_TX_DESC(*tx_ring, i);
len = skb->len - skb->data_len;
offset = 0;
while(len > 4096) {
SETUP_TXD_PTR(4096, skb->data + offset);
QUEUE_TXD();
i = tx_ring->next_to_use;
while(count--) {
tx_desc = E1000_TX_DESC(*tx_ring, i);
len -= 4096;
offset += 4096;
tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma);
tx_desc->lower.data =
cpu_to_le32(txd_lower | tx_ring->buffer_info[i].length);
tx_desc->upper.data = cpu_to_le32(txd_upper);
i = (i + 1) % tx_ring->count;
}
SETUP_TXD_PTR(len, skb->data + offset);
tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP);
for(f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
frag = &skb_shinfo(skb)->frags[f];
tx_ring->next_to_use = i;
E1000_WRITE_REG(&adapter->hw, TDT, i);
}
QUEUE_TXD();
#define TXD_USE_COUNT(S, X) (((S) / (X)) + (((S) % (X)) ? 1 : 0))
tx_desc = E1000_TX_DESC(*tx_ring, i);
len = frag->size;
offset = 0;
static int
e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev->priv;
unsigned long flags;
int tx_flags = 0, count;
while(len > 4096) {
SETUP_TXD_PAGE(4096, frag->page,
frag->page_offset + offset);
QUEUE_TXD();
int f;
tx_desc = E1000_TX_DESC(*tx_ring, i);
len -= 4096;
offset += 4096;
}
SETUP_TXD_PAGE(len, frag->page, frag->page_offset + offset);
}
/* EOP and SKB pointer go with the last fragment */
if(time_after(netdev->trans_start, adapter->trans_finish + HZ) &&
!(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF)) {
tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP);
tx_ring->buffer_info[i].skb = skb;
adapter->trans_finish = jiffies;
netif_stop_queue(netdev);
return 1;
}
QUEUE_TXD();
count = TXD_USE_COUNT(skb->len - skb->data_len,
adapter->max_data_per_txd);
for(f = 0; f < skb_shinfo(skb)->nr_frags; f++)
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
adapter->max_data_per_txd);
if(skb->ip_summed == CHECKSUM_HW)
count++;
tx_ring->next_to_use = i;
spin_lock_irqsave(&adapter->tx_lock, flags);
e1000_clean_tx_irq(adapter);
if(E1000_DESC_UNUSED(&adapter->tx_ring) < count) {
netif_stop_queue(netdev);
spin_unlock_irqrestore(&adapter->tx_lock, flags);
return 1;
}
spin_unlock_irqrestore(&adapter->tx_lock, flags);
/* Move the HW Tx Tail Pointer */
if(e1000_tx_csum(adapter, skb))
tx_flags |= E1000_TX_FLAGS_CSUM;
E1000_WRITE_REG(&adapter->hw, TDT, i);
count = e1000_tx_map(adapter, skb);
e1000_tx_queue(adapter, count, tx_flags);
netdev->trans_start = jiffies;
return 0;
}
#undef TXD_USE_COUNT
#undef SETUP_TXD
#undef QUEUE_TXD
/**
* e1000_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
......@@ -1672,11 +1690,15 @@ e1000_intr(int irq, void *data, struct pt_regs *regs)
}
e1000_clean_rx_irq(adapter);
e1000_clean_tx_irq(adapter);
if((icr & E1000_ICR_TXDW) && spin_trylock(&adapter->tx_lock)) {
e1000_clean_tx_irq(adapter);
spin_unlock(&adapter->tx_lock);
}
i--;
}
return;
}
}
/**
......@@ -1710,7 +1732,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
if(tx_ring->buffer_info[i].skb) {
dev_kfree_skb_irq(tx_ring->buffer_info[i].skb);
dev_kfree_skb_any(tx_ring->buffer_info[i].skb);
tx_ring->buffer_info[i].skb = NULL;
}
......@@ -1718,7 +1740,6 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
memset(tx_desc, 0, sizeof(struct e1000_tx_desc));
mb();
atomic_inc(&tx_ring->unused);
i = (i + 1) % tx_ring->count;
tx_desc = E1000_TX_DESC(*tx_ring, i);
......@@ -1728,7 +1749,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
tx_ring->next_to_clean = i;
if(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) &&
(atomic_read(&tx_ring->unused) > E1000_TX_QUEUE_WAKE)) {
(E1000_DESC_UNUSED(tx_ring) > E1000_TX_QUEUE_WAKE)) {
netif_wake_queue(netdev);
}
......@@ -1777,8 +1798,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter)
mb();
rx_ring->buffer_info[i].skb = NULL;
rx_ring->unused_count++;
i = (i + 1) % rx_ring->count;
rx_desc = E1000_RX_DESC(*rx_ring, i);
......@@ -1808,7 +1827,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter)
mb();
rx_ring->buffer_info[i].skb = NULL;
rx_ring->unused_count++;
i = (i + 1) % rx_ring->count;
rx_desc = E1000_RX_DESC(*rx_ring, i);
......@@ -1829,8 +1847,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter)
mb();
rx_ring->buffer_info[i].skb = NULL;
rx_ring->unused_count++;
i = (i + 1) % rx_ring->count;
rx_desc = E1000_RX_DESC(*rx_ring, i);
......@@ -1898,8 +1914,6 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
/* move tail */
E1000_WRITE_REG(&adapter->hw, RDT, i);
atomic_dec(&rx_ring->unused);
i = (i + 1) % rx_ring->count;
}
......
......@@ -192,15 +192,6 @@ E1000_PARAM(FlowControl, "Flow Control setting");
E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
/* Transmit Interrupt Delay in units of 1.024 microseconds
*
* Valid Range: 0-65535
*
* Default Value: 64
*/
E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
/* Receive Interrupt Delay in units of 1.024 microseconds
*
* Valid Range: 0-65535
......@@ -251,10 +242,6 @@ E1000_PARAM(DisablePolarityCorrection,
#define MIN_RXD 80
#define MAX_82544_RXD 4096
#define DEFAULT_TIDV 64
#define MAX_TIDV 0xFFFF
#define MIN_TIDV 0
#define DEFAULT_RDTR 64
#define DEFAULT_RADV 128
#define MAX_RXDELAY 0xFFFF
......@@ -423,18 +410,6 @@ e1000_check_options(struct e1000_adapter *adapter)
e1000_validate_option(&fc, &opt);
adapter->hw.fc = adapter->hw.original_fc = fc;
}
{ /* Transmit Interrupt Delay */
struct e1000_option opt = {
type: range_option,
name: "Transmit Interrupt Delay",
err: "using default of " __MODULE_STRING(DEFAULT_TIDV),
def: DEFAULT_TIDV,
arg: { r: { min: MIN_TIDV, max: MAX_TIDV }}
};
adapter->tx_int_delay = TxIntDelay[bd];
e1000_validate_option(&adapter->tx_int_delay, &opt);
}
{ /* Receive Interrupt Delay */
char *rdtr = "using default of " __MODULE_STRING(DEFAULT_RDTR);
char *radv = "using default of " __MODULE_STRING(DEFAULT_RADV);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment