summary refs log tree commit diff
path: root/drivers
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-11-21 15:19:15 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-21 15:19:15 -0500
commit7d7a10792d729e7a0e3627b1b85b9312c4785542 (patch)
tree252e22443ecfa3b53e6439b228c4f44d875046fe /drivers
parent91ecee6846704cbd0ec14509b86e99c291cd9bcd (diff)
parent16958a2b05def4ed214ae681b7ee4ce8537b00fb (diff)
downloadlinux-7d7a10792d729e7a0e3627b1b85b9312c4785542.tar.gz
Merge branch 'amd-xgbe-next'
Tom Lendacky says:

====================
amd-xgbe: AMD XGBE driver updates 2014-11-20

The following series of patches includes functional updates to the
driver as well as some trivial changes.

- Add a read memory barrier in the Tx and Rx path after checking the
  descriptor ownership bit
- Wait for the Tx engine to stop/suspend before issuing a stop command
- Implement a smatch tool suggestion to simplify an if statement
- Separate out Tx and Rx ring data fields into their own structures
- Add BQL support
- Remove an unused variable
- Change Tx coalescing support to operate on packet basis instead of
  a descriptor basis
- Add support for the skb->xmit_more flag

This patch series is based on net-next.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-common.h21
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-desc.c40
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-dev.c170
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c112
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h42
5 files changed, 289 insertions, 96 deletions
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 2fe8fc71fe01..75b08c63d39f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -125,9 +125,6 @@
 #define DMA_AXIAWCR			0x3018
 #define DMA_DSR0			0x3020
 #define DMA_DSR1			0x3024
-#define DMA_DSR2			0x3028
-#define DMA_DSR3			0x302c
-#define DMA_DSR4			0x3030
 
 /* DMA register entry bit positions and sizes */
 #define DMA_AXIARCR_DRC_INDEX		0
@@ -158,10 +155,6 @@
 #define DMA_AXIAWCR_TDC_WIDTH		4
 #define DMA_AXIAWCR_TDD_INDEX		28
 #define DMA_AXIAWCR_TDD_WIDTH		2
-#define DMA_DSR0_RPS_INDEX		8
-#define DMA_DSR0_RPS_WIDTH		4
-#define DMA_DSR0_TPS_INDEX		12
-#define DMA_DSR0_TPS_WIDTH		4
 #define DMA_ISR_MACIS_INDEX		17
 #define DMA_ISR_MACIS_WIDTH		1
 #define DMA_ISR_MTLIS_INDEX		16
@@ -175,6 +168,20 @@
 #define DMA_SBMR_UNDEF_INDEX		0
 #define DMA_SBMR_UNDEF_WIDTH		1
 
+/* DMA register values */
+#define DMA_DSR_RPS_WIDTH		4
+#define DMA_DSR_TPS_WIDTH		4
+#define DMA_DSR_Q_WIDTH			(DMA_DSR_RPS_WIDTH + DMA_DSR_TPS_WIDTH)
+#define DMA_DSR0_RPS_START		8
+#define DMA_DSR0_TPS_START		12
+#define DMA_DSRX_FIRST_QUEUE		3
+#define DMA_DSRX_INC			4
+#define DMA_DSRX_QPR			4
+#define DMA_DSRX_RPS_START		0
+#define DMA_DSRX_TPS_START		4
+#define DMA_TPS_STOPPED			0x00
+#define DMA_TPS_SUSPENDED		0x06
+
 /* DMA channel register offsets
  *   Multiple channels can be active.  The first channel has registers
  *   that begin at 0x3100.  Each subsequent channel has registers that
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index e6b9f54b9697..51b68d1299fe 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -335,11 +335,11 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 	}
 
 	/* Set up the header page info */
-	xgbe_set_buffer_data(&rdata->rx_hdr, &ring->rx_hdr_pa,
+	xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa,
 			     XGBE_SKB_ALLOC_SIZE);
 
 	/* Set up the buffer page info */
-	xgbe_set_buffer_data(&rdata->rx_buf, &ring->rx_buf_pa,
+	xgbe_set_buffer_data(&rdata->rx.buf, &ring->rx_buf_pa,
 			     pdata->rx_buf_size);
 
 	return 0;
@@ -378,7 +378,7 @@ static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata)
 
 		ring->cur = 0;
 		ring->dirty = 0;
-		ring->tx.queue_stopped = 0;
+		memset(&ring->tx, 0, sizeof(ring->tx));
 
 		hw_if->tx_desc_init(channel);
 	}
@@ -422,8 +422,7 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
 
 		ring->cur = 0;
 		ring->dirty = 0;
-		ring->rx.realloc_index = 0;
-		ring->rx.realloc_threshold = 0;
+		memset(&ring->rx, 0, sizeof(ring->rx));
 
 		hw_if->rx_desc_init(channel);
 	}
@@ -451,31 +450,29 @@ static void xgbe_unmap_rdata(struct xgbe_prv_data *pdata,
 		rdata->skb = NULL;
 	}
 
-	if (rdata->rx_hdr.pa.pages)
-		put_page(rdata->rx_hdr.pa.pages);
+	if (rdata->rx.hdr.pa.pages)
+		put_page(rdata->rx.hdr.pa.pages);
 
-	if (rdata->rx_hdr.pa_unmap.pages) {
-		dma_unmap_page(pdata->dev, rdata->rx_hdr.pa_unmap.pages_dma,
-			       rdata->rx_hdr.pa_unmap.pages_len,
+	if (rdata->rx.hdr.pa_unmap.pages) {
+		dma_unmap_page(pdata->dev, rdata->rx.hdr.pa_unmap.pages_dma,
+			       rdata->rx.hdr.pa_unmap.pages_len,
 			       DMA_FROM_DEVICE);
-		put_page(rdata->rx_hdr.pa_unmap.pages);
+		put_page(rdata->rx.hdr.pa_unmap.pages);
 	}
 
-	if (rdata->rx_buf.pa.pages)
-		put_page(rdata->rx_buf.pa.pages);
+	if (rdata->rx.buf.pa.pages)
+		put_page(rdata->rx.buf.pa.pages);
 
-	if (rdata->rx_buf.pa_unmap.pages) {
-		dma_unmap_page(pdata->dev, rdata->rx_buf.pa_unmap.pages_dma,
-			       rdata->rx_buf.pa_unmap.pages_len,
+	if (rdata->rx.buf.pa_unmap.pages) {
+		dma_unmap_page(pdata->dev, rdata->rx.buf.pa_unmap.pages_dma,
+			       rdata->rx.buf.pa_unmap.pages_len,
 			       DMA_FROM_DEVICE);
-		put_page(rdata->rx_buf.pa_unmap.pages);
+		put_page(rdata->rx.buf.pa_unmap.pages);
 	}
 
-	memset(&rdata->rx_hdr, 0, sizeof(rdata->rx_hdr));
-	memset(&rdata->rx_buf, 0, sizeof(rdata->rx_buf));
+	memset(&rdata->tx, 0, sizeof(rdata->tx));
+	memset(&rdata->rx, 0, sizeof(rdata->rx));
 
-	rdata->tso_header = 0;
-	rdata->len = 0;
 	rdata->interrupt = 0;
 	rdata->mapped_as_page = 0;
 
@@ -534,7 +531,6 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb)
 		}
 		rdata->skb_dma = skb_dma;
 		rdata->skb_dma_len = packet->header_len;
-		rdata->tso_header = 1;
 
 		offset = packet->header_len;
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index f6014d330b2e..53f5f66ec2ee 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1085,10 +1085,10 @@ static void xgbe_rx_desc_reset(struct xgbe_ring_data *rdata)
 	 *   Set buffer 2 (hi) address to buffer dma address (hi) and
 	 *     set control bits OWN and INTE
 	 */
-	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx_hdr.dma));
-	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx_hdr.dma));
-	rdesc->desc2 = cpu_to_le32(lower_32_bits(rdata->rx_buf.dma));
-	rdesc->desc3 = cpu_to_le32(upper_32_bits(rdata->rx_buf.dma));
+	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx.hdr.dma));
+	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx.hdr.dma));
+	rdesc->desc2 = cpu_to_le32(lower_32_bits(rdata->rx.buf.dma));
+	rdesc->desc3 = cpu_to_le32(upper_32_bits(rdata->rx.buf.dma));
 
 	XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE,
 			  rdata->interrupt ? 1 : 0);
@@ -1325,6 +1325,29 @@ static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata)
 	xgbe_config_flow_control(pdata);
 }
 
+static void xgbe_tx_start_xmit(struct xgbe_channel *channel,
+			       struct xgbe_ring *ring)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+	struct xgbe_ring_data *rdata;
+
+	/* Issue a poll command to Tx DMA by writing address
+	 * of next immediate free descriptor */
+	rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
+			  lower_32_bits(rdata->rdesc_dma));
+
+	/* Start the Tx coalescing timer */
+	if (pdata->tx_usecs && !channel->tx_timer_active) {
+		channel->tx_timer_active = 1;
+		hrtimer_start(&channel->tx_timer,
+			      ktime_set(0, pdata->tx_usecs * NSEC_PER_USEC),
+			      HRTIMER_MODE_REL);
+	}
+
+	ring->tx.xmit_more = 0;
+}
+
 static void xgbe_dev_xmit(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
@@ -1334,7 +1357,7 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	struct xgbe_packet_data *packet = &ring->packet_data;
 	unsigned int csum, tso, vlan;
 	unsigned int tso_context, vlan_context;
-	unsigned int tx_coalesce, tx_frames;
+	unsigned int tx_set_ic;
 	int start_index = ring->cur;
 	int i;
 
@@ -1357,10 +1380,26 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	else
 		vlan_context = 0;
 
-	tx_coalesce = (pdata->tx_usecs || pdata->tx_frames) ? 1 : 0;
-	tx_frames = pdata->tx_frames;
-	if (tx_coalesce && !channel->tx_timer_active)
-		ring->coalesce_count = 0;
+	/* Determine if an interrupt should be generated for this Tx:
+	 *   Interrupt:
+	 *     - Tx frame count exceeds the frame count setting
+	 *     - Addition of Tx frame count to the frame count since the
+	 *       last interrupt was set exceeds the frame count setting
+	 *   No interrupt:
+	 *     - No frame count setting specified (ethtool -C ethX tx-frames 0)
+	 *     - Addition of Tx frame count to the frame count since the
+	 *       last interrupt was set does not exceed the frame count setting
+	 */
+	ring->coalesce_count += packet->tx_packets;
+	if (!pdata->tx_frames)
+		tx_set_ic = 0;
+	else if (packet->tx_packets > pdata->tx_frames)
+		tx_set_ic = 1;
+	else if ((ring->coalesce_count % pdata->tx_frames) <
+		 packet->tx_packets)
+		tx_set_ic = 1;
+	else
+		tx_set_ic = 0;
 
 	rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
 	rdesc = rdata->rdesc;
@@ -1427,13 +1466,6 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	if (XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, PTP))
 		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, TTSE, 1);
 
-	/* Set IC bit based on Tx coalescing settings */
-	XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
-	if (tx_coalesce && (!tx_frames ||
-			    (++ring->coalesce_count % tx_frames)))
-		/* Clear IC bit */
-		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 0);
-
 	/* Mark it as First Descriptor */
 	XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, FD, 1);
 
@@ -1478,13 +1510,6 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, HL_B1L,
 				  rdata->skb_dma_len);
 
-		/* Set IC bit based on Tx coalescing settings */
-		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
-		if (tx_coalesce && (!tx_frames ||
-				    (++ring->coalesce_count % tx_frames)))
-			/* Clear IC bit */
-			XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 0);
-
 		/* Set OWN bit */
 		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN, 1);
 
@@ -1500,6 +1525,14 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	/* Set LAST bit for the last descriptor */
 	XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, LD, 1);
 
+	/* Set IC bit based on Tx coalescing settings */
+	if (tx_set_ic)
+		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
+
+	/* Save the Tx info to report back during cleanup */
+	rdata->tx.packets = packet->tx_packets;
+	rdata->tx.bytes = packet->tx_bytes;
+
 	/* In case the Tx DMA engine is running, make sure everything
 	 * is written to the descriptor(s) before setting the OWN bit
 	 * for the first descriptor
@@ -1518,20 +1551,13 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	/* Make sure ownership is written to the descriptor */
 	wmb();
 
-	/* Issue a poll command to Tx DMA by writing address
-	 * of next immediate free descriptor */
 	ring->cur++;
-	rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
-	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
-			  lower_32_bits(rdata->rdesc_dma));
-
-	/* Start the Tx coalescing timer */
-	if (tx_coalesce && !channel->tx_timer_active) {
-		channel->tx_timer_active = 1;
-		hrtimer_start(&channel->tx_timer,
-			      ktime_set(0, pdata->tx_usecs * NSEC_PER_USEC),
-			      HRTIMER_MODE_REL);
-	}
+	if (!packet->skb->xmit_more ||
+	    netif_xmit_stopped(netdev_get_tx_queue(pdata->netdev,
+						   channel->queue_index)))
+		xgbe_tx_start_xmit(channel, ring);
+	else
+		ring->tx.xmit_more = 1;
 
 	DBGPR("  %s: descriptors %u to %u written\n",
 	      channel->name, start_index & (ring->rdesc_count - 1),
@@ -1558,6 +1584,9 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, OWN))
 		return 1;
 
+	/* Make sure descriptor fields are read after reading the OWN bit */
+	rmb();
+
 #ifdef XGMAC_ENABLE_RX_DESC_DUMP
 	xgbe_dump_rx_desc(ring, rdesc, ring->cur);
 #endif
@@ -1583,8 +1612,8 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 
 	/* Get the header length */
 	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD))
-		rdata->hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
-						   RX_NORMAL_DESC2, HL);
+		rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
+						      RX_NORMAL_DESC2, HL);
 
 	/* Get the RSS hash */
 	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, RSV)) {
@@ -1607,7 +1636,7 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 	}
 
 	/* Get the packet length */
-	rdata->len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
+	rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
 	if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
 		/* Not all the data has been transferred for this packet */
@@ -1630,7 +1659,8 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 	etlt = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ETLT);
 	DBGPR("  err=%u, etlt=%#x\n", err, etlt);
 
-	if (!err || (err && !etlt)) {
+	if (!err || !etlt) {
+		/* No error if err is 0 or etlt is 0 */
 		if ((etlt == 0x09) &&
 		    (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
@@ -2450,6 +2480,47 @@ static void xgbe_config_mmc(struct xgbe_prv_data *pdata)
 	XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1);
 }
 
+static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
+				 struct xgbe_channel *channel)
+{
+	unsigned int tx_dsr, tx_pos, tx_qidx;
+	unsigned int tx_status;
+	unsigned long tx_timeout;
+
+	/* Calculate the status register to read and the position within */
+	if (channel->queue_index < DMA_DSRX_FIRST_QUEUE) {
+		tx_dsr = DMA_DSR0;
+		tx_pos = (channel->queue_index * DMA_DSR_Q_WIDTH) +
+			 DMA_DSR0_TPS_START;
+	} else {
+		tx_qidx = channel->queue_index - DMA_DSRX_FIRST_QUEUE;
+
+		tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC);
+		tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_WIDTH) +
+			 DMA_DSRX_TPS_START;
+	}
+
+	/* The Tx engine cannot be stopped if it is actively processing
+	 * descriptors. Wait for the Tx engine to enter the stopped or
+	 * suspended state.  Don't wait forever though...
+	 */
+	tx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ);
+	while (time_before(jiffies, tx_timeout)) {
+		tx_status = XGMAC_IOREAD(pdata, tx_dsr);
+		tx_status = GET_BITS(tx_status, tx_pos, DMA_DSR_TPS_WIDTH);
+		if ((tx_status == DMA_TPS_STOPPED) ||
+		    (tx_status == DMA_TPS_SUSPENDED))
+			break;
+
+		usleep_range(500, 1000);
+	}
+
+	if (!time_before(jiffies, tx_timeout))
+		netdev_info(pdata->netdev,
+			    "timed out waiting for Tx DMA channel %u to stop\n",
+			    channel->queue_index);
+}
+
 static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_channel *channel;
@@ -2478,6 +2549,15 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
 	struct xgbe_channel *channel;
 	unsigned int i;
 
+	/* Prepare for Tx DMA channel stop */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		xgbe_prepare_tx_stop(pdata, channel);
+	}
+
 	/* Disable MAC Tx */
 	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
 
@@ -2569,6 +2649,15 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
 	struct xgbe_channel *channel;
 	unsigned int i;
 
+	/* Prepare for Tx DMA channel stop */
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			break;
+
+		xgbe_prepare_tx_stop(pdata, channel);
+	}
+
 	/* Disable MAC Tx */
 	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
 
@@ -2729,6 +2818,7 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->rx_desc_reset = xgbe_rx_desc_reset;
 	hw_if->is_last_desc = xgbe_is_last_desc;
 	hw_if->is_context_desc = xgbe_is_context_desc;
+	hw_if->tx_start_xmit = xgbe_tx_start_xmit;
 
 	/* For FLOW ctrl */
 	hw_if->config_tx_flow_control = xgbe_config_tx_flow_control;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 0544931329d1..02c104dc2aa4 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -225,6 +225,28 @@ static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring)
 	return (ring->rdesc_count - (ring->cur - ring->dirty));
 }
 
+static int xgbe_maybe_stop_tx_queue(struct xgbe_channel *channel,
+				    struct xgbe_ring *ring, unsigned int count)
+{
+	struct xgbe_prv_data *pdata = channel->pdata;
+
+	if (count > xgbe_tx_avail_desc(ring)) {
+		DBGPR("  Tx queue stopped, not enough descriptors available\n");
+		netif_stop_subqueue(pdata->netdev, channel->queue_index);
+		ring->tx.queue_stopped = 1;
+
+		/* If we haven't notified the hardware because of xmit_more
+		 * support, tell it now
+		 */
+		if (ring->tx.xmit_more)
+			pdata->hw_if.tx_start_xmit(channel, ring);
+
+		return NETDEV_TX_BUSY;
+	}
+
+	return 0;
+}
+
 static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
 {
 	unsigned int rx_buf_size;
@@ -876,7 +898,10 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 static void xgbe_stop(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	struct xgbe_channel *channel;
 	struct net_device *netdev = pdata->netdev;
+	struct netdev_queue *txq;
+	unsigned int i;
 
 	DBGPR("-->xgbe_stop\n");
 
@@ -890,6 +915,15 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 	hw_if->disable_tx(pdata);
 	hw_if->disable_rx(pdata);
 
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->tx_ring)
+			continue;
+
+		txq = netdev_get_tx_queue(netdev, channel->queue_index);
+		netdev_tx_reset_queue(txq);
+	}
+
 	DBGPR("<--xgbe_stop\n");
 }
 
@@ -1156,6 +1190,12 @@ static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet)
 	      packet->tcp_header_len, packet->tcp_payload_len);
 	DBGPR("  packet->mss=%u\n", packet->mss);
 
+	/* Update the number of packets that will ultimately be transmitted
+	 * along with the extra bytes for each extra packet
+	 */
+	packet->tx_packets = skb_shinfo(skb)->gso_segs;
+	packet->tx_bytes += (packet->tx_packets - 1) * packet->header_len;
+
 	return 0;
 }
 
@@ -1181,9 +1221,14 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 	unsigned int len;
 	unsigned int i;
 
+	packet->skb = skb;
+
 	context_desc = 0;
 	packet->rdesc_count = 0;
 
+	packet->tx_packets = 1;
+	packet->tx_bytes = skb->len;
+
 	if (xgbe_is_tso(skb)) {
 		/* TSO requires an extra descriptor if mss is different */
 		if (skb_shinfo(skb)->gso_size != ring->tx.cur_mss) {
@@ -1400,12 +1445,14 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 	struct xgbe_channel *channel;
 	struct xgbe_ring *ring;
 	struct xgbe_packet_data *packet;
+	struct netdev_queue *txq;
 	unsigned long flags;
 	int ret;
 
 	DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len);
 
 	channel = pdata->channel + skb->queue_mapping;
+	txq = netdev_get_tx_queue(netdev, channel->queue_index);
 	ring = channel->tx_ring;
 	packet = &ring->packet_data;
 
@@ -1424,13 +1471,9 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 	xgbe_packet_info(pdata, ring, skb, packet);
 
 	/* Check that there are enough descriptors available */
-	if (packet->rdesc_count > xgbe_tx_avail_desc(ring)) {
-		DBGPR("  Tx queue stopped, not enough descriptors available\n");
-		netif_stop_subqueue(netdev, channel->queue_index);
-		ring->tx.queue_stopped = 1;
-		ret = NETDEV_TX_BUSY;
+	ret = xgbe_maybe_stop_tx_queue(channel, ring, packet->rdesc_count);
+	if (ret)
 		goto tx_netdev_return;
-	}
 
 	ret = xgbe_prep_tso(skb, packet);
 	if (ret) {
@@ -1447,6 +1490,9 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	xgbe_prep_tx_tstamp(pdata, skb, packet);
 
+	/* Report on the actual number of bytes (to be) sent */
+	netdev_tx_sent_queue(txq, packet->tx_bytes);
+
 	/* Configure required descriptor fields for transmission */
 	hw_if->dev_xmit(channel);
 
@@ -1454,6 +1500,11 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 	xgbe_print_pkt(netdev, skb, true);
 #endif
 
+	/* Stop the queue in advance if there may not be enough descriptors */
+	xgbe_maybe_stop_tx_queue(channel, ring, XGBE_TX_MAX_DESCS);
+
+	ret = NETDEV_TX_OK;
+
 tx_netdev_return:
 	spin_unlock_irqrestore(&ring->lock, flags);
 
@@ -1747,14 +1798,14 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
 	u8 *packet;
 	unsigned int copy_len;
 
-	skb = netdev_alloc_skb_ip_align(netdev, rdata->rx_hdr.dma_len);
+	skb = netdev_alloc_skb_ip_align(netdev, rdata->rx.hdr.dma_len);
 	if (!skb)
 		return NULL;
 
-	packet = page_address(rdata->rx_hdr.pa.pages) +
-		 rdata->rx_hdr.pa.pages_offset;
-	copy_len = (rdata->hdr_len) ? rdata->hdr_len : *len;
-	copy_len = min(rdata->rx_hdr.dma_len, copy_len);
+	packet = page_address(rdata->rx.hdr.pa.pages) +
+		 rdata->rx.hdr.pa.pages_offset;
+	copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : *len;
+	copy_len = min(rdata->rx.hdr.dma_len, copy_len);
 	skb_copy_to_linear_data(skb, packet, copy_len);
 	skb_put(skb, copy_len);
 
@@ -1772,8 +1823,10 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 	struct xgbe_ring_data *rdata;
 	struct xgbe_ring_desc *rdesc;
 	struct net_device *netdev = pdata->netdev;
+	struct netdev_queue *txq;
 	unsigned long flags;
 	int processed = 0;
+	unsigned int tx_packets = 0, tx_bytes = 0;
 
 	DBGPR("-->xgbe_tx_poll\n");
 
@@ -1781,6 +1834,8 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 	if (!ring)
 		return 0;
 
+	txq = netdev_get_tx_queue(netdev, channel->queue_index);
+
 	spin_lock_irqsave(&ring->lock, flags);
 
 	while ((processed < XGBE_TX_DESC_MAX_PROC) &&
@@ -1791,10 +1846,19 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 		if (!hw_if->tx_complete(rdesc))
 			break;
 
+		/* Make sure descriptor fields are read after reading the OWN
+		 * bit */
+		rmb();
+
 #ifdef XGMAC_ENABLE_TX_DESC_DUMP
 		xgbe_dump_tx_desc(ring, ring->dirty, 1, 0);
 #endif
 
+		if (hw_if->is_last_desc(rdesc)) {
+			tx_packets += rdata->tx.packets;
+			tx_bytes += rdata->tx.bytes;
+		}
+
 		/* Free the SKB and reset the descriptor for re-use */
 		desc_if->unmap_rdata(pdata, rdata);
 		hw_if->tx_desc_reset(rdata);
@@ -1803,14 +1867,20 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 		ring->dirty++;
 	}
 
+	if (!processed)
+		goto unlock;
+
+	netdev_tx_completed_queue(txq, tx_packets, tx_bytes);
+
 	if ((ring->tx.queue_stopped == 1) &&
 	    (xgbe_tx_avail_desc(ring) > XGBE_TX_DESC_MIN_FREE)) {
 		ring->tx.queue_stopped = 0;
-		netif_wake_subqueue(netdev, channel->queue_index);
+		netif_tx_wake_queue(txq);
 	}
 
 	DBGPR("<--xgbe_tx_poll: processed=%d\n", processed);
 
+unlock:
 	spin_unlock_irqrestore(&ring->lock, flags);
 
 	return processed;
@@ -1896,13 +1966,13 @@ read_again:
 		}
 
 		if (!context) {
-			put_len = rdata->len - len;
+			put_len = rdata->rx.len - len;
 			len += put_len;
 
 			if (!skb) {
 				dma_sync_single_for_cpu(pdata->dev,
-							rdata->rx_hdr.dma,
-							rdata->rx_hdr.dma_len,
+							rdata->rx.hdr.dma,
+							rdata->rx.hdr.dma_len,
 							DMA_FROM_DEVICE);
 
 				skb = xgbe_create_skb(pdata, rdata, &put_len);
@@ -1914,15 +1984,15 @@ read_again:
 
 			if (put_len) {
 				dma_sync_single_for_cpu(pdata->dev,
-							rdata->rx_buf.dma,
-							rdata->rx_buf.dma_len,
+							rdata->rx.buf.dma,
+							rdata->rx.buf.dma_len,
 							DMA_FROM_DEVICE);
 
 				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-						rdata->rx_buf.pa.pages,
-						rdata->rx_buf.pa.pages_offset,
-						put_len, rdata->rx_buf.dma_len);
-				rdata->rx_buf.pa.pages = NULL;
+						rdata->rx.buf.pa.pages,
+						rdata->rx.buf.pa.pages_offset,
+						put_len, rdata->rx.buf.dma_len);
+				rdata->rx.buf.pa.pages = NULL;
 			}
 		}
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 901fb1fe5d0d..eb3387398c6f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -140,6 +140,17 @@
 
 #define XGBE_TX_MAX_BUF_SIZE	(0x3fff & ~(64 - 1))
 
+/* Descriptors required for maximum contigous TSO/GSO packet */
+#define XGBE_TX_MAX_SPLIT	((GSO_MAX_SIZE / XGBE_TX_MAX_BUF_SIZE) + 1)
+
+/* Maximum possible descriptors needed for an SKB:
+ * - Maximum number of SKB frags
+ * - Maximum descriptors for contiguous TSO/GSO packet
+ * - Possible context descriptor
+ * - Possible TSO header descriptor
+ */
+#define XGBE_TX_MAX_DESCS	(MAX_SKB_FRAGS + XGBE_TX_MAX_SPLIT + 2)
+
 #define XGBE_RX_MIN_BUF_SIZE	(ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
 #define XGBE_RX_BUF_ALIGN	64
 #define XGBE_SKB_ALLOC_SIZE	256
@@ -147,6 +158,7 @@
 
 #define XGBE_MAX_DMA_CHANNELS	16
 #define XGBE_MAX_QUEUES		16
+#define XGBE_DMA_STOP_TIMEOUT	5
 
 /* DMA cache settings - Outer sharable, write-back, write-allocate */
 #define XGBE_DMA_OS_AXDOMAIN	0x2
@@ -224,6 +236,8 @@
 struct xgbe_prv_data;
 
 struct xgbe_packet_data {
+	struct sk_buff *skb;
+
 	unsigned int attributes;
 
 	unsigned int errors;
@@ -242,6 +256,9 @@ struct xgbe_packet_data {
 
 	u32 rss_hash;
 	enum pkt_hash_types rss_hash_type;
+
+	unsigned int tx_packets;
+	unsigned int tx_bytes;
 };
 
 /* Common Rx and Tx descriptor mapping */
@@ -270,6 +287,21 @@ struct xgbe_buffer_data {
 	unsigned int dma_len;
 };
 
+/* Tx-related ring data */
+struct xgbe_tx_ring_data {
+	unsigned int packets;		/* BQL packet count */
+	unsigned int bytes;		/* BQL byte count */
+};
+
+/* Rx-related ring data */
+struct xgbe_rx_ring_data {
+	struct xgbe_buffer_data hdr;	/* Header locations */
+	struct xgbe_buffer_data buf;	/* Payload locations */
+
+	unsigned short hdr_len;		/* Length of received header */
+	unsigned short len;		/* Length of received packet */
+};
+
 /* Structure used to hold information related to the descriptor
  * and the packet associated with the descriptor (always use
  * use the XGBE_GET_DESC_DATA macro to access this data from the ring)
@@ -281,13 +313,9 @@ struct xgbe_ring_data {
 	struct sk_buff *skb;		/* Virtual address of SKB */
 	dma_addr_t skb_dma;		/* DMA address of SKB data */
 	unsigned int skb_dma_len;	/* Length of SKB DMA area */
-	unsigned int tso_header;        /* TSO header indicator */
-
-	struct xgbe_buffer_data rx_hdr;	/* Header locations */
-	struct xgbe_buffer_data rx_buf; /* Payload locations */
 
-	unsigned short hdr_len;		/* Length of received header */
-	unsigned short len;		/* Length of received Rx packet */
+	struct xgbe_tx_ring_data tx;	/* Tx-related data */
+	struct xgbe_rx_ring_data rx;	/* Rx-related data */
 
 	unsigned int interrupt;		/* Interrupt indicator */
 
@@ -345,6 +373,7 @@ struct xgbe_ring {
 	union {
 		struct {
 			unsigned int queue_stopped;
+			unsigned int xmit_more;
 			unsigned short cur_mss;
 			unsigned short cur_vlan_ctag;
 		} tx;
@@ -508,6 +537,7 @@ struct xgbe_hw_if {
 	void (*tx_desc_reset)(struct xgbe_ring_data *);
 	int (*is_last_desc)(struct xgbe_ring_desc *);
 	int (*is_context_desc)(struct xgbe_ring_desc *);
+	void (*tx_start_xmit)(struct xgbe_channel *, struct xgbe_ring *);
 
 	/* For FLOW ctrl */
 	int (*config_tx_flow_control)(struct xgbe_prv_data *);