summary refs log tree commit diff
path: root/drivers/net/ethernet
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r--drivers/net/ethernet/Kconfig9
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c24
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h16
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c58
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h3
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h109
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c1701
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h7
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c34
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h14
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c35
-rw-r--r--drivers/net/ethernet/broadcom/cnic.c116
-rw-r--r--drivers/net/ethernet/broadcom/cnic.h5
-rw-r--r--drivers/net/ethernet/broadcom/cnic_defs.h2
-rw-r--r--drivers/net/ethernet/broadcom/cnic_if.h4
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c534
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h9
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h51
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c954
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c341
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c734
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.h80
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_regs.h185
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h97
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c5
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c55
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h6
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c57
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c96
-rw-r--r--drivers/net/ethernet/freescale/Kconfig7
-rw-r--r--drivers/net/ethernet/freescale/Makefile1
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.c549
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.h52
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c12
-rw-r--r--drivers/net/ethernet/freescale/gianfar.h11
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ptp.c2
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c1
-rw-r--r--drivers/net/ethernet/freescale/xgmac_mdio.c274
-rw-r--r--drivers/net/ethernet/i825xx/Kconfig2
-rw-r--r--drivers/net/ethernet/i825xx/znet.c4
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_ethtool.c39
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c14
-rw-r--r--drivers/net/ethernet/intel/e1000e/82571.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/ethtool.c44
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c19
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.c31
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_82575.c17
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_defines.h11
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_phy.c29
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_phy.h5
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_regs.h3
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h41
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c198
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c711
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c677
-rw-r--r--drivers/net/ethernet/intel/ixgbe/Makefile2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h35
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c300
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c573
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c105
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/defines.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h9
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c272
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/mbx.c15
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/mbx.h21
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.c122
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c4
-rw-r--r--drivers/net/ethernet/mipsnet.c345
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c17
-rw-r--r--drivers/net/ethernet/realtek/r8169.c4
-rw-r--r--drivers/net/ethernet/sfc/Kconfig7
-rw-r--r--drivers/net/ethernet/sfc/Makefile1
-rw-r--r--drivers/net/ethernet/sfc/bitfield.h22
-rw-r--r--drivers/net/ethernet/sfc/efx.c250
-rw-r--r--drivers/net/ethernet/sfc/efx.h1
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c16
-rw-r--r--drivers/net/ethernet/sfc/falcon_boards.c2
-rw-r--r--drivers/net/ethernet/sfc/filter.c108
-rw-r--r--drivers/net/ethernet/sfc/filter.h7
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c49
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h12
-rw-r--r--drivers/net/ethernet/sfc/mcdi_pcol.h29
-rw-r--r--drivers/net/ethernet/sfc/mtd.c7
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h78
-rw-r--r--drivers/net/ethernet/sfc/nic.c6
-rw-r--r--drivers/net/ethernet/sfc/nic.h36
-rw-r--r--drivers/net/ethernet/sfc/ptp.c1484
-rw-r--r--drivers/net/ethernet/sfc/rx.c20
-rw-r--r--drivers/net/ethernet/sfc/selftest.c3
-rw-r--r--drivers/net/ethernet/sfc/siena.c1
-rw-r--r--drivers/net/ethernet/sfc/siena_sriov.c8
-rw-r--r--drivers/net/ethernet/sfc/tx.c627
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c39
-rw-r--r--drivers/net/ethernet/sun/sunbmac.c1
-rw-r--r--drivers/net/ethernet/ti/Kconfig4
-rw-r--r--drivers/net/ethernet/ti/cpsw.c179
-rw-r--r--drivers/net/ethernet/ti/davinci_mdio.c41
-rw-r--r--drivers/net/ethernet/tundra/tsi108_eth.c1
-rw-r--r--drivers/net/ethernet/wiznet/w5100.c3
-rw-r--r--drivers/net/ethernet/wiznet/w5300.c3
113 files changed, 8935 insertions, 4083 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index a11af5cc4844..e4ff38949112 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -89,15 +89,6 @@ source "drivers/net/ethernet/marvell/Kconfig"
 source "drivers/net/ethernet/mellanox/Kconfig"
 source "drivers/net/ethernet/micrel/Kconfig"
 source "drivers/net/ethernet/microchip/Kconfig"
-
-config MIPS_SIM_NET
-	tristate "MIPS simulator Network device"
-	depends on MIPS_SIM
-	---help---
-	  The MIPSNET device is a simple Ethernet network device which is
-	  emulated by the MIPS Simulator.
-	  If you are not using a MIPSsim or are unsure, say N.
-
 source "drivers/net/ethernet/myricom/Kconfig"
 
 config FEALNX
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 878ad32b93f2..d4473072654a 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -40,7 +40,6 @@ obj-$(CONFIG_NET_VENDOR_MARVELL) += marvell/
 obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/
 obj-$(CONFIG_NET_VENDOR_MICREL) += micrel/
 obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/
-obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o
 obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
 obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index f15e72e81ac4..4bd416b72e65 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -101,6 +101,7 @@ config TIGON3
 	tristate "Broadcom Tigon3 support"
 	depends on PCI
 	select PHYLIB
+	select HWMON
 	---help---
 	  This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index eac25236856c..72897c47b8c8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -23,8 +23,8 @@
  * (you will need to reboot afterwards) */
 /* #define BNX2X_STOP_ON_ERROR */
 
-#define DRV_MODULE_VERSION      "1.72.51-0"
-#define DRV_MODULE_RELDATE      "2012/06/18"
+#define DRV_MODULE_VERSION      "1.78.00-0"
+#define DRV_MODULE_RELDATE      "2012/09/27"
 #define BNX2X_BC_VER            0x040200
 
 #if defined(CONFIG_DCB)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e8e97a7d1d06..30f04a389227 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2285,7 +2285,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	/* Wait for all pending SP commands to complete */
 	if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
 		BNX2X_ERR("Timeout waiting for SP elements to complete\n");
-		bnx2x_nic_unload(bp, UNLOAD_CLOSE);
+		bnx2x_nic_unload(bp, UNLOAD_CLOSE, false);
 		return -EBUSY;
 	}
 
@@ -2333,7 +2333,7 @@ load_error0:
 }
 
 /* must be called with rtnl_lock */
-int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
+int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 {
 	int i;
 	bool global = false;
@@ -2395,7 +2395,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
 
 	/* Cleanup the chip if needed */
 	if (unload_mode != UNLOAD_RECOVERY)
-		bnx2x_chip_cleanup(bp, unload_mode);
+		bnx2x_chip_cleanup(bp, unload_mode, keep_link);
 	else {
 		/* Send the UNLOAD_REQUEST to the MCP */
 		bnx2x_send_unload_req(bp, unload_mode);
@@ -2419,7 +2419,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
 		bnx2x_free_irq(bp);
 
 		/* Report UNLOAD_DONE to MCP */
-		bnx2x_send_unload_done(bp);
+		bnx2x_send_unload_done(bp, false);
 	}
 
 	/*
@@ -3026,8 +3026,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	first_bd = tx_start_bd;
 
 	tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
-	SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
-		 mac_type);
+	SET_FLAG(tx_start_bd->general_data,
+		 ETH_TX_START_BD_PARSE_NBDS,
+		 0);
 
 	/* header nbd */
 	SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
@@ -3077,13 +3078,20 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 					      &pbd_e2->dst_mac_addr_lo,
 					      eth->h_dest);
 		}
+
+		SET_FLAG(pbd_e2_parsing_data,
+			 ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type);
 	} else {
+		u16 global_data = 0;
 		pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
 		memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
 		/* Set PBD in checksum offload case */
 		if (xmit_type & XMIT_CSUM)
 			hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
 
+		SET_FLAG(global_data,
+			 ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, mac_type);
+		pbd_e1x->global_data |= cpu_to_le16(global_data);
 	}
 
 	/* Setup the data pointer of the first BD of the packet */
@@ -3770,7 +3778,7 @@ int bnx2x_reload_if_running(struct net_device *dev)
 	if (unlikely(!netif_running(dev)))
 		return 0;
 
-	bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+	bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
 	return bnx2x_nic_load(bp, LOAD_NORMAL);
 }
 
@@ -3967,7 +3975,7 @@ int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	netif_device_detach(dev);
 
-	bnx2x_nic_unload(bp, UNLOAD_CLOSE);
+	bnx2x_nic_unload(bp, UNLOAD_CLOSE, false);
 
 	bnx2x_set_power_state(bp, pci_choose_state(pdev, state));
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index dfd86a55f1dc..9c5ea6c5b4c7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -83,8 +83,9 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode);
  * bnx2x_send_unload_done - send UNLOAD_DONE command to the MCP.
  *
  * @bp:		driver handle
+ * @keep_link:		true iff link should be kept up
  */
-void bnx2x_send_unload_done(struct bnx2x *bp);
+void bnx2x_send_unload_done(struct bnx2x *bp, bool keep_link);
 
 /**
  * bnx2x_config_rss_pf - configure RSS parameters in a PF.
@@ -153,6 +154,14 @@ u8 bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode);
 void bnx2x_link_set(struct bnx2x *bp);
 
 /**
+ * bnx2x_force_link_reset - Forces link reset, and put the PHY
+ * in reset as well.
+ *
+ * @bp:		driver handle
+ */
+void bnx2x_force_link_reset(struct bnx2x *bp);
+
+/**
  * bnx2x_link_test - query link status.
  *
  * @bp:		driver handle
@@ -312,12 +321,13 @@ void bnx2x_set_num_queues(struct bnx2x *bp);
  *
  * @bp:			driver handle
  * @unload_mode:	COMMON, PORT, FUNCTION
+ * @keep_link:		true iff link should be kept up.
  *
  * - Cleanup MAC configuration.
  * - Closes clients.
  * - etc.
  */
-void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode);
+void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link);
 
 /**
  * bnx2x_acquire_hw_lock - acquire HW lock.
@@ -446,7 +456,7 @@ void bnx2x_fw_dump_lvl(struct bnx2x *bp, const char *lvl);
 bool bnx2x_test_firmware_version(struct bnx2x *bp, bool is_err);
 
 /* dev_close main block */
-int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode);
+int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link);
 
 /* dev_open main block */
 int bnx2x_nic_load(struct bnx2x *bp, int load_mode);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 8a73374e52a7..2245c3895409 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -91,25 +91,21 @@ static void bnx2x_pfc_set(struct bnx2x *bp)
 	/*
 	 * Rx COS configuration
 	 * Changing PFC RX configuration .
-	 * In RX COS0 will always be configured to lossy and COS1 to lossless
+	 * In RX COS0 will always be configured to lossless and COS1 to lossy
 	 */
 	for (i = 0 ; i < MAX_PFC_PRIORITIES ; i++) {
 		pri_bit = 1 << i;
 
-		if (pri_bit & DCBX_PFC_PRI_PAUSE_MASK(bp))
+		if (!(pri_bit & DCBX_PFC_PRI_PAUSE_MASK(bp)))
 			val |= 1 << (i * 4);
 	}
 
 	pfc_params.pkt_priority_to_cos = val;
 
 	/* RX COS0 */
-	pfc_params.llfc_low_priority_classes = 0;
+	pfc_params.llfc_low_priority_classes = DCBX_PFC_PRI_PAUSE_MASK(bp);
 	/* RX COS1 */
-	pfc_params.llfc_high_priority_classes = DCBX_PFC_PRI_PAUSE_MASK(bp);
-
-	/* BRB configuration */
-	pfc_params.cos0_pauseable = false;
-	pfc_params.cos1_pauseable = true;
+	pfc_params.llfc_high_priority_classes = 0;
 
 	bnx2x_acquire_phy_lock(bp);
 	bp->link_params.feature_config_flags |= FEATURE_CONFIG_PFC_ENABLED;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index ebf40cd7aa10..c65295dded39 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -905,6 +905,7 @@ static int bnx2x_nway_reset(struct net_device *dev)
 
 	if (netif_running(dev)) {
 		bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+		bnx2x_force_link_reset(bp);
 		bnx2x_link_set(bp);
 	}
 
@@ -1606,7 +1607,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev,
 	return 0;
 }
 
-static char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = {
+static const char bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF][ETH_GSTRING_LEN] = {
 	"register_test (offline)    ",
 	"memory_test (offline)      ",
 	"int_loopback_test (offline)",
@@ -1653,7 +1654,7 @@ static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata)
 		return -EOPNOTSUPP;
 	}
 
-	eee_cfg = SHMEM2_RD(bp, eee_status[BP_PORT(bp)]);
+	eee_cfg = bp->link_vars.eee_status;
 
 	edata->supported =
 		bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_SUPPORTED_MASK) >>
@@ -1690,7 +1691,7 @@ static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata)
 		return -EOPNOTSUPP;
 	}
 
-	eee_cfg = SHMEM2_RD(bp, eee_status[BP_PORT(bp)]);
+	eee_cfg = bp->link_vars.eee_status;
 
 	if (!(eee_cfg & SHMEM_EEE_SUPPORTED_MASK)) {
 		DP(BNX2X_MSG_ETHTOOL, "Board does not support EEE!\n");
@@ -1739,6 +1740,7 @@ static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata)
 	/* Restart link to propogate changes */
 	if (netif_running(dev)) {
 		bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+		bnx2x_force_link_reset(bp);
 		bnx2x_link_set(bp);
 	}
 
@@ -2038,8 +2040,6 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 	u16 pkt_prod, bd_prod;
 	struct sw_tx_bd *tx_buf;
 	struct eth_tx_start_bd *tx_start_bd;
-	struct eth_tx_parse_bd_e1x  *pbd_e1x = NULL;
-	struct eth_tx_parse_bd_e2  *pbd_e2 = NULL;
 	dma_addr_t mapping;
 	union eth_rx_cqe *cqe;
 	u8 cqe_fp_flags, cqe_fp_type;
@@ -2131,21 +2131,32 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 	tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
 	tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
 	SET_FLAG(tx_start_bd->general_data,
-		 ETH_TX_START_BD_ETH_ADDR_TYPE,
-		 UNICAST_ADDRESS);
-	SET_FLAG(tx_start_bd->general_data,
 		 ETH_TX_START_BD_HDR_NBDS,
 		 1);
+	SET_FLAG(tx_start_bd->general_data,
+		 ETH_TX_START_BD_PARSE_NBDS,
+		 0);
 
 	/* turn on parsing and get a BD */
 	bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
 
-	pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
-	pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
-
-	memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
-	memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
-
+	if (CHIP_IS_E1x(bp)) {
+		u16 global_data = 0;
+		struct eth_tx_parse_bd_e1x  *pbd_e1x =
+			&txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
+		memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
+		SET_FLAG(global_data,
+			 ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, UNICAST_ADDRESS);
+		pbd_e1x->global_data = cpu_to_le16(global_data);
+	} else {
+		u32 parsing_data = 0;
+		struct eth_tx_parse_bd_e2  *pbd_e2 =
+			&txdata->tx_desc_ring[bd_prod].parse_bd_e2;
+		memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
+		SET_FLAG(parsing_data,
+			 ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, UNICAST_ADDRESS);
+		pbd_e2->parsing_data = cpu_to_le32(parsing_data);
+	}
 	wmb();
 
 	txdata->tx_db.data.prod += 2;
@@ -2263,7 +2274,7 @@ static int bnx2x_test_ext_loopback(struct bnx2x *bp)
 	if (!netif_running(bp->dev))
 		return BNX2X_EXT_LOOPBACK_FAILED;
 
-	bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+	bnx2x_nic_unload(bp, UNLOAD_NORMAL, false);
 	rc = bnx2x_nic_load(bp, LOAD_LOOPBACK_EXT);
 	if (rc) {
 		DP(BNX2X_MSG_ETHTOOL,
@@ -2414,7 +2425,7 @@ static void bnx2x_self_test(struct net_device *dev,
 
 		link_up = bp->link_vars.link_up;
 
-		bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+		bnx2x_nic_unload(bp, UNLOAD_NORMAL, false);
 		rc = bnx2x_nic_load(bp, LOAD_DIAG);
 		if (rc) {
 			etest->flags |= ETH_TEST_FL_FAILED;
@@ -2446,7 +2457,7 @@ static void bnx2x_self_test(struct net_device *dev,
 			etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE;
 		}
 
-		bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+		bnx2x_nic_unload(bp, UNLOAD_NORMAL, false);
 
 		/* restore input for TX port IF */
 		REG_WR(bp, NIG_REG_EGRESS_UMP0_IN_EN + port*4, val);
@@ -2534,7 +2545,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset)
 static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	struct bnx2x *bp = netdev_priv(dev);
-	int i, j, k, offset, start;
+	int i, j, k, start;
 	char queue_name[MAX_QUEUE_NAME_LEN+1];
 
 	switch (stringset) {
@@ -2570,13 +2581,8 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 			start = 0;
 		else
 			start = 4;
-		for (i = 0, j = start; j < (start + BNX2X_NUM_TESTS(bp));
-		     i++, j++) {
-			offset = sprintf(buf+32*i, "%s",
-					 bnx2x_tests_str_arr[j]);
-			*(buf+offset) = '\0';
-		}
-		break;
+		memcpy(buf, bnx2x_tests_str_arr + start,
+		       ETH_GSTRING_LEN * BNX2X_NUM_TESTS(bp));
 	}
 }
 
@@ -2940,7 +2946,7 @@ static int bnx2x_set_channels(struct net_device *dev,
 		bnx2x_change_num_queues(bp, channels->combined_count);
 		return 0;
 	}
-	bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+	bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
 	bnx2x_change_num_queues(bp, channels->combined_count);
 	return bnx2x_nic_load(bp, LOAD_NORMAL);
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
index bbc66ced9c25..620fe939ecfd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
@@ -88,9 +88,6 @@
 #define TSTORM_ASSERT_LIST_INDEX_OFFSET	(IRO[102].base)
 #define TSTORM_ASSERT_LIST_OFFSET(assertListEntry) \
 	(IRO[101].base + ((assertListEntry) * IRO[101].m1))
-#define TSTORM_COMMON_SAFC_WORKAROUND_ENABLE_OFFSET (IRO[107].base)
-#define TSTORM_COMMON_SAFC_WORKAROUND_TIMEOUT_10USEC_OFFSET \
-	(IRO[108].base)
 #define TSTORM_FUNCTION_COMMON_CONFIG_OFFSET(pfId) \
 	(IRO[201].base + ((pfId) * IRO[201].m1))
 #define TSTORM_FUNC_EN_OFFSET(funcId) \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 76b6e65790f8..18704929e642 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -1286,6 +1286,9 @@ struct drv_func_mb {
 	#define DRV_MSG_CODE_SET_MF_BW_MIN_MASK         0x00ff0000
 	#define DRV_MSG_CODE_SET_MF_BW_MAX_MASK         0xff000000
 
+	#define DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET     0x00000002
+
+	#define DRV_MSG_CODE_LOAD_REQ_WITH_LFA          0x0000100a
 	u32 fw_mb_header;
 	#define FW_MSG_CODE_MASK                        0xffff0000
 	#define FW_MSG_CODE_DRV_LOAD_COMMON             0x10100000
@@ -1909,6 +1912,54 @@ struct lldp_local_mib {
 };
 /***END OF DCBX STRUCTURES DECLARATIONS***/
 
+/***********************************************************/
+/*                         Elink section                   */
+/***********************************************************/
+#define SHMEM_LINK_CONFIG_SIZE 2
+struct shmem_lfa {
+	u32 req_duplex;
+	#define REQ_DUPLEX_PHY0_MASK        0x0000ffff
+	#define REQ_DUPLEX_PHY0_SHIFT       0
+	#define REQ_DUPLEX_PHY1_MASK        0xffff0000
+	#define REQ_DUPLEX_PHY1_SHIFT       16
+	u32 req_flow_ctrl;
+	#define REQ_FLOW_CTRL_PHY0_MASK     0x0000ffff
+	#define REQ_FLOW_CTRL_PHY0_SHIFT    0
+	#define REQ_FLOW_CTRL_PHY1_MASK     0xffff0000
+	#define REQ_FLOW_CTRL_PHY1_SHIFT    16
+	u32 req_line_speed; /* Also determine AutoNeg */
+	#define REQ_LINE_SPD_PHY0_MASK      0x0000ffff
+	#define REQ_LINE_SPD_PHY0_SHIFT     0
+	#define REQ_LINE_SPD_PHY1_MASK      0xffff0000
+	#define REQ_LINE_SPD_PHY1_SHIFT     16
+	u32 speed_cap_mask[SHMEM_LINK_CONFIG_SIZE];
+	u32 additional_config;
+	#define REQ_FC_AUTO_ADV_MASK        0x0000ffff
+	#define REQ_FC_AUTO_ADV0_SHIFT      0
+	#define NO_LFA_DUE_TO_DCC_MASK      0x00010000
+	u32 lfa_sts;
+	#define LFA_LINK_FLAP_REASON_OFFSET		0
+	#define LFA_LINK_FLAP_REASON_MASK		0x000000ff
+		#define LFA_LINK_DOWN			    0x1
+		#define LFA_LOOPBACK_ENABLED		0x2
+		#define LFA_DUPLEX_MISMATCH		    0x3
+		#define LFA_MFW_IS_TOO_OLD		    0x4
+		#define LFA_LINK_SPEED_MISMATCH		0x5
+		#define LFA_FLOW_CTRL_MISMATCH		0x6
+		#define LFA_SPEED_CAP_MISMATCH		0x7
+		#define LFA_DCC_LFA_DISABLED		0x8
+		#define LFA_EEE_MISMATCH		0x9
+
+	#define LINK_FLAP_AVOIDANCE_COUNT_OFFSET	8
+	#define LINK_FLAP_AVOIDANCE_COUNT_MASK		0x0000ff00
+
+	#define LINK_FLAP_COUNT_OFFSET			16
+	#define LINK_FLAP_COUNT_MASK			0x00ff0000
+
+	#define LFA_FLAGS_MASK				0xff000000
+	#define SHMEM_LFA_DONT_CLEAR_STAT		(1<<24)
+};
+
 struct ncsi_oem_fcoe_features {
 	u32 fcoe_features1;
 	#define FCOE_FEATURES1_IOS_PER_CONNECTION_MASK          0x0000FFFF
@@ -2738,8 +2789,8 @@ struct afex_stats {
 };
 
 #define BCM_5710_FW_MAJOR_VERSION			7
-#define BCM_5710_FW_MINOR_VERSION			2
-#define BCM_5710_FW_REVISION_VERSION			51
+#define BCM_5710_FW_MINOR_VERSION			8
+#define BCM_5710_FW_REVISION_VERSION		2
 #define BCM_5710_FW_ENGINEERING_VERSION			0
 #define BCM_5710_FW_COMPILE_FLAGS			1
 
@@ -3861,10 +3912,8 @@ struct eth_rss_update_ramrod_data {
 #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_TCP_CAPABILITY_SHIFT 4
 #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY (0x1<<5)
 #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY_SHIFT 5
-#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY (0x1<<6)
-#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY_SHIFT 6
-#define __ETH_RSS_UPDATE_RAMROD_DATA_RESERVED0 (0x1<<7)
-#define __ETH_RSS_UPDATE_RAMROD_DATA_RESERVED0_SHIFT 7
+#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY (0x1<<7)
+#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY_SHIFT 7
 	u8 rss_result_mask;
 	u8 rss_mode;
 	__le32 __reserved2;
@@ -4080,27 +4129,29 @@ struct eth_tx_start_bd {
 #define ETH_TX_START_BD_HDR_NBDS_SHIFT 0
 #define ETH_TX_START_BD_FORCE_VLAN_MODE (0x1<<4)
 #define ETH_TX_START_BD_FORCE_VLAN_MODE_SHIFT 4
-#define ETH_TX_START_BD_RESREVED (0x1<<5)
-#define ETH_TX_START_BD_RESREVED_SHIFT 5
-#define ETH_TX_START_BD_ETH_ADDR_TYPE (0x3<<6)
-#define ETH_TX_START_BD_ETH_ADDR_TYPE_SHIFT 6
+#define ETH_TX_START_BD_PARSE_NBDS (0x3<<5)
+#define ETH_TX_START_BD_PARSE_NBDS_SHIFT 5
+#define ETH_TX_START_BD_RESREVED (0x1<<7)
+#define ETH_TX_START_BD_RESREVED_SHIFT 7
 };
 
 /*
  * Tx parsing BD structure for ETH E1/E1h
  */
 struct eth_tx_parse_bd_e1x {
-	u8 global_data;
+	__le16 global_data;
 #define ETH_TX_PARSE_BD_E1X_IP_HDR_START_OFFSET_W (0xF<<0)
 #define ETH_TX_PARSE_BD_E1X_IP_HDR_START_OFFSET_W_SHIFT 0
-#define ETH_TX_PARSE_BD_E1X_RESERVED0 (0x1<<4)
-#define ETH_TX_PARSE_BD_E1X_RESERVED0_SHIFT 4
-#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN (0x1<<5)
-#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN_SHIFT 5
-#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN (0x1<<6)
-#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT 6
-#define ETH_TX_PARSE_BD_E1X_NS_FLG (0x1<<7)
-#define ETH_TX_PARSE_BD_E1X_NS_FLG_SHIFT 7
+#define ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE (0x3<<4)
+#define ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE_SHIFT 4
+#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN (0x1<<6)
+#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN_SHIFT 6
+#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN (0x1<<7)
+#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT 7
+#define ETH_TX_PARSE_BD_E1X_NS_FLG (0x1<<8)
+#define ETH_TX_PARSE_BD_E1X_NS_FLG_SHIFT 8
+#define ETH_TX_PARSE_BD_E1X_RESERVED0 (0x7F<<9)
+#define ETH_TX_PARSE_BD_E1X_RESERVED0_SHIFT 9
 	u8 tcp_flags;
 #define ETH_TX_PARSE_BD_E1X_FIN_FLG (0x1<<0)
 #define ETH_TX_PARSE_BD_E1X_FIN_FLG_SHIFT 0
@@ -4119,7 +4170,6 @@ struct eth_tx_parse_bd_e1x {
 #define ETH_TX_PARSE_BD_E1X_CWR_FLG (0x1<<7)
 #define ETH_TX_PARSE_BD_E1X_CWR_FLG_SHIFT 7
 	u8 ip_hlen_w;
-	s8 reserved;
 	__le16 total_hlen_w;
 	__le16 tcp_pseudo_csum;
 	__le16 lso_mss;
@@ -4138,14 +4188,16 @@ struct eth_tx_parse_bd_e2 {
 	__le16 src_mac_addr_mid;
 	__le16 src_mac_addr_hi;
 	__le32 parsing_data;
-#define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W (0x1FFF<<0)
+#define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W (0x7FF<<0)
 #define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT 0
-#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW (0xF<<13)
-#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT 13
-#define ETH_TX_PARSE_BD_E2_LSO_MSS (0x3FFF<<17)
-#define ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT 17
-#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR (0x1<<31)
-#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR_SHIFT 31
+#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW (0xF<<11)
+#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT 11
+#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR (0x1<<15)
+#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR_SHIFT 15
+#define ETH_TX_PARSE_BD_E2_LSO_MSS (0x3FFF<<16)
+#define ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT 16
+#define ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE (0x3<<30)
+#define ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT 30
 };
 
 /*
@@ -4913,7 +4965,8 @@ struct flow_control_configuration {
  *
  */
 struct function_start_data {
-	__le16 function_mode;
+	u8 function_mode;
+	u8 reserved;
 	__le16 sd_vlan_tag;
 	__le16 vif_id;
 	u8 path_id;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
index 559c396d45cc..c8f10f0e8a0d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
@@ -566,7 +566,7 @@ static const struct {
 		u32 e2;		/* 57712 */
 		u32 e3;		/* 578xx */
 	} reg_mask;		/* Register mask (all valid bits) */
-	char name[7];		/* Block's longest name is 6 characters long
+	char name[8];		/* Block's longest name is 7 characters long
 				 * (name + suffix)
 				 */
 } bnx2x_blocks_parity_data[] = {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index b046beb435b2..e2e45ee5df33 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -161,120 +161,6 @@
 #define EDC_MODE_LIMITING				0x0044
 #define EDC_MODE_PASSIVE_DAC			0x0055
 
-/* BRB default for class 0 E2 */
-#define DEFAULT0_E2_BRB_MAC_PAUSE_XOFF_THR	170
-#define DEFAULT0_E2_BRB_MAC_PAUSE_XON_THR		250
-#define DEFAULT0_E2_BRB_MAC_FULL_XOFF_THR		10
-#define DEFAULT0_E2_BRB_MAC_FULL_XON_THR		50
-
-/* BRB thresholds for E2*/
-#define PFC_E2_BRB_MAC_PAUSE_XOFF_THR_PAUSE		170
-#define PFC_E2_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE		0
-
-#define PFC_E2_BRB_MAC_PAUSE_XON_THR_PAUSE		250
-#define PFC_E2_BRB_MAC_PAUSE_XON_THR_NON_PAUSE		0
-
-#define PFC_E2_BRB_MAC_FULL_XOFF_THR_PAUSE		10
-#define PFC_E2_BRB_MAC_FULL_XOFF_THR_NON_PAUSE		90
-
-#define PFC_E2_BRB_MAC_FULL_XON_THR_PAUSE			50
-#define PFC_E2_BRB_MAC_FULL_XON_THR_NON_PAUSE		250
-
-/* BRB default for class 0 E3A0 */
-#define DEFAULT0_E3A0_BRB_MAC_PAUSE_XOFF_THR	290
-#define DEFAULT0_E3A0_BRB_MAC_PAUSE_XON_THR	410
-#define DEFAULT0_E3A0_BRB_MAC_FULL_XOFF_THR	10
-#define DEFAULT0_E3A0_BRB_MAC_FULL_XON_THR	50
-
-/* BRB thresholds for E3A0 */
-#define PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_PAUSE		290
-#define PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE		0
-
-#define PFC_E3A0_BRB_MAC_PAUSE_XON_THR_PAUSE		410
-#define PFC_E3A0_BRB_MAC_PAUSE_XON_THR_NON_PAUSE		0
-
-#define PFC_E3A0_BRB_MAC_FULL_XOFF_THR_PAUSE		10
-#define PFC_E3A0_BRB_MAC_FULL_XOFF_THR_NON_PAUSE		170
-
-#define PFC_E3A0_BRB_MAC_FULL_XON_THR_PAUSE		50
-#define PFC_E3A0_BRB_MAC_FULL_XON_THR_NON_PAUSE		410
-
-/* BRB default for E3B0 */
-#define DEFAULT0_E3B0_BRB_MAC_PAUSE_XOFF_THR	330
-#define DEFAULT0_E3B0_BRB_MAC_PAUSE_XON_THR	490
-#define DEFAULT0_E3B0_BRB_MAC_FULL_XOFF_THR	15
-#define DEFAULT0_E3B0_BRB_MAC_FULL_XON_THR	55
-
-/* BRB thresholds for E3B0 2 port mode*/
-#define PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_PAUSE		1025
-#define PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE	0
-
-#define PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_PAUSE		1025
-#define PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE	0
-
-#define PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_PAUSE		10
-#define PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE	1025
-
-#define PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_PAUSE		50
-#define PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_NON_PAUSE	1025
-
-/* only for E3B0*/
-#define PFC_E3B0_2P_BRB_FULL_LB_XOFF_THR			1025
-#define PFC_E3B0_2P_BRB_FULL_LB_XON_THR			1025
-
-/* Lossy +Lossless GUARANTIED == GUART */
-#define PFC_E3B0_2P_MIX_PAUSE_LB_GUART			284
-/* Lossless +Lossless*/
-#define PFC_E3B0_2P_PAUSE_LB_GUART			236
-/* Lossy +Lossy*/
-#define PFC_E3B0_2P_NON_PAUSE_LB_GUART			342
-
-/* Lossy +Lossless*/
-#define PFC_E3B0_2P_MIX_PAUSE_MAC_0_CLASS_T_GUART		284
-/* Lossless +Lossless*/
-#define PFC_E3B0_2P_PAUSE_MAC_0_CLASS_T_GUART		236
-/* Lossy +Lossy*/
-#define PFC_E3B0_2P_NON_PAUSE_MAC_0_CLASS_T_GUART		336
-#define PFC_E3B0_2P_BRB_MAC_0_CLASS_T_GUART_HYST		80
-
-#define PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART		0
-#define PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART_HYST		0
-
-/* BRB thresholds for E3B0 4 port mode */
-#define PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_PAUSE		304
-#define PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE	0
-
-#define PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_PAUSE		384
-#define PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE	0
-
-#define PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_PAUSE		10
-#define PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE	304
-
-#define PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_PAUSE		50
-#define PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_NON_PAUSE	384
-
-/* only for E3B0*/
-#define PFC_E3B0_4P_BRB_FULL_LB_XOFF_THR			304
-#define PFC_E3B0_4P_BRB_FULL_LB_XON_THR			384
-#define PFC_E3B0_4P_LB_GUART		120
-
-#define PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART		120
-#define PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART_HYST	80
-
-#define PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART		80
-#define PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART_HYST	120
-
-/* Pause defines*/
-#define DEFAULT_E3B0_BRB_FULL_LB_XOFF_THR			330
-#define DEFAULT_E3B0_BRB_FULL_LB_XON_THR			490
-#define DEFAULT_E3B0_LB_GUART		40
-
-#define DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART		40
-#define DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART_HYST	0
-
-#define DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART		40
-#define DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART_HYST	0
-
 /* ETS defines*/
 #define DCBX_INVALID_COS					(0xFF)
 
@@ -321,6 +207,127 @@ static u32 bnx2x_bits_dis(struct bnx2x *bp, u32 reg, u32 bits)
 	return val;
 }
 
+/*
+ * bnx2x_check_lfa - This function checks if link reinitialization is required,
+ *                   or link flap can be avoided.
+ *
+ * @params:	link parameters
+ * Returns 0 if Link Flap Avoidance conditions are met otherwise, the failed
+ *         condition code.
+ */
+static int bnx2x_check_lfa(struct link_params *params)
+{
+	u32 link_status, cfg_idx, lfa_mask, cfg_size;
+	u32 cur_speed_cap_mask, cur_req_fc_auto_adv, additional_config;
+	u32 saved_val, req_val, eee_status;
+	struct bnx2x *bp = params->bp;
+
+	additional_config =
+		REG_RD(bp, params->lfa_base +
+			   offsetof(struct shmem_lfa, additional_config));
+
+	/* NOTE: must be first condition checked -
+	* to verify DCC bit is cleared in any case!
+	*/
+	if (additional_config & NO_LFA_DUE_TO_DCC_MASK) {
+		DP(NETIF_MSG_LINK, "No LFA due to DCC flap after clp exit\n");
+		REG_WR(bp, params->lfa_base +
+			   offsetof(struct shmem_lfa, additional_config),
+		       additional_config & ~NO_LFA_DUE_TO_DCC_MASK);
+		return LFA_DCC_LFA_DISABLED;
+	}
+
+	/* Verify that link is up */
+	link_status = REG_RD(bp, params->shmem_base +
+			     offsetof(struct shmem_region,
+				      port_mb[params->port].link_status));
+	if (!(link_status & LINK_STATUS_LINK_UP))
+		return LFA_LINK_DOWN;
+
+	/* Verify that loopback mode is not set */
+	if (params->loopback_mode)
+		return LFA_LOOPBACK_ENABLED;
+
+	/* Verify that MFW supports LFA */
+	if (!params->lfa_base)
+		return LFA_MFW_IS_TOO_OLD;
+
+	if (params->num_phys == 3) {
+		cfg_size = 2;
+		lfa_mask = 0xffffffff;
+	} else {
+		cfg_size = 1;
+		lfa_mask = 0xffff;
+	}
+
+	/* Compare Duplex */
+	saved_val = REG_RD(bp, params->lfa_base +
+			   offsetof(struct shmem_lfa, req_duplex));
+	req_val = params->req_duplex[0] | (params->req_duplex[1] << 16);
+	if ((saved_val & lfa_mask) != (req_val & lfa_mask)) {
+		DP(NETIF_MSG_LINK, "Duplex mismatch %x vs. %x\n",
+			       (saved_val & lfa_mask), (req_val & lfa_mask));
+		return LFA_DUPLEX_MISMATCH;
+	}
+	/* Compare Flow Control */
+	saved_val = REG_RD(bp, params->lfa_base +
+			   offsetof(struct shmem_lfa, req_flow_ctrl));
+	req_val = params->req_flow_ctrl[0] | (params->req_flow_ctrl[1] << 16);
+	if ((saved_val & lfa_mask) != (req_val & lfa_mask)) {
+		DP(NETIF_MSG_LINK, "Flow control mismatch %x vs. %x\n",
+			       (saved_val & lfa_mask), (req_val & lfa_mask));
+		return LFA_FLOW_CTRL_MISMATCH;
+	}
+	/* Compare Link Speed */
+	saved_val = REG_RD(bp, params->lfa_base +
+			   offsetof(struct shmem_lfa, req_line_speed));
+	req_val = params->req_line_speed[0] | (params->req_line_speed[1] << 16);
+	if ((saved_val & lfa_mask) != (req_val & lfa_mask)) {
+		DP(NETIF_MSG_LINK, "Link speed mismatch %x vs. %x\n",
+			       (saved_val & lfa_mask), (req_val & lfa_mask));
+		return LFA_LINK_SPEED_MISMATCH;
+	}
+
+	for (cfg_idx = 0; cfg_idx < cfg_size; cfg_idx++) {
+		cur_speed_cap_mask = REG_RD(bp, params->lfa_base +
+					    offsetof(struct shmem_lfa,
+						     speed_cap_mask[cfg_idx]));
+
+		if (cur_speed_cap_mask != params->speed_cap_mask[cfg_idx]) {
+			DP(NETIF_MSG_LINK, "Speed Cap mismatch %x vs. %x\n",
+				       cur_speed_cap_mask,
+				       params->speed_cap_mask[cfg_idx]);
+			return LFA_SPEED_CAP_MISMATCH;
+		}
+	}
+
+	cur_req_fc_auto_adv =
+		REG_RD(bp, params->lfa_base +
+		       offsetof(struct shmem_lfa, additional_config)) &
+		REQ_FC_AUTO_ADV_MASK;
+
+	if ((u16)cur_req_fc_auto_adv != params->req_fc_auto_adv) {
+		DP(NETIF_MSG_LINK, "Flow Ctrl AN mismatch %x vs. %x\n",
+			       cur_req_fc_auto_adv, params->req_fc_auto_adv);
+		return LFA_FLOW_CTRL_MISMATCH;
+	}
+
+	eee_status = REG_RD(bp, params->shmem2_base +
+			    offsetof(struct shmem2_region,
+				     eee_status[params->port]));
+
+	if (((eee_status & SHMEM_EEE_LPI_REQUESTED_BIT) ^
+	     (params->eee_mode & EEE_MODE_ENABLE_LPI)) ||
+	    ((eee_status & SHMEM_EEE_REQUESTED_BIT) ^
+	     (params->eee_mode & EEE_MODE_ADV_LPI))) {
+		DP(NETIF_MSG_LINK, "EEE mismatch %x vs. %x\n", params->eee_mode,
+			       eee_status);
+		return LFA_EEE_MISMATCH;
+	}
+
+	/* LFA conditions are met */
+	return 0;
+}
 /******************************************************************/
 /*			EPIO/GPIO section			  */
 /******************************************************************/
@@ -1307,93 +1314,6 @@ int bnx2x_ets_strict(const struct link_params *params, const u8 strict_cos)
 }
 
 /******************************************************************/
-/*			EEE section				   */
-/******************************************************************/
-static u8 bnx2x_eee_has_cap(struct link_params *params)
-{
-	struct bnx2x *bp = params->bp;
-
-	if (REG_RD(bp, params->shmem2_base) <=
-		   offsetof(struct shmem2_region, eee_status[params->port]))
-		return 0;
-
-	return 1;
-}
-
-static int bnx2x_eee_nvram_to_time(u32 nvram_mode, u32 *idle_timer)
-{
-	switch (nvram_mode) {
-	case PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED:
-		*idle_timer = EEE_MODE_NVRAM_BALANCED_TIME;
-		break;
-	case PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE:
-		*idle_timer = EEE_MODE_NVRAM_AGGRESSIVE_TIME;
-		break;
-	case PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY:
-		*idle_timer = EEE_MODE_NVRAM_LATENCY_TIME;
-		break;
-	default:
-		*idle_timer = 0;
-		break;
-	}
-
-	return 0;
-}
-
-static int bnx2x_eee_time_to_nvram(u32 idle_timer, u32 *nvram_mode)
-{
-	switch (idle_timer) {
-	case EEE_MODE_NVRAM_BALANCED_TIME:
-		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED;
-		break;
-	case EEE_MODE_NVRAM_AGGRESSIVE_TIME:
-		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE;
-		break;
-	case EEE_MODE_NVRAM_LATENCY_TIME:
-		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY;
-		break;
-	default:
-		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED;
-		break;
-	}
-
-	return 0;
-}
-
-static u32 bnx2x_eee_calc_timer(struct link_params *params)
-{
-	u32 eee_mode, eee_idle;
-	struct bnx2x *bp = params->bp;
-
-	if (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) {
-		if (params->eee_mode & EEE_MODE_OUTPUT_TIME) {
-			/* time value in eee_mode --> used directly*/
-			eee_idle = params->eee_mode & EEE_MODE_TIMER_MASK;
-		} else {
-			/* hsi value in eee_mode --> time */
-			if (bnx2x_eee_nvram_to_time(params->eee_mode &
-						    EEE_MODE_NVRAM_MASK,
-						    &eee_idle))
-				return 0;
-		}
-	} else {
-		/* hsi values in nvram --> time*/
-		eee_mode = ((REG_RD(bp, params->shmem_base +
-				    offsetof(struct shmem_region, dev_info.
-				    port_feature_config[params->port].
-				    eee_power_mode)) &
-			     PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >>
-			    PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT);
-
-		if (bnx2x_eee_nvram_to_time(eee_mode, &eee_idle))
-			return 0;
-	}
-
-	return eee_idle;
-}
-
-
-/******************************************************************/
 /*			PFC section				  */
 /******************************************************************/
 static void bnx2x_update_pfc_xmac(struct link_params *params,
@@ -1606,16 +1526,23 @@ static void bnx2x_set_xumac_nig(struct link_params *params,
 	       NIG_REG_P0_MAC_PAUSE_OUT_EN, tx_pause_en);
 }
 
-static void bnx2x_umac_disable(struct link_params *params)
+static void bnx2x_set_umac_rxtx(struct link_params *params, u8 en)
 {
 	u32 umac_base = params->port ? GRCBASE_UMAC1 : GRCBASE_UMAC0;
+	u32 val;
 	struct bnx2x *bp = params->bp;
 	if (!(REG_RD(bp, MISC_REG_RESET_REG_2) &
 		   (MISC_REGISTERS_RESET_REG_2_UMAC0 << params->port)))
 		return;
-
+	val = REG_RD(bp, umac_base + UMAC_REG_COMMAND_CONFIG);
+	if (en)
+		val |= (UMAC_COMMAND_CONFIG_REG_TX_ENA |
+			UMAC_COMMAND_CONFIG_REG_RX_ENA);
+	else
+		val &= ~(UMAC_COMMAND_CONFIG_REG_TX_ENA |
+			 UMAC_COMMAND_CONFIG_REG_RX_ENA);
 	/* Disable RX and TX */
-	REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, 0);
+	REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val);
 }
 
 static void bnx2x_umac_enable(struct link_params *params,
@@ -1671,6 +1598,16 @@ static void bnx2x_umac_enable(struct link_params *params,
 	REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val);
 	udelay(50);
 
+	/* Configure UMAC for EEE */
+	if (vars->eee_status & SHMEM_EEE_ADV_STATUS_MASK) {
+		DP(NETIF_MSG_LINK, "configured UMAC for EEE\n");
+		REG_WR(bp, umac_base + UMAC_REG_UMAC_EEE_CTRL,
+		       UMAC_UMAC_EEE_CTRL_REG_EEE_EN);
+		REG_WR(bp, umac_base + UMAC_REG_EEE_WAKE_TIMER, 0x11);
+	} else {
+		REG_WR(bp, umac_base + UMAC_REG_UMAC_EEE_CTRL, 0x0);
+	}
+
 	/* Set MAC address for source TX Pause/PFC frames (under SW reset) */
 	REG_WR(bp, umac_base + UMAC_REG_MAC_ADDR0,
 	       ((params->mac_addr[2] << 24) |
@@ -1766,11 +1703,12 @@ static void bnx2x_xmac_init(struct link_params *params, u32 max_speed)
 
 }
 
-static void bnx2x_xmac_disable(struct link_params *params)
+static void bnx2x_set_xmac_rxtx(struct link_params *params, u8 en)
 {
 	u8 port = params->port;
 	struct bnx2x *bp = params->bp;
 	u32 pfc_ctrl, xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
+	u32 val;
 
 	if (REG_RD(bp, MISC_REG_RESET_REG_2) &
 	    MISC_REGISTERS_RESET_REG_2_XMAC) {
@@ -1784,7 +1722,12 @@ static void bnx2x_xmac_disable(struct link_params *params)
 		REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI,
 		       (pfc_ctrl | (1<<1)));
 		DP(NETIF_MSG_LINK, "Disable XMAC on port %x\n", port);
-		REG_WR(bp, xmac_base + XMAC_REG_CTRL, 0);
+		val = REG_RD(bp, xmac_base + XMAC_REG_CTRL);
+		if (en)
+			val |= (XMAC_CTRL_REG_TX_EN | XMAC_CTRL_REG_RX_EN);
+		else
+			val &= ~(XMAC_CTRL_REG_TX_EN | XMAC_CTRL_REG_RX_EN);
+		REG_WR(bp, xmac_base + XMAC_REG_CTRL, val);
 	}
 }
 
@@ -2087,391 +2030,6 @@ static void bnx2x_update_pfc_bmac2(struct link_params *params,
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_BMAC_CONTROL, wb_data, 2);
 }
 
-/* PFC BRB internal port configuration params */
-struct bnx2x_pfc_brb_threshold_val {
-	u32 pause_xoff;
-	u32 pause_xon;
-	u32 full_xoff;
-	u32 full_xon;
-};
-
-struct bnx2x_pfc_brb_e3b0_val {
-	u32 per_class_guaranty_mode;
-	u32 lb_guarantied_hyst;
-	u32 full_lb_xoff_th;
-	u32 full_lb_xon_threshold;
-	u32 lb_guarantied;
-	u32 mac_0_class_t_guarantied;
-	u32 mac_0_class_t_guarantied_hyst;
-	u32 mac_1_class_t_guarantied;
-	u32 mac_1_class_t_guarantied_hyst;
-};
-
-struct bnx2x_pfc_brb_th_val {
-	struct bnx2x_pfc_brb_threshold_val pauseable_th;
-	struct bnx2x_pfc_brb_threshold_val non_pauseable_th;
-	struct bnx2x_pfc_brb_threshold_val default_class0;
-	struct bnx2x_pfc_brb_threshold_val default_class1;
-
-};
-static int bnx2x_pfc_brb_get_config_params(
-				struct link_params *params,
-				struct bnx2x_pfc_brb_th_val *config_val)
-{
-	struct bnx2x *bp = params->bp;
-	DP(NETIF_MSG_LINK, "Setting PFC BRB configuration\n");
-
-	config_val->default_class1.pause_xoff = 0;
-	config_val->default_class1.pause_xon = 0;
-	config_val->default_class1.full_xoff = 0;
-	config_val->default_class1.full_xon = 0;
-
-	if (CHIP_IS_E2(bp)) {
-		/* Class0 defaults */
-		config_val->default_class0.pause_xoff =
-			DEFAULT0_E2_BRB_MAC_PAUSE_XOFF_THR;
-		config_val->default_class0.pause_xon =
-			DEFAULT0_E2_BRB_MAC_PAUSE_XON_THR;
-		config_val->default_class0.full_xoff =
-			DEFAULT0_E2_BRB_MAC_FULL_XOFF_THR;
-		config_val->default_class0.full_xon =
-			DEFAULT0_E2_BRB_MAC_FULL_XON_THR;
-		/* Pause able*/
-		config_val->pauseable_th.pause_xoff =
-			PFC_E2_BRB_MAC_PAUSE_XOFF_THR_PAUSE;
-		config_val->pauseable_th.pause_xon =
-			PFC_E2_BRB_MAC_PAUSE_XON_THR_PAUSE;
-		config_val->pauseable_th.full_xoff =
-			PFC_E2_BRB_MAC_FULL_XOFF_THR_PAUSE;
-		config_val->pauseable_th.full_xon =
-			PFC_E2_BRB_MAC_FULL_XON_THR_PAUSE;
-		/* Non pause able*/
-		config_val->non_pauseable_th.pause_xoff =
-			PFC_E2_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
-		config_val->non_pauseable_th.pause_xon =
-			PFC_E2_BRB_MAC_PAUSE_XON_THR_NON_PAUSE;
-		config_val->non_pauseable_th.full_xoff =
-			PFC_E2_BRB_MAC_FULL_XOFF_THR_NON_PAUSE;
-		config_val->non_pauseable_th.full_xon =
-			PFC_E2_BRB_MAC_FULL_XON_THR_NON_PAUSE;
-	} else if (CHIP_IS_E3A0(bp)) {
-		/* Class0 defaults */
-		config_val->default_class0.pause_xoff =
-			DEFAULT0_E3A0_BRB_MAC_PAUSE_XOFF_THR;
-		config_val->default_class0.pause_xon =
-			DEFAULT0_E3A0_BRB_MAC_PAUSE_XON_THR;
-		config_val->default_class0.full_xoff =
-			DEFAULT0_E3A0_BRB_MAC_FULL_XOFF_THR;
-		config_val->default_class0.full_xon =
-			DEFAULT0_E3A0_BRB_MAC_FULL_XON_THR;
-		/* Pause able */
-		config_val->pauseable_th.pause_xoff =
-			PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_PAUSE;
-		config_val->pauseable_th.pause_xon =
-			PFC_E3A0_BRB_MAC_PAUSE_XON_THR_PAUSE;
-		config_val->pauseable_th.full_xoff =
-			PFC_E3A0_BRB_MAC_FULL_XOFF_THR_PAUSE;
-		config_val->pauseable_th.full_xon =
-			PFC_E3A0_BRB_MAC_FULL_XON_THR_PAUSE;
-		/* Non pause able*/
-		config_val->non_pauseable_th.pause_xoff =
-			PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
-		config_val->non_pauseable_th.pause_xon =
-			PFC_E3A0_BRB_MAC_PAUSE_XON_THR_NON_PAUSE;
-		config_val->non_pauseable_th.full_xoff =
-			PFC_E3A0_BRB_MAC_FULL_XOFF_THR_NON_PAUSE;
-		config_val->non_pauseable_th.full_xon =
-			PFC_E3A0_BRB_MAC_FULL_XON_THR_NON_PAUSE;
-	} else if (CHIP_IS_E3B0(bp)) {
-		/* Class0 defaults */
-		config_val->default_class0.pause_xoff =
-			DEFAULT0_E3B0_BRB_MAC_PAUSE_XOFF_THR;
-		config_val->default_class0.pause_xon =
-		    DEFAULT0_E3B0_BRB_MAC_PAUSE_XON_THR;
-		config_val->default_class0.full_xoff =
-		    DEFAULT0_E3B0_BRB_MAC_FULL_XOFF_THR;
-		config_val->default_class0.full_xon =
-		    DEFAULT0_E3B0_BRB_MAC_FULL_XON_THR;
-
-		if (params->phy[INT_PHY].flags &
-		    FLAGS_4_PORT_MODE) {
-			config_val->pauseable_th.pause_xoff =
-				PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_PAUSE;
-			config_val->pauseable_th.pause_xon =
-				PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_PAUSE;
-			config_val->pauseable_th.full_xoff =
-				PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_PAUSE;
-			config_val->pauseable_th.full_xon =
-				PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_PAUSE;
-			/* Non pause able*/
-			config_val->non_pauseable_th.pause_xoff =
-			PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
-			config_val->non_pauseable_th.pause_xon =
-			PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE;
-			config_val->non_pauseable_th.full_xoff =
-			PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE;
-			config_val->non_pauseable_th.full_xon =
-			PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_NON_PAUSE;
-		} else {
-			config_val->pauseable_th.pause_xoff =
-				PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_PAUSE;
-			config_val->pauseable_th.pause_xon =
-				PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_PAUSE;
-			config_val->pauseable_th.full_xoff =
-				PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_PAUSE;
-			config_val->pauseable_th.full_xon =
-				PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_PAUSE;
-			/* Non pause able*/
-			config_val->non_pauseable_th.pause_xoff =
-				PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
-			config_val->non_pauseable_th.pause_xon =
-				PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE;
-			config_val->non_pauseable_th.full_xoff =
-				PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE;
-			config_val->non_pauseable_th.full_xon =
-				PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_NON_PAUSE;
-		}
-	} else
-	    return -EINVAL;
-
-	return 0;
-}
-
-static void bnx2x_pfc_brb_get_e3b0_config_params(
-		struct link_params *params,
-		struct bnx2x_pfc_brb_e3b0_val
-		*e3b0_val,
-		struct bnx2x_nig_brb_pfc_port_params *pfc_params,
-		const u8 pfc_enabled)
-{
-	if (pfc_enabled && pfc_params) {
-		e3b0_val->per_class_guaranty_mode = 1;
-		e3b0_val->lb_guarantied_hyst = 80;
-
-		if (params->phy[INT_PHY].flags &
-		    FLAGS_4_PORT_MODE) {
-			e3b0_val->full_lb_xoff_th =
-				PFC_E3B0_4P_BRB_FULL_LB_XOFF_THR;
-			e3b0_val->full_lb_xon_threshold =
-				PFC_E3B0_4P_BRB_FULL_LB_XON_THR;
-			e3b0_val->lb_guarantied =
-				PFC_E3B0_4P_LB_GUART;
-			e3b0_val->mac_0_class_t_guarantied =
-				PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART;
-			e3b0_val->mac_0_class_t_guarantied_hyst =
-				PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART_HYST;
-			e3b0_val->mac_1_class_t_guarantied =
-				PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART;
-			e3b0_val->mac_1_class_t_guarantied_hyst =
-				PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART_HYST;
-		} else {
-			e3b0_val->full_lb_xoff_th =
-				PFC_E3B0_2P_BRB_FULL_LB_XOFF_THR;
-			e3b0_val->full_lb_xon_threshold =
-				PFC_E3B0_2P_BRB_FULL_LB_XON_THR;
-			e3b0_val->mac_0_class_t_guarantied_hyst =
-				PFC_E3B0_2P_BRB_MAC_0_CLASS_T_GUART_HYST;
-			e3b0_val->mac_1_class_t_guarantied =
-				PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART;
-			e3b0_val->mac_1_class_t_guarantied_hyst =
-				PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART_HYST;
-
-			if (pfc_params->cos0_pauseable !=
-				pfc_params->cos1_pauseable) {
-				/* Nonpauseable= Lossy + pauseable = Lossless*/
-				e3b0_val->lb_guarantied =
-					PFC_E3B0_2P_MIX_PAUSE_LB_GUART;
-				e3b0_val->mac_0_class_t_guarantied =
-			       PFC_E3B0_2P_MIX_PAUSE_MAC_0_CLASS_T_GUART;
-			} else if (pfc_params->cos0_pauseable) {
-				/* Lossless +Lossless*/
-				e3b0_val->lb_guarantied =
-					PFC_E3B0_2P_PAUSE_LB_GUART;
-				e3b0_val->mac_0_class_t_guarantied =
-				   PFC_E3B0_2P_PAUSE_MAC_0_CLASS_T_GUART;
-			} else {
-				/* Lossy +Lossy*/
-				e3b0_val->lb_guarantied =
-					PFC_E3B0_2P_NON_PAUSE_LB_GUART;
-				e3b0_val->mac_0_class_t_guarantied =
-			       PFC_E3B0_2P_NON_PAUSE_MAC_0_CLASS_T_GUART;
-			}
-		}
-	} else {
-		e3b0_val->per_class_guaranty_mode = 0;
-		e3b0_val->lb_guarantied_hyst = 0;
-		e3b0_val->full_lb_xoff_th =
-			DEFAULT_E3B0_BRB_FULL_LB_XOFF_THR;
-		e3b0_val->full_lb_xon_threshold =
-			DEFAULT_E3B0_BRB_FULL_LB_XON_THR;
-		e3b0_val->lb_guarantied =
-			DEFAULT_E3B0_LB_GUART;
-		e3b0_val->mac_0_class_t_guarantied =
-			DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART;
-		e3b0_val->mac_0_class_t_guarantied_hyst =
-			DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART_HYST;
-		e3b0_val->mac_1_class_t_guarantied =
-			DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART;
-		e3b0_val->mac_1_class_t_guarantied_hyst =
-			DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART_HYST;
-	}
-}
-static int bnx2x_update_pfc_brb(struct link_params *params,
-				struct link_vars *vars,
-				struct bnx2x_nig_brb_pfc_port_params
-				*pfc_params)
-{
-	struct bnx2x *bp = params->bp;
-	struct bnx2x_pfc_brb_th_val config_val = { {0} };
-	struct bnx2x_pfc_brb_threshold_val *reg_th_config =
-		&config_val.pauseable_th;
-	struct bnx2x_pfc_brb_e3b0_val e3b0_val = {0};
-	const int set_pfc = params->feature_config_flags &
-		FEATURE_CONFIG_PFC_ENABLED;
-	const u8 pfc_enabled = (set_pfc && pfc_params);
-	int bnx2x_status = 0;
-	u8 port = params->port;
-
-	/* default - pause configuration */
-	reg_th_config = &config_val.pauseable_th;
-	bnx2x_status = bnx2x_pfc_brb_get_config_params(params, &config_val);
-	if (bnx2x_status)
-		return bnx2x_status;
-
-	if (pfc_enabled) {
-		/* First COS */
-		if (pfc_params->cos0_pauseable)
-			reg_th_config = &config_val.pauseable_th;
-		else
-			reg_th_config = &config_val.non_pauseable_th;
-	} else
-		reg_th_config = &config_val.default_class0;
-	/* The number of free blocks below which the pause signal to class 0
-	 * of MAC #n is asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_PAUSE_0_XOFF_THRESHOLD_1 :
-	       BRB1_REG_PAUSE_0_XOFF_THRESHOLD_0 ,
-	       reg_th_config->pause_xoff);
-	/* The number of free blocks above which the pause signal to class 0
-	 * of MAC #n is de-asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_PAUSE_0_XON_THRESHOLD_1 :
-	       BRB1_REG_PAUSE_0_XON_THRESHOLD_0 , reg_th_config->pause_xon);
-	/* The number of free blocks below which the full signal to class 0
-	 * of MAC #n is asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_FULL_0_XOFF_THRESHOLD_1 :
-	       BRB1_REG_FULL_0_XOFF_THRESHOLD_0 , reg_th_config->full_xoff);
-	/* The number of free blocks above which the full signal to class 0
-	 * of MAC #n is de-asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_FULL_0_XON_THRESHOLD_1 :
-	       BRB1_REG_FULL_0_XON_THRESHOLD_0 , reg_th_config->full_xon);
-
-	if (pfc_enabled) {
-		/* Second COS */
-		if (pfc_params->cos1_pauseable)
-			reg_th_config = &config_val.pauseable_th;
-		else
-			reg_th_config = &config_val.non_pauseable_th;
-	} else
-		reg_th_config = &config_val.default_class1;
-	/* The number of free blocks below which the pause signal to
-	 * class 1 of MAC #n is asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_PAUSE_1_XOFF_THRESHOLD_1 :
-	       BRB1_REG_PAUSE_1_XOFF_THRESHOLD_0,
-	       reg_th_config->pause_xoff);
-
-	/* The number of free blocks above which the pause signal to
-	 * class 1 of MAC #n is de-asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_PAUSE_1_XON_THRESHOLD_1 :
-	       BRB1_REG_PAUSE_1_XON_THRESHOLD_0,
-	       reg_th_config->pause_xon);
-	/* The number of free blocks below which the full signal to
-	 * class 1 of MAC #n is asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_FULL_1_XOFF_THRESHOLD_1 :
-	       BRB1_REG_FULL_1_XOFF_THRESHOLD_0,
-	       reg_th_config->full_xoff);
-	/* The number of free blocks above which the full signal to
-	 * class 1 of MAC #n is de-asserted. n=0,1
-	 */
-	REG_WR(bp, (port) ? BRB1_REG_FULL_1_XON_THRESHOLD_1 :
-	       BRB1_REG_FULL_1_XON_THRESHOLD_0,
-	       reg_th_config->full_xon);
-
-	if (CHIP_IS_E3B0(bp)) {
-		bnx2x_pfc_brb_get_e3b0_config_params(
-			params,
-			&e3b0_val,
-			pfc_params,
-			pfc_enabled);
-
-		REG_WR(bp, BRB1_REG_PER_CLASS_GUARANTY_MODE,
-			   e3b0_val.per_class_guaranty_mode);
-
-		/* The hysteresis on the guarantied buffer space for the Lb
-		 * port before signaling XON.
-		 */
-		REG_WR(bp, BRB1_REG_LB_GUARANTIED_HYST,
-			   e3b0_val.lb_guarantied_hyst);
-
-		/* The number of free blocks below which the full signal to the
-		 * LB port is asserted.
-		 */
-		REG_WR(bp, BRB1_REG_FULL_LB_XOFF_THRESHOLD,
-		       e3b0_val.full_lb_xoff_th);
-		/* The number of free blocks above which the full signal to the
-		 * LB port is de-asserted.
-		 */
-		REG_WR(bp, BRB1_REG_FULL_LB_XON_THRESHOLD,
-		       e3b0_val.full_lb_xon_threshold);
-		/* The number of blocks guarantied for the MAC #n port. n=0,1
-		 */
-
-		/* The number of blocks guarantied for the LB port. */
-		REG_WR(bp, BRB1_REG_LB_GUARANTIED,
-		       e3b0_val.lb_guarantied);
-
-		/* The number of blocks guarantied for the MAC #n port. */
-		REG_WR(bp, BRB1_REG_MAC_GUARANTIED_0,
-		       2 * e3b0_val.mac_0_class_t_guarantied);
-		REG_WR(bp, BRB1_REG_MAC_GUARANTIED_1,
-		       2 * e3b0_val.mac_1_class_t_guarantied);
-		/* The number of blocks guarantied for class #t in MAC0. t=0,1
-		 */
-		REG_WR(bp, BRB1_REG_MAC_0_CLASS_0_GUARANTIED,
-		       e3b0_val.mac_0_class_t_guarantied);
-		REG_WR(bp, BRB1_REG_MAC_0_CLASS_1_GUARANTIED,
-		       e3b0_val.mac_0_class_t_guarantied);
-		/* The hysteresis on the guarantied buffer space for class in
-		 * MAC0.  t=0,1
-		 */
-		REG_WR(bp, BRB1_REG_MAC_0_CLASS_0_GUARANTIED_HYST,
-		       e3b0_val.mac_0_class_t_guarantied_hyst);
-		REG_WR(bp, BRB1_REG_MAC_0_CLASS_1_GUARANTIED_HYST,
-		       e3b0_val.mac_0_class_t_guarantied_hyst);
-
-		/* The number of blocks guarantied for class #t in MAC1.t=0,1
-		 */
-		REG_WR(bp, BRB1_REG_MAC_1_CLASS_0_GUARANTIED,
-		       e3b0_val.mac_1_class_t_guarantied);
-		REG_WR(bp, BRB1_REG_MAC_1_CLASS_1_GUARANTIED,
-		       e3b0_val.mac_1_class_t_guarantied);
-		/* The hysteresis on the guarantied buffer space for class #t
-		 * in MAC1.  t=0,1
-		 */
-		REG_WR(bp, BRB1_REG_MAC_1_CLASS_0_GUARANTIED_HYST,
-		       e3b0_val.mac_1_class_t_guarantied_hyst);
-		REG_WR(bp, BRB1_REG_MAC_1_CLASS_1_GUARANTIED_HYST,
-		       e3b0_val.mac_1_class_t_guarantied_hyst);
-	}
-
-	return bnx2x_status;
-}
-
 /******************************************************************************
 * Description:
 *  This function is needed because NIG ARB_CREDIT_WEIGHT_X are
@@ -2529,16 +2087,6 @@ static void bnx2x_update_mng(struct link_params *params, u32 link_status)
 			port_mb[params->port].link_status), link_status);
 }
 
-static void bnx2x_update_mng_eee(struct link_params *params, u32 eee_status)
-{
-	struct bnx2x *bp = params->bp;
-
-	if (bnx2x_eee_has_cap(params))
-		REG_WR(bp, params->shmem2_base +
-		       offsetof(struct shmem2_region,
-				eee_status[params->port]), eee_status);
-}
-
 static void bnx2x_update_pfc_nig(struct link_params *params,
 		struct link_vars *vars,
 		struct bnx2x_nig_brb_pfc_port_params *nig_params)
@@ -2658,11 +2206,6 @@ int bnx2x_update_pfc(struct link_params *params,
 	/* Update NIG params */
 	bnx2x_update_pfc_nig(params, vars, pfc_params);
 
-	/* Update BRB params */
-	bnx2x_status = bnx2x_update_pfc_brb(params, vars, pfc_params);
-	if (bnx2x_status)
-		return bnx2x_status;
-
 	if (!vars->link_up)
 		return bnx2x_status;
 
@@ -2827,16 +2370,18 @@ static int bnx2x_bmac2_enable(struct link_params *params,
 
 static int bnx2x_bmac_enable(struct link_params *params,
 			     struct link_vars *vars,
-			     u8 is_lb)
+			     u8 is_lb, u8 reset_bmac)
 {
 	int rc = 0;
 	u8 port = params->port;
 	struct bnx2x *bp = params->bp;
 	u32 val;
 	/* Reset and unreset the BigMac */
-	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
-	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
-	usleep_range(1000, 2000);
+	if (reset_bmac) {
+		REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
+		       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
+		usleep_range(1000, 2000);
+	}
 
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
 	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
@@ -2868,37 +2413,28 @@ static int bnx2x_bmac_enable(struct link_params *params,
 	return rc;
 }
 
-static void bnx2x_bmac_rx_disable(struct bnx2x *bp, u8 port)
+static void bnx2x_set_bmac_rx(struct bnx2x *bp, u32 chip_id, u8 port, u8 en)
 {
 	u32 bmac_addr = port ? NIG_REG_INGRESS_BMAC1_MEM :
 			NIG_REG_INGRESS_BMAC0_MEM;
 	u32 wb_data[2];
 	u32 nig_bmac_enable = REG_RD(bp, NIG_REG_BMAC0_REGS_OUT_EN + port*4);
 
+	if (CHIP_IS_E2(bp))
+		bmac_addr += BIGMAC2_REGISTER_BMAC_CONTROL;
+	else
+		bmac_addr += BIGMAC_REGISTER_BMAC_CONTROL;
 	/* Only if the bmac is out of reset */
 	if (REG_RD(bp, MISC_REG_RESET_REG_2) &
 			(MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port) &&
 	    nig_bmac_enable) {
-
-		if (CHIP_IS_E2(bp)) {
-			/* Clear Rx Enable bit in BMAC_CONTROL register */
-			REG_RD_DMAE(bp, bmac_addr +
-				    BIGMAC2_REGISTER_BMAC_CONTROL,
-				    wb_data, 2);
-			wb_data[0] &= ~BMAC_CONTROL_RX_ENABLE;
-			REG_WR_DMAE(bp, bmac_addr +
-				    BIGMAC2_REGISTER_BMAC_CONTROL,
-				    wb_data, 2);
-		} else {
-			/* Clear Rx Enable bit in BMAC_CONTROL register */
-			REG_RD_DMAE(bp, bmac_addr +
-					BIGMAC_REGISTER_BMAC_CONTROL,
-					wb_data, 2);
+		/* Clear Rx Enable bit in BMAC_CONTROL register */
+		REG_RD_DMAE(bp, bmac_addr, wb_data, 2);
+		if (en)
+			wb_data[0] |= BMAC_CONTROL_RX_ENABLE;
+		else
 			wb_data[0] &= ~BMAC_CONTROL_RX_ENABLE;
-			REG_WR_DMAE(bp, bmac_addr +
-					BIGMAC_REGISTER_BMAC_CONTROL,
-					wb_data, 2);
-		}
+		REG_WR_DMAE(bp, bmac_addr, wb_data, 2);
 		usleep_range(1000, 2000);
 	}
 }
@@ -3233,6 +2769,245 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy,
 			       EMAC_MDIO_STATUS_10MB);
 	return rc;
 }
+
+/******************************************************************/
+/*			EEE section				   */
+/******************************************************************/
+static u8 bnx2x_eee_has_cap(struct link_params *params)
+{
+	struct bnx2x *bp = params->bp;
+
+	if (REG_RD(bp, params->shmem2_base) <=
+		   offsetof(struct shmem2_region, eee_status[params->port]))
+		return 0;
+
+	return 1;
+}
+
+static int bnx2x_eee_nvram_to_time(u32 nvram_mode, u32 *idle_timer)
+{
+	switch (nvram_mode) {
+	case PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED:
+		*idle_timer = EEE_MODE_NVRAM_BALANCED_TIME;
+		break;
+	case PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE:
+		*idle_timer = EEE_MODE_NVRAM_AGGRESSIVE_TIME;
+		break;
+	case PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY:
+		*idle_timer = EEE_MODE_NVRAM_LATENCY_TIME;
+		break;
+	default:
+		*idle_timer = 0;
+		break;
+	}
+
+	return 0;
+}
+
+static int bnx2x_eee_time_to_nvram(u32 idle_timer, u32 *nvram_mode)
+{
+	switch (idle_timer) {
+	case EEE_MODE_NVRAM_BALANCED_TIME:
+		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED;
+		break;
+	case EEE_MODE_NVRAM_AGGRESSIVE_TIME:
+		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE;
+		break;
+	case EEE_MODE_NVRAM_LATENCY_TIME:
+		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY;
+		break;
+	default:
+		*nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED;
+		break;
+	}
+
+	return 0;
+}
+
+static u32 bnx2x_eee_calc_timer(struct link_params *params)
+{
+	u32 eee_mode, eee_idle;
+	struct bnx2x *bp = params->bp;
+
+	if (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) {
+		if (params->eee_mode & EEE_MODE_OUTPUT_TIME) {
+			/* time value in eee_mode --> used directly*/
+			eee_idle = params->eee_mode & EEE_MODE_TIMER_MASK;
+		} else {
+			/* hsi value in eee_mode --> time */
+			if (bnx2x_eee_nvram_to_time(params->eee_mode &
+						    EEE_MODE_NVRAM_MASK,
+						    &eee_idle))
+				return 0;
+		}
+	} else {
+		/* hsi values in nvram --> time*/
+		eee_mode = ((REG_RD(bp, params->shmem_base +
+				    offsetof(struct shmem_region, dev_info.
+				    port_feature_config[params->port].
+				    eee_power_mode)) &
+			     PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >>
+			    PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT);
+
+		if (bnx2x_eee_nvram_to_time(eee_mode, &eee_idle))
+			return 0;
+	}
+
+	return eee_idle;
+}
+
+static int bnx2x_eee_set_timers(struct link_params *params,
+				   struct link_vars *vars)
+{
+	u32 eee_idle = 0, eee_mode;
+	struct bnx2x *bp = params->bp;
+
+	eee_idle = bnx2x_eee_calc_timer(params);
+
+	if (eee_idle) {
+		REG_WR(bp, MISC_REG_CPMU_LP_IDLE_THR_P0 + (params->port << 2),
+		       eee_idle);
+	} else if ((params->eee_mode & EEE_MODE_ENABLE_LPI) &&
+		   (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) &&
+		   (params->eee_mode & EEE_MODE_OUTPUT_TIME)) {
+		DP(NETIF_MSG_LINK, "Error: Tx LPI is enabled with timer 0\n");
+		return -EINVAL;
+	}
+
+	vars->eee_status &= ~(SHMEM_EEE_TIMER_MASK | SHMEM_EEE_TIME_OUTPUT_BIT);
+	if (params->eee_mode & EEE_MODE_OUTPUT_TIME) {
+		/* eee_idle in 1u --> eee_status in 16u */
+		eee_idle >>= 4;
+		vars->eee_status |= (eee_idle & SHMEM_EEE_TIMER_MASK) |
+				    SHMEM_EEE_TIME_OUTPUT_BIT;
+	} else {
+		if (bnx2x_eee_time_to_nvram(eee_idle, &eee_mode))
+			return -EINVAL;
+		vars->eee_status |= eee_mode;
+	}
+
+	return 0;
+}
+
+static int bnx2x_eee_initial_config(struct link_params *params,
+				     struct link_vars *vars, u8 mode)
+{
+	vars->eee_status |= ((u32) mode) << SHMEM_EEE_SUPPORTED_SHIFT;
+
+	/* Propogate params' bits --> vars (for migration exposure) */
+	if (params->eee_mode & EEE_MODE_ENABLE_LPI)
+		vars->eee_status |= SHMEM_EEE_LPI_REQUESTED_BIT;
+	else
+		vars->eee_status &= ~SHMEM_EEE_LPI_REQUESTED_BIT;
+
+	if (params->eee_mode & EEE_MODE_ADV_LPI)
+		vars->eee_status |= SHMEM_EEE_REQUESTED_BIT;
+	else
+		vars->eee_status &= ~SHMEM_EEE_REQUESTED_BIT;
+
+	return bnx2x_eee_set_timers(params, vars);
+}
+
+static int bnx2x_eee_disable(struct bnx2x_phy *phy,
+				struct link_params *params,
+				struct link_vars *vars)
+{
+	struct bnx2x *bp = params->bp;
+
+	/* Make Certain LPI is disabled */
+	REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2), 0);
+
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, 0x0);
+
+	vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK;
+
+	return 0;
+}
+
+static int bnx2x_eee_advertise(struct bnx2x_phy *phy,
+				  struct link_params *params,
+				  struct link_vars *vars, u8 modes)
+{
+	struct bnx2x *bp = params->bp;
+	u16 val = 0;
+
+	/* Mask events preventing LPI generation */
+	REG_WR(bp, MISC_REG_CPMU_LP_MASK_EXT_P0 + (params->port << 2), 0xfc20);
+
+	if (modes & SHMEM_EEE_10G_ADV) {
+		DP(NETIF_MSG_LINK, "Advertise 10GBase-T EEE\n");
+		val |= 0x8;
+	}
+	if (modes & SHMEM_EEE_1G_ADV) {
+		DP(NETIF_MSG_LINK, "Advertise 1GBase-T EEE\n");
+		val |= 0x4;
+	}
+
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, val);
+
+	vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK;
+	vars->eee_status |= (modes << SHMEM_EEE_ADV_STATUS_SHIFT);
+
+	return 0;
+}
+
+static void bnx2x_update_mng_eee(struct link_params *params, u32 eee_status)
+{
+	struct bnx2x *bp = params->bp;
+
+	if (bnx2x_eee_has_cap(params))
+		REG_WR(bp, params->shmem2_base +
+		       offsetof(struct shmem2_region,
+				eee_status[params->port]), eee_status);
+}
+
+static void bnx2x_eee_an_resolve(struct bnx2x_phy *phy,
+				  struct link_params *params,
+				  struct link_vars *vars)
+{
+	struct bnx2x *bp = params->bp;
+	u16 adv = 0, lp = 0;
+	u32 lp_adv = 0;
+	u8 neg = 0;
+
+	bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, &adv);
+	bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_LP_EEE_ADV, &lp);
+
+	if (lp & 0x2) {
+		lp_adv |= SHMEM_EEE_100M_ADV;
+		if (adv & 0x2) {
+			if (vars->line_speed == SPEED_100)
+				neg = 1;
+			DP(NETIF_MSG_LINK, "EEE negotiated - 100M\n");
+		}
+	}
+	if (lp & 0x14) {
+		lp_adv |= SHMEM_EEE_1G_ADV;
+		if (adv & 0x14) {
+			if (vars->line_speed == SPEED_1000)
+				neg = 1;
+			DP(NETIF_MSG_LINK, "EEE negotiated - 1G\n");
+		}
+	}
+	if (lp & 0x68) {
+		lp_adv |= SHMEM_EEE_10G_ADV;
+		if (adv & 0x68) {
+			if (vars->line_speed == SPEED_10000)
+				neg = 1;
+			DP(NETIF_MSG_LINK, "EEE negotiated - 10G\n");
+		}
+	}
+
+	vars->eee_status &= ~SHMEM_EEE_LP_ADV_STATUS_MASK;
+	vars->eee_status |= (lp_adv << SHMEM_EEE_LP_ADV_STATUS_SHIFT);
+
+	if (neg) {
+		DP(NETIF_MSG_LINK, "EEE is active\n");
+		vars->eee_status |= SHMEM_EEE_ACTIVE_BIT;
+	}
+
+}
+
 /******************************************************************/
 /*			BSC access functions from E3	          */
 /******************************************************************/
@@ -3754,6 +3529,19 @@ static u8 bnx2x_ext_phy_resolve_fc(struct bnx2x_phy *phy,
  * init configuration, and set/clear SGMII flag. Internal
  * phy init is done purely in phy_init stage.
  */
+
+static void bnx2x_warpcore_set_lpi_passthrough(struct bnx2x_phy *phy,
+					       struct link_params *params)
+{
+	struct bnx2x *bp = params->bp;
+
+	DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n");
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_EEE_COMBO_CONTROL0, 0x7c);
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_DIGITAL4_MISC5, 0xc000);
+}
+
 static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 					struct link_params *params,
 					struct link_vars *vars) {
@@ -4013,13 +3801,7 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy,
 	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
 				 MDIO_WC_REG_DIGITAL4_MISC3, 0x8080);
 
-	/* Enable LPI pass through */
-	DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n");
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_EEE_COMBO_CONTROL0,
-			 0x7c);
-	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
-				 MDIO_WC_REG_DIGITAL4_MISC5, 0xc000);
+	bnx2x_warpcore_set_lpi_passthrough(phy, params);
 
 	/* 10G XFI Full Duplex */
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
@@ -4116,6 +3898,8 @@ static void bnx2x_warpcore_set_sgmii_speed(struct bnx2x_phy *phy,
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_RX66_CONTROL, val16 & ~(3<<13));
 
+	bnx2x_warpcore_set_lpi_passthrough(phy, params);
+
 	if (always_autoneg || phy->req_line_speed == SPEED_AUTO_NEG) {
 		/* SGMII Autoneg */
 		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
@@ -4409,7 +4193,7 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 			   "serdes_net_if = 0x%x\n",
 		       vars->line_speed, serdes_net_if);
 	bnx2x_set_aer_mmd(params, phy);
-
+	bnx2x_warpcore_reset_lane(bp, phy, 1);
 	vars->phy_flags |= PHY_XGXS_FLAG;
 	if ((serdes_net_if == PORT_HW_CFG_NET_SERDES_IF_SGMII) ||
 	    (phy->req_line_speed &&
@@ -4718,6 +4502,10 @@ void bnx2x_link_status_update(struct link_params *params,
 	vars->link_status = REG_RD(bp, params->shmem_base +
 				   offsetof(struct shmem_region,
 					    port_mb[port].link_status));
+	if (bnx2x_eee_has_cap(params))
+		vars->eee_status = REG_RD(bp, params->shmem2_base +
+					  offsetof(struct shmem2_region,
+						   eee_status[params->port]));
 
 	vars->phy_flags = PHY_XGXS_FLAG;
 	bnx2x_sync_link(params, vars);
@@ -6530,25 +6318,21 @@ static int bnx2x_update_link_down(struct link_params *params,
 	usleep_range(10000, 20000);
 	/* Reset BigMac/Xmac */
 	if (CHIP_IS_E1x(bp) ||
-	    CHIP_IS_E2(bp)) {
-		bnx2x_bmac_rx_disable(bp, params->port);
-		REG_WR(bp, GRCBASE_MISC +
-		       MISC_REGISTERS_RESET_REG_2_CLEAR,
-	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
-	}
+	    CHIP_IS_E2(bp))
+		bnx2x_set_bmac_rx(bp, params->chip_id, params->port, 0);
+
 	if (CHIP_IS_E3(bp)) {
 		/* Prevent LPI Generation by chip */
 		REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2),
 		       0);
-		REG_WR(bp, MISC_REG_CPMU_LP_DR_ENABLE, 0);
 		REG_WR(bp, MISC_REG_CPMU_LP_MASK_ENT_P0 + (params->port << 2),
 		       0);
 		vars->eee_status &= ~(SHMEM_EEE_LP_ADV_STATUS_MASK |
 				      SHMEM_EEE_ACTIVE_BIT);
 
 		bnx2x_update_mng_eee(params, vars->eee_status);
-		bnx2x_xmac_disable(params);
-		bnx2x_umac_disable(params);
+		bnx2x_set_xmac_rxtx(params, 0);
+		bnx2x_set_umac_rxtx(params, 0);
 	}
 
 	return 0;
@@ -6600,7 +6384,7 @@ static int bnx2x_update_link_up(struct link_params *params,
 	if ((CHIP_IS_E1x(bp) ||
 	     CHIP_IS_E2(bp))) {
 		if (link_10g) {
-			if (bnx2x_bmac_enable(params, vars, 0) ==
+			if (bnx2x_bmac_enable(params, vars, 0, 1) ==
 			    -ESRCH) {
 				DP(NETIF_MSG_LINK, "Found errors on BMAC\n");
 				vars->link_up = 0;
@@ -7207,6 +6991,22 @@ static void bnx2x_8073_set_pause_cl37(struct link_params *params,
 	msleep(500);
 }
 
+static void bnx2x_8073_specific_func(struct bnx2x_phy *phy,
+				     struct link_params *params,
+				     u32 action)
+{
+	struct bnx2x *bp = params->bp;
+	switch (action) {
+	case PHY_INIT:
+		/* Enable LASI */
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, (1<<2));
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL,  0x0004);
+		break;
+	}
+}
+
 static int bnx2x_8073_config_init(struct bnx2x_phy *phy,
 				  struct link_params *params,
 				  struct link_vars *vars)
@@ -7227,12 +7027,7 @@ static int bnx2x_8073_config_init(struct bnx2x_phy *phy,
 	bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1,
 		       MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port);
 
-	/* Enable LASI */
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, (1<<2));
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL,  0x0004);
-
+	bnx2x_8073_specific_func(phy, params, PHY_INIT);
 	bnx2x_8073_set_pause_cl37(params, phy, vars);
 
 	bnx2x_cl45_read(bp, phy,
@@ -8267,7 +8062,7 @@ static void bnx2x_8727_specific_func(struct bnx2x_phy *phy,
 				     u32 action)
 {
 	struct bnx2x *bp = params->bp;
-
+	u16 val;
 	switch (action) {
 	case DISABLE_TX:
 		bnx2x_sfp_set_transmitter(params, phy, 0);
@@ -8276,6 +8071,40 @@ static void bnx2x_8727_specific_func(struct bnx2x_phy *phy,
 		if (!(phy->flags & FLAGS_SFP_NOT_APPROVED))
 			bnx2x_sfp_set_transmitter(params, phy, 1);
 		break;
+	case PHY_INIT:
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
+				 (1<<2) | (1<<5));
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL,
+				 0);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x0006);
+		/* Make MOD_ABS give interrupt on change */
+		bnx2x_cl45_read(bp, phy, MDIO_PMA_DEVAD,
+				MDIO_PMA_REG_8727_PCS_OPT_CTRL,
+				&val);
+		val |= (1<<12);
+		if (phy->flags & FLAGS_NOC)
+			val |= (3<<5);
+		/* Set 8727 GPIOs to input to allow reading from the 8727 GPIO0
+		 * status which reflect SFP+ module over-current
+		 */
+		if (!(phy->flags & FLAGS_NOC))
+			val &= 0xff8f; /* Reset bits 4-6 */
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL,
+				 val);
+
+		/* Set 2-wire transfer rate of SFP+ module EEPROM
+		 * to 100Khz since some DACs(direct attached cables) do
+		 * not work at 400Khz.
+		 */
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD,
+				 MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR,
+				 0xa001);
+		break;
 	default:
 		DP(NETIF_MSG_LINK, "Function 0x%x not supported by 8727\n",
 		   action);
@@ -9058,28 +8887,15 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 				  struct link_vars *vars)
 {
 	u32 tx_en_mode;
-	u16 tmp1, val, mod_abs, tmp2;
-	u16 rx_alarm_ctrl_val;
-	u16 lasi_ctrl_val;
+	u16 tmp1, mod_abs, tmp2;
 	struct bnx2x *bp = params->bp;
 	/* Enable PMD link, MOD_ABS_FLT, and 1G link alarm */
 
 	bnx2x_wait_reset_complete(bp, phy, params);
-	rx_alarm_ctrl_val = (1<<2) | (1<<5) ;
-	/* Should be 0x6 to enable XS on Tx side. */
-	lasi_ctrl_val = 0x0006;
 
 	DP(NETIF_MSG_LINK, "Initializing BCM8727\n");
-	/* Enable LASI */
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
-			 rx_alarm_ctrl_val);
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL,
-			 0);
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, lasi_ctrl_val);
 
+	bnx2x_8727_specific_func(phy, params, PHY_INIT);
 	/* Initially configure MOD_ABS to interrupt when module is
 	 * presence( bit 8)
 	 */
@@ -9095,25 +8911,9 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER, mod_abs);
 
-
 	/* Enable/Disable PHY transmitter output */
 	bnx2x_set_disable_pmd_transmit(params, phy, 0);
 
-	/* Make MOD_ABS give interrupt on change */
-	bnx2x_cl45_read(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL,
-			&val);
-	val |= (1<<12);
-	if (phy->flags & FLAGS_NOC)
-		val |= (3<<5);
-
-	/* Set 8727 GPIOs to input to allow reading from the 8727 GPIO0
-	 * status which reflect SFP+ module over-current
-	 */
-	if (!(phy->flags & FLAGS_NOC))
-		val &= 0xff8f; /* Reset bits 4-6 */
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, val);
-
 	bnx2x_8727_power_module(bp, phy, 1);
 
 	bnx2x_cl45_read(bp, phy,
@@ -9123,13 +8923,7 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 			MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1);
 
 	bnx2x_8727_config_speed(phy, params);
-	/* Set 2-wire transfer rate of SFP+ module EEPROM
-	 * to 100Khz since some DACs(direct attached cables) do
-	 * not work at 400Khz.
-	 */
-	bnx2x_cl45_write(bp, phy,
-			 MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR,
-			 0xa001);
+
 
 	/* Set TX PreEmphasis if needed */
 	if ((params->feature_config_flags &
@@ -9558,6 +9352,29 @@ static void bnx2x_848xx_set_led(struct bnx2x *bp,
 			 0xFFFB, 0xFFFD);
 }
 
+static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy,
+				      struct link_params *params,
+				      u32 action)
+{
+	struct bnx2x *bp = params->bp;
+	switch (action) {
+	case PHY_INIT:
+		if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+			/* Save spirom version */
+			bnx2x_save_848xx_spirom_version(phy, bp, params->port);
+		}
+		/* This phy uses the NIG latch mechanism since link indication
+		 * arrives through its LED4 and not via its LASI signal, so we
+		 * get steady signal instead of clear on read
+		 */
+		bnx2x_bits_en(bp, NIG_REG_LATCH_BC_0 + params->port*4,
+			      1 << NIG_LATCH_BC_ENABLE_MI_INT);
+
+		bnx2x_848xx_set_led(bp, phy);
+		break;
+	}
+}
+
 static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy,
 				       struct link_params *params,
 				       struct link_vars *vars)
@@ -9565,22 +9382,10 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 autoneg_val, an_1000_val, an_10_100_val, an_10g_val;
 
-	if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
-		/* Save spirom version */
-		bnx2x_save_848xx_spirom_version(phy, bp, params->port);
-	}
-	/* This phy uses the NIG latch mechanism since link indication
-	 * arrives through its LED4 and not via its LASI signal, so we
-	 * get steady signal instead of clear on read
-	 */
-	bnx2x_bits_en(bp, NIG_REG_LATCH_BC_0 + params->port*4,
-		      1 << NIG_LATCH_BC_ENABLE_MI_INT);
-
+	bnx2x_848xx_specific_func(phy, params, PHY_INIT);
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x0000);
 
-	bnx2x_848xx_set_led(bp, phy);
-
 	/* set 1000 speed advertisement */
 	bnx2x_cl45_read(bp, phy,
 			MDIO_AN_DEVAD, MDIO_AN_REG_8481_1000T_CTRL,
@@ -9887,39 +9692,6 @@ static int bnx2x_84833_hw_reset_phy(struct bnx2x_phy *phy,
 	return 0;
 }
 
-static int bnx2x_8483x_eee_timers(struct link_params *params,
-				   struct link_vars *vars)
-{
-	u32 eee_idle = 0, eee_mode;
-	struct bnx2x *bp = params->bp;
-
-	eee_idle = bnx2x_eee_calc_timer(params);
-
-	if (eee_idle) {
-		REG_WR(bp, MISC_REG_CPMU_LP_IDLE_THR_P0 + (params->port << 2),
-		       eee_idle);
-	} else if ((params->eee_mode & EEE_MODE_ENABLE_LPI) &&
-		   (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) &&
-		   (params->eee_mode & EEE_MODE_OUTPUT_TIME)) {
-		DP(NETIF_MSG_LINK, "Error: Tx LPI is enabled with timer 0\n");
-		return -EINVAL;
-	}
-
-	vars->eee_status &= ~(SHMEM_EEE_TIMER_MASK | SHMEM_EEE_TIME_OUTPUT_BIT);
-	if (params->eee_mode & EEE_MODE_OUTPUT_TIME) {
-		/* eee_idle in 1u --> eee_status in 16u */
-		eee_idle >>= 4;
-		vars->eee_status |= (eee_idle & SHMEM_EEE_TIMER_MASK) |
-				    SHMEM_EEE_TIME_OUTPUT_BIT;
-	} else {
-		if (bnx2x_eee_time_to_nvram(eee_idle, &eee_mode))
-			return -EINVAL;
-		vars->eee_status |= eee_mode;
-	}
-
-	return 0;
-}
-
 static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy,
 				   struct link_params *params,
 				   struct link_vars *vars)
@@ -9930,10 +9702,6 @@ static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy,
 
 	DP(NETIF_MSG_LINK, "Don't Advertise 10GBase-T EEE\n");
 
-	/* Make Certain LPI is disabled */
-	REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2), 0);
-	REG_WR(bp, MISC_REG_CPMU_LP_DR_ENABLE, 0);
-
 	/* Prevent Phy from working in EEE and advertising it */
 	rc = bnx2x_84833_cmd_hdlr(phy, params,
 		PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1);
@@ -9942,10 +9710,7 @@ static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy,
 		return rc;
 	}
 
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, 0);
-	vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK;
-
-	return 0;
+	return bnx2x_eee_disable(phy, params, vars);
 }
 
 static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy,
@@ -9956,8 +9721,6 @@ static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 cmd_args = 1;
 
-	DP(NETIF_MSG_LINK, "Advertise 10GBase-T EEE\n");
-
 	rc = bnx2x_84833_cmd_hdlr(phy, params,
 		PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1);
 	if (rc) {
@@ -9965,15 +9728,7 @@ static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy,
 		return rc;
 	}
 
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, 0x8);
-
-	/* Mask events preventing LPI generation */
-	REG_WR(bp, MISC_REG_CPMU_LP_MASK_EXT_P0 + (params->port << 2), 0xfc20);
-
-	vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK;
-	vars->eee_status |= (SHMEM_EEE_10G_ADV << SHMEM_EEE_ADV_STATUS_SHIFT);
-
-	return 0;
+	return bnx2x_eee_advertise(phy, params, vars, SHMEM_EEE_10G_ADV);
 }
 
 #define PHY84833_CONSTANT_LATENCY 1193
@@ -10105,22 +9860,10 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 			MDIO_84833_TOP_CFG_FW_REV, &val);
 
 	/* Configure EEE support */
-	if ((val >= MDIO_84833_TOP_CFG_FW_EEE) && bnx2x_eee_has_cap(params)) {
-		phy->flags |= FLAGS_EEE_10GBT;
-		vars->eee_status |= SHMEM_EEE_10G_ADV <<
-				    SHMEM_EEE_SUPPORTED_SHIFT;
-		/* Propogate params' bits --> vars (for migration exposure) */
-		if (params->eee_mode & EEE_MODE_ENABLE_LPI)
-			vars->eee_status |= SHMEM_EEE_LPI_REQUESTED_BIT;
-		else
-			vars->eee_status &= ~SHMEM_EEE_LPI_REQUESTED_BIT;
-
-		if (params->eee_mode & EEE_MODE_ADV_LPI)
-			vars->eee_status |= SHMEM_EEE_REQUESTED_BIT;
-		else
-			vars->eee_status &= ~SHMEM_EEE_REQUESTED_BIT;
-
-		rc = bnx2x_8483x_eee_timers(params, vars);
+	if ((val >= MDIO_84833_TOP_CFG_FW_EEE) &&
+	    (val != MDIO_84833_TOP_CFG_FW_NO_EEE) &&
+	    bnx2x_eee_has_cap(params)) {
+		rc = bnx2x_eee_initial_config(params, vars, SHMEM_EEE_10G_ADV);
 		if (rc) {
 			DP(NETIF_MSG_LINK, "Failed to configure EEE timers\n");
 			bnx2x_8483x_disable_eee(phy, params, vars);
@@ -10139,7 +9882,6 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 			return rc;
 		}
 	} else {
-		phy->flags &= ~FLAGS_EEE_10GBT;
 		vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK;
 	}
 
@@ -10278,29 +10020,8 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy,
 				LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
 
 		/* Determine if EEE was negotiated */
-		if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
-			u32 eee_shmem = 0;
-
-			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
-					MDIO_AN_REG_EEE_ADV, &val1);
-			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
-					MDIO_AN_REG_LP_EEE_ADV, &val2);
-			if ((val1 & val2) & 0x8) {
-				DP(NETIF_MSG_LINK, "EEE negotiated\n");
-				vars->eee_status |= SHMEM_EEE_ACTIVE_BIT;
-			}
-
-			if (val2 & 0x12)
-				eee_shmem |= SHMEM_EEE_100M_ADV;
-			if (val2 & 0x4)
-				eee_shmem |= SHMEM_EEE_1G_ADV;
-			if (val2 & 0x68)
-				eee_shmem |= SHMEM_EEE_10G_ADV;
-
-			vars->eee_status &= ~SHMEM_EEE_LP_ADV_STATUS_MASK;
-			vars->eee_status |= (eee_shmem <<
-					     SHMEM_EEE_LP_ADV_STATUS_SHIFT);
-		}
+		if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833)
+			bnx2x_eee_an_resolve(phy, params, vars);
 	}
 
 	return link_up;
@@ -10569,6 +10290,35 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy,
 /******************************************************************/
 /*			54618SE PHY SECTION			  */
 /******************************************************************/
+static void bnx2x_54618se_specific_func(struct bnx2x_phy *phy,
+					struct link_params *params,
+					u32 action)
+{
+	struct bnx2x *bp = params->bp;
+	u16 temp;
+	switch (action) {
+	case PHY_INIT:
+		/* Configure LED4: set to INTR (0x6). */
+		/* Accessing shadow register 0xe. */
+		bnx2x_cl22_write(bp, phy,
+				 MDIO_REG_GPHY_SHADOW,
+				 MDIO_REG_GPHY_SHADOW_LED_SEL2);
+		bnx2x_cl22_read(bp, phy,
+				MDIO_REG_GPHY_SHADOW,
+				&temp);
+		temp &= ~(0xf << 4);
+		temp |= (0x6 << 4);
+		bnx2x_cl22_write(bp, phy,
+				 MDIO_REG_GPHY_SHADOW,
+				 MDIO_REG_GPHY_SHADOW_WR_ENA | temp);
+		/* Configure INTR based on link status change. */
+		bnx2x_cl22_write(bp, phy,
+				 MDIO_REG_INTR_MASK,
+				 ~MDIO_REG_INTR_MASK_LINK_STATUS);
+		break;
+	}
+}
+
 static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 					       struct link_params *params,
 					       struct link_vars *vars)
@@ -10606,24 +10356,8 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 	/* Wait for GPHY to reset */
 	msleep(50);
 
-	/* Configure LED4: set to INTR (0x6). */
-	/* Accessing shadow register 0xe. */
-	bnx2x_cl22_write(bp, phy,
-			MDIO_REG_GPHY_SHADOW,
-			MDIO_REG_GPHY_SHADOW_LED_SEL2);
-	bnx2x_cl22_read(bp, phy,
-			MDIO_REG_GPHY_SHADOW,
-			&temp);
-	temp &= ~(0xf << 4);
-	temp |= (0x6 << 4);
-	bnx2x_cl22_write(bp, phy,
-			MDIO_REG_GPHY_SHADOW,
-			MDIO_REG_GPHY_SHADOW_WR_ENA | temp);
-	/* Configure INTR based on link status change. */
-	bnx2x_cl22_write(bp, phy,
-			MDIO_REG_INTR_MASK,
-			~MDIO_REG_INTR_MASK_LINK_STATUS);
 
+	bnx2x_54618se_specific_func(phy, params, PHY_INIT);
 	/* Flip the signal detect polarity (set 0x1c.0x1e[8]). */
 	bnx2x_cl22_write(bp, phy,
 			MDIO_REG_GPHY_SHADOW,
@@ -10728,28 +10462,52 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "Setting 10M force\n");
 	}
 
-	/* Check if we should turn on Auto-GrEEEn */
-	bnx2x_cl22_read(bp, phy, MDIO_REG_GPHY_PHYID_LSB, &temp);
-	if (temp == MDIO_REG_GPHY_ID_54618SE) {
-		if (params->feature_config_flags &
-		    FEATURE_CONFIG_AUTOGREEEN_ENABLED) {
-			temp = 6;
-			DP(NETIF_MSG_LINK, "Enabling Auto-GrEEEn\n");
+	if ((phy->flags & FLAGS_EEE) && bnx2x_eee_has_cap(params)) {
+		int rc;
+
+		bnx2x_cl22_write(bp, phy, MDIO_REG_GPHY_EXP_ACCESS,
+				 MDIO_REG_GPHY_EXP_ACCESS_TOP |
+				 MDIO_REG_GPHY_EXP_TOP_2K_BUF);
+		bnx2x_cl22_read(bp, phy, MDIO_REG_GPHY_EXP_ACCESS_GATE, &temp);
+		temp &= 0xfffe;
+		bnx2x_cl22_write(bp, phy, MDIO_REG_GPHY_EXP_ACCESS_GATE, temp);
+
+		rc = bnx2x_eee_initial_config(params, vars, SHMEM_EEE_1G_ADV);
+		if (rc) {
+			DP(NETIF_MSG_LINK, "Failed to configure EEE timers\n");
+			bnx2x_eee_disable(phy, params, vars);
+		} else if ((params->eee_mode & EEE_MODE_ADV_LPI) &&
+			   (phy->req_duplex == DUPLEX_FULL) &&
+			   (bnx2x_eee_calc_timer(params) ||
+			    !(params->eee_mode & EEE_MODE_ENABLE_LPI))) {
+			/* Need to advertise EEE only when requested,
+			 * and either no LPI assertion was requested,
+			 * or it was requested and a valid timer was set.
+			 * Also notice full duplex is required for EEE.
+			 */
+			bnx2x_eee_advertise(phy, params, vars,
+					    SHMEM_EEE_1G_ADV);
 		} else {
-			temp = 0;
-			DP(NETIF_MSG_LINK, "Disabling Auto-GrEEEn\n");
+			DP(NETIF_MSG_LINK, "Don't Advertise 1GBase-T EEE\n");
+			bnx2x_eee_disable(phy, params, vars);
+		}
+	} else {
+		vars->eee_status &= ~SHMEM_EEE_1G_ADV <<
+				    SHMEM_EEE_SUPPORTED_SHIFT;
+
+		if (phy->flags & FLAGS_EEE) {
+			/* Handle legacy auto-grEEEn */
+			if (params->feature_config_flags &
+			    FEATURE_CONFIG_AUTOGREEEN_ENABLED) {
+				temp = 6;
+				DP(NETIF_MSG_LINK, "Enabling Auto-GrEEEn\n");
+			} else {
+				temp = 0;
+				DP(NETIF_MSG_LINK, "Don't Adv. EEE\n");
+			}
+			bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+					 MDIO_AN_REG_EEE_ADV, temp);
 		}
-		bnx2x_cl22_write(bp, phy,
-				 MDIO_REG_GPHY_CL45_ADDR_REG, MDIO_AN_DEVAD);
-		bnx2x_cl22_write(bp, phy,
-				 MDIO_REG_GPHY_CL45_DATA_REG,
-				 MDIO_REG_GPHY_EEE_ADV);
-		bnx2x_cl22_write(bp, phy,
-				 MDIO_REG_GPHY_CL45_ADDR_REG,
-				 (0x1 << 14) | MDIO_AN_DEVAD);
-		bnx2x_cl22_write(bp, phy,
-				 MDIO_REG_GPHY_CL45_DATA_REG,
-				 temp);
 	}
 
 	bnx2x_cl22_write(bp, phy,
@@ -10896,29 +10654,6 @@ static u8 bnx2x_54618se_read_status(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "BCM54618SE: link speed is %d\n",
 			   vars->line_speed);
 
-		/* Report whether EEE is resolved. */
-		bnx2x_cl22_read(bp, phy, MDIO_REG_GPHY_PHYID_LSB, &val);
-		if (val == MDIO_REG_GPHY_ID_54618SE) {
-			if (vars->link_status &
-			    LINK_STATUS_AUTO_NEGOTIATE_COMPLETE)
-				val = 0;
-			else {
-				bnx2x_cl22_write(bp, phy,
-					MDIO_REG_GPHY_CL45_ADDR_REG,
-					MDIO_AN_DEVAD);
-				bnx2x_cl22_write(bp, phy,
-					MDIO_REG_GPHY_CL45_DATA_REG,
-					MDIO_REG_GPHY_EEE_RESOLVED);
-				bnx2x_cl22_write(bp, phy,
-					MDIO_REG_GPHY_CL45_ADDR_REG,
-					(0x1 << 14) | MDIO_AN_DEVAD);
-				bnx2x_cl22_read(bp, phy,
-					MDIO_REG_GPHY_CL45_DATA_REG,
-					&val);
-			}
-			DP(NETIF_MSG_LINK, "EEE resolution: 0x%x\n", val);
-		}
-
 		bnx2x_ext_phy_resolve_fc(phy, params, vars);
 
 		if (vars->link_status & LINK_STATUS_AUTO_NEGOTIATE_COMPLETE) {
@@ -10948,6 +10683,10 @@ static u8 bnx2x_54618se_read_status(struct bnx2x_phy *phy,
 			if (val & (1<<11))
 				vars->link_status |=
 				  LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
+
+			if ((phy->flags & FLAGS_EEE) &&
+			    bnx2x_eee_has_cap(params))
+				bnx2x_eee_an_resolve(phy, params, vars);
 		}
 	}
 	return link_up;
@@ -11353,7 +11092,7 @@ static struct bnx2x_phy phy_8073 = {
 	.format_fw_ver	= (format_fw_ver_t)bnx2x_format_ver,
 	.hw_reset	= (hw_reset_t)NULL,
 	.set_link_led	= (set_link_led_t)NULL,
-	.phy_specific_func = (phy_specific_func_t)NULL
+	.phy_specific_func = (phy_specific_func_t)bnx2x_8073_specific_func
 };
 static struct bnx2x_phy phy_8705 = {
 	.type		= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8705,
@@ -11546,7 +11285,7 @@ static struct bnx2x_phy phy_84823 = {
 	.format_fw_ver	= (format_fw_ver_t)bnx2x_848xx_format_ver,
 	.hw_reset	= (hw_reset_t)NULL,
 	.set_link_led	= (set_link_led_t)bnx2x_848xx_set_link_led,
-	.phy_specific_func = (phy_specific_func_t)NULL
+	.phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
 };
 
 static struct bnx2x_phy phy_84833 = {
@@ -11555,8 +11294,7 @@ static struct bnx2x_phy phy_84833 = {
 	.def_md_devad	= 0,
 	.flags		= (FLAGS_FAN_FAILURE_DET_REQ |
 			   FLAGS_REARM_LATCH_SIGNAL |
-			   FLAGS_TX_ERROR_CHECK |
-			   FLAGS_EEE_10GBT),
+			   FLAGS_TX_ERROR_CHECK),
 	.rx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.tx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
 	.mdio_ctrl	= 0,
@@ -11582,7 +11320,7 @@ static struct bnx2x_phy phy_84833 = {
 	.format_fw_ver	= (format_fw_ver_t)bnx2x_848xx_format_ver,
 	.hw_reset	= (hw_reset_t)bnx2x_84833_hw_reset_phy,
 	.set_link_led	= (set_link_led_t)bnx2x_848xx_set_link_led,
-	.phy_specific_func = (phy_specific_func_t)NULL
+	.phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
 };
 
 static struct bnx2x_phy phy_54618se = {
@@ -11616,7 +11354,7 @@ static struct bnx2x_phy phy_54618se = {
 	.format_fw_ver	= (format_fw_ver_t)NULL,
 	.hw_reset	= (hw_reset_t)NULL,
 	.set_link_led	= (set_link_led_t)bnx2x_5461x_set_link_led,
-	.phy_specific_func = (phy_specific_func_t)NULL
+	.phy_specific_func = (phy_specific_func_t)bnx2x_54618se_specific_func
 };
 /*****************************************************************/
 /*                                                               */
@@ -11862,6 +11600,8 @@ static int bnx2x_populate_ext_phy(struct bnx2x *bp,
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54616:
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE:
 		*phy = phy_54618se;
+		if (phy_type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE)
+			phy->flags |= FLAGS_EEE;
 		break;
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101:
 		*phy = phy_7101;
@@ -12141,7 +11881,7 @@ void bnx2x_init_bmac_loopback(struct link_params *params,
 		bnx2x_xgxs_deassert(params);
 
 		/* set bmac loopback */
-		bnx2x_bmac_enable(params, vars, 1);
+		bnx2x_bmac_enable(params, vars, 1, 1);
 
 		REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0);
 }
@@ -12233,7 +11973,7 @@ void bnx2x_init_xgxs_loopback(struct link_params *params,
 		if (USES_WARPCORE(bp))
 			bnx2x_xmac_enable(params, vars, 0);
 		else
-			bnx2x_bmac_enable(params, vars, 0);
+			bnx2x_bmac_enable(params, vars, 0, 1);
 	}
 
 		if (params->loopback_mode == LOOPBACK_XGXS) {
@@ -12258,8 +11998,161 @@ void bnx2x_init_xgxs_loopback(struct link_params *params,
 	bnx2x_set_led(params, vars, LED_MODE_OPER, vars->line_speed);
 }
 
+static void bnx2x_set_rx_filter(struct link_params *params, u8 en)
+{
+	struct bnx2x *bp = params->bp;
+	u8 val = en * 0x1F;
+
+	/* Open the gate between the NIG to the BRB */
+	if (!CHIP_IS_E1x(bp))
+		val |= en * 0x20;
+	REG_WR(bp, NIG_REG_LLH0_BRB1_DRV_MASK + params->port*4, val);
+
+	if (!CHIP_IS_E1(bp)) {
+		REG_WR(bp, NIG_REG_LLH0_BRB1_DRV_MASK_MF + params->port*4,
+		       en*0x3);
+	}
+
+	REG_WR(bp, (params->port ? NIG_REG_LLH1_BRB1_NOT_MCP :
+		    NIG_REG_LLH0_BRB1_NOT_MCP), en);
+}
+static int bnx2x_avoid_link_flap(struct link_params *params,
+					    struct link_vars *vars)
+{
+	u32 phy_idx;
+	u32 dont_clear_stat, lfa_sts;
+	struct bnx2x *bp = params->bp;
+
+	/* Sync the link parameters */
+	bnx2x_link_status_update(params, vars);
+
+	/*
+	 * The module verification was already done by previous link owner,
+	 * so this call is meant only to get warning message
+	 */
+
+	for (phy_idx = INT_PHY; phy_idx < params->num_phys; phy_idx++) {
+		struct bnx2x_phy *phy = &params->phy[phy_idx];
+		if (phy->phy_specific_func) {
+			DP(NETIF_MSG_LINK, "Calling PHY specific func\n");
+			phy->phy_specific_func(phy, params, PHY_INIT);
+		}
+		if ((phy->media_type == ETH_PHY_SFPP_10G_FIBER) ||
+		    (phy->media_type == ETH_PHY_SFP_1G_FIBER) ||
+		    (phy->media_type == ETH_PHY_DA_TWINAX))
+			bnx2x_verify_sfp_module(phy, params);
+	}
+	lfa_sts = REG_RD(bp, params->lfa_base +
+			 offsetof(struct shmem_lfa,
+				  lfa_sts));
+
+	dont_clear_stat = lfa_sts & SHMEM_LFA_DONT_CLEAR_STAT;
+
+	/* Re-enable the NIG/MAC */
+	if (CHIP_IS_E3(bp)) {
+		if (!dont_clear_stat) {
+			REG_WR(bp, GRCBASE_MISC +
+			       MISC_REGISTERS_RESET_REG_2_CLEAR,
+			       (MISC_REGISTERS_RESET_REG_2_MSTAT0 <<
+				params->port));
+			REG_WR(bp, GRCBASE_MISC +
+			       MISC_REGISTERS_RESET_REG_2_SET,
+			       (MISC_REGISTERS_RESET_REG_2_MSTAT0 <<
+				params->port));
+		}
+		if (vars->line_speed < SPEED_10000)
+			bnx2x_umac_enable(params, vars, 0);
+		else
+			bnx2x_xmac_enable(params, vars, 0);
+	} else {
+		if (vars->line_speed < SPEED_10000)
+			bnx2x_emac_enable(params, vars, 0);
+		else
+			bnx2x_bmac_enable(params, vars, 0, !dont_clear_stat);
+	}
+
+	/* Increment LFA count */
+	lfa_sts = ((lfa_sts & ~LINK_FLAP_AVOIDANCE_COUNT_MASK) |
+		   (((((lfa_sts & LINK_FLAP_AVOIDANCE_COUNT_MASK) >>
+		       LINK_FLAP_AVOIDANCE_COUNT_OFFSET) + 1) & 0xff)
+		    << LINK_FLAP_AVOIDANCE_COUNT_OFFSET));
+	/* Clear link flap reason */
+	lfa_sts &= ~LFA_LINK_FLAP_REASON_MASK;
+
+	REG_WR(bp, params->lfa_base +
+	       offsetof(struct shmem_lfa, lfa_sts), lfa_sts);
+
+	/* Disable NIG DRAIN */
+	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0);
+
+	/* Enable interrupts */
+	bnx2x_link_int_enable(params);
+	return 0;
+}
+
+static void bnx2x_cannot_avoid_link_flap(struct link_params *params,
+					 struct link_vars *vars,
+					 int lfa_status)
+{
+	u32 lfa_sts, cfg_idx, tmp_val;
+	struct bnx2x *bp = params->bp;
+
+	bnx2x_link_reset(params, vars, 1);
+
+	if (!params->lfa_base)
+		return;
+	/* Store the new link parameters */
+	REG_WR(bp, params->lfa_base +
+	       offsetof(struct shmem_lfa, req_duplex),
+	       params->req_duplex[0] | (params->req_duplex[1] << 16));
+
+	REG_WR(bp, params->lfa_base +
+	       offsetof(struct shmem_lfa, req_flow_ctrl),
+	       params->req_flow_ctrl[0] | (params->req_flow_ctrl[1] << 16));
+
+	REG_WR(bp, params->lfa_base +
+	       offsetof(struct shmem_lfa, req_line_speed),
+	       params->req_line_speed[0] | (params->req_line_speed[1] << 16));
+
+	for (cfg_idx = 0; cfg_idx < SHMEM_LINK_CONFIG_SIZE; cfg_idx++) {
+		REG_WR(bp, params->lfa_base +
+		       offsetof(struct shmem_lfa,
+				speed_cap_mask[cfg_idx]),
+		       params->speed_cap_mask[cfg_idx]);
+	}
+
+	tmp_val = REG_RD(bp, params->lfa_base +
+			 offsetof(struct shmem_lfa, additional_config));
+	tmp_val &= ~REQ_FC_AUTO_ADV_MASK;
+	tmp_val |= params->req_fc_auto_adv;
+
+	REG_WR(bp, params->lfa_base +
+	       offsetof(struct shmem_lfa, additional_config), tmp_val);
+
+	lfa_sts = REG_RD(bp, params->lfa_base +
+			 offsetof(struct shmem_lfa, lfa_sts));
+
+	/* Clear the "Don't Clear Statistics" bit, and set reason */
+	lfa_sts &= ~SHMEM_LFA_DONT_CLEAR_STAT;
+
+	/* Set link flap reason */
+	lfa_sts &= ~LFA_LINK_FLAP_REASON_MASK;
+	lfa_sts |= ((lfa_status & LFA_LINK_FLAP_REASON_MASK) <<
+		    LFA_LINK_FLAP_REASON_OFFSET);
+
+	/* Increment link flap counter */
+	lfa_sts = ((lfa_sts & ~LINK_FLAP_COUNT_MASK) |
+		   (((((lfa_sts & LINK_FLAP_COUNT_MASK) >>
+		       LINK_FLAP_COUNT_OFFSET) + 1) & 0xff)
+		    << LINK_FLAP_COUNT_OFFSET));
+	REG_WR(bp, params->lfa_base +
+	       offsetof(struct shmem_lfa, lfa_sts), lfa_sts);
+	/* Proceed with regular link initialization */
+}
+
 int bnx2x_phy_init(struct link_params *params, struct link_vars *vars)
 {
+	int lfa_status;
 	struct bnx2x *bp = params->bp;
 	DP(NETIF_MSG_LINK, "Phy Initialization started\n");
 	DP(NETIF_MSG_LINK, "(1) req_speed %d, req_flowctrl %d\n",
@@ -12274,6 +12167,19 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars)
 	vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
 	vars->mac_type = MAC_TYPE_NONE;
 	vars->phy_flags = 0;
+	/* Driver opens NIG-BRB filters */
+	bnx2x_set_rx_filter(params, 1);
+	/* Check if link flap can be avoided */
+	lfa_status = bnx2x_check_lfa(params);
+
+	if (lfa_status == 0) {
+		DP(NETIF_MSG_LINK, "Link Flap Avoidance in progress\n");
+		return bnx2x_avoid_link_flap(params, vars);
+	}
+
+	DP(NETIF_MSG_LINK, "Cannot avoid link flap lfa_sta=0x%x\n",
+		       lfa_status);
+	bnx2x_cannot_avoid_link_flap(params, vars, lfa_status);
 
 	/* Disable attentions */
 	bnx2x_bits_dis(bp, NIG_REG_MASK_INTERRUPT_PORT0 + params->port*4,
@@ -12356,13 +12262,12 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 		REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0);
 	}
 
-	/* Stop BigMac rx */
-	if (!CHIP_IS_E3(bp))
-		bnx2x_bmac_rx_disable(bp, port);
-	else {
-		bnx2x_xmac_disable(params);
-		bnx2x_umac_disable(params);
-	}
+		if (!CHIP_IS_E3(bp)) {
+			bnx2x_set_bmac_rx(bp, params->chip_id, port, 0);
+		} else {
+			bnx2x_set_xmac_rxtx(params, 0);
+			bnx2x_set_umac_rxtx(params, 0);
+		}
 	/* Disable emac */
 	if (!CHIP_IS_E3(bp))
 		REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);
@@ -12420,6 +12325,56 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 	vars->phy_flags = 0;
 	return 0;
 }
+int bnx2x_lfa_reset(struct link_params *params,
+			       struct link_vars *vars)
+{
+	struct bnx2x *bp = params->bp;
+	vars->link_up = 0;
+	vars->phy_flags = 0;
+	if (!params->lfa_base)
+		return bnx2x_link_reset(params, vars, 1);
+	/*
+	 * Activate NIG drain so that during this time the device won't send
+	 * anything while it is unable to response.
+	 */
+	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 1);
+
+	/*
+	 * Close gracefully the gate from BMAC to NIG such that no half packets
+	 * are passed.
+	 */
+	if (!CHIP_IS_E3(bp))
+		bnx2x_set_bmac_rx(bp, params->chip_id, params->port, 0);
+
+	if (CHIP_IS_E3(bp)) {
+		bnx2x_set_xmac_rxtx(params, 0);
+		bnx2x_set_umac_rxtx(params, 0);
+	}
+	/* Wait 10ms for the pipe to clean up*/
+	usleep_range(10000, 20000);
+
+	/* Clean the NIG-BRB using the network filters in a way that will
+	 * not cut a packet in the middle.
+	 */
+	bnx2x_set_rx_filter(params, 0);
+
+	/*
+	 * Re-open the gate between the BMAC and the NIG, after verifying the
+	 * gate to the BRB is closed, otherwise packets may arrive to the
+	 * firmware before driver had initialized it. The target is to achieve
+	 * minimum management protocol down time.
+	 */
+	if (!CHIP_IS_E3(bp))
+		bnx2x_set_bmac_rx(bp, params->chip_id, params->port, 1);
+
+	if (CHIP_IS_E3(bp)) {
+		bnx2x_set_xmac_rxtx(params, 1);
+		bnx2x_set_umac_rxtx(params, 1);
+	}
+	/* Disable NIG drain */
+	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0);
+	return 0;
+}
 
 /****************************************************************************/
 /*				Common function				    */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index 51cac8130051..9165b89a4b19 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -155,7 +155,7 @@ struct bnx2x_phy {
 #define FLAGS_DUMMY_READ		(1<<9)
 #define FLAGS_MDC_MDIO_WA_B0		(1<<10)
 #define FLAGS_TX_ERROR_CHECK		(1<<12)
-#define FLAGS_EEE_10GBT			(1<<13)
+#define FLAGS_EEE			(1<<13)
 
 	/* preemphasis values for the rx side */
 	u16 rx_preemphasis[4];
@@ -216,6 +216,7 @@ struct bnx2x_phy {
 	phy_specific_func_t phy_specific_func;
 #define DISABLE_TX	1
 #define ENABLE_TX	2
+#define PHY_INIT	3
 };
 
 /* Inputs parameters to the CLC */
@@ -304,6 +305,8 @@ struct link_params {
 	struct bnx2x *bp;
 	u16 req_fc_auto_adv; /* Should be set to TX / BOTH when
 				req_flow_ctrl is set to AUTO */
+	u16 rsrv1;
+	u32 lfa_base;
 };
 
 /* Output parameters */
@@ -356,7 +359,7 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars);
    to 0 */
 int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 		     u8 reset_ext_phy);
-
+int bnx2x_lfa_reset(struct link_params *params, struct link_vars *vars);
 /* bnx2x_link_update should be called upon link interrupt */
 int bnx2x_link_update(struct link_params *params, struct link_vars *vars);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index e11485ca037d..f7ed122f4071 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2166,7 +2166,6 @@ void bnx2x_link_set(struct bnx2x *bp)
 {
 	if (!BP_NOMCP(bp)) {
 		bnx2x_acquire_phy_lock(bp);
-		bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1);
 		bnx2x_phy_init(&bp->link_params, &bp->link_vars);
 		bnx2x_release_phy_lock(bp);
 
@@ -2179,12 +2178,19 @@ static void bnx2x__link_reset(struct bnx2x *bp)
 {
 	if (!BP_NOMCP(bp)) {
 		bnx2x_acquire_phy_lock(bp);
-		bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1);
+		bnx2x_lfa_reset(&bp->link_params, &bp->link_vars);
 		bnx2x_release_phy_lock(bp);
 	} else
 		BNX2X_ERR("Bootcode is missing - can not reset link\n");
 }
 
+void bnx2x_force_link_reset(struct bnx2x *bp)
+{
+	bnx2x_acquire_phy_lock(bp);
+	bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1);
+	bnx2x_release_phy_lock(bp);
+}
+
 u8 bnx2x_link_test(struct bnx2x *bp, u8 is_serdes)
 {
 	u8 rc = 0;
@@ -6751,7 +6757,6 @@ static int bnx2x_init_hw_port(struct bnx2x *bp)
 	u32 low, high;
 	u32 val;
 
-	bnx2x__link_reset(bp);
 
 	DP(NETIF_MSG_HW, "starting port init  port %d\n", port);
 
@@ -8244,12 +8249,15 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
  * bnx2x_send_unload_done - send UNLOAD_DONE command to the MCP.
  *
  * @bp:		driver handle
+ * @keep_link:		true iff link should be kept up
  */
-void bnx2x_send_unload_done(struct bnx2x *bp)
+void bnx2x_send_unload_done(struct bnx2x *bp, bool keep_link)
 {
+	u32 reset_param = keep_link ? DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET : 0;
+
 	/* Report UNLOAD_DONE to MCP */
 	if (!BP_NOMCP(bp))
-		bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
+		bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, reset_param);
 }
 
 static int bnx2x_func_wait_started(struct bnx2x *bp)
@@ -8318,7 +8326,7 @@ static int bnx2x_func_wait_started(struct bnx2x *bp)
 	return 0;
 }
 
-void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode)
+void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
 {
 	int port = BP_PORT(bp);
 	int i, rc = 0;
@@ -8440,7 +8448,7 @@ unload_error:
 
 
 	/* Report UNLOAD_DONE to MCP */
-	bnx2x_send_unload_done(bp);
+	bnx2x_send_unload_done(bp, keep_link);
 }
 
 void bnx2x_disable_close_the_gate(struct bnx2x *bp)
@@ -8852,7 +8860,8 @@ int bnx2x_leader_reset(struct bnx2x *bp)
 	 * driver is owner of the HW
 	 */
 	if (!global && !BP_NOMCP(bp)) {
-		load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
+		load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ,
+					     DRV_MSG_CODE_LOAD_REQ_WITH_LFA);
 		if (!load_code) {
 			BNX2X_ERR("MCP response failure, aborting\n");
 			rc = -EAGAIN;
@@ -8958,7 +8967,7 @@ static void bnx2x_parity_recover(struct bnx2x *bp)
 
 			/* Stop the driver */
 			/* If interface has been removed - break */
-			if (bnx2x_nic_unload(bp, UNLOAD_RECOVERY))
+			if (bnx2x_nic_unload(bp, UNLOAD_RECOVERY, false))
 				return;
 
 			bp->recovery_state = BNX2X_RECOVERY_WAIT;
@@ -9124,7 +9133,7 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 		bp->sp_rtnl_state = 0;
 		smp_mb();
 
-		bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+		bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
 		bnx2x_nic_load(bp, LOAD_NORMAL);
 
 		goto sp_rtnl_exit;
@@ -9310,7 +9319,8 @@ static void __devinit bnx2x_prev_unload_undi_inc(struct bnx2x *bp, u8 port,
 
 static int __devinit bnx2x_prev_mcp_done(struct bnx2x *bp)
 {
-	u32 rc = bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
+	u32 rc = bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE,
+				  DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET);
 	if (!rc) {
 		BNX2X_ERR("MCP response failure, aborting\n");
 		return -EBUSY;
@@ -11000,7 +11010,7 @@ static int bnx2x_close(struct net_device *dev)
 	struct bnx2x *bp = netdev_priv(dev);
 
 	/* Unload the driver, release IRQs */
-	bnx2x_nic_unload(bp, UNLOAD_CLOSE);
+	bnx2x_nic_unload(bp, UNLOAD_CLOSE, false);
 
 	/* Power off */
 	bnx2x_set_power_state(bp, PCI_D3hot);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 28a0bcfe61ff..1b1999d34c71 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -4949,6 +4949,10 @@
 #define UMAC_COMMAND_CONFIG_REG_SW_RESET			 (0x1<<13)
 #define UMAC_COMMAND_CONFIG_REG_TX_ENA				 (0x1<<0)
 #define UMAC_REG_COMMAND_CONFIG					 0x8
+/* [RW 16] This is the duration for which MAC must wait to go back to ACTIVE
+ * state from LPI state when it receives packet for transmission. The
+ * decrement unit is 1 micro-second. */
+#define UMAC_REG_EEE_WAKE_TIMER					 0x6c
 /* [RW 32] Register Bit 0 refers to Bit 16 of the MAC address; Bit 1 refers
  * to bit 17 of the MAC address etc. */
 #define UMAC_REG_MAC_ADDR0					 0xc
@@ -4958,6 +4962,8 @@
 /* [RW 14] Defines a 14-Bit maximum frame length used by the MAC receive
  * logic to check frames. */
 #define UMAC_REG_MAXFR						 0x14
+#define UMAC_REG_UMAC_EEE_CTRL					 0x64
+#define UMAC_UMAC_EEE_CTRL_REG_EEE_EN				 (0x1<<3)
 /* [RW 8] The event id for aggregated interrupt 0 */
 #define USDM_REG_AGG_INT_EVENT_0				 0xc4038
 #define USDM_REG_AGG_INT_EVENT_1				 0xc403c
@@ -6992,6 +6998,7 @@ Theotherbitsarereservedandshouldbezero*/
 /* BCM84833 only */
 #define MDIO_84833_TOP_CFG_FW_REV			0x400f
 #define MDIO_84833_TOP_CFG_FW_EEE		0x10b1
+#define MDIO_84833_TOP_CFG_FW_NO_EEE		0x1f81
 #define MDIO_84833_TOP_CFG_XGPHY_STRAP1			0x401a
 #define MDIO_84833_SUPER_ISOLATE		0x8000
 /* These are mailbox register set used by 84833. */
@@ -7160,10 +7167,11 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_REG_GPHY_ID_54618SE		0x5cd5
 #define MDIO_REG_GPHY_CL45_ADDR_REG			0xd
 #define MDIO_REG_GPHY_CL45_DATA_REG			0xe
-#define MDIO_REG_GPHY_EEE_ADV			0x3c
-#define MDIO_REG_GPHY_EEE_1G		(0x1 << 2)
-#define MDIO_REG_GPHY_EEE_100		(0x1 << 1)
 #define MDIO_REG_GPHY_EEE_RESOLVED		0x803e
+#define MDIO_REG_GPHY_EXP_ACCESS_GATE			0x15
+#define MDIO_REG_GPHY_EXP_ACCESS			0x17
+#define MDIO_REG_GPHY_EXP_ACCESS_TOP		0xd00
+#define MDIO_REG_GPHY_EXP_TOP_2K_BUF		0x40
 #define MDIO_REG_GPHY_AUX_STATUS			0x19
 #define MDIO_REG_INTR_STATUS				0x1a
 #define MDIO_REG_INTR_MASK				0x1b
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 62f754bd0dfe..71971a161bd1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -229,8 +229,7 @@ static inline int bnx2x_exe_queue_step(struct bnx2x *bp,
 			 */
 			list_add_tail(&spacer.link, &o->pending_comp);
 			mb();
-			list_del(&elem->link);
-			list_add_tail(&elem->link, &o->pending_comp);
+			list_move_tail(&elem->link, &o->pending_comp);
 			list_del(&spacer.link);
 		} else
 			break;
@@ -5620,7 +5619,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp,
 	memset(rdata, 0, sizeof(*rdata));
 
 	/* Fill the ramrod data with provided parameters */
-	rdata->function_mode = cpu_to_le16(start_params->mf_mode);
+	rdata->function_mode = (u8)start_params->mf_mode;
 	rdata->sd_vlan_tag   = cpu_to_le16(start_params->sd_vlan_tag);
 	rdata->path_id       = BP_PATH(bp);
 	rdata->network_cos_mode = start_params->network_cos_mode;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index a1d0446b39b3..348ed02d3c69 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
@@ -39,14 +39,39 @@ static inline long bnx2x_hilo(u32 *hiref)
 #endif
 }
 
-static u16 bnx2x_get_port_stats_dma_len(struct bnx2x *bp)
+static inline u16 bnx2x_get_port_stats_dma_len(struct bnx2x *bp)
 {
-	u16 res = sizeof(struct host_port_stats) >> 2;
+	u16 res = 0;
 
-	/* if PFC stats are not supported by the MFW, don't DMA them */
-	if (!(bp->flags &  BC_SUPPORTS_PFC_STATS))
-		res -= (sizeof(u32)*4) >> 2;
+	/* 'newest' convention - shmem2 cotains the size of the port stats */
+	if (SHMEM2_HAS(bp, sizeof_port_stats)) {
+		u32 size = SHMEM2_RD(bp, sizeof_port_stats);
+		if (size)
+			res = size;
 
+		/* prevent newer BC from causing buffer overflow */
+		if (res > sizeof(struct host_port_stats))
+			res = sizeof(struct host_port_stats);
+	}
+
+	/* Older convention - all BCs support the port stats' fields up until
+	 * the 'not_used' field
+	 */
+	if (!res) {
+		res = offsetof(struct host_port_stats, not_used) + 4;
+
+		/* if PFC stats are supported by the MFW, DMA them as well */
+		if (bp->flags & BC_SUPPORTS_PFC_STATS) {
+			res += offsetof(struct host_port_stats,
+					pfc_frames_rx_lo) -
+			       offsetof(struct host_port_stats,
+					pfc_frames_tx_hi) + 4 ;
+		}
+	}
+
+	res >>= 2;
+
+	WARN_ON(res > 2 * DMAE_LEN32_RD_MAX);
 	return res;
 }
 
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 3b4fc61f24cf..cc8434fd606e 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -823,10 +823,8 @@ static void cnic_free_context(struct cnic_dev *dev)
 	}
 }
 
-static void __cnic_free_uio(struct cnic_uio_dev *udev)
+static void __cnic_free_uio_rings(struct cnic_uio_dev *udev)
 {
-	uio_unregister_device(&udev->cnic_uinfo);
-
 	if (udev->l2_buf) {
 		dma_free_coherent(&udev->pdev->dev, udev->l2_buf_size,
 				  udev->l2_buf, udev->l2_buf_map);
@@ -839,6 +837,14 @@ static void __cnic_free_uio(struct cnic_uio_dev *udev)
 		udev->l2_ring = NULL;
 	}
 
+}
+
+static void __cnic_free_uio(struct cnic_uio_dev *udev)
+{
+	uio_unregister_device(&udev->cnic_uinfo);
+
+	__cnic_free_uio_rings(udev);
+
 	pci_dev_put(udev->pdev);
 	kfree(udev);
 }
@@ -862,6 +868,8 @@ static void cnic_free_resc(struct cnic_dev *dev)
 	if (udev) {
 		udev->dev = NULL;
 		cp->udev = NULL;
+		if (udev->uio_dev == -1)
+			__cnic_free_uio_rings(udev);
 	}
 
 	cnic_free_context(dev);
@@ -996,6 +1004,34 @@ static int cnic_alloc_kcq(struct cnic_dev *dev, struct kcq_info *info,
 	return 0;
 }
 
+static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages)
+{
+	struct cnic_local *cp = udev->dev->cnic_priv;
+
+	if (udev->l2_ring)
+		return 0;
+
+	udev->l2_ring_size = pages * BCM_PAGE_SIZE;
+	udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size,
+					   &udev->l2_ring_map,
+					   GFP_KERNEL | __GFP_COMP);
+	if (!udev->l2_ring)
+		return -ENOMEM;
+
+	udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
+	udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size);
+	udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size,
+					  &udev->l2_buf_map,
+					  GFP_KERNEL | __GFP_COMP);
+	if (!udev->l2_buf) {
+		__cnic_free_uio_rings(udev);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+}
+
 static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
 {
 	struct cnic_local *cp = dev->cnic_priv;
@@ -1005,6 +1041,11 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
 	list_for_each_entry(udev, &cnic_udev_list, list) {
 		if (udev->pdev == dev->pcidev) {
 			udev->dev = dev;
+			if (__cnic_alloc_uio_rings(udev, pages)) {
+				udev->dev = NULL;
+				read_unlock(&cnic_dev_lock);
+				return -ENOMEM;
+			}
 			cp->udev = udev;
 			read_unlock(&cnic_dev_lock);
 			return 0;
@@ -1020,20 +1061,9 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
 
 	udev->dev = dev;
 	udev->pdev = dev->pcidev;
-	udev->l2_ring_size = pages * BCM_PAGE_SIZE;
-	udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size,
-					   &udev->l2_ring_map,
-					   GFP_KERNEL | __GFP_COMP);
-	if (!udev->l2_ring)
-		goto err_udev;
 
-	udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
-	udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size);
-	udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size,
-					  &udev->l2_buf_map,
-					  GFP_KERNEL | __GFP_COMP);
-	if (!udev->l2_buf)
-		goto err_dma;
+	if (__cnic_alloc_uio_rings(udev, pages))
+		goto err_udev;
 
 	write_lock(&cnic_dev_lock);
 	list_add(&udev->list, &cnic_udev_list);
@@ -1044,9 +1074,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
 	cp->udev = udev;
 
 	return 0;
- err_dma:
-	dma_free_coherent(&udev->pdev->dev, udev->l2_ring_size,
-			  udev->l2_ring, udev->l2_ring_map);
+
  err_udev:
 	kfree(udev);
 	return -ENOMEM;
@@ -1260,7 +1288,7 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev)
 	if (ret)
 		goto error;
 
-	if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) {
+	if (CNIC_SUPPORTS_FCOE(cp)) {
 		ret = cnic_alloc_kcq(dev, &cp->kcq2, true);
 		if (ret)
 			goto error;
@@ -1275,6 +1303,9 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev)
 	if (ret)
 		goto error;
 
+	if (cp->ethdev->drv_state & CNIC_DRV_STATE_NO_ISCSI)
+		return 0;
+
 	cp->bnx2x_def_status_blk = cp->ethdev->irq_arr[1].status_blk;
 
 	cp->l2_rx_ring_size = 15;
@@ -3050,6 +3081,22 @@ static void cnic_ack_bnx2x_e2_msix(struct cnic_dev *dev)
 			IGU_INT_DISABLE, 0);
 }
 
+static void cnic_arm_bnx2x_msix(struct cnic_dev *dev, u32 idx)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, CSTORM_ID, idx,
+			   IGU_INT_ENABLE, 1);
+}
+
+static void cnic_arm_bnx2x_e2_msix(struct cnic_dev *dev, u32 idx)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF, idx,
+			IGU_INT_ENABLE, 1);
+}
+
 static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
 {
 	u32 last_status = *info->status_idx_ptr;
@@ -3086,9 +3133,8 @@ static void cnic_service_bnx2x_bh(unsigned long data)
 		CNIC_WR16(dev, cp->kcq1.io_addr,
 			  cp->kcq1.sw_prod_idx + MAX_KCQ_IDX);
 
-		if (!BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) {
-			cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID,
-					   status_idx, IGU_INT_ENABLE, 1);
+		if (cp->ethdev->drv_state & CNIC_DRV_STATE_NO_FCOE) {
+			cp->arm_int(dev, status_idx);
 			break;
 		}
 
@@ -4845,6 +4891,9 @@ static void cnic_init_bnx2x_tx_ring(struct cnic_dev *dev,
 	buf_map = udev->l2_buf_map;
 	for (i = 0; i < MAX_TX_DESC_CNT; i += 3, txbd += 3) {
 		struct eth_tx_start_bd *start_bd = &txbd->start_bd;
+		struct eth_tx_parse_bd_e1x *pbd_e1x =
+			&((txbd + 1)->parse_bd_e1x);
+		struct eth_tx_parse_bd_e2 *pbd_e2 = &((txbd + 1)->parse_bd_e2);
 		struct eth_tx_bd *reg_bd = &((txbd + 2)->reg_bd);
 
 		start_bd->addr_hi = cpu_to_le32((u64) buf_map >> 32);
@@ -4854,10 +4903,15 @@ static void cnic_init_bnx2x_tx_ring(struct cnic_dev *dev,
 		start_bd->nbytes = cpu_to_le16(0x10);
 		start_bd->nbd = cpu_to_le16(3);
 		start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
-		start_bd->general_data = (UNICAST_ADDRESS <<
-			ETH_TX_START_BD_ETH_ADDR_TYPE_SHIFT);
+		start_bd->general_data &= ~ETH_TX_START_BD_PARSE_NBDS;
 		start_bd->general_data |= (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT);
 
+		if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id))
+			pbd_e2->parsing_data = (UNICAST_ADDRESS <<
+				 ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT);
+		else
+			 pbd_e1x->global_data = (UNICAST_ADDRESS <<
+				ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE_SHIFT);
 	}
 
 	val = (u64) ring_map >> 32;
@@ -5308,7 +5362,7 @@ static void cnic_stop_hw(struct cnic_dev *dev)
 		/* Need to wait for the ring shutdown event to complete
 		 * before clearing the CNIC_UP flag.
 		 */
-		while (cp->udev->uio_dev != -1 && i < 15) {
+		while (cp->udev && cp->udev->uio_dev != -1 && i < 15) {
 			msleep(100);
 			i++;
 		}
@@ -5473,8 +5527,7 @@ static struct cnic_dev *init_bnx2x_cnic(struct net_device *dev)
 
 	if (!(ethdev->drv_state & CNIC_DRV_STATE_NO_ISCSI))
 		cdev->max_iscsi_conn = ethdev->max_iscsi_conn;
-	if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id) &&
-	    !(ethdev->drv_state & CNIC_DRV_STATE_NO_FCOE))
+	if (CNIC_SUPPORTS_FCOE(cp))
 		cdev->max_fcoe_conn = ethdev->max_fcoe_conn;
 
 	if (cdev->max_fcoe_conn > BNX2X_FCOE_NUM_CONNECTIONS)
@@ -5492,10 +5545,13 @@ static struct cnic_dev *init_bnx2x_cnic(struct net_device *dev)
 	cp->stop_cm = cnic_cm_stop_bnx2x_hw;
 	cp->enable_int = cnic_enable_bnx2x_int;
 	cp->disable_int_sync = cnic_disable_bnx2x_int_sync;
-	if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id))
+	if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) {
 		cp->ack_int = cnic_ack_bnx2x_e2_msix;
-	else
+		cp->arm_int = cnic_arm_bnx2x_e2_msix;
+	} else {
 		cp->ack_int = cnic_ack_bnx2x_msix;
+		cp->arm_int = cnic_arm_bnx2x_msix;
+	}
 	cp->close_conn = cnic_close_bnx2x_conn;
 	return cdev;
 }
diff --git a/drivers/net/ethernet/broadcom/cnic.h b/drivers/net/ethernet/broadcom/cnic.h
index 30328097f516..148604c3fa0c 100644
--- a/drivers/net/ethernet/broadcom/cnic.h
+++ b/drivers/net/ethernet/broadcom/cnic.h
@@ -334,6 +334,7 @@ struct cnic_local {
 	void			(*enable_int)(struct cnic_dev *);
 	void			(*disable_int_sync)(struct cnic_dev *);
 	void			(*ack_int)(struct cnic_dev *);
+	void			(*arm_int)(struct cnic_dev *, u32 index);
 	void			(*close_conn)(struct cnic_sock *, u32 opcode);
 };
 
@@ -474,6 +475,10 @@ struct bnx2x_bd_chain_next {
 	  MAX_STAT_COUNTER_ID_E1))
 #endif
 
+#define CNIC_SUPPORTS_FCOE(cp)					\
+	(BNX2X_CHIP_IS_E2_PLUS((cp)->chip_id) &&		\
+	 !((cp)->ethdev->drv_state & CNIC_DRV_STATE_NO_FCOE))
+
 #define CNIC_RAMROD_TMO			(HZ / 4)
 
 #endif
diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h
index 382c98b0cc0c..ede3db35d757 100644
--- a/drivers/net/ethernet/broadcom/cnic_defs.h
+++ b/drivers/net/ethernet/broadcom/cnic_defs.h
@@ -896,7 +896,7 @@ struct tstorm_tcp_tcp_ag_context_section {
 	u32 snd_nxt;
 	u32 rtt_seq;
 	u32 rtt_time;
-	u32 __reserved66;
+	u32 wnd_right_edge_local;
 	u32 wnd_right_edge;
 	u32 tcp_agg_vars1;
 #define TSTORM_TCP_TCP_AG_CONTEXT_SECTION_FIN_SENT_FLAG (0x1<<0)
diff --git a/drivers/net/ethernet/broadcom/cnic_if.h b/drivers/net/ethernet/broadcom/cnic_if.h
index 5cb88881bba1..865095aad1f6 100644
--- a/drivers/net/ethernet/broadcom/cnic_if.h
+++ b/drivers/net/ethernet/broadcom/cnic_if.h
@@ -14,8 +14,8 @@
 
 #include "bnx2x/bnx2x_mfw_req.h"
 
-#define CNIC_MODULE_VERSION	"2.5.12"
-#define CNIC_MODULE_RELDATE	"June 29, 2012"
+#define CNIC_MODULE_VERSION	"2.5.14"
+#define CNIC_MODULE_RELDATE	"Sep 30, 2012"
 
 #define CNIC_ULP_RDMA		0
 #define CNIC_ULP_ISCSI		1
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 388d32213937..46280ba4c5d4 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -44,10 +44,8 @@
 #include <linux/prefetch.h>
 #include <linux/dma-mapping.h>
 #include <linux/firmware.h>
-#if IS_ENABLED(CONFIG_HWMON)
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
-#endif
 
 #include <net/checksum.h>
 #include <net/ip.h>
@@ -92,10 +90,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			124
+#define TG3_MIN_NUM			125
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"March 21, 2012"
+#define DRV_MODULE_RELDATE	"September 26, 2012"
 
 #define RESET_KIND_SHUTDOWN	0
 #define RESET_KIND_INIT		1
@@ -6263,7 +6261,7 @@ static int tg3_poll_work(struct tg3_napi *tnapi, int work_done, int budget)
 		u32 jmb_prod_idx = dpr->rx_jmb_prod_idx;
 
 		tp->rx_refill = false;
-		for (i = 1; i < tp->irq_cnt; i++)
+		for (i = 1; i <= tp->rxq_cnt; i++)
 			err |= tg3_rx_prodring_xfer(tp, dpr,
 						    &tp->napi[i].prodring);
 
@@ -7592,15 +7590,11 @@ static int tg3_init_rings(struct tg3 *tp)
 	return 0;
 }
 
-/*
- * Must not be invoked with interrupt sources disabled and
- * the hardware shutdown down.
- */
-static void tg3_free_consistent(struct tg3 *tp)
+static void tg3_mem_tx_release(struct tg3 *tp)
 {
 	int i;
 
-	for (i = 0; i < tp->irq_cnt; i++) {
+	for (i = 0; i < tp->irq_max; i++) {
 		struct tg3_napi *tnapi = &tp->napi[i];
 
 		if (tnapi->tx_ring) {
@@ -7611,17 +7605,114 @@ static void tg3_free_consistent(struct tg3 *tp)
 
 		kfree(tnapi->tx_buffers);
 		tnapi->tx_buffers = NULL;
+	}
+}
 
-		if (tnapi->rx_rcb) {
-			dma_free_coherent(&tp->pdev->dev,
-					  TG3_RX_RCB_RING_BYTES(tp),
-					  tnapi->rx_rcb,
-					  tnapi->rx_rcb_mapping);
-			tnapi->rx_rcb = NULL;
-		}
+static int tg3_mem_tx_acquire(struct tg3 *tp)
+{
+	int i;
+	struct tg3_napi *tnapi = &tp->napi[0];
+
+	/* If multivector TSS is enabled, vector 0 does not handle
+	 * tx interrupts.  Don't allocate any resources for it.
+	 */
+	if (tg3_flag(tp, ENABLE_TSS))
+		tnapi++;
+
+	for (i = 0; i < tp->txq_cnt; i++, tnapi++) {
+		tnapi->tx_buffers = kzalloc(sizeof(struct tg3_tx_ring_info) *
+					    TG3_TX_RING_SIZE, GFP_KERNEL);
+		if (!tnapi->tx_buffers)
+			goto err_out;
+
+		tnapi->tx_ring = dma_alloc_coherent(&tp->pdev->dev,
+						    TG3_TX_RING_BYTES,
+						    &tnapi->tx_desc_mapping,
+						    GFP_KERNEL);
+		if (!tnapi->tx_ring)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	tg3_mem_tx_release(tp);
+	return -ENOMEM;
+}
+
+static void tg3_mem_rx_release(struct tg3 *tp)
+{
+	int i;
+
+	for (i = 0; i < tp->irq_max; i++) {
+		struct tg3_napi *tnapi = &tp->napi[i];
 
 		tg3_rx_prodring_fini(tp, &tnapi->prodring);
 
+		if (!tnapi->rx_rcb)
+			continue;
+
+		dma_free_coherent(&tp->pdev->dev,
+				  TG3_RX_RCB_RING_BYTES(tp),
+				  tnapi->rx_rcb,
+				  tnapi->rx_rcb_mapping);
+		tnapi->rx_rcb = NULL;
+	}
+}
+
+static int tg3_mem_rx_acquire(struct tg3 *tp)
+{
+	unsigned int i, limit;
+
+	limit = tp->rxq_cnt;
+
+	/* If RSS is enabled, we need a (dummy) producer ring
+	 * set on vector zero.  This is the true hw prodring.
+	 */
+	if (tg3_flag(tp, ENABLE_RSS))
+		limit++;
+
+	for (i = 0; i < limit; i++) {
+		struct tg3_napi *tnapi = &tp->napi[i];
+
+		if (tg3_rx_prodring_init(tp, &tnapi->prodring))
+			goto err_out;
+
+		/* If multivector RSS is enabled, vector 0
+		 * does not handle rx or tx interrupts.
+		 * Don't allocate any resources for it.
+		 */
+		if (!i && tg3_flag(tp, ENABLE_RSS))
+			continue;
+
+		tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev,
+						   TG3_RX_RCB_RING_BYTES(tp),
+						   &tnapi->rx_rcb_mapping,
+						   GFP_KERNEL);
+		if (!tnapi->rx_rcb)
+			goto err_out;
+
+		memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp));
+	}
+
+	return 0;
+
+err_out:
+	tg3_mem_rx_release(tp);
+	return -ENOMEM;
+}
+
+/*
+ * Must not be invoked with interrupt sources disabled and
+ * the hardware shutdown down.
+ */
+static void tg3_free_consistent(struct tg3 *tp)
+{
+	int i;
+
+	for (i = 0; i < tp->irq_cnt; i++) {
+		struct tg3_napi *tnapi = &tp->napi[i];
+
 		if (tnapi->hw_status) {
 			dma_free_coherent(&tp->pdev->dev, TG3_HW_STATUS_SIZE,
 					  tnapi->hw_status,
@@ -7630,6 +7721,9 @@ static void tg3_free_consistent(struct tg3 *tp)
 		}
 	}
 
+	tg3_mem_rx_release(tp);
+	tg3_mem_tx_release(tp);
+
 	if (tp->hw_stats) {
 		dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats),
 				  tp->hw_stats, tp->stats_mapping);
@@ -7668,72 +7762,38 @@ static int tg3_alloc_consistent(struct tg3 *tp)
 		memset(tnapi->hw_status, 0, TG3_HW_STATUS_SIZE);
 		sblk = tnapi->hw_status;
 
-		if (tg3_rx_prodring_init(tp, &tnapi->prodring))
-			goto err_out;
+		if (tg3_flag(tp, ENABLE_RSS)) {
+			u16 *prodptr = 0;
 
-		/* If multivector TSS is enabled, vector 0 does not handle
-		 * tx interrupts.  Don't allocate any resources for it.
-		 */
-		if ((!i && !tg3_flag(tp, ENABLE_TSS)) ||
-		    (i && tg3_flag(tp, ENABLE_TSS))) {
-			tnapi->tx_buffers = kzalloc(
-					       sizeof(struct tg3_tx_ring_info) *
-					       TG3_TX_RING_SIZE, GFP_KERNEL);
-			if (!tnapi->tx_buffers)
-				goto err_out;
-
-			tnapi->tx_ring = dma_alloc_coherent(&tp->pdev->dev,
-							    TG3_TX_RING_BYTES,
-							&tnapi->tx_desc_mapping,
-							    GFP_KERNEL);
-			if (!tnapi->tx_ring)
-				goto err_out;
-		}
-
-		/*
-		 * When RSS is enabled, the status block format changes
-		 * slightly.  The "rx_jumbo_consumer", "reserved",
-		 * and "rx_mini_consumer" members get mapped to the
-		 * other three rx return ring producer indexes.
-		 */
-		switch (i) {
-		default:
-			if (tg3_flag(tp, ENABLE_RSS)) {
-				tnapi->rx_rcb_prod_idx = NULL;
+			/*
+			 * When RSS is enabled, the status block format changes
+			 * slightly.  The "rx_jumbo_consumer", "reserved",
+			 * and "rx_mini_consumer" members get mapped to the
+			 * other three rx return ring producer indexes.
+			 */
+			switch (i) {
+			case 1:
+				prodptr = &sblk->idx[0].rx_producer;
+				break;
+			case 2:
+				prodptr = &sblk->rx_jumbo_consumer;
+				break;
+			case 3:
+				prodptr = &sblk->reserved;
+				break;
+			case 4:
+				prodptr = &sblk->rx_mini_consumer;
 				break;
 			}
-			/* Fall through */
-		case 1:
+			tnapi->rx_rcb_prod_idx = prodptr;
+		} else {
 			tnapi->rx_rcb_prod_idx = &sblk->idx[0].rx_producer;
-			break;
-		case 2:
-			tnapi->rx_rcb_prod_idx = &sblk->rx_jumbo_consumer;
-			break;
-		case 3:
-			tnapi->rx_rcb_prod_idx = &sblk->reserved;
-			break;
-		case 4:
-			tnapi->rx_rcb_prod_idx = &sblk->rx_mini_consumer;
-			break;
 		}
-
-		/*
-		 * If multivector RSS is enabled, vector 0 does not handle
-		 * rx or tx interrupts.  Don't allocate any resources for it.
-		 */
-		if (!i && tg3_flag(tp, ENABLE_RSS))
-			continue;
-
-		tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev,
-						   TG3_RX_RCB_RING_BYTES(tp),
-						   &tnapi->rx_rcb_mapping,
-						   GFP_KERNEL);
-		if (!tnapi->rx_rcb)
-			goto err_out;
-
-		memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp));
 	}
 
+	if (tg3_mem_tx_acquire(tp) || tg3_mem_rx_acquire(tp))
+		goto err_out;
+
 	return 0;
 
 err_out:
@@ -8247,9 +8307,10 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
 			      nic_addr);
 }
 
-static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
+
+static void tg3_coal_tx_init(struct tg3 *tp, struct ethtool_coalesce *ec)
 {
-	int i;
+	int i = 0;
 
 	if (!tg3_flag(tp, ENABLE_TSS)) {
 		tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs);
@@ -8259,31 +8320,43 @@ static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
 		tw32(HOSTCC_TXCOL_TICKS, 0);
 		tw32(HOSTCC_TXMAX_FRAMES, 0);
 		tw32(HOSTCC_TXCOAL_MAXF_INT, 0);
+
+		for (; i < tp->txq_cnt; i++) {
+			u32 reg;
+
+			reg = HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18;
+			tw32(reg, ec->tx_coalesce_usecs);
+			reg = HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18;
+			tw32(reg, ec->tx_max_coalesced_frames);
+			reg = HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18;
+			tw32(reg, ec->tx_max_coalesced_frames_irq);
+		}
 	}
 
+	for (; i < tp->irq_max - 1; i++) {
+		tw32(HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18, 0);
+		tw32(HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18, 0);
+		tw32(HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18, 0);
+	}
+}
+
+static void tg3_coal_rx_init(struct tg3 *tp, struct ethtool_coalesce *ec)
+{
+	int i = 0;
+	u32 limit = tp->rxq_cnt;
+
 	if (!tg3_flag(tp, ENABLE_RSS)) {
 		tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs);
 		tw32(HOSTCC_RXMAX_FRAMES, ec->rx_max_coalesced_frames);
 		tw32(HOSTCC_RXCOAL_MAXF_INT, ec->rx_max_coalesced_frames_irq);
+		limit--;
 	} else {
 		tw32(HOSTCC_RXCOL_TICKS, 0);
 		tw32(HOSTCC_RXMAX_FRAMES, 0);
 		tw32(HOSTCC_RXCOAL_MAXF_INT, 0);
 	}
 
-	if (!tg3_flag(tp, 5705_PLUS)) {
-		u32 val = ec->stats_block_coalesce_usecs;
-
-		tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq);
-		tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq);
-
-		if (!netif_carrier_ok(tp->dev))
-			val = 0;
-
-		tw32(HOSTCC_STAT_COAL_TICKS, val);
-	}
-
-	for (i = 0; i < tp->irq_cnt - 1; i++) {
+	for (; i < limit; i++) {
 		u32 reg;
 
 		reg = HOSTCC_RXCOL_TICKS_VEC1 + i * 0x18;
@@ -8292,27 +8365,30 @@ static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
 		tw32(reg, ec->rx_max_coalesced_frames);
 		reg = HOSTCC_RXCOAL_MAXF_INT_VEC1 + i * 0x18;
 		tw32(reg, ec->rx_max_coalesced_frames_irq);
-
-		if (tg3_flag(tp, ENABLE_TSS)) {
-			reg = HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18;
-			tw32(reg, ec->tx_coalesce_usecs);
-			reg = HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18;
-			tw32(reg, ec->tx_max_coalesced_frames);
-			reg = HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18;
-			tw32(reg, ec->tx_max_coalesced_frames_irq);
-		}
 	}
 
 	for (; i < tp->irq_max - 1; i++) {
 		tw32(HOSTCC_RXCOL_TICKS_VEC1 + i * 0x18, 0);
 		tw32(HOSTCC_RXMAX_FRAMES_VEC1 + i * 0x18, 0);
 		tw32(HOSTCC_RXCOAL_MAXF_INT_VEC1 + i * 0x18, 0);
+	}
+}
 
-		if (tg3_flag(tp, ENABLE_TSS)) {
-			tw32(HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18, 0);
-			tw32(HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18, 0);
-			tw32(HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18, 0);
-		}
+static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
+{
+	tg3_coal_tx_init(tp, ec);
+	tg3_coal_rx_init(tp, ec);
+
+	if (!tg3_flag(tp, 5705_PLUS)) {
+		u32 val = ec->stats_block_coalesce_usecs;
+
+		tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq);
+		tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq);
+
+		if (!netif_carrier_ok(tp->dev))
+			val = 0;
+
+		tw32(HOSTCC_STAT_COAL_TICKS, val);
 	}
 }
 
@@ -8570,13 +8646,12 @@ static void __tg3_set_rx_mode(struct net_device *dev)
 	}
 }
 
-static void tg3_rss_init_dflt_indir_tbl(struct tg3 *tp)
+static void tg3_rss_init_dflt_indir_tbl(struct tg3 *tp, u32 qcnt)
 {
 	int i;
 
 	for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
-		tp->rss_ind_tbl[i] =
-			ethtool_rxfh_indir_default(i, tp->irq_cnt - 1);
+		tp->rss_ind_tbl[i] = ethtool_rxfh_indir_default(i, qcnt);
 }
 
 static void tg3_rss_check_indir_tbl(struct tg3 *tp)
@@ -8598,7 +8673,7 @@ static void tg3_rss_check_indir_tbl(struct tg3 *tp)
 	}
 
 	if (i != TG3_RSS_INDIR_TBL_SIZE)
-		tg3_rss_init_dflt_indir_tbl(tp);
+		tg3_rss_init_dflt_indir_tbl(tp, tp->rxq_cnt);
 }
 
 static void tg3_rss_write_indir_tbl(struct tg3 *tp)
@@ -9495,7 +9570,6 @@ static int tg3_init_hw(struct tg3 *tp, int reset_phy)
 	return tg3_reset_hw(tp, reset_phy);
 }
 
-#if IS_ENABLED(CONFIG_HWMON)
 static void tg3_sd_scan_scratchpad(struct tg3 *tp, struct tg3_ocir *ocir)
 {
 	int i;
@@ -9548,22 +9622,17 @@ static const struct attribute_group tg3_group = {
 	.attrs = tg3_attributes,
 };
 
-#endif
-
 static void tg3_hwmon_close(struct tg3 *tp)
 {
-#if IS_ENABLED(CONFIG_HWMON)
 	if (tp->hwmon_dev) {
 		hwmon_device_unregister(tp->hwmon_dev);
 		tp->hwmon_dev = NULL;
 		sysfs_remove_group(&tp->pdev->dev.kobj, &tg3_group);
 	}
-#endif
 }
 
 static void tg3_hwmon_open(struct tg3 *tp)
 {
-#if IS_ENABLED(CONFIG_HWMON)
 	int i, err;
 	u32 size = 0;
 	struct pci_dev *pdev = tp->pdev;
@@ -9595,7 +9664,6 @@ static void tg3_hwmon_open(struct tg3 *tp)
 		dev_err(&pdev->dev, "Cannot register hwmon device, aborting\n");
 		sysfs_remove_group(&pdev->dev.kobj, &tg3_group);
 	}
-#endif
 }
 
 
@@ -10119,21 +10187,43 @@ static int tg3_request_firmware(struct tg3 *tp)
 	return 0;
 }
 
-static bool tg3_enable_msix(struct tg3 *tp)
+static u32 tg3_irq_count(struct tg3 *tp)
 {
-	int i, rc;
-	struct msix_entry msix_ent[tp->irq_max];
+	u32 irq_cnt = max(tp->rxq_cnt, tp->txq_cnt);
 
-	tp->irq_cnt = netif_get_num_default_rss_queues();
-	if (tp->irq_cnt > 1) {
+	if (irq_cnt > 1) {
 		/* We want as many rx rings enabled as there are cpus.
 		 * In multiqueue MSI-X mode, the first MSI-X vector
 		 * only deals with link interrupts, etc, so we add
 		 * one to the number of vectors we are requesting.
 		 */
-		tp->irq_cnt = min_t(unsigned, tp->irq_cnt + 1, tp->irq_max);
+		irq_cnt = min_t(unsigned, irq_cnt + 1, tp->irq_max);
 	}
 
+	return irq_cnt;
+}
+
+static bool tg3_enable_msix(struct tg3 *tp)
+{
+	int i, rc;
+	struct msix_entry msix_ent[tp->irq_max];
+
+	tp->txq_cnt = tp->txq_req;
+	tp->rxq_cnt = tp->rxq_req;
+	if (!tp->rxq_cnt)
+		tp->rxq_cnt = netif_get_num_default_rss_queues();
+	if (tp->rxq_cnt > tp->rxq_max)
+		tp->rxq_cnt = tp->rxq_max;
+
+	/* Disable multiple TX rings by default.  Simple round-robin hardware
+	 * scheduling of the TX rings can cause starvation of rings with
+	 * small packets when other rings have TSO or jumbo packets.
+	 */
+	if (!tp->txq_req)
+		tp->txq_cnt = 1;
+
+	tp->irq_cnt = tg3_irq_count(tp);
+
 	for (i = 0; i < tp->irq_max; i++) {
 		msix_ent[i].entry  = i;
 		msix_ent[i].vector = 0;
@@ -10148,27 +10238,28 @@ static bool tg3_enable_msix(struct tg3 *tp)
 		netdev_notice(tp->dev, "Requested %d MSI-X vectors, received %d\n",
 			      tp->irq_cnt, rc);
 		tp->irq_cnt = rc;
+		tp->rxq_cnt = max(rc - 1, 1);
+		if (tp->txq_cnt)
+			tp->txq_cnt = min(tp->rxq_cnt, tp->txq_max);
 	}
 
 	for (i = 0; i < tp->irq_max; i++)
 		tp->napi[i].irq_vec = msix_ent[i].vector;
 
-	netif_set_real_num_tx_queues(tp->dev, 1);
-	rc = tp->irq_cnt > 1 ? tp->irq_cnt - 1 : 1;
-	if (netif_set_real_num_rx_queues(tp->dev, rc)) {
+	if (netif_set_real_num_rx_queues(tp->dev, tp->rxq_cnt)) {
 		pci_disable_msix(tp->pdev);
 		return false;
 	}
 
-	if (tp->irq_cnt > 1) {
-		tg3_flag_set(tp, ENABLE_RSS);
+	if (tp->irq_cnt == 1)
+		return true;
 
-		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
-		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) {
-			tg3_flag_set(tp, ENABLE_TSS);
-			netif_set_real_num_tx_queues(tp->dev, tp->irq_cnt - 1);
-		}
-	}
+	tg3_flag_set(tp, ENABLE_RSS);
+
+	if (tp->txq_cnt > 1)
+		tg3_flag_set(tp, ENABLE_TSS);
+
+	netif_set_real_num_tx_queues(tp->dev, tp->txq_cnt);
 
 	return true;
 }
@@ -10202,6 +10293,11 @@ defcfg:
 	if (!tg3_flag(tp, USING_MSIX)) {
 		tp->irq_cnt = 1;
 		tp->napi[0].irq_vec = tp->pdev->irq;
+	}
+
+	if (tp->irq_cnt == 1) {
+		tp->txq_cnt = 1;
+		tp->rxq_cnt = 1;
 		netif_set_real_num_tx_queues(tp->dev, 1);
 		netif_set_real_num_rx_queues(tp->dev, 1);
 	}
@@ -10219,38 +10315,11 @@ static void tg3_ints_fini(struct tg3 *tp)
 	tg3_flag_clear(tp, ENABLE_TSS);
 }
 
-static int tg3_open(struct net_device *dev)
+static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq)
 {
-	struct tg3 *tp = netdev_priv(dev);
+	struct net_device *dev = tp->dev;
 	int i, err;
 
-	if (tp->fw_needed) {
-		err = tg3_request_firmware(tp);
-		if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0) {
-			if (err)
-				return err;
-		} else if (err) {
-			netdev_warn(tp->dev, "TSO capability disabled\n");
-			tg3_flag_clear(tp, TSO_CAPABLE);
-		} else if (!tg3_flag(tp, TSO_CAPABLE)) {
-			netdev_notice(tp->dev, "TSO capability restored\n");
-			tg3_flag_set(tp, TSO_CAPABLE);
-		}
-	}
-
-	netif_carrier_off(tp->dev);
-
-	err = tg3_power_up(tp);
-	if (err)
-		return err;
-
-	tg3_full_lock(tp, 0);
-
-	tg3_disable_ints(tp);
-	tg3_flag_clear(tp, INIT_COMPLETE);
-
-	tg3_full_unlock(tp);
-
 	/*
 	 * Setup interrupts first so we know how
 	 * many NAPI resources to allocate
@@ -10284,7 +10353,7 @@ static int tg3_open(struct net_device *dev)
 
 	tg3_full_lock(tp, 0);
 
-	err = tg3_init_hw(tp, 1);
+	err = tg3_init_hw(tp, reset_phy);
 	if (err) {
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
 		tg3_free_rings(tp);
@@ -10295,7 +10364,7 @@ static int tg3_open(struct net_device *dev)
 	if (err)
 		goto err_out3;
 
-	if (tg3_flag(tp, USING_MSI)) {
+	if (test_irq && tg3_flag(tp, USING_MSI)) {
 		err = tg3_test_msi(tp);
 
 		if (err) {
@@ -10351,20 +10420,18 @@ err_out2:
 
 err_out1:
 	tg3_ints_fini(tp);
-	tg3_frob_aux_power(tp, false);
-	pci_set_power_state(tp->pdev, PCI_D3hot);
+
 	return err;
 }
 
-static int tg3_close(struct net_device *dev)
+static void tg3_stop(struct tg3 *tp)
 {
 	int i;
-	struct tg3 *tp = netdev_priv(dev);
 
 	tg3_napi_disable(tp);
 	tg3_reset_task_cancel(tp);
 
-	netif_tx_stop_all_queues(dev);
+	netif_tx_disable(tp->dev);
 
 	tg3_timer_stop(tp);
 
@@ -10389,13 +10456,60 @@ static int tg3_close(struct net_device *dev)
 
 	tg3_ints_fini(tp);
 
-	/* Clear stats across close / open calls */
-	memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev));
-	memset(&tp->estats_prev, 0, sizeof(tp->estats_prev));
-
 	tg3_napi_fini(tp);
 
 	tg3_free_consistent(tp);
+}
+
+static int tg3_open(struct net_device *dev)
+{
+	struct tg3 *tp = netdev_priv(dev);
+	int err;
+
+	if (tp->fw_needed) {
+		err = tg3_request_firmware(tp);
+		if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0) {
+			if (err)
+				return err;
+		} else if (err) {
+			netdev_warn(tp->dev, "TSO capability disabled\n");
+			tg3_flag_clear(tp, TSO_CAPABLE);
+		} else if (!tg3_flag(tp, TSO_CAPABLE)) {
+			netdev_notice(tp->dev, "TSO capability restored\n");
+			tg3_flag_set(tp, TSO_CAPABLE);
+		}
+	}
+
+	netif_carrier_off(tp->dev);
+
+	err = tg3_power_up(tp);
+	if (err)
+		return err;
+
+	tg3_full_lock(tp, 0);
+
+	tg3_disable_ints(tp);
+	tg3_flag_clear(tp, INIT_COMPLETE);
+
+	tg3_full_unlock(tp);
+
+	err = tg3_start(tp, true, true);
+	if (err) {
+		tg3_frob_aux_power(tp, false);
+		pci_set_power_state(tp->pdev, PCI_D3hot);
+	}
+	return err;
+}
+
+static int tg3_close(struct net_device *dev)
+{
+	struct tg3 *tp = netdev_priv(dev);
+
+	tg3_stop(tp);
+
+	/* Clear stats across close / open calls */
+	memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev));
+	memset(&tp->estats_prev, 0, sizeof(tp->estats_prev));
 
 	tg3_power_down(tp);
 
@@ -11185,11 +11299,11 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
 		if (netif_running(tp->dev))
-			info->data = tp->irq_cnt;
+			info->data = tp->rxq_cnt;
 		else {
 			info->data = num_online_cpus();
-			if (info->data > TG3_IRQ_MAX_VECS_RSS)
-				info->data = TG3_IRQ_MAX_VECS_RSS;
+			if (info->data > TG3_RSS_MAX_NUM_QS)
+				info->data = TG3_RSS_MAX_NUM_QS;
 		}
 
 		/* The first interrupt vector only
@@ -11246,6 +11360,58 @@ static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir)
 	return 0;
 }
 
+static void tg3_get_channels(struct net_device *dev,
+			     struct ethtool_channels *channel)
+{
+	struct tg3 *tp = netdev_priv(dev);
+	u32 deflt_qs = netif_get_num_default_rss_queues();
+
+	channel->max_rx = tp->rxq_max;
+	channel->max_tx = tp->txq_max;
+
+	if (netif_running(dev)) {
+		channel->rx_count = tp->rxq_cnt;
+		channel->tx_count = tp->txq_cnt;
+	} else {
+		if (tp->rxq_req)
+			channel->rx_count = tp->rxq_req;
+		else
+			channel->rx_count = min(deflt_qs, tp->rxq_max);
+
+		if (tp->txq_req)
+			channel->tx_count = tp->txq_req;
+		else
+			channel->tx_count = min(deflt_qs, tp->txq_max);
+	}
+}
+
+static int tg3_set_channels(struct net_device *dev,
+			    struct ethtool_channels *channel)
+{
+	struct tg3 *tp = netdev_priv(dev);
+
+	if (!tg3_flag(tp, SUPPORT_MSIX))
+		return -EOPNOTSUPP;
+
+	if (channel->rx_count > tp->rxq_max ||
+	    channel->tx_count > tp->txq_max)
+		return -EINVAL;
+
+	tp->rxq_req = channel->rx_count;
+	tp->txq_req = channel->tx_count;
+
+	if (!netif_running(dev))
+		return 0;
+
+	tg3_stop(tp);
+
+	netif_carrier_off(dev);
+
+	tg3_start(tp, true, false);
+
+	return 0;
+}
+
 static void tg3_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	switch (stringset) {
@@ -12494,6 +12660,8 @@ static const struct ethtool_ops tg3_ethtool_ops = {
 	.get_rxfh_indir_size    = tg3_get_rxfh_indir_size,
 	.get_rxfh_indir		= tg3_get_rxfh_indir,
 	.set_rxfh_indir		= tg3_set_rxfh_indir,
+	.get_channels		= tg3_get_channels,
+	.set_channels		= tg3_set_channels,
 	.get_ts_info		= ethtool_op_get_ts_info,
 };
 
@@ -14510,10 +14678,20 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 		if (tg3_flag(tp, 57765_PLUS)) {
 			tg3_flag_set(tp, SUPPORT_MSIX);
 			tp->irq_max = TG3_IRQ_MAX_VECS;
-			tg3_rss_init_dflt_indir_tbl(tp);
 		}
 	}
 
+	tp->txq_max = 1;
+	tp->rxq_max = 1;
+	if (tp->irq_max > 1) {
+		tp->rxq_max = TG3_RSS_MAX_NUM_QS;
+		tg3_rss_init_dflt_indir_tbl(tp, TG3_RSS_MAX_NUM_QS);
+
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720)
+			tp->txq_max = tp->irq_max - 1;
+	}
+
 	if (tg3_flag(tp, 5755_PLUS) ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
 		tg3_flag_set(tp, SHORT_DMA_BUG);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 6d52cb286826..d9308c32102e 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2860,7 +2860,8 @@ struct tg3_rx_prodring_set {
 	dma_addr_t			rx_jmb_mapping;
 };
 
-#define TG3_IRQ_MAX_VECS_RSS		5
+#define TG3_RSS_MAX_NUM_QS		4
+#define TG3_IRQ_MAX_VECS_RSS		(TG3_RSS_MAX_NUM_QS + 1)
 #define TG3_IRQ_MAX_VECS		TG3_IRQ_MAX_VECS_RSS
 
 struct tg3_napi {
@@ -3037,6 +3038,9 @@ struct tg3 {
 	void				(*write32_tx_mbox) (struct tg3 *, u32,
 							    u32);
 	u32				dma_limit;
+	u32				txq_req;
+	u32				txq_cnt;
+	u32				txq_max;
 
 	/* begin "rx thread" cacheline section */
 	struct tg3_napi			napi[TG3_IRQ_MAX_VECS];
@@ -3051,6 +3055,9 @@ struct tg3 {
 	u32				rx_std_max_post;
 	u32				rx_offset;
 	u32				rx_pkt_map_sz;
+	u32				rxq_req;
+	u32				rxq_cnt;
+	u32				rxq_max;
 	bool				rx_refill;
 
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index b441f33258e7..ce1eac529470 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -3268,6 +3268,7 @@ bnad_pci_probe(struct pci_dev *pdev,
 	 *	Output : using_dac = 1 for 64 bit DMA
 	 *			   = 0 for 32 bit DMA
 	 */
+	using_dac = false;
 	err = bnad_pci_init(bnad, pdev, &using_dac);
 	if (err)
 		goto unlock_mutex;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ec2dafe8ae5b..745a1f53361f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -67,12 +67,12 @@ enum {
 };
 
 enum {
-	MEMWIN0_APERTURE = 65536,
-	MEMWIN0_BASE     = 0x30000,
+	MEMWIN0_APERTURE = 2048,
+	MEMWIN0_BASE     = 0x1b800,
 	MEMWIN1_APERTURE = 32768,
 	MEMWIN1_BASE     = 0x28000,
-	MEMWIN2_APERTURE = 2048,
-	MEMWIN2_BASE     = 0x1b800,
+	MEMWIN2_APERTURE = 65536,
+	MEMWIN2_BASE     = 0x30000,
 };
 
 enum dev_master {
@@ -211,6 +211,9 @@ struct tp_err_stats {
 struct tp_params {
 	unsigned int ntxchan;        /* # of Tx channels */
 	unsigned int tre;            /* log2 of core clocks per TP tick */
+
+	uint32_t dack_re;            /* DACK timer resolution */
+	unsigned short tx_modq[NCHAN];	/* channel to modulation queue map */
 };
 
 struct vpd_params {
@@ -315,6 +318,10 @@ enum {                                 /* adapter flags */
 	USING_MSI          = (1 << 1),
 	USING_MSIX         = (1 << 2),
 	FW_OK              = (1 << 4),
+	RSS_TNLALLLOOKUP   = (1 << 5),
+	USING_SOFT_PARAMS  = (1 << 6),
+	MASTER_PF          = (1 << 7),
+	FW_OFLD_CONN       = (1 << 9),
 };
 
 struct rx_sw_desc;
@@ -467,6 +474,11 @@ struct sge {
 	u16 rdma_rxq[NCHAN];
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
+	u32 fl_pg_order;            /* large page allocation size */
+	u32 stat_len;               /* length of status page at ring end */
+	u32 pktshift;               /* padding between CPL & packet data */
+	u32 fl_align;               /* response queue message alignment */
+	u32 fl_starve_thres;        /* Free List starvation threshold */
 	unsigned int starve_thres;
 	u8 idma_state[2];
 	unsigned int egr_start;
@@ -511,6 +523,8 @@ struct adapter {
 	struct net_device *port[MAX_NPORTS];
 	u8 chan_map[NCHAN];                   /* channel -> port map */
 
+	unsigned int l2t_start;
+	unsigned int l2t_end;
 	struct l2t_data *l2t;
 	void *uld_handle[CXGB4_ULD_MAX];
 	struct list_head list_node;
@@ -619,7 +633,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 			  struct net_device *dev, unsigned int iqid);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
-void t4_sge_init(struct adapter *adap);
+int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
 void t4_sge_stop(struct adapter *adap);
 extern int dbfifo_int_thresh;
@@ -638,6 +652,14 @@ static inline unsigned int us_to_core_ticks(const struct adapter *adap,
 	return (us * adap->params.vpd.cclk) / 1000;
 }
 
+static inline unsigned int core_ticks_to_us(const struct adapter *adapter,
+					    unsigned int ticks)
+{
+	/* add Core Clock / 2 to round ticks to nearest uS */
+	return ((ticks * 1000 + adapter->params.vpd.cclk/2) /
+		adapter->params.vpd.cclk);
+}
+
 void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask,
 		      u32 val);
 
@@ -656,6 +678,9 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd,
 	return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false);
 }
 
+void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
+		       unsigned int data_reg, const u32 *vals,
+		       unsigned int nregs, unsigned int start_idx);
 void t4_intr_enable(struct adapter *adapter);
 void t4_intr_disable(struct adapter *adapter);
 int t4_slow_intr_handler(struct adapter *adapter);
@@ -664,8 +689,12 @@ int t4_wait_dev_ready(struct adapter *adap);
 int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port,
 		  struct link_config *lc);
 int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port);
+int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len,
+		    __be32 *buf);
 int t4_seeprom_wp(struct adapter *adapter, bool enable);
+int get_vpd_params(struct adapter *adapter, struct vpd_params *p);
 int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size);
+unsigned int t4_flash_cfg_addr(struct adapter *adapter);
 int t4_check_fw_version(struct adapter *adapter);
 int t4_prep_adapter(struct adapter *adapter);
 int t4_port_init(struct adapter *adap, int mbox, int pf, int vf);
@@ -680,6 +709,8 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
 
 void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p);
 void t4_read_mtu_tbl(struct adapter *adap, u16 *mtus, u8 *mtu_log);
+void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr,
+			    unsigned int mask, unsigned int val);
 void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
 			 struct tp_tcp_stats *v6);
 void t4_load_mtus(struct adapter *adap, const unsigned short *mtus,
@@ -695,6 +726,16 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 int t4_fw_bye(struct adapter *adap, unsigned int mbox);
 int t4_early_init(struct adapter *adap, unsigned int mbox);
 int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force);
+int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
+		  const u8 *fw_data, unsigned int size, int force);
+int t4_fw_config_file(struct adapter *adap, unsigned int mbox,
+		      unsigned int mtype, unsigned int maddr,
+		      u32 *finiver, u32 *finicsum, u32 *cfcsum);
+int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
+			  unsigned int cache_line_size);
+int t4_fw_initialize(struct adapter *adap, unsigned int mbox);
 int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int nparams, const u32 *params,
 		    u32 *val);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 933985420acb..6b9f6bb2f7ed 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -78,28 +78,45 @@
  */
 #define MAX_SGE_TIMERVAL 200U
 
-#ifdef CONFIG_PCI_IOV
-/*
- * Virtual Function provisioning constants.  We need two extra Ingress Queues
- * with Interrupt capability to serve as the VF's Firmware Event Queue and
- * Forwarded Interrupt Queue (when using MSI mode) -- neither will have Free
- * Lists associated with them).  For each Ethernet/Control Egress Queue and
- * for each Free List, we need an Egress Context.
- */
 enum {
+	/*
+	 * Physical Function provisioning constants.
+	 */
+	PFRES_NVI = 4,			/* # of Virtual Interfaces */
+	PFRES_NETHCTRL = 128,		/* # of EQs used for ETH or CTRL Qs */
+	PFRES_NIQFLINT = 128,		/* # of ingress Qs/w Free List(s)/intr
+					 */
+	PFRES_NEQ = 256,		/* # of egress queues */
+	PFRES_NIQ = 0,			/* # of ingress queues */
+	PFRES_TC = 0,			/* PCI-E traffic class */
+	PFRES_NEXACTF = 128,		/* # of exact MPS filters */
+
+	PFRES_R_CAPS = FW_CMD_CAP_PF,
+	PFRES_WX_CAPS = FW_CMD_CAP_PF,
+
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * Virtual Function provisioning constants.  We need two extra Ingress
+	 * Queues with Interrupt capability to serve as the VF's Firmware
+	 * Event Queue and Forwarded Interrupt Queue (when using MSI mode) --
+	 * neither will have Free Lists associated with them).  For each
+	 * Ethernet/Control Egress Queue and for each Free List, we need an
+	 * Egress Context.
+	 */
 	VFRES_NPORTS = 1,		/* # of "ports" per VF */
 	VFRES_NQSETS = 2,		/* # of "Queue Sets" per VF */
 
 	VFRES_NVI = VFRES_NPORTS,	/* # of Virtual Interfaces */
 	VFRES_NETHCTRL = VFRES_NQSETS,	/* # of EQs used for ETH or CTRL Qs */
 	VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
-	VFRES_NIQ = 0,			/* # of non-fl/int ingress queues */
 	VFRES_NEQ = VFRES_NQSETS*2,	/* # of egress queues */
+	VFRES_NIQ = 0,			/* # of non-fl/int ingress queues */
 	VFRES_TC = 0,			/* PCI-E traffic class */
 	VFRES_NEXACTF = 16,		/* # of exact MPS filters */
 
 	VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
 	VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
+#endif
 };
 
 /*
@@ -146,7 +163,6 @@ static unsigned int pfvfres_pmask(struct adapter *adapter,
 	}
 	/*NOTREACHED*/
 }
-#endif
 
 enum {
 	MAX_TXQ_ENTRIES      = 16384,
@@ -193,6 +209,7 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
 };
 
 #define FW_FNAME "cxgb4/t4fw.bin"
+#define FW_CFNAME "cxgb4/t4-config.txt"
 
 MODULE_DESCRIPTION(DRV_DESC);
 MODULE_AUTHOR("Chelsio Communications");
@@ -201,6 +218,28 @@ MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
 MODULE_FIRMWARE(FW_FNAME);
 
+/*
+ * Normally we're willing to become the firmware's Master PF but will be happy
+ * if another PF has already become the Master and initialized the adapter.
+ * Setting "force_init" will cause this driver to forcibly establish itself as
+ * the Master PF and initialize the adapter.
+ */
+static uint force_init;
+
+module_param(force_init, uint, 0644);
+MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter");
+
+/*
+ * Normally if the firmware we connect to has Configuration File support, we
+ * use that and only fall back to the old Driver-based initialization if the
+ * Configuration File fails for some reason.  If force_old_init is set, then
+ * we'll always use the old Driver-based initialization sequence.
+ */
+static uint force_old_init;
+
+module_param(force_old_init, uint, 0644);
+MODULE_PARM_DESC(force_old_init, "Force old initialization sequence");
+
 static int dflt_msg_enable = DFLT_MSG_ENABLE;
 
 module_param(dflt_msg_enable, int, 0644);
@@ -236,6 +275,20 @@ module_param_array(intr_cnt, uint, NULL, 0644);
 MODULE_PARM_DESC(intr_cnt,
 		 "thresholds 1..3 for queue interrupt packet counters");
 
+/*
+ * Normally we tell the chip to deliver Ingress Packets into our DMA buffers
+ * offset by 2 bytes in order to have the IP headers line up on 4-byte
+ * boundaries.  This is a requirement for many architectures which will throw
+ * a machine check fault if an attempt is made to access one of the 4-byte IP
+ * header fields on a non-4-byte boundary.  And it's a major performance issue
+ * even on some architectures which allow it like some implementations of the
+ * x86 ISA.  However, some architectures don't mind this and for some very
+ * edge-case performance sensitive applications (like forwarding large volumes
+ * of small packets), setting this DMA offset to 0 will decrease the number of
+ * PCI-E Bus transfers enough to measurably affect performance.
+ */
+static int rx_dma_offset = 2;
+
 static bool vf_acls;
 
 #ifdef CONFIG_PCI_IOV
@@ -248,6 +301,30 @@ module_param_array(num_vf, uint, NULL, 0644);
 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
 #endif
 
+/*
+ * The filter TCAM has a fixed portion and a variable portion.  The fixed
+ * portion can match on source/destination IP IPv4/IPv6 addresses and TCP/UDP
+ * ports.  The variable portion is 36 bits which can include things like Exact
+ * Match MAC Index (9 bits), Ether Type (16 bits), IP Protocol (8 bits),
+ * [Inner] VLAN Tag (17 bits), etc. which, if all were somehow selected, would
+ * far exceed the 36-bit budget for this "compressed" header portion of the
+ * filter.  Thus, we have a scarce resource which must be carefully managed.
+ *
+ * By default we set this up to mostly match the set of filter matching
+ * capabilities of T3 but with accommodations for some of T4's more
+ * interesting features:
+ *
+ *   { IP Fragment (1), MPS Match Type (3), IP Protocol (8),
+ *     [Inner] VLAN (17), Port (3), FCoE (1) }
+ */
+enum {
+	TP_VLAN_PRI_MAP_DEFAULT = HW_TPL_FR_MT_PR_IV_P_FC,
+	TP_VLAN_PRI_MAP_FIRST = FCOE_SHIFT,
+	TP_VLAN_PRI_MAP_LAST = FRAGMENTATION_SHIFT,
+};
+
+static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
+
 static struct dentry *cxgb4_debugfs_root;
 
 static LIST_HEAD(adapter_list);
@@ -852,11 +929,25 @@ static int upgrade_fw(struct adapter *adap)
 	 */
 	if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR ||
 	    vers > adap->params.fw_vers) {
-		ret = -t4_load_fw(adap, fw->data, fw->size);
+		dev_info(dev, "upgrading firmware ...\n");
+		ret = t4_fw_upgrade(adap, adap->mbox, fw->data, fw->size,
+				    /*force=*/false);
 		if (!ret)
-			dev_info(dev, "firmware upgraded to version %pI4 from "
-				 FW_FNAME "\n", &hdr->fw_ver);
+			dev_info(dev, "firmware successfully upgraded to "
+				 FW_FNAME " (%d.%d.%d.%d)\n",
+				 FW_HDR_FW_VER_MAJOR_GET(vers),
+				 FW_HDR_FW_VER_MINOR_GET(vers),
+				 FW_HDR_FW_VER_MICRO_GET(vers),
+				 FW_HDR_FW_VER_BUILD_GET(vers));
+		else
+			dev_err(dev, "firmware upgrade failed! err=%d\n", -ret);
+	} else {
+		/*
+		 * Tell our caller that we didn't upgrade the firmware.
+		 */
+		ret = -EINVAL;
 	}
+
 out:	release_firmware(fw);
 	return ret;
 }
@@ -2470,8 +2561,8 @@ int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
 		else
 			delta = size - hw_pidx + pidx;
 		wmb();
-		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
-			     V_QID(qid) | V_PIDX(delta));
+		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
+			     QID(qid) | PIDX(delta));
 	}
 out:
 	return ret;
@@ -2579,8 +2670,8 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
 		else
 			delta = q->size - hw_pidx + q->db_pidx;
 		wmb();
-		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
-				V_QID(q->cntxt_id) | V_PIDX(delta));
+		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
+			     QID(q->cntxt_id) | PIDX(delta));
 	}
 out:
 	q->db_disabled = 0;
@@ -2617,9 +2708,9 @@ static void process_db_full(struct work_struct *work)
 
 	notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
 	drain_db_fifo(adap, dbfifo_drain_delay);
-	t4_set_reg_field(adap, A_SGE_INT_ENABLE3,
-			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT,
-			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT);
+	t4_set_reg_field(adap, SGE_INT_ENABLE3,
+			 DBFIFO_HP_INT | DBFIFO_LP_INT,
+			 DBFIFO_HP_INT | DBFIFO_LP_INT);
 	notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
 }
 
@@ -2639,8 +2730,8 @@ static void process_db_drop(struct work_struct *work)
 
 void t4_db_full(struct adapter *adap)
 {
-	t4_set_reg_field(adap, A_SGE_INT_ENABLE3,
-			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, 0);
+	t4_set_reg_field(adap, SGE_INT_ENABLE3,
+			 DBFIFO_HP_INT | DBFIFO_LP_INT, 0);
 	queue_work(workq, &adap->db_full_task);
 }
 
@@ -3076,6 +3167,10 @@ static void setup_memwin(struct adapter *adap)
 	t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
 		     (bar0 + MEMWIN2_BASE) | BIR(0) |
 		     WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
+}
+
+static void setup_memwin_rdma(struct adapter *adap)
+{
 	if (adap->vres.ocq.size) {
 		unsigned int start, sz_kb;
 
@@ -3155,6 +3250,488 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 
 /*
  * Phase 0 of initialization: contact FW, obtain config, perform basic init.
+ *
+ * If the firmware we're dealing with has Configuration File support, then
+ * we use that to perform all configuration
+ */
+
+/*
+ * Tweak configuration based on module parameters, etc.  Most of these have
+ * defaults assigned to them by Firmware Configuration Files (if we're using
+ * them) but need to be explicitly set if we're using hard-coded
+ * initialization.  But even in the case of using Firmware Configuration
+ * Files, we'd like to expose the ability to change these via module
+ * parameters so these are essentially common tweaks/settings for
+ * Configuration Files and hard-coded initialization ...
+ */
+static int adap_init0_tweaks(struct adapter *adapter)
+{
+	/*
+	 * Fix up various Host-Dependent Parameters like Page Size, Cache
+	 * Line Size, etc.  The firmware default is for a 4KB Page Size and
+	 * 64B Cache Line Size ...
+	 */
+	t4_fixup_host_params(adapter, PAGE_SIZE, L1_CACHE_BYTES);
+
+	/*
+	 * Process module parameters which affect early initialization.
+	 */
+	if (rx_dma_offset != 2 && rx_dma_offset != 0) {
+		dev_err(&adapter->pdev->dev,
+			"Ignoring illegal rx_dma_offset=%d, using 2\n",
+			rx_dma_offset);
+		rx_dma_offset = 2;
+	}
+	t4_set_reg_field(adapter, SGE_CONTROL,
+			 PKTSHIFT_MASK,
+			 PKTSHIFT(rx_dma_offset));
+
+	/*
+	 * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
+	 * adds the pseudo header itself.
+	 */
+	t4_tp_wr_bits_indirect(adapter, TP_INGRESS_CONFIG,
+			       CSUM_HAS_PSEUDO_HDR, 0);
+
+	return 0;
+}
+
+/*
+ * Attempt to initialize the adapter via a Firmware Configuration File.
+ */
+static int adap_init0_config(struct adapter *adapter, int reset)
+{
+	struct fw_caps_config_cmd caps_cmd;
+	const struct firmware *cf;
+	unsigned long mtype = 0, maddr = 0;
+	u32 finiver, finicsum, cfcsum;
+	int ret, using_flash;
+
+	/*
+	 * Reset device if necessary.
+	 */
+	if (reset) {
+		ret = t4_fw_reset(adapter, adapter->mbox,
+				  PIORSTMODE | PIORST);
+		if (ret < 0)
+			goto bye;
+	}
+
+	/*
+	 * If we have a T4 configuration file under /lib/firmware/cxgb4/,
+	 * then use that.  Otherwise, use the configuration file stored
+	 * in the adapter flash ...
+	 */
+	ret = request_firmware(&cf, FW_CFNAME, adapter->pdev_dev);
+	if (ret < 0) {
+		using_flash = 1;
+		mtype = FW_MEMTYPE_CF_FLASH;
+		maddr = t4_flash_cfg_addr(adapter);
+	} else {
+		u32 params[7], val[7];
+
+		using_flash = 0;
+		if (cf->size >= FLASH_CFG_MAX_SIZE)
+			ret = -ENOMEM;
+		else {
+			params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
+			     FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CF));
+			ret = t4_query_params(adapter, adapter->mbox,
+					      adapter->fn, 0, 1, params, val);
+			if (ret == 0) {
+				/*
+				 * For t4_memory_write() below addresses and
+				 * sizes have to be in terms of multiples of 4
+				 * bytes.  So, if the Configuration File isn't
+				 * a multiple of 4 bytes in length we'll have
+				 * to write that out separately since we can't
+				 * guarantee that the bytes following the
+				 * residual byte in the buffer returned by
+				 * request_firmware() are zeroed out ...
+				 */
+				size_t resid = cf->size & 0x3;
+				size_t size = cf->size & ~0x3;
+				__be32 *data = (__be32 *)cf->data;
+
+				mtype = FW_PARAMS_PARAM_Y_GET(val[0]);
+				maddr = FW_PARAMS_PARAM_Z_GET(val[0]) << 16;
+
+				ret = t4_memory_write(adapter, mtype, maddr,
+						      size, data);
+				if (ret == 0 && resid != 0) {
+					union {
+						__be32 word;
+						char buf[4];
+					} last;
+					int i;
+
+					last.word = data[size >> 2];
+					for (i = resid; i < 4; i++)
+						last.buf[i] = 0;
+					ret = t4_memory_write(adapter, mtype,
+							      maddr + size,
+							      4, &last.word);
+				}
+			}
+		}
+
+		release_firmware(cf);
+		if (ret)
+			goto bye;
+	}
+
+	/*
+	 * Issue a Capability Configuration command to the firmware to get it
+	 * to parse the Configuration File.  We don't use t4_fw_config_file()
+	 * because we want the ability to modify various features after we've
+	 * processed the configuration file ...
+	 */
+	memset(&caps_cmd, 0, sizeof(caps_cmd));
+	caps_cmd.op_to_write =
+		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+		      FW_CMD_REQUEST |
+		      FW_CMD_READ);
+	caps_cmd.retval_len16 =
+		htonl(FW_CAPS_CONFIG_CMD_CFVALID |
+		      FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
+		      FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
+		      FW_LEN16(caps_cmd));
+	ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
+			 &caps_cmd);
+	if (ret < 0)
+		goto bye;
+
+	finiver = ntohl(caps_cmd.finiver);
+	finicsum = ntohl(caps_cmd.finicsum);
+	cfcsum = ntohl(caps_cmd.cfcsum);
+	if (finicsum != cfcsum)
+		dev_warn(adapter->pdev_dev, "Configuration File checksum "\
+			 "mismatch: [fini] csum=%#x, computed csum=%#x\n",
+			 finicsum, cfcsum);
+
+	/*
+	 * If we're a pure NIC driver then disable all offloading facilities.
+	 * This will allow the firmware to optimize aspects of the hardware
+	 * configuration which will result in improved performance.
+	 */
+	caps_cmd.ofldcaps = 0;
+	caps_cmd.iscsicaps = 0;
+	caps_cmd.rdmacaps = 0;
+	caps_cmd.fcoecaps = 0;
+
+	/*
+	 * And now tell the firmware to use the configuration we just loaded.
+	 */
+	caps_cmd.op_to_write =
+		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+		      FW_CMD_REQUEST |
+		      FW_CMD_WRITE);
+	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
+			 NULL);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Tweak configuration based on system architecture, module
+	 * parameters, etc.
+	 */
+	ret = adap_init0_tweaks(adapter);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * And finally tell the firmware to initialize itself using the
+	 * parameters from the Configuration File.
+	 */
+	ret = t4_fw_initialize(adapter, adapter->mbox);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Return successfully and note that we're operating with parameters
+	 * not supplied by the driver, rather than from hard-wired
+	 * initialization constants burried in the driver.
+	 */
+	adapter->flags |= USING_SOFT_PARAMS;
+	dev_info(adapter->pdev_dev, "Successfully configured using Firmware "\
+		 "Configuration File %s, version %#x, computed checksum %#x\n",
+		 (using_flash
+		  ? "in device FLASH"
+		  : "/lib/firmware/" FW_CFNAME),
+		 finiver, cfcsum);
+	return 0;
+
+	/*
+	 * Something bad happened.  Return the error ...  (If the "error"
+	 * is that there's no Configuration File on the adapter we don't
+	 * want to issue a warning since this is fairly common.)
+	 */
+bye:
+	if (ret != -ENOENT)
+		dev_warn(adapter->pdev_dev, "Configuration file error %d\n",
+			 -ret);
+	return ret;
+}
+
+/*
+ * Attempt to initialize the adapter via hard-coded, driver supplied
+ * parameters ...
+ */
+static int adap_init0_no_config(struct adapter *adapter, int reset)
+{
+	struct sge *s = &adapter->sge;
+	struct fw_caps_config_cmd caps_cmd;
+	u32 v;
+	int i, ret;
+
+	/*
+	 * Reset device if necessary
+	 */
+	if (reset) {
+		ret = t4_fw_reset(adapter, adapter->mbox,
+				  PIORSTMODE | PIORST);
+		if (ret < 0)
+			goto bye;
+	}
+
+	/*
+	 * Get device capabilities and select which we'll be using.
+	 */
+	memset(&caps_cmd, 0, sizeof(caps_cmd));
+	caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+				     FW_CMD_REQUEST | FW_CMD_READ);
+	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
+			 &caps_cmd);
+	if (ret < 0)
+		goto bye;
+
+#ifndef CONFIG_CHELSIO_T4_OFFLOAD
+	/*
+	 * If we're a pure NIC driver then disable all offloading facilities.
+	 * This will allow the firmware to optimize aspects of the hardware
+	 * configuration which will result in improved performance.
+	 */
+	caps_cmd.ofldcaps = 0;
+	caps_cmd.iscsicaps = 0;
+	caps_cmd.rdmacaps = 0;
+	caps_cmd.fcoecaps = 0;
+#endif
+
+	if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
+		if (!vf_acls)
+			caps_cmd.niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
+		else
+			caps_cmd.niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
+	} else if (vf_acls) {
+		dev_err(adapter->pdev_dev, "virtualization ACLs not supported");
+		goto bye;
+	}
+	caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+			      FW_CMD_REQUEST | FW_CMD_WRITE);
+	ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
+			 NULL);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Tweak configuration based on system architecture, module
+	 * parameters, etc.
+	 */
+	ret = adap_init0_tweaks(adapter);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Select RSS Global Mode we want to use.  We use "Basic Virtual"
+	 * mode which maps each Virtual Interface to its own section of
+	 * the RSS Table and we turn on all map and hash enables ...
+	 */
+	adapter->flags |= RSS_TNLALLLOOKUP;
+	ret = t4_config_glbl_rss(adapter, adapter->mbox,
+				 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
+				 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
+				 FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ |
+				 ((adapter->flags & RSS_TNLALLLOOKUP) ?
+					FW_RSS_GLB_CONFIG_CMD_TNLALLLKP : 0));
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Set up our own fundamental resource provisioning ...
+	 */
+	ret = t4_cfg_pfvf(adapter, adapter->mbox, adapter->fn, 0,
+			  PFRES_NEQ, PFRES_NETHCTRL,
+			  PFRES_NIQFLINT, PFRES_NIQ,
+			  PFRES_TC, PFRES_NVI,
+			  FW_PFVF_CMD_CMASK_MASK,
+			  pfvfres_pmask(adapter, adapter->fn, 0),
+			  PFRES_NEXACTF,
+			  PFRES_R_CAPS, PFRES_WX_CAPS);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Perform low level SGE initialization.  We need to do this before we
+	 * send the firmware the INITIALIZE command because that will cause
+	 * any other PF Drivers which are waiting for the Master
+	 * Initialization to proceed forward.
+	 */
+	for (i = 0; i < SGE_NTIMERS - 1; i++)
+		s->timer_val[i] = min(intr_holdoff[i], MAX_SGE_TIMERVAL);
+	s->timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
+	s->counter_val[0] = 1;
+	for (i = 1; i < SGE_NCOUNTERS; i++)
+		s->counter_val[i] = min(intr_cnt[i - 1],
+					THRESHOLD_0_GET(THRESHOLD_0_MASK));
+	t4_sge_init(adapter);
+
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * Provision resource limits for Virtual Functions.  We currently
+	 * grant them all the same static resource limits except for the Port
+	 * Access Rights Mask which we're assigning based on the PF.  All of
+	 * the static provisioning stuff for both the PF and VF really needs
+	 * to be managed in a persistent manner for each device which the
+	 * firmware controls.
+	 */
+	{
+		int pf, vf;
+
+		for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
+			if (num_vf[pf] <= 0)
+				continue;
+
+			/* VF numbering starts at 1! */
+			for (vf = 1; vf <= num_vf[pf]; vf++) {
+				ret = t4_cfg_pfvf(adapter, adapter->mbox,
+						  pf, vf,
+						  VFRES_NEQ, VFRES_NETHCTRL,
+						  VFRES_NIQFLINT, VFRES_NIQ,
+						  VFRES_TC, VFRES_NVI,
+						  FW_PFVF_CMD_CMASK_GET(
+						  FW_PFVF_CMD_CMASK_MASK),
+						  pfvfres_pmask(
+						  adapter, pf, vf),
+						  VFRES_NEXACTF,
+						  VFRES_R_CAPS, VFRES_WX_CAPS);
+				if (ret < 0)
+					dev_warn(adapter->pdev_dev,
+						 "failed to "\
+						 "provision pf/vf=%d/%d; "
+						 "err=%d\n", pf, vf, ret);
+			}
+		}
+	}
+#endif
+
+	/*
+	 * Set up the default filter mode.  Later we'll want to implement this
+	 * via a firmware command, etc. ...  This needs to be done before the
+	 * firmare initialization command ...  If the selected set of fields
+	 * isn't equal to the default value, we'll need to make sure that the
+	 * field selections will fit in the 36-bit budget.
+	 */
+	if (tp_vlan_pri_map != TP_VLAN_PRI_MAP_DEFAULT) {
+		int i, bits = 0;
+
+		for (i = TP_VLAN_PRI_MAP_FIRST; i <= TP_VLAN_PRI_MAP_LAST; i++)
+			switch (tp_vlan_pri_map & (1 << i)) {
+			case 0:
+				/* compressed filter field not enabled */
+				break;
+			case FCOE_MASK:
+				bits +=  1;
+				break;
+			case PORT_MASK:
+				bits +=  3;
+				break;
+			case VNIC_ID_MASK:
+				bits += 17;
+				break;
+			case VLAN_MASK:
+				bits += 17;
+				break;
+			case TOS_MASK:
+				bits +=  8;
+				break;
+			case PROTOCOL_MASK:
+				bits +=  8;
+				break;
+			case ETHERTYPE_MASK:
+				bits += 16;
+				break;
+			case MACMATCH_MASK:
+				bits +=  9;
+				break;
+			case MPSHITTYPE_MASK:
+				bits +=  3;
+				break;
+			case FRAGMENTATION_MASK:
+				bits +=  1;
+				break;
+			}
+
+		if (bits > 36) {
+			dev_err(adapter->pdev_dev,
+				"tp_vlan_pri_map=%#x needs %d bits > 36;"\
+				" using %#x\n", tp_vlan_pri_map, bits,
+				TP_VLAN_PRI_MAP_DEFAULT);
+			tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
+		}
+	}
+	v = tp_vlan_pri_map;
+	t4_write_indirect(adapter, TP_PIO_ADDR, TP_PIO_DATA,
+			  &v, 1, TP_VLAN_PRI_MAP);
+
+	/*
+	 * We need Five Tuple Lookup mode to be set in TP_GLOBAL_CONFIG order
+	 * to support any of the compressed filter fields above.  Newer
+	 * versions of the firmware do this automatically but it doesn't hurt
+	 * to set it here.  Meanwhile, we do _not_ need to set Lookup Every
+	 * Packet in TP_INGRESS_CONFIG to support matching non-TCP packets
+	 * since the firmware automatically turns this on and off when we have
+	 * a non-zero number of filters active (since it does have a
+	 * performance impact).
+	 */
+	if (tp_vlan_pri_map)
+		t4_set_reg_field(adapter, TP_GLOBAL_CONFIG,
+				 FIVETUPLELOOKUP_MASK,
+				 FIVETUPLELOOKUP_MASK);
+
+	/*
+	 * Tweak some settings.
+	 */
+	t4_write_reg(adapter, TP_SHIFT_CNT, SYNSHIFTMAX(6) |
+		     RXTSHIFTMAXR1(4) | RXTSHIFTMAXR2(15) |
+		     PERSHIFTBACKOFFMAX(8) | PERSHIFTMAX(8) |
+		     KEEPALIVEMAXR1(4) | KEEPALIVEMAXR2(9));
+
+	/*
+	 * Get basic stuff going by issuing the Firmware Initialize command.
+	 * Note that this _must_ be after all PFVF commands ...
+	 */
+	ret = t4_fw_initialize(adapter, adapter->mbox);
+	if (ret < 0)
+		goto bye;
+
+	/*
+	 * Return successfully!
+	 */
+	dev_info(adapter->pdev_dev, "Successfully configured using built-in "\
+		 "driver parameters\n");
+	return 0;
+
+	/*
+	 * Something bad happened.  Return the error ...
+	 */
+bye:
+	return ret;
+}
+
+/*
+ * Phase 0 of initialization: contact FW, obtain config, perform basic init.
  */
 static int adap_init0(struct adapter *adap)
 {
@@ -3162,72 +3739,216 @@ static int adap_init0(struct adapter *adap)
 	u32 v, port_vec;
 	enum dev_state state;
 	u32 params[7], val[7];
-	struct fw_caps_config_cmd c;
-
-	ret = t4_check_fw_version(adap);
-	if (ret == -EINVAL || ret > 0) {
-		if (upgrade_fw(adap) >= 0)             /* recache FW version */
-			ret = t4_check_fw_version(adap);
-	}
-	if (ret < 0)
-		return ret;
+	int reset = 1, j;
 
-	/* contact FW, request master */
-	ret = t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, &state);
+	/*
+	 * Contact FW, advertising Master capability (and potentially forcing
+	 * ourselves as the Master PF if our module parameter force_init is
+	 * set).
+	 */
+	ret = t4_fw_hello(adap, adap->mbox, adap->fn,
+			  force_init ? MASTER_MUST : MASTER_MAY,
+			  &state);
 	if (ret < 0) {
 		dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
 			ret);
 		return ret;
 	}
+	if (ret == adap->mbox)
+		adap->flags |= MASTER_PF;
+	if (force_init && state == DEV_STATE_INIT)
+		state = DEV_STATE_UNINIT;
 
-	/* reset device */
-	ret = t4_fw_reset(adap, adap->fn, PIORSTMODE | PIORST);
-	if (ret < 0)
-		goto bye;
-
-	for (v = 0; v < SGE_NTIMERS - 1; v++)
-		adap->sge.timer_val[v] = min(intr_holdoff[v], MAX_SGE_TIMERVAL);
-	adap->sge.timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
-	adap->sge.counter_val[0] = 1;
-	for (v = 1; v < SGE_NCOUNTERS; v++)
-		adap->sge.counter_val[v] = min(intr_cnt[v - 1],
-					       THRESHOLD_3_MASK);
-#define FW_PARAM_DEV(param) \
-	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
-	 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
+	/*
+	 * If we're the Master PF Driver and the device is uninitialized,
+	 * then let's consider upgrading the firmware ...  (We always want
+	 * to check the firmware version number in order to A. get it for
+	 * later reporting and B. to warn if the currently loaded firmware
+	 * is excessively mismatched relative to the driver.)
+	 */
+	ret = t4_check_fw_version(adap);
+	if ((adap->flags & MASTER_PF) && state != DEV_STATE_INIT) {
+		if (ret == -EINVAL || ret > 0) {
+			if (upgrade_fw(adap) >= 0) {
+				/*
+				 * Note that the chip was reset as part of the
+				 * firmware upgrade so we don't reset it again
+				 * below and grab the new firmware version.
+				 */
+				reset = 0;
+				ret = t4_check_fw_version(adap);
+			}
+		}
+		if (ret < 0)
+			return ret;
+	}
 
-	params[0] = FW_PARAM_DEV(CCLK);
-	ret = t4_query_params(adap, adap->fn, adap->fn, 0, 1, params, val);
+	/*
+	 * Grab VPD parameters.  This should be done after we establish a
+	 * connection to the firmware since some of the VPD parameters
+	 * (notably the Core Clock frequency) are retrieved via requests to
+	 * the firmware.  On the other hand, we need these fairly early on
+	 * so we do this right after getting ahold of the firmware.
+	 */
+	ret = get_vpd_params(adap, &adap->params.vpd);
 	if (ret < 0)
 		goto bye;
-	adap->params.vpd.cclk = val[0];
 
-	ret = adap_init1(adap, &c);
+	/*
+	 * Find out what ports are available to us.  Note that we need to do
+	 * this before calling adap_init0_no_config() since it needs nports
+	 * and portvec ...
+	 */
+	v =
+	    FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
+	    FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_PORTVEC);
+	ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1, &v, &port_vec);
 	if (ret < 0)
 		goto bye;
 
+	adap->params.nports = hweight32(port_vec);
+	adap->params.portvec = port_vec;
+
+	/*
+	 * If the firmware is initialized already (and we're not forcing a
+	 * master initialization), note that we're living with existing
+	 * adapter parameters.  Otherwise, it's time to try initializing the
+	 * adapter ...
+	 */
+	if (state == DEV_STATE_INIT) {
+		dev_info(adap->pdev_dev, "Coming up as %s: "\
+			 "Adapter already initialized\n",
+			 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
+		adap->flags |= USING_SOFT_PARAMS;
+	} else {
+		dev_info(adap->pdev_dev, "Coming up as MASTER: "\
+			 "Initializing adapter\n");
+
+		/*
+		 * If the firmware doesn't support Configuration
+		 * Files warn user and exit,
+		 */
+		if (ret < 0)
+			dev_warn(adap->pdev_dev, "Firmware doesn't support "
+				 "configuration file.\n");
+		if (force_old_init)
+			ret = adap_init0_no_config(adap, reset);
+		else {
+			/*
+			 * Find out whether we're dealing with a version of
+			 * the firmware which has configuration file support.
+			 */
+			params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
+				     FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CF));
+			ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1,
+					      params, val);
+
+			/*
+			 * If the firmware doesn't support Configuration
+			 * Files, use the old Driver-based, hard-wired
+			 * initialization.  Otherwise, try using the
+			 * Configuration File support and fall back to the
+			 * Driver-based initialization if there's no
+			 * Configuration File found.
+			 */
+			if (ret < 0)
+				ret = adap_init0_no_config(adap, reset);
+			else {
+				/*
+				 * The firmware provides us with a memory
+				 * buffer where we can load a Configuration
+				 * File from the host if we want to override
+				 * the Configuration File in flash.
+				 */
+
+				ret = adap_init0_config(adap, reset);
+				if (ret == -ENOENT) {
+					dev_info(adap->pdev_dev,
+					    "No Configuration File present "
+					    "on adapter.  Using hard-wired "
+					    "configuration parameters.\n");
+					ret = adap_init0_no_config(adap, reset);
+				}
+			}
+		}
+		if (ret < 0) {
+			dev_err(adap->pdev_dev,
+				"could not initialize adapter, error %d\n",
+				-ret);
+			goto bye;
+		}
+	}
+
+	/*
+	 * If we're living with non-hard-coded parameters (either from a
+	 * Firmware Configuration File or values programmed by a different PF
+	 * Driver), give the SGE code a chance to pull in anything that it
+	 * needs ...  Note that this must be called after we retrieve our VPD
+	 * parameters in order to know how to convert core ticks to seconds.
+	 */
+	if (adap->flags & USING_SOFT_PARAMS) {
+		ret = t4_sge_init(adap);
+		if (ret < 0)
+			goto bye;
+	}
+
+	/*
+	 * Grab some of our basic fundamental operating parameters.
+	 */
+#define FW_PARAM_DEV(param) \
+	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
+	FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
+
 #define FW_PARAM_PFVF(param) \
-	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
-	 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) | \
-	 FW_PARAMS_PARAM_Y(adap->fn))
+	FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
+	FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)|  \
+	FW_PARAMS_PARAM_Y(0) | \
+	FW_PARAMS_PARAM_Z(0)
 
-	params[0] = FW_PARAM_DEV(PORTVEC);
+	params[0] = FW_PARAM_PFVF(EQ_START);
 	params[1] = FW_PARAM_PFVF(L2T_START);
 	params[2] = FW_PARAM_PFVF(L2T_END);
 	params[3] = FW_PARAM_PFVF(FILTER_START);
 	params[4] = FW_PARAM_PFVF(FILTER_END);
 	params[5] = FW_PARAM_PFVF(IQFLINT_START);
-	params[6] = FW_PARAM_PFVF(EQ_START);
-	ret = t4_query_params(adap, adap->fn, adap->fn, 0, 7, params, val);
+	ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params, val);
 	if (ret < 0)
 		goto bye;
-	port_vec = val[0];
+	adap->sge.egr_start = val[0];
+	adap->l2t_start = val[1];
+	adap->l2t_end = val[2];
 	adap->tids.ftid_base = val[3];
 	adap->tids.nftids = val[4] - val[3] + 1;
 	adap->sge.ingr_start = val[5];
-	adap->sge.egr_start = val[6];
 
-	if (c.ofldcaps) {
+	/* query params related to active filter region */
+	params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
+	params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
+	ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
+	/* If Active filter size is set we enable establishing
+	 * offload connection through firmware work request
+	 */
+	if ((val[0] != val[1]) && (ret >= 0)) {
+		adap->flags |= FW_OFLD_CONN;
+		adap->tids.aftid_base = val[0];
+		adap->tids.aftid_end = val[1];
+	}
+
+#ifdef CONFIG_CHELSIO_T4_OFFLOAD
+	/*
+	 * Get device capabilities so we can determine what resources we need
+	 * to manage.
+	 */
+	memset(&caps_cmd, 0, sizeof(caps_cmd));
+	caps_cmd.op_to_write = htonl(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+				     FW_CMD_REQUEST | FW_CMD_READ);
+	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
+			 &caps_cmd);
+	if (ret < 0)
+		goto bye;
+
+	if (caps_cmd.ofldcaps) {
 		/* query offload-related parameters */
 		params[0] = FW_PARAM_DEV(NTID);
 		params[1] = FW_PARAM_PFVF(SERVER_START);
@@ -3235,28 +3956,55 @@ static int adap_init0(struct adapter *adap)
 		params[3] = FW_PARAM_PFVF(TDDP_START);
 		params[4] = FW_PARAM_PFVF(TDDP_END);
 		params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
-		ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
-				      val);
+		ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
+				      params, val);
 		if (ret < 0)
 			goto bye;
 		adap->tids.ntids = val[0];
 		adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
 		adap->tids.stid_base = val[1];
 		adap->tids.nstids = val[2] - val[1] + 1;
+		/*
+		 * Setup server filter region. Divide the availble filter
+		 * region into two parts. Regular filters get 1/3rd and server
+		 * filters get 2/3rd part. This is only enabled if workarond
+		 * path is enabled.
+		 * 1. For regular filters.
+		 * 2. Server filter: This are special filters which are used
+		 * to redirect SYN packets to offload queue.
+		 */
+		if (adap->flags & FW_OFLD_CONN && !is_bypass(adap)) {
+			adap->tids.sftid_base = adap->tids.ftid_base +
+					DIV_ROUND_UP(adap->tids.nftids, 3);
+			adap->tids.nsftids = adap->tids.nftids -
+					 DIV_ROUND_UP(adap->tids.nftids, 3);
+			adap->tids.nftids = adap->tids.sftid_base -
+						adap->tids.ftid_base;
+		}
 		adap->vres.ddp.start = val[3];
 		adap->vres.ddp.size = val[4] - val[3] + 1;
 		adap->params.ofldq_wr_cred = val[5];
+
+		params[0] = FW_PARAM_PFVF(ETHOFLD_START);
+		params[1] = FW_PARAM_PFVF(ETHOFLD_END);
+		ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2,
+				      params, val);
+		if ((val[0] != val[1]) && (ret >= 0)) {
+			adap->tids.uotid_base = val[0];
+			adap->tids.nuotids = val[1] - val[0] + 1;
+		}
+
 		adap->params.offload = 1;
 	}
-	if (c.rdmacaps) {
+	if (caps_cmd.rdmacaps) {
 		params[0] = FW_PARAM_PFVF(STAG_START);
 		params[1] = FW_PARAM_PFVF(STAG_END);
 		params[2] = FW_PARAM_PFVF(RQ_START);
 		params[3] = FW_PARAM_PFVF(RQ_END);
 		params[4] = FW_PARAM_PFVF(PBL_START);
 		params[5] = FW_PARAM_PFVF(PBL_END);
-		ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
-				      val);
+		ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
+				      params, val);
 		if (ret < 0)
 			goto bye;
 		adap->vres.stag.start = val[0];
@@ -3272,8 +4020,7 @@ static int adap_init0(struct adapter *adap)
 		params[3] = FW_PARAM_PFVF(CQ_END);
 		params[4] = FW_PARAM_PFVF(OCQ_START);
 		params[5] = FW_PARAM_PFVF(OCQ_END);
-		ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
-				      val);
+		ret = t4_query_params(adap, 0, 0, 0, 6, params, val);
 		if (ret < 0)
 			goto bye;
 		adap->vres.qp.start = val[0];
@@ -3283,11 +4030,11 @@ static int adap_init0(struct adapter *adap)
 		adap->vres.ocq.start = val[4];
 		adap->vres.ocq.size = val[5] - val[4] + 1;
 	}
-	if (c.iscsicaps) {
+	if (caps_cmd.iscsicaps) {
 		params[0] = FW_PARAM_PFVF(ISCSI_START);
 		params[1] = FW_PARAM_PFVF(ISCSI_END);
-		ret = t4_query_params(adap, adap->fn, adap->fn, 0, 2, params,
-				      val);
+		ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2,
+				      params, val);
 		if (ret < 0)
 			goto bye;
 		adap->vres.iscsi.start = val[0];
@@ -3295,63 +4042,33 @@ static int adap_init0(struct adapter *adap)
 	}
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
+#endif /* CONFIG_CHELSIO_T4_OFFLOAD */
 
-	adap->params.nports = hweight32(port_vec);
-	adap->params.portvec = port_vec;
-	adap->flags |= FW_OK;
-
-	/* These are finalized by FW initialization, load their values now */
+	/*
+	 * These are finalized by FW initialization, load their values now.
+	 */
 	v = t4_read_reg(adap, TP_TIMER_RESOLUTION);
 	adap->params.tp.tre = TIMERRESOLUTION_GET(v);
+	adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v);
 	t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
 	t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
 		     adap->params.b_wnd);
 
-#ifdef CONFIG_PCI_IOV
-	/*
-	 * Provision resource limits for Virtual Functions.  We currently
-	 * grant them all the same static resource limits except for the Port
-	 * Access Rights Mask which we're assigning based on the PF.  All of
-	 * the static provisioning stuff for both the PF and VF really needs
-	 * to be managed in a persistent manner for each device which the
-	 * firmware controls.
-	 */
-	{
-		int pf, vf;
-
-		for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
-			if (num_vf[pf] <= 0)
-				continue;
+	/* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */
+	for (j = 0; j < NCHAN; j++)
+		adap->params.tp.tx_modq[j] = j;
 
-			/* VF numbering starts at 1! */
-			for (vf = 1; vf <= num_vf[pf]; vf++) {
-				ret = t4_cfg_pfvf(adap, adap->fn, pf, vf,
-						  VFRES_NEQ, VFRES_NETHCTRL,
-						  VFRES_NIQFLINT, VFRES_NIQ,
-						  VFRES_TC, VFRES_NVI,
-						  FW_PFVF_CMD_CMASK_MASK,
-						  pfvfres_pmask(adap, pf, vf),
-						  VFRES_NEXACTF,
-						  VFRES_R_CAPS, VFRES_WX_CAPS);
-				if (ret < 0)
-					dev_warn(adap->pdev_dev, "failed to "
-						 "provision pf/vf=%d/%d; "
-						 "err=%d\n", pf, vf, ret);
-			}
-		}
-	}
-#endif
-
-	setup_memwin(adap);
+	adap->flags |= FW_OK;
 	return 0;
 
 	/*
-	 * If a command timed out or failed with EIO FW does not operate within
-	 * its spec or something catastrophic happened to HW/FW, stop issuing
-	 * commands.
+	 * Something bad happened.  If a command timed out or failed with EIO
+	 * FW does not operate within its spec or something catastrophic
+	 * happened to HW/FW, stop issuing commands.
 	 */
-bye:	if (ret != -ETIMEDOUT && ret != -EIO)
-		t4_fw_bye(adap, adap->fn);
+bye:
+	if (ret != -ETIMEDOUT && ret != -EIO)
+		t4_fw_bye(adap, adap->mbox);
 	return ret;
 }
 
@@ -3806,7 +4523,9 @@ static int __devinit init_one(struct pci_dev *pdev,
 	err = t4_prep_adapter(adapter);
 	if (err)
 		goto out_unmap_bar;
+	setup_memwin(adapter);
 	err = adap_init0(adapter);
+	setup_memwin_rdma(adapter);
 	if (err)
 		goto out_unmap_bar;
 
@@ -3948,8 +4667,11 @@ static void __devexit remove_one(struct pci_dev *pdev)
 {
 	struct adapter *adapter = pci_get_drvdata(pdev);
 
+#ifdef CONFIG_PCI_IOV
 	pci_disable_sriov(pdev);
 
+#endif
+
 	if (adapter) {
 		int i;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index d79980c5fc63..1b899fea1a91 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -100,6 +100,8 @@ struct tid_info {
 
 	unsigned int nftids;
 	unsigned int ftid_base;
+	unsigned int aftid_base;
+	unsigned int aftid_end;
 
 	spinlock_t atid_lock ____cacheline_aligned_in_smp;
 	union aopen_entry *afree;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index d49933ed551f..3ecc087d732d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -68,9 +68,6 @@
  */
 #define RX_PKT_SKB_LEN   512
 
-/* Ethernet header padding prepended to RX_PKTs */
-#define RX_PKT_PAD 2
-
 /*
  * Max number of Tx descriptors we clean up at a time.  Should be modest as
  * freeing skbs isn't cheap and it happens while holding locks.  We just need
@@ -137,13 +134,6 @@
  */
 #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
 
-enum {
-	/* packet alignment in FL buffers */
-	FL_ALIGN = L1_CACHE_BYTES < 32 ? 32 : L1_CACHE_BYTES,
-	/* egress status entry size */
-	STAT_LEN = L1_CACHE_BYTES > 64 ? 128 : 64
-};
-
 struct tx_sw_desc {                /* SW state per Tx descriptor */
 	struct sk_buff *skb;
 	struct ulptx_sgl *sgl;
@@ -155,16 +145,57 @@ struct rx_sw_desc {                /* SW state per Rx descriptor */
 };
 
 /*
- * The low bits of rx_sw_desc.dma_addr have special meaning.
+ * Rx buffer sizes for "useskbs" Free List buffers (one ingress packet pe skb
+ * buffer).  We currently only support two sizes for 1500- and 9000-byte MTUs.
+ * We could easily support more but there doesn't seem to be much need for
+ * that ...
+ */
+#define FL_MTU_SMALL 1500
+#define FL_MTU_LARGE 9000
+
+static inline unsigned int fl_mtu_bufsize(struct adapter *adapter,
+					  unsigned int mtu)
+{
+	struct sge *s = &adapter->sge;
+
+	return ALIGN(s->pktshift + ETH_HLEN + VLAN_HLEN + mtu, s->fl_align);
+}
+
+#define FL_MTU_SMALL_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_SMALL)
+#define FL_MTU_LARGE_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_LARGE)
+
+/*
+ * Bits 0..3 of rx_sw_desc.dma_addr have special meaning.  The hardware uses
+ * these to specify the buffer size as an index into the SGE Free List Buffer
+ * Size register array.  We also use bit 4, when the buffer has been unmapped
+ * for DMA, but this is of course never sent to the hardware and is only used
+ * to prevent double unmappings.  All of the above requires that the Free List
+ * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are
+ * 32-byte or or a power of 2 greater in alignment.  Since the SGE's minimal
+ * Free List Buffer alignment is 32 bytes, this works out for us ...
  */
 enum {
-	RX_LARGE_BUF    = 1 << 0, /* buffer is larger than PAGE_SIZE */
-	RX_UNMAPPED_BUF = 1 << 1, /* buffer is not mapped */
+	RX_BUF_FLAGS     = 0x1f,   /* bottom five bits are special */
+	RX_BUF_SIZE      = 0x0f,   /* bottom three bits are for buf sizes */
+	RX_UNMAPPED_BUF  = 0x10,   /* buffer is not mapped */
+
+	/*
+	 * XXX We shouldn't depend on being able to use these indices.
+	 * XXX Especially when some other Master PF has initialized the
+	 * XXX adapter or we use the Firmware Configuration File.  We
+	 * XXX should really search through the Host Buffer Size register
+	 * XXX array for the appropriately sized buffer indices.
+	 */
+	RX_SMALL_PG_BUF  = 0x0,   /* small (PAGE_SIZE) page buffer */
+	RX_LARGE_PG_BUF  = 0x1,   /* buffer large (FL_PG_ORDER) page buffer */
+
+	RX_SMALL_MTU_BUF = 0x2,   /* small MTU buffer */
+	RX_LARGE_MTU_BUF = 0x3,   /* large MTU buffer */
 };
 
 static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d)
 {
-	return d->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF | RX_UNMAPPED_BUF);
+	return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS;
 }
 
 static inline bool is_buf_mapped(const struct rx_sw_desc *d)
@@ -392,14 +423,35 @@ static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 	}
 }
 
-static inline int get_buf_size(const struct rx_sw_desc *d)
+static inline int get_buf_size(struct adapter *adapter,
+			       const struct rx_sw_desc *d)
 {
-#if FL_PG_ORDER > 0
-	return (d->dma_addr & RX_LARGE_BUF) ? (PAGE_SIZE << FL_PG_ORDER) :
-					      PAGE_SIZE;
-#else
-	return PAGE_SIZE;
-#endif
+	struct sge *s = &adapter->sge;
+	unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE;
+	int buf_size;
+
+	switch (rx_buf_size_idx) {
+	case RX_SMALL_PG_BUF:
+		buf_size = PAGE_SIZE;
+		break;
+
+	case RX_LARGE_PG_BUF:
+		buf_size = PAGE_SIZE << s->fl_pg_order;
+		break;
+
+	case RX_SMALL_MTU_BUF:
+		buf_size = FL_MTU_SMALL_BUFSIZE(adapter);
+		break;
+
+	case RX_LARGE_MTU_BUF:
+		buf_size = FL_MTU_LARGE_BUFSIZE(adapter);
+		break;
+
+	default:
+		BUG_ON(1);
+	}
+
+	return buf_size;
 }
 
 /**
@@ -418,7 +470,8 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
 
 		if (is_buf_mapped(d))
 			dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
-				       get_buf_size(d), PCI_DMA_FROMDEVICE);
+				       get_buf_size(adap, d),
+				       PCI_DMA_FROMDEVICE);
 		put_page(d->page);
 		d->page = NULL;
 		if (++q->cidx == q->size)
@@ -444,7 +497,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
 
 	if (is_buf_mapped(d))
 		dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
-			       get_buf_size(d), PCI_DMA_FROMDEVICE);
+			       get_buf_size(adap, d), PCI_DMA_FROMDEVICE);
 	d->page = NULL;
 	if (++q->cidx == q->size)
 		q->cidx = 0;
@@ -485,6 +538,7 @@ static inline void set_rx_sw_desc(struct rx_sw_desc *sd, struct page *pg,
 static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 			      gfp_t gfp)
 {
+	struct sge *s = &adap->sge;
 	struct page *pg;
 	dma_addr_t mapping;
 	unsigned int cred = q->avail;
@@ -493,25 +547,27 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 
 	gfp |= __GFP_NOWARN | __GFP_COLD;
 
-#if FL_PG_ORDER > 0
+	if (s->fl_pg_order == 0)
+		goto alloc_small_pages;
+
 	/*
 	 * Prefer large buffers
 	 */
 	while (n) {
-		pg = alloc_pages(gfp | __GFP_COMP, FL_PG_ORDER);
+		pg = alloc_pages(gfp | __GFP_COMP, s->fl_pg_order);
 		if (unlikely(!pg)) {
 			q->large_alloc_failed++;
 			break;       /* fall back to single pages */
 		}
 
 		mapping = dma_map_page(adap->pdev_dev, pg, 0,
-				       PAGE_SIZE << FL_PG_ORDER,
+				       PAGE_SIZE << s->fl_pg_order,
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
-			__free_pages(pg, FL_PG_ORDER);
+			__free_pages(pg, s->fl_pg_order);
 			goto out;   /* do not try small pages for this error */
 		}
-		mapping |= RX_LARGE_BUF;
+		mapping |= RX_LARGE_PG_BUF;
 		*d++ = cpu_to_be64(mapping);
 
 		set_rx_sw_desc(sd, pg, mapping);
@@ -525,8 +581,8 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 		}
 		n--;
 	}
-#endif
 
+alloc_small_pages:
 	while (n--) {
 		pg = __skb_alloc_page(gfp, NULL);
 		if (unlikely(!pg)) {
@@ -769,8 +825,8 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 	wmb();            /* write descriptors before telling HW */
 	spin_lock(&q->db_lock);
 	if (!q->db_disabled) {
-		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
-			     V_QID(q->cntxt_id) | V_PIDX(n));
+		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
+			     QID(q->cntxt_id) | PIDX(n));
 	}
 	q->db_pidx = q->pidx;
 	spin_unlock(&q->db_lock);
@@ -1519,6 +1575,8 @@ static noinline int handle_trace_pkt(struct adapter *adap,
 static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
 		   const struct cpl_rx_pkt *pkt)
 {
+	struct adapter *adapter = rxq->rspq.adap;
+	struct sge *s = &adapter->sge;
 	int ret;
 	struct sk_buff *skb;
 
@@ -1529,8 +1587,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
 		return;
 	}
 
-	copy_frags(skb, gl, RX_PKT_PAD);
-	skb->len = gl->tot_len - RX_PKT_PAD;
+	copy_frags(skb, gl, s->pktshift);
+	skb->len = gl->tot_len - s->pktshift;
 	skb->data_len = skb->len;
 	skb->truesize += skb->data_len;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1566,6 +1624,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 	struct sk_buff *skb;
 	const struct cpl_rx_pkt *pkt;
 	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
+	struct sge *s = &q->adap->sge;
 
 	if (unlikely(*(u8 *)rsp == CPL_TRACE_PKT))
 		return handle_trace_pkt(q->adap, si);
@@ -1585,7 +1644,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 		return 0;
 	}
 
-	__skb_pull(skb, RX_PKT_PAD);      /* remove ethernet header padding */
+	__skb_pull(skb, s->pktshift);      /* remove ethernet header padding */
 	skb->protocol = eth_type_trans(skb, q->netdev);
 	skb_record_rx_queue(skb, q->idx);
 	if (skb->dev->features & NETIF_F_RXHASH)
@@ -1696,6 +1755,8 @@ static int process_responses(struct sge_rspq *q, int budget)
 	int budget_left = budget;
 	const struct rsp_ctrl *rc;
 	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
+	struct adapter *adapter = q->adap;
+	struct sge *s = &adapter->sge;
 
 	while (likely(budget_left)) {
 		rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
@@ -1722,7 +1783,7 @@ static int process_responses(struct sge_rspq *q, int budget)
 			/* gather packet fragments */
 			for (frags = 0, fp = si.frags; ; frags++, fp++) {
 				rsd = &rxq->fl.sdesc[rxq->fl.cidx];
-				bufsz = get_buf_size(rsd);
+				bufsz = get_buf_size(adapter, rsd);
 				fp->page = rsd->page;
 				fp->offset = q->offset;
 				fp->size = min(bufsz, len);
@@ -1747,7 +1808,7 @@ static int process_responses(struct sge_rspq *q, int budget)
 			si.nfrags = frags + 1;
 			ret = q->handler(q, q->cur_desc, &si);
 			if (likely(ret == 0))
-				q->offset += ALIGN(fp->size, FL_ALIGN);
+				q->offset += ALIGN(fp->size, s->fl_align);
 			else
 				restore_rx_bufs(&si, &rxq->fl, frags);
 		} else if (likely(rsp_type == RSP_TYPE_CPL)) {
@@ -1983,6 +2044,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 {
 	int ret, flsz = 0;
 	struct fw_iq_cmd c;
+	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
 
 	/* Size needs to be multiple of 16, including status entry. */
@@ -2015,11 +2077,11 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		fl->size = roundup(fl->size, 8);
 		fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
 				      sizeof(struct rx_sw_desc), &fl->addr,
-				      &fl->sdesc, STAT_LEN, NUMA_NO_NODE);
+				      &fl->sdesc, s->stat_len, NUMA_NO_NODE);
 		if (!fl->desc)
 			goto fl_nomem;
 
-		flsz = fl->size / 8 + STAT_LEN / sizeof(struct tx_desc);
+		flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
 		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN |
 					    FW_IQ_CMD_FL0FETCHRO(1) |
 					    FW_IQ_CMD_FL0DATARO(1) |
@@ -2096,14 +2158,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 {
 	int ret, nentries;
 	struct fw_eq_eth_cmd c;
+	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
 
 	/* Add status entries */
-	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
+	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
 
 	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
 			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
-			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN,
+			&txq->q.phys_addr, &txq->q.sdesc, s->stat_len,
 			netdev_queue_numa_node_read(netdevq));
 	if (!txq->q.desc)
 		return -ENOMEM;
@@ -2149,10 +2212,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 {
 	int ret, nentries;
 	struct fw_eq_ctrl_cmd c;
+	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
 
 	/* Add status entries */
-	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
+	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
 
 	txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
 				 sizeof(struct tx_desc), 0, &txq->q.phys_addr,
@@ -2200,14 +2264,15 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 {
 	int ret, nentries;
 	struct fw_eq_ofld_cmd c;
+	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
 
 	/* Add status entries */
-	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
+	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
 
 	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
 			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
-			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN,
+			&txq->q.phys_addr, &txq->q.sdesc, s->stat_len,
 			NUMA_NO_NODE);
 	if (!txq->q.desc)
 		return -ENOMEM;
@@ -2251,8 +2316,10 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 
 static void free_txq(struct adapter *adap, struct sge_txq *q)
 {
+	struct sge *s = &adap->sge;
+
 	dma_free_coherent(adap->pdev_dev,
-			  q->size * sizeof(struct tx_desc) + STAT_LEN,
+			  q->size * sizeof(struct tx_desc) + s->stat_len,
 			  q->desc, q->phys_addr);
 	q->cntxt_id = 0;
 	q->sdesc = NULL;
@@ -2262,6 +2329,7 @@ static void free_txq(struct adapter *adap, struct sge_txq *q)
 static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
 			 struct sge_fl *fl)
 {
+	struct sge *s = &adap->sge;
 	unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
 
 	adap->sge.ingr_map[rq->cntxt_id - adap->sge.ingr_start] = NULL;
@@ -2276,7 +2344,7 @@ static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
 
 	if (fl) {
 		free_rx_bufs(adap, fl, fl->avail);
-		dma_free_coherent(adap->pdev_dev, fl->size * 8 + STAT_LEN,
+		dma_free_coherent(adap->pdev_dev, fl->size * 8 + s->stat_len,
 				  fl->desc, fl->addr);
 		kfree(fl->sdesc);
 		fl->sdesc = NULL;
@@ -2408,18 +2476,112 @@ void t4_sge_stop(struct adapter *adap)
  *	Performs SGE initialization needed every time after a chip reset.
  *	We do not initialize any of the queues here, instead the driver
  *	top-level must request them individually.
+ *
+ *	Called in two different modes:
+ *
+ *	 1. Perform actual hardware initialization and record hard-coded
+ *	    parameters which were used.  This gets used when we're the
+ *	    Master PF and the Firmware Configuration File support didn't
+ *	    work for some reason.
+ *
+ *	 2. We're not the Master PF or initialization was performed with
+ *	    a Firmware Configuration File.  In this case we need to grab
+ *	    any of the SGE operating parameters that we need to have in
+ *	    order to do our job and make sure we can live with them ...
  */
-void t4_sge_init(struct adapter *adap)
+
+static int t4_sge_init_soft(struct adapter *adap)
 {
-	unsigned int i, v;
 	struct sge *s = &adap->sge;
-	unsigned int fl_align_log = ilog2(FL_ALIGN);
+	u32 fl_small_pg, fl_large_pg, fl_small_mtu, fl_large_mtu;
+	u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5;
+	u32 ingress_rx_threshold;
 
-	t4_set_reg_field(adap, SGE_CONTROL, PKTSHIFT_MASK |
-			 INGPADBOUNDARY_MASK | EGRSTATUSPAGESIZE,
-			 INGPADBOUNDARY(fl_align_log - 5) | PKTSHIFT(2) |
-			 RXPKTCPLMODE |
-			 (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0));
+	/*
+	 * Verify that CPL messages are going to the Ingress Queue for
+	 * process_responses() and that only packet data is going to the
+	 * Free Lists.
+	 */
+	if ((t4_read_reg(adap, SGE_CONTROL) & RXPKTCPLMODE_MASK) !=
+	    RXPKTCPLMODE(X_RXPKTCPLMODE_SPLIT)) {
+		dev_err(adap->pdev_dev, "bad SGE CPL MODE\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Validate the Host Buffer Register Array indices that we want to
+	 * use ...
+	 *
+	 * XXX Note that we should really read through the Host Buffer Size
+	 * XXX register array and find the indices of the Buffer Sizes which
+	 * XXX meet our needs!
+	 */
+	#define READ_FL_BUF(x) \
+		t4_read_reg(adap, SGE_FL_BUFFER_SIZE0+(x)*sizeof(u32))
+
+	fl_small_pg = READ_FL_BUF(RX_SMALL_PG_BUF);
+	fl_large_pg = READ_FL_BUF(RX_LARGE_PG_BUF);
+	fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF);
+	fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF);
+
+	#undef READ_FL_BUF
+
+	if (fl_small_pg != PAGE_SIZE ||
+	    (fl_large_pg != 0 && (fl_large_pg <= fl_small_pg ||
+				  (fl_large_pg & (fl_large_pg-1)) != 0))) {
+		dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n",
+			fl_small_pg, fl_large_pg);
+		return -EINVAL;
+	}
+	if (fl_large_pg)
+		s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT;
+
+	if (fl_small_mtu < FL_MTU_SMALL_BUFSIZE(adap) ||
+	    fl_large_mtu < FL_MTU_LARGE_BUFSIZE(adap)) {
+		dev_err(adap->pdev_dev, "bad SGE FL MTU sizes [%d, %d]\n",
+			fl_small_mtu, fl_large_mtu);
+		return -EINVAL;
+	}
+
+	/*
+	 * Retrieve our RX interrupt holdoff timer values and counter
+	 * threshold values from the SGE parameters.
+	 */
+	timer_value_0_and_1 = t4_read_reg(adap, SGE_TIMER_VALUE_0_AND_1);
+	timer_value_2_and_3 = t4_read_reg(adap, SGE_TIMER_VALUE_2_AND_3);
+	timer_value_4_and_5 = t4_read_reg(adap, SGE_TIMER_VALUE_4_AND_5);
+	s->timer_val[0] = core_ticks_to_us(adap,
+		TIMERVALUE0_GET(timer_value_0_and_1));
+	s->timer_val[1] = core_ticks_to_us(adap,
+		TIMERVALUE1_GET(timer_value_0_and_1));
+	s->timer_val[2] = core_ticks_to_us(adap,
+		TIMERVALUE2_GET(timer_value_2_and_3));
+	s->timer_val[3] = core_ticks_to_us(adap,
+		TIMERVALUE3_GET(timer_value_2_and_3));
+	s->timer_val[4] = core_ticks_to_us(adap,
+		TIMERVALUE4_GET(timer_value_4_and_5));
+	s->timer_val[5] = core_ticks_to_us(adap,
+		TIMERVALUE5_GET(timer_value_4_and_5));
+
+	ingress_rx_threshold = t4_read_reg(adap, SGE_INGRESS_RX_THRESHOLD);
+	s->counter_val[0] = THRESHOLD_0_GET(ingress_rx_threshold);
+	s->counter_val[1] = THRESHOLD_1_GET(ingress_rx_threshold);
+	s->counter_val[2] = THRESHOLD_2_GET(ingress_rx_threshold);
+	s->counter_val[3] = THRESHOLD_3_GET(ingress_rx_threshold);
+
+	return 0;
+}
+
+static int t4_sge_init_hard(struct adapter *adap)
+{
+	struct sge *s = &adap->sge;
+
+	/*
+	 * Set up our basic SGE mode to deliver CPL messages to our Ingress
+	 * Queue and Packet Date to the Free List.
+	 */
+	t4_set_reg_field(adap, SGE_CONTROL, RXPKTCPLMODE_MASK,
+			 RXPKTCPLMODE_MASK);
 
 	/*
 	 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
@@ -2433,13 +2595,24 @@ void t4_sge_init(struct adapter *adap)
 	t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP,
 			F_ENABLE_DROP);
 
-	for (i = v = 0; i < 32; i += 4)
-		v |= (PAGE_SHIFT - 10) << i;
-	t4_write_reg(adap, SGE_HOST_PAGE_SIZE, v);
-	t4_write_reg(adap, SGE_FL_BUFFER_SIZE0, PAGE_SIZE);
-#if FL_PG_ORDER > 0
-	t4_write_reg(adap, SGE_FL_BUFFER_SIZE1, PAGE_SIZE << FL_PG_ORDER);
-#endif
+	/*
+	 * SGE_FL_BUFFER_SIZE0 (RX_SMALL_PG_BUF) is set up by
+	 * t4_fixup_host_params().
+	 */
+	s->fl_pg_order = FL_PG_ORDER;
+	if (s->fl_pg_order)
+		t4_write_reg(adap,
+			     SGE_FL_BUFFER_SIZE0+RX_LARGE_PG_BUF*sizeof(u32),
+			     PAGE_SIZE << FL_PG_ORDER);
+	t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_SMALL_MTU_BUF*sizeof(u32),
+		     FL_MTU_SMALL_BUFSIZE(adap));
+	t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_LARGE_MTU_BUF*sizeof(u32),
+		     FL_MTU_LARGE_BUFSIZE(adap));
+
+	/*
+	 * Note that the SGE Ingress Packet Count Interrupt Threshold and
+	 * Timer Holdoff values must be supplied by our caller.
+	 */
 	t4_write_reg(adap, SGE_INGRESS_RX_THRESHOLD,
 		     THRESHOLD_0(s->counter_val[0]) |
 		     THRESHOLD_1(s->counter_val[1]) |
@@ -2449,14 +2622,54 @@ void t4_sge_init(struct adapter *adap)
 		     TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[0])) |
 		     TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[1])));
 	t4_write_reg(adap, SGE_TIMER_VALUE_2_AND_3,
-		     TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[2])) |
-		     TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[3])));
+		     TIMERVALUE2(us_to_core_ticks(adap, s->timer_val[2])) |
+		     TIMERVALUE3(us_to_core_ticks(adap, s->timer_val[3])));
 	t4_write_reg(adap, SGE_TIMER_VALUE_4_AND_5,
-		     TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[4])) |
-		     TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[5])));
+		     TIMERVALUE4(us_to_core_ticks(adap, s->timer_val[4])) |
+		     TIMERVALUE5(us_to_core_ticks(adap, s->timer_val[5])));
+
+	return 0;
+}
+
+int t4_sge_init(struct adapter *adap)
+{
+	struct sge *s = &adap->sge;
+	u32 sge_control;
+	int ret;
+
+	/*
+	 * Ingress Padding Boundary and Egress Status Page Size are set up by
+	 * t4_fixup_host_params().
+	 */
+	sge_control = t4_read_reg(adap, SGE_CONTROL);
+	s->pktshift = PKTSHIFT_GET(sge_control);
+	s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64;
+	s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_control) +
+			    X_INGPADBOUNDARY_SHIFT);
+
+	if (adap->flags & USING_SOFT_PARAMS)
+		ret = t4_sge_init_soft(adap);
+	else
+		ret = t4_sge_init_hard(adap);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * A FL with <= fl_starve_thres buffers is starving and a periodic
+	 * timer will attempt to refill it.  This needs to be larger than the
+	 * SGE's Egress Congestion Threshold.  If it isn't, then we can get
+	 * stuck waiting for new packets while the SGE is waiting for us to
+	 * give it more Free List entries.  (Note that the SGE's Egress
+	 * Congestion Threshold is in units of 2 Free List pointers.)
+	 */
+	s->fl_starve_thres
+		= EGRTHRESHOLD_GET(t4_read_reg(adap, SGE_CONM_CTRL))*2 + 1;
+
 	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
 	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
 	s->starve_thres = core_ticks_per_usec(adap) * 1000000;  /* 1 s */
 	s->idma_state[0] = s->idma_state[1] = 0;
 	spin_lock_init(&s->intrq_lock);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index af1601323173..35b81d8b59e9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -120,6 +120,28 @@ static void t4_read_indirect(struct adapter *adap, unsigned int addr_reg,
 	}
 }
 
+/**
+ *	t4_write_indirect - write indirectly addressed registers
+ *	@adap: the adapter
+ *	@addr_reg: register holding the indirect addresses
+ *	@data_reg: register holding the value for the indirect registers
+ *	@vals: values to write
+ *	@nregs: how many indirect registers to write
+ *	@start_idx: address of first indirect register to write
+ *
+ *	Writes a sequential block of registers that are accessed indirectly
+ *	through an address/data register pair.
+ */
+void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
+		       unsigned int data_reg, const u32 *vals,
+		       unsigned int nregs, unsigned int start_idx)
+{
+	while (nregs--) {
+		t4_write_reg(adap, addr_reg, start_idx++);
+		t4_write_reg(adap, data_reg, *vals++);
+	}
+}
+
 /*
  * Get the reply to a mailbox command and store it in @rpl in big-endian order.
  */
@@ -330,6 +352,143 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc)
 	return 0;
 }
 
+/*
+ *	t4_mem_win_rw - read/write memory through PCIE memory window
+ *	@adap: the adapter
+ *	@addr: address of first byte requested
+ *	@data: MEMWIN0_APERTURE bytes of data containing the requested address
+ *	@dir: direction of transfer 1 => read, 0 => write
+ *
+ *	Read/write MEMWIN0_APERTURE bytes of data from MC starting at a
+ *	MEMWIN0_APERTURE-byte-aligned address that covers the requested
+ *	address @addr.
+ */
+static int t4_mem_win_rw(struct adapter *adap, u32 addr, __be32 *data, int dir)
+{
+	int i;
+
+	/*
+	 * Setup offset into PCIE memory window.  Address must be a
+	 * MEMWIN0_APERTURE-byte-aligned address.  (Read back MA register to
+	 * ensure that changes propagate before we attempt to use the new
+	 * values.)
+	 */
+	t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET,
+		     addr & ~(MEMWIN0_APERTURE - 1));
+	t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET);
+
+	/* Collecting data 4 bytes at a time upto MEMWIN0_APERTURE */
+	for (i = 0; i < MEMWIN0_APERTURE; i = i+0x4) {
+		if (dir)
+			*data++ = t4_read_reg(adap, (MEMWIN0_BASE + i));
+		else
+			t4_write_reg(adap, (MEMWIN0_BASE + i), *data++);
+	}
+
+	return 0;
+}
+
+/**
+ *	t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
+ *	@adap: the adapter
+ *	@mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ *	@addr: address within indicated memory type
+ *	@len: amount of memory to transfer
+ *	@buf: host memory buffer
+ *	@dir: direction of transfer 1 => read, 0 => write
+ *
+ *	Reads/writes an [almost] arbitrary memory region in the firmware: the
+ *	firmware memory address, length and host buffer must be aligned on
+ *	32-bit boudaries.  The memory is transferred as a raw byte sequence
+ *	from/to the firmware's memory.  If this memory contains data
+ *	structures which contain multi-byte integers, it's the callers
+ *	responsibility to perform appropriate byte order conversions.
+ */
+static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len,
+			__be32 *buf, int dir)
+{
+	u32 pos, start, end, offset, memoffset;
+	int ret;
+
+	/*
+	 * Argument sanity checks ...
+	 */
+	if ((addr & 0x3) || (len & 0x3))
+		return -EINVAL;
+
+	/*
+	 * Offset into the region of memory which is being accessed
+	 * MEM_EDC0 = 0
+	 * MEM_EDC1 = 1
+	 * MEM_MC   = 2
+	 */
+	memoffset = (mtype * (5 * 1024 * 1024));
+
+	/* Determine the PCIE_MEM_ACCESS_OFFSET */
+	addr = addr + memoffset;
+
+	/*
+	 * The underlaying EDC/MC read routines read MEMWIN0_APERTURE bytes
+	 * at a time so we need to round down the start and round up the end.
+	 * We'll start copying out of the first line at (addr - start) a word
+	 * at a time.
+	 */
+	start = addr & ~(MEMWIN0_APERTURE-1);
+	end = (addr + len + MEMWIN0_APERTURE-1) & ~(MEMWIN0_APERTURE-1);
+	offset = (addr - start)/sizeof(__be32);
+
+	for (pos = start; pos < end; pos += MEMWIN0_APERTURE, offset = 0) {
+		__be32 data[MEMWIN0_APERTURE/sizeof(__be32)];
+
+		/*
+		 * If we're writing, copy the data from the caller's memory
+		 * buffer
+		 */
+		if (!dir) {
+			/*
+			 * If we're doing a partial write, then we need to do
+			 * a read-modify-write ...
+			 */
+			if (offset || len < MEMWIN0_APERTURE) {
+				ret = t4_mem_win_rw(adap, pos, data, 1);
+				if (ret)
+					return ret;
+			}
+			while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) &&
+			       len > 0) {
+				data[offset++] = *buf++;
+				len -= sizeof(__be32);
+			}
+		}
+
+		/*
+		 * Transfer a block of memory and bail if there's an error.
+		 */
+		ret = t4_mem_win_rw(adap, pos, data, dir);
+		if (ret)
+			return ret;
+
+		/*
+		 * If we're reading, copy the data into the caller's memory
+		 * buffer.
+		 */
+		if (dir)
+			while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) &&
+			       len > 0) {
+				*buf++ = data[offset++];
+				len -= sizeof(__be32);
+			}
+	}
+
+	return 0;
+}
+
+int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len,
+		    __be32 *buf)
+{
+	return t4_memory_rw(adap, mtype, addr, len, buf, 0);
+}
+
 #define EEPROM_STAT_ADDR   0x7bfc
 #define VPD_BASE           0
 #define VPD_LEN            512
@@ -355,8 +514,9 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable)
  *
  *	Reads card parameters stored in VPD EEPROM.
  */
-static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
+int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
+	u32 cclk_param, cclk_val;
 	int i, ret;
 	int ec, sn;
 	u8 vpd[VPD_LEN], csum;
@@ -418,6 +578,19 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	i = pci_vpd_info_field_size(vpd + sn - PCI_VPD_INFO_FLD_HDR_SIZE);
 	memcpy(p->sn, vpd + sn, min(i, SERNUM_LEN));
 	strim(p->sn);
+
+	/*
+	 * Ask firmware for the Core Clock since it knows how to translate the
+	 * Reference Clock ('V2') VPD field into a Core Clock value ...
+	 */
+	cclk_param = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
+		      FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CCLK));
+	ret = t4_query_params(adapter, adapter->mbox, 0, 0,
+			      1, &cclk_param, &cclk_val);
+	if (ret)
+		return ret;
+	p->cclk = cclk_val;
+
 	return 0;
 }
 
@@ -718,6 +891,77 @@ static int t4_flash_erase_sectors(struct adapter *adapter, int start, int end)
 }
 
 /**
+ *	t4_flash_cfg_addr - return the address of the flash configuration file
+ *	@adapter: the adapter
+ *
+ *	Return the address within the flash where the Firmware Configuration
+ *	File is stored.
+ */
+unsigned int t4_flash_cfg_addr(struct adapter *adapter)
+{
+	if (adapter->params.sf_size == 0x100000)
+		return FLASH_FPGA_CFG_START;
+	else
+		return FLASH_CFG_START;
+}
+
+/**
+ *	t4_load_cfg - download config file
+ *	@adap: the adapter
+ *	@cfg_data: the cfg text file to write
+ *	@size: text file size
+ *
+ *	Write the supplied config text file to the card's serial flash.
+ */
+int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+{
+	int ret, i, n;
+	unsigned int addr;
+	unsigned int flash_cfg_start_sec;
+	unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
+
+	addr = t4_flash_cfg_addr(adap);
+	flash_cfg_start_sec = addr / SF_SEC_SIZE;
+
+	if (size > FLASH_CFG_MAX_SIZE) {
+		dev_err(adap->pdev_dev, "cfg file too large, max is %u bytes\n",
+			FLASH_CFG_MAX_SIZE);
+		return -EFBIG;
+	}
+
+	i = DIV_ROUND_UP(FLASH_CFG_MAX_SIZE,	/* # of sectors spanned */
+			 sf_sec_size);
+	ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec,
+				     flash_cfg_start_sec + i - 1);
+	/*
+	 * If size == 0 then we're simply erasing the FLASH sectors associated
+	 * with the on-adapter Firmware Configuration File.
+	 */
+	if (ret || size == 0)
+		goto out;
+
+	/* this will write to the flash up to SF_PAGE_SIZE at a time */
+	for (i = 0; i < size; i += SF_PAGE_SIZE) {
+		if ((size - i) <  SF_PAGE_SIZE)
+			n = size - i;
+		else
+			n = SF_PAGE_SIZE;
+		ret = t4_write_flash(adap, addr, n, cfg_data);
+		if (ret)
+			goto out;
+
+		addr += SF_PAGE_SIZE;
+		cfg_data += SF_PAGE_SIZE;
+	}
+
+out:
+	if (ret)
+		dev_err(adap->pdev_dev, "config file %s failed %d\n",
+			(size == 0 ? "clear" : "download"), ret);
+	return ret;
+}
+
+/**
  *	t4_load_fw - download firmware
  *	@adap: the adapter
  *	@fw_data: the firmware image to write
@@ -1018,9 +1262,9 @@ static void sge_intr_handler(struct adapter *adapter)
 		{ ERR_INVALID_CIDX_INC,
 		  "SGE GTS CIDX increment too large", -1, 0 },
 		{ ERR_CPL_OPCODE_0, "SGE received 0-length CPL", -1, 0 },
-		{ F_DBFIFO_LP_INT, NULL, -1, 0, t4_db_full },
-		{ F_DBFIFO_HP_INT, NULL, -1, 0, t4_db_full },
-		{ F_ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped },
+		{ DBFIFO_LP_INT, NULL, -1, 0, t4_db_full },
+		{ DBFIFO_HP_INT, NULL, -1, 0, t4_db_full },
+		{ ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped },
 		{ ERR_DATA_CPL_ON_HIGH_QID1 | ERR_DATA_CPL_ON_HIGH_QID0,
 		  "SGE IQID > 1023 received CPL for FL", -1, 0 },
 		{ ERR_BAD_DB_PIDX3, "SGE DBP 3 pidx increment too large", -1,
@@ -1520,7 +1764,7 @@ void t4_intr_enable(struct adapter *adapter)
 		     ERR_BAD_DB_PIDX2 | ERR_BAD_DB_PIDX1 |
 		     ERR_BAD_DB_PIDX0 | ERR_ING_CTXT_PRIO |
 		     ERR_EGR_CTXT_PRIO | INGRESS_SIZE_ERR |
-		     F_DBFIFO_HP_INT | F_DBFIFO_LP_INT |
+		     DBFIFO_HP_INT | DBFIFO_LP_INT |
 		     EGRESS_SIZE_ERR);
 	t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE), PF_INTR_MASK);
 	t4_set_reg_field(adapter, PL_INT_MAP0, 0, 1 << pf);
@@ -1717,6 +1961,23 @@ void t4_read_mtu_tbl(struct adapter *adap, u16 *mtus, u8 *mtu_log)
 }
 
 /**
+ *	t4_tp_wr_bits_indirect - set/clear bits in an indirect TP register
+ *	@adap: the adapter
+ *	@addr: the indirect TP register address
+ *	@mask: specifies the field within the register to modify
+ *	@val: new value for the field
+ *
+ *	Sets a field of an indirect TP register to the given value.
+ */
+void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr,
+			    unsigned int mask, unsigned int val)
+{
+	t4_write_reg(adap, TP_PIO_ADDR, addr);
+	val |= t4_read_reg(adap, TP_PIO_DATA) & ~mask;
+	t4_write_reg(adap, TP_PIO_DATA, val);
+}
+
+/**
  *	init_cong_ctrl - initialize congestion control parameters
  *	@a: the alpha values for congestion control
  *	@b: the beta values for congestion control
@@ -2000,9 +2261,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
 	struct fw_ldst_cmd c;
 
 	memset(&c, 0, sizeof(c));
-	c.op_to_addrspace = htonl(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST |
-			    F_FW_CMD_WRITE |
-			    V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE));
+	c.op_to_addrspace = htonl(FW_CMD_OP(FW_LDST_CMD) | FW_CMD_REQUEST |
+			    FW_CMD_WRITE |
+			    FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE));
 	c.cycles_to_len16 = htonl(FW_LEN16(c));
 	c.u.addrval.addr = htonl(addr);
 	c.u.addrval.val = htonl(val);
@@ -2033,8 +2294,8 @@ int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len)
 	if ((addr & 3) || (len + off) > MEMWIN0_APERTURE)
 		return -EINVAL;
 
-	t4_write_reg(adap, A_PCIE_MEM_ACCESS_OFFSET, addr & ~15);
-	t4_read_reg(adap, A_PCIE_MEM_ACCESS_OFFSET);
+	t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, addr & ~15);
+	t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET);
 
 	for (i = 0; i < len; i += 4)
 		*data++ = t4_read_reg(adap, (MEMWIN0_BASE + off + i));
@@ -2102,39 +2363,129 @@ int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr,
 }
 
 /**
- *	t4_fw_hello - establish communication with FW
- *	@adap: the adapter
- *	@mbox: mailbox to use for the FW command
- *	@evt_mbox: mailbox to receive async FW events
- *	@master: specifies the caller's willingness to be the device master
- *	@state: returns the current device state
+ *      t4_fw_hello - establish communication with FW
+ *      @adap: the adapter
+ *      @mbox: mailbox to use for the FW command
+ *      @evt_mbox: mailbox to receive async FW events
+ *      @master: specifies the caller's willingness to be the device master
+ *	@state: returns the current device state (if non-NULL)
  *
- *	Issues a command to establish communication with FW.
+ *	Issues a command to establish communication with FW.  Returns either
+ *	an error (negative integer) or the mailbox of the Master PF.
  */
 int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 		enum dev_master master, enum dev_state *state)
 {
 	int ret;
 	struct fw_hello_cmd c;
+	u32 v;
+	unsigned int master_mbox;
+	int retries = FW_CMD_HELLO_RETRIES;
 
+retry:
+	memset(&c, 0, sizeof(c));
 	INIT_CMD(c, HELLO, WRITE);
 	c.err_to_mbasyncnot = htonl(
 		FW_HELLO_CMD_MASTERDIS(master == MASTER_CANT) |
 		FW_HELLO_CMD_MASTERFORCE(master == MASTER_MUST) |
-		FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox : 0xff) |
-		FW_HELLO_CMD_MBASYNCNOT(evt_mbox));
+		FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox :
+				      FW_HELLO_CMD_MBMASTER_MASK) |
+		FW_HELLO_CMD_MBASYNCNOT(evt_mbox) |
+		FW_HELLO_CMD_STAGE(fw_hello_cmd_stage_os) |
+		FW_HELLO_CMD_CLEARINIT);
 
+	/*
+	 * Issue the HELLO command to the firmware.  If it's not successful
+	 * but indicates that we got a "busy" or "timeout" condition, retry
+	 * the HELLO until we exhaust our retry limit.
+	 */
 	ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c);
-	if (ret == 0 && state) {
-		u32 v = ntohl(c.err_to_mbasyncnot);
-		if (v & FW_HELLO_CMD_INIT)
-			*state = DEV_STATE_INIT;
-		else if (v & FW_HELLO_CMD_ERR)
+	if (ret < 0) {
+		if ((ret == -EBUSY || ret == -ETIMEDOUT) && retries-- > 0)
+			goto retry;
+		return ret;
+	}
+
+	v = ntohl(c.err_to_mbasyncnot);
+	master_mbox = FW_HELLO_CMD_MBMASTER_GET(v);
+	if (state) {
+		if (v & FW_HELLO_CMD_ERR)
 			*state = DEV_STATE_ERR;
+		else if (v & FW_HELLO_CMD_INIT)
+			*state = DEV_STATE_INIT;
 		else
 			*state = DEV_STATE_UNINIT;
 	}
-	return ret;
+
+	/*
+	 * If we're not the Master PF then we need to wait around for the
+	 * Master PF Driver to finish setting up the adapter.
+	 *
+	 * Note that we also do this wait if we're a non-Master-capable PF and
+	 * there is no current Master PF; a Master PF may show up momentarily
+	 * and we wouldn't want to fail pointlessly.  (This can happen when an
+	 * OS loads lots of different drivers rapidly at the same time).  In
+	 * this case, the Master PF returned by the firmware will be
+	 * FW_PCIE_FW_MASTER_MASK so the test below will work ...
+	 */
+	if ((v & (FW_HELLO_CMD_ERR|FW_HELLO_CMD_INIT)) == 0 &&
+	    master_mbox != mbox) {
+		int waiting = FW_CMD_HELLO_TIMEOUT;
+
+		/*
+		 * Wait for the firmware to either indicate an error or
+		 * initialized state.  If we see either of these we bail out
+		 * and report the issue to the caller.  If we exhaust the
+		 * "hello timeout" and we haven't exhausted our retries, try
+		 * again.  Otherwise bail with a timeout error.
+		 */
+		for (;;) {
+			u32 pcie_fw;
+
+			msleep(50);
+			waiting -= 50;
+
+			/*
+			 * If neither Error nor Initialialized are indicated
+			 * by the firmware keep waiting till we exaust our
+			 * timeout ... and then retry if we haven't exhausted
+			 * our retries ...
+			 */
+			pcie_fw = t4_read_reg(adap, MA_PCIE_FW);
+			if (!(pcie_fw & (FW_PCIE_FW_ERR|FW_PCIE_FW_INIT))) {
+				if (waiting <= 0) {
+					if (retries-- > 0)
+						goto retry;
+
+					return -ETIMEDOUT;
+				}
+				continue;
+			}
+
+			/*
+			 * We either have an Error or Initialized condition
+			 * report errors preferentially.
+			 */
+			if (state) {
+				if (pcie_fw & FW_PCIE_FW_ERR)
+					*state = DEV_STATE_ERR;
+				else if (pcie_fw & FW_PCIE_FW_INIT)
+					*state = DEV_STATE_INIT;
+			}
+
+			/*
+			 * If we arrived before a Master PF was selected and
+			 * there's not a valid Master PF, grab its identity
+			 * for our caller.
+			 */
+			if (master_mbox == FW_PCIE_FW_MASTER_MASK &&
+			    (pcie_fw & FW_PCIE_FW_MASTER_VLD))
+				master_mbox = FW_PCIE_FW_MASTER_GET(pcie_fw);
+			break;
+		}
+	}
+
+	return master_mbox;
 }
 
 /**
@@ -2186,6 +2537,334 @@ int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset)
 }
 
 /**
+ *	t4_fw_halt - issue a reset/halt to FW and put uP into RESET
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW RESET command (if desired)
+ *	@force: force uP into RESET even if FW RESET command fails
+ *
+ *	Issues a RESET command to firmware (if desired) with a HALT indication
+ *	and then puts the microprocessor into RESET state.  The RESET command
+ *	will only be issued if a legitimate mailbox is provided (mbox <=
+ *	FW_PCIE_FW_MASTER_MASK).
+ *
+ *	This is generally used in order for the host to safely manipulate the
+ *	adapter without fear of conflicting with whatever the firmware might
+ *	be doing.  The only way out of this state is to RESTART the firmware
+ *	...
+ */
+int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force)
+{
+	int ret = 0;
+
+	/*
+	 * If a legitimate mailbox is provided, issue a RESET command
+	 * with a HALT indication.
+	 */
+	if (mbox <= FW_PCIE_FW_MASTER_MASK) {
+		struct fw_reset_cmd c;
+
+		memset(&c, 0, sizeof(c));
+		INIT_CMD(c, RESET, WRITE);
+		c.val = htonl(PIORST | PIORSTMODE);
+		c.halt_pkd = htonl(FW_RESET_CMD_HALT(1U));
+		ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+	}
+
+	/*
+	 * Normally we won't complete the operation if the firmware RESET
+	 * command fails but if our caller insists we'll go ahead and put the
+	 * uP into RESET.  This can be useful if the firmware is hung or even
+	 * missing ...  We'll have to take the risk of putting the uP into
+	 * RESET without the cooperation of firmware in that case.
+	 *
+	 * We also force the firmware's HALT flag to be on in case we bypassed
+	 * the firmware RESET command above or we're dealing with old firmware
+	 * which doesn't have the HALT capability.  This will serve as a flag
+	 * for the incoming firmware to know that it's coming out of a HALT
+	 * rather than a RESET ... if it's new enough to understand that ...
+	 */
+	if (ret == 0 || force) {
+		t4_set_reg_field(adap, CIM_BOOT_CFG, UPCRST, UPCRST);
+		t4_set_reg_field(adap, PCIE_FW, FW_PCIE_FW_HALT,
+				 FW_PCIE_FW_HALT);
+	}
+
+	/*
+	 * And we always return the result of the firmware RESET command
+	 * even when we force the uP into RESET ...
+	 */
+	return ret;
+}
+
+/**
+ *	t4_fw_restart - restart the firmware by taking the uP out of RESET
+ *	@adap: the adapter
+ *	@reset: if we want to do a RESET to restart things
+ *
+ *	Restart firmware previously halted by t4_fw_halt().  On successful
+ *	return the previous PF Master remains as the new PF Master and there
+ *	is no need to issue a new HELLO command, etc.
+ *
+ *	We do this in two ways:
+ *
+ *	 1. If we're dealing with newer firmware we'll simply want to take
+ *	    the chip's microprocessor out of RESET.  This will cause the
+ *	    firmware to start up from its start vector.  And then we'll loop
+ *	    until the firmware indicates it's started again (PCIE_FW.HALT
+ *	    reset to 0) or we timeout.
+ *
+ *	 2. If we're dealing with older firmware then we'll need to RESET
+ *	    the chip since older firmware won't recognize the PCIE_FW.HALT
+ *	    flag and automatically RESET itself on startup.
+ */
+int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset)
+{
+	if (reset) {
+		/*
+		 * Since we're directing the RESET instead of the firmware
+		 * doing it automatically, we need to clear the PCIE_FW.HALT
+		 * bit.
+		 */
+		t4_set_reg_field(adap, PCIE_FW, FW_PCIE_FW_HALT, 0);
+
+		/*
+		 * If we've been given a valid mailbox, first try to get the
+		 * firmware to do the RESET.  If that works, great and we can
+		 * return success.  Otherwise, if we haven't been given a
+		 * valid mailbox or the RESET command failed, fall back to
+		 * hitting the chip with a hammer.
+		 */
+		if (mbox <= FW_PCIE_FW_MASTER_MASK) {
+			t4_set_reg_field(adap, CIM_BOOT_CFG, UPCRST, 0);
+			msleep(100);
+			if (t4_fw_reset(adap, mbox,
+					PIORST | PIORSTMODE) == 0)
+				return 0;
+		}
+
+		t4_write_reg(adap, PL_RST, PIORST | PIORSTMODE);
+		msleep(2000);
+	} else {
+		int ms;
+
+		t4_set_reg_field(adap, CIM_BOOT_CFG, UPCRST, 0);
+		for (ms = 0; ms < FW_CMD_MAX_TIMEOUT; ) {
+			if (!(t4_read_reg(adap, PCIE_FW) & FW_PCIE_FW_HALT))
+				return 0;
+			msleep(100);
+			ms += 100;
+		}
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
+/**
+ *	t4_fw_upgrade - perform all of the steps necessary to upgrade FW
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW RESET command (if desired)
+ *	@fw_data: the firmware image to write
+ *	@size: image size
+ *	@force: force upgrade even if firmware doesn't cooperate
+ *
+ *	Perform all of the steps necessary for upgrading an adapter's
+ *	firmware image.  Normally this requires the cooperation of the
+ *	existing firmware in order to halt all existing activities
+ *	but if an invalid mailbox token is passed in we skip that step
+ *	(though we'll still put the adapter microprocessor into RESET in
+ *	that case).
+ *
+ *	On successful return the new firmware will have been loaded and
+ *	the adapter will have been fully RESET losing all previous setup
+ *	state.  On unsuccessful return the adapter may be completely hosed ...
+ *	positive errno indicates that the adapter is ~probably~ intact, a
+ *	negative errno indicates that things are looking bad ...
+ */
+int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
+		  const u8 *fw_data, unsigned int size, int force)
+{
+	const struct fw_hdr *fw_hdr = (const struct fw_hdr *)fw_data;
+	int reset, ret;
+
+	ret = t4_fw_halt(adap, mbox, force);
+	if (ret < 0 && !force)
+		return ret;
+
+	ret = t4_load_fw(adap, fw_data, size);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Older versions of the firmware don't understand the new
+	 * PCIE_FW.HALT flag and so won't know to perform a RESET when they
+	 * restart.  So for newly loaded older firmware we'll have to do the
+	 * RESET for it so it starts up on a clean slate.  We can tell if
+	 * the newly loaded firmware will handle this right by checking
+	 * its header flags to see if it advertises the capability.
+	 */
+	reset = ((ntohl(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0);
+	return t4_fw_restart(adap, mbox, reset);
+}
+
+
+/**
+ *	t4_fw_config_file - setup an adapter via a Configuration File
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW command
+ *	@mtype: the memory type where the Configuration File is located
+ *	@maddr: the memory address where the Configuration File is located
+ *	@finiver: return value for CF [fini] version
+ *	@finicsum: return value for CF [fini] checksum
+ *	@cfcsum: return value for CF computed checksum
+ *
+ *	Issue a command to get the firmware to process the Configuration
+ *	File located at the specified mtype/maddress.  If the Configuration
+ *	File is processed successfully and return value pointers are
+ *	provided, the Configuration File "[fini] section version and
+ *	checksum values will be returned along with the computed checksum.
+ *	It's up to the caller to decide how it wants to respond to the
+ *	checksums not matching but it recommended that a prominant warning
+ *	be emitted in order to help people rapidly identify changed or
+ *	corrupted Configuration Files.
+ *
+ *	Also note that it's possible to modify things like "niccaps",
+ *	"toecaps",etc. between processing the Configuration File and telling
+ *	the firmware to use the new configuration.  Callers which want to
+ *	do this will need to "hand-roll" their own CAPS_CONFIGS commands for
+ *	Configuration Files if they want to do this.
+ */
+int t4_fw_config_file(struct adapter *adap, unsigned int mbox,
+		      unsigned int mtype, unsigned int maddr,
+		      u32 *finiver, u32 *finicsum, u32 *cfcsum)
+{
+	struct fw_caps_config_cmd caps_cmd;
+	int ret;
+
+	/*
+	 * Tell the firmware to process the indicated Configuration File.
+	 * If there are no errors and the caller has provided return value
+	 * pointers for the [fini] section version, checksum and computed
+	 * checksum, pass those back to the caller.
+	 */
+	memset(&caps_cmd, 0, sizeof(caps_cmd));
+	caps_cmd.op_to_write =
+		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+		      FW_CMD_REQUEST |
+		      FW_CMD_READ);
+	caps_cmd.retval_len16 =
+		htonl(FW_CAPS_CONFIG_CMD_CFVALID |
+		      FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
+		      FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
+		      FW_LEN16(caps_cmd));
+	ret = t4_wr_mbox(adap, mbox, &caps_cmd, sizeof(caps_cmd), &caps_cmd);
+	if (ret < 0)
+		return ret;
+
+	if (finiver)
+		*finiver = ntohl(caps_cmd.finiver);
+	if (finicsum)
+		*finicsum = ntohl(caps_cmd.finicsum);
+	if (cfcsum)
+		*cfcsum = ntohl(caps_cmd.cfcsum);
+
+	/*
+	 * And now tell the firmware to use the configuration we just loaded.
+	 */
+	caps_cmd.op_to_write =
+		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+		      FW_CMD_REQUEST |
+		      FW_CMD_WRITE);
+	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	return t4_wr_mbox(adap, mbox, &caps_cmd, sizeof(caps_cmd), NULL);
+}
+
+/**
+ *	t4_fixup_host_params - fix up host-dependent parameters
+ *	@adap: the adapter
+ *	@page_size: the host's Base Page Size
+ *	@cache_line_size: the host's Cache Line Size
+ *
+ *	Various registers in T4 contain values which are dependent on the
+ *	host's Base Page and Cache Line Sizes.  This function will fix all of
+ *	those registers with the appropriate values as passed in ...
+ */
+int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
+			 unsigned int cache_line_size)
+{
+	unsigned int page_shift = fls(page_size) - 1;
+	unsigned int sge_hps = page_shift - 10;
+	unsigned int stat_len = cache_line_size > 64 ? 128 : 64;
+	unsigned int fl_align = cache_line_size < 32 ? 32 : cache_line_size;
+	unsigned int fl_align_log = fls(fl_align) - 1;
+
+	t4_write_reg(adap, SGE_HOST_PAGE_SIZE,
+		     HOSTPAGESIZEPF0(sge_hps) |
+		     HOSTPAGESIZEPF1(sge_hps) |
+		     HOSTPAGESIZEPF2(sge_hps) |
+		     HOSTPAGESIZEPF3(sge_hps) |
+		     HOSTPAGESIZEPF4(sge_hps) |
+		     HOSTPAGESIZEPF5(sge_hps) |
+		     HOSTPAGESIZEPF6(sge_hps) |
+		     HOSTPAGESIZEPF7(sge_hps));
+
+	t4_set_reg_field(adap, SGE_CONTROL,
+			 INGPADBOUNDARY(INGPADBOUNDARY_MASK) |
+			 EGRSTATUSPAGESIZE_MASK,
+			 INGPADBOUNDARY(fl_align_log - 5) |
+			 EGRSTATUSPAGESIZE(stat_len != 64));
+
+	/*
+	 * Adjust various SGE Free List Host Buffer Sizes.
+	 *
+	 * This is something of a crock since we're using fixed indices into
+	 * the array which are also known by the sge.c code and the T4
+	 * Firmware Configuration File.  We need to come up with a much better
+	 * approach to managing this array.  For now, the first four entries
+	 * are:
+	 *
+	 *   0: Host Page Size
+	 *   1: 64KB
+	 *   2: Buffer size corresponding to 1500 byte MTU (unpacked mode)
+	 *   3: Buffer size corresponding to 9000 byte MTU (unpacked mode)
+	 *
+	 * For the single-MTU buffers in unpacked mode we need to include
+	 * space for the SGE Control Packet Shift, 14 byte Ethernet header,
+	 * possible 4 byte VLAN tag, all rounded up to the next Ingress Packet
+	 * Padding boundry.  All of these are accommodated in the Factory
+	 * Default Firmware Configuration File but we need to adjust it for
+	 * this host's cache line size.
+	 */
+	t4_write_reg(adap, SGE_FL_BUFFER_SIZE0, page_size);
+	t4_write_reg(adap, SGE_FL_BUFFER_SIZE2,
+		     (t4_read_reg(adap, SGE_FL_BUFFER_SIZE2) + fl_align-1)
+		     & ~(fl_align-1));
+	t4_write_reg(adap, SGE_FL_BUFFER_SIZE3,
+		     (t4_read_reg(adap, SGE_FL_BUFFER_SIZE3) + fl_align-1)
+		     & ~(fl_align-1));
+
+	t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(page_shift - 12));
+
+	return 0;
+}
+
+/**
+ *	t4_fw_initialize - ask FW to initialize the device
+ *	@adap: the adapter
+ *	@mbox: mailbox to use for the FW command
+ *
+ *	Issues a command to FW to partially initialize the device.  This
+ *	performs initialization that generally doesn't depend on user input.
+ */
+int t4_fw_initialize(struct adapter *adap, unsigned int mbox)
+{
+	struct fw_initialize_cmd c;
+
+	memset(&c, 0, sizeof(c));
+	INIT_CMD(c, INITIALIZE, WRITE);
+	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+}
+
+/**
  *	t4_query_params - query FW or device parameters
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
@@ -2835,10 +3514,6 @@ int __devinit t4_prep_adapter(struct adapter *adapter)
 		return ret;
 	}
 
-	ret = get_vpd_params(adapter, &adapter->params.vpd);
-	if (ret < 0)
-		return ret;
-
 	init_cong_ctrl(adapter->params.a_wnd, adapter->params.b_wnd);
 
 	/*
@@ -2846,6 +3521,7 @@ int __devinit t4_prep_adapter(struct adapter *adapter)
 	 */
 	adapter->params.nports = 1;
 	adapter->params.portvec = 1;
+	adapter->params.vpd.cclk = 50000;
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index c26b455f37de..f534ed7e10e9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -58,6 +58,7 @@ enum {
 
 enum {
 	SF_PAGE_SIZE = 256,           /* serial flash page size */
+	SF_SEC_SIZE = 64 * 1024,      /* serial flash sector size */
 };
 
 enum { RSP_TYPE_FLBUF, RSP_TYPE_CPL, RSP_TYPE_INTR }; /* response entry types */
@@ -137,4 +138,83 @@ struct rsp_ctrl {
 #define QINTR_CNT_EN       0x1
 #define QINTR_TIMER_IDX(x) ((x) << 1)
 #define QINTR_TIMER_IDX_GET(x) (((x) >> 1) & 0x7)
+
+/*
+ * Flash layout.
+ */
+#define FLASH_START(start)	((start) * SF_SEC_SIZE)
+#define FLASH_MAX_SIZE(nsecs)	((nsecs) * SF_SEC_SIZE)
+
+enum {
+	/*
+	 * Various Expansion-ROM boot images, etc.
+	 */
+	FLASH_EXP_ROM_START_SEC = 0,
+	FLASH_EXP_ROM_NSECS = 6,
+	FLASH_EXP_ROM_START = FLASH_START(FLASH_EXP_ROM_START_SEC),
+	FLASH_EXP_ROM_MAX_SIZE = FLASH_MAX_SIZE(FLASH_EXP_ROM_NSECS),
+
+	/*
+	 * iSCSI Boot Firmware Table (iBFT) and other driver-related
+	 * parameters ...
+	 */
+	FLASH_IBFT_START_SEC = 6,
+	FLASH_IBFT_NSECS = 1,
+	FLASH_IBFT_START = FLASH_START(FLASH_IBFT_START_SEC),
+	FLASH_IBFT_MAX_SIZE = FLASH_MAX_SIZE(FLASH_IBFT_NSECS),
+
+	/*
+	 * Boot configuration data.
+	 */
+	FLASH_BOOTCFG_START_SEC = 7,
+	FLASH_BOOTCFG_NSECS = 1,
+	FLASH_BOOTCFG_START = FLASH_START(FLASH_BOOTCFG_START_SEC),
+	FLASH_BOOTCFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_BOOTCFG_NSECS),
+
+	/*
+	 * Location of firmware image in FLASH.
+	 */
+	FLASH_FW_START_SEC = 8,
+	FLASH_FW_NSECS = 8,
+	FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC),
+	FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS),
+
+	/*
+	 * iSCSI persistent/crash information.
+	 */
+	FLASH_ISCSI_CRASH_START_SEC = 29,
+	FLASH_ISCSI_CRASH_NSECS = 1,
+	FLASH_ISCSI_CRASH_START = FLASH_START(FLASH_ISCSI_CRASH_START_SEC),
+	FLASH_ISCSI_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_ISCSI_CRASH_NSECS),
+
+	/*
+	 * FCoE persistent/crash information.
+	 */
+	FLASH_FCOE_CRASH_START_SEC = 30,
+	FLASH_FCOE_CRASH_NSECS = 1,
+	FLASH_FCOE_CRASH_START = FLASH_START(FLASH_FCOE_CRASH_START_SEC),
+	FLASH_FCOE_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FCOE_CRASH_NSECS),
+
+	/*
+	 * Location of Firmware Configuration File in FLASH.  Since the FPGA
+	 * "FLASH" is smaller we need to store the Configuration File in a
+	 * different location -- which will overlap the end of the firmware
+	 * image if firmware ever gets that large ...
+	 */
+	FLASH_CFG_START_SEC = 31,
+	FLASH_CFG_NSECS = 1,
+	FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC),
+	FLASH_CFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_CFG_NSECS),
+
+	FLASH_FPGA_CFG_START_SEC = 15,
+	FLASH_FPGA_CFG_START = FLASH_START(FLASH_FPGA_CFG_START_SEC),
+
+	/*
+	 * Sectors 32-63 are reserved for FLASH failover.
+	 */
+};
+
+#undef FLASH_START
+#undef FLASH_MAX_SIZE
+
 #endif /* __T4_HW_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 111fc323f155..a1a8b57200f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -86,10 +86,17 @@
 #define  CIDXINC_SHIFT     0
 #define  CIDXINC(x)        ((x) << CIDXINC_SHIFT)
 
+#define X_RXPKTCPLMODE_SPLIT     1
+#define X_INGPADBOUNDARY_SHIFT 5
+
 #define SGE_CONTROL 0x1008
 #define  DCASYSTYPE             0x00080000U
-#define  RXPKTCPLMODE           0x00040000U
-#define  EGRSTATUSPAGESIZE      0x00020000U
+#define  RXPKTCPLMODE_MASK      0x00040000U
+#define  RXPKTCPLMODE_SHIFT     18
+#define  RXPKTCPLMODE(x)        ((x) << RXPKTCPLMODE_SHIFT)
+#define  EGRSTATUSPAGESIZE_MASK  0x00020000U
+#define  EGRSTATUSPAGESIZE_SHIFT 17
+#define  EGRSTATUSPAGESIZE(x)    ((x) << EGRSTATUSPAGESIZE_SHIFT)
 #define  PKTSHIFT_MASK          0x00001c00U
 #define  PKTSHIFT_SHIFT         10
 #define  PKTSHIFT(x)            ((x) << PKTSHIFT_SHIFT)
@@ -108,6 +115,35 @@
 #define  GLOBALENABLE           0x00000001U
 
 #define SGE_HOST_PAGE_SIZE 0x100c
+
+#define  HOSTPAGESIZEPF7_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF7_SHIFT  28
+#define  HOSTPAGESIZEPF7(x)     ((x) << HOSTPAGESIZEPF7_SHIFT)
+
+#define  HOSTPAGESIZEPF6_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF6_SHIFT  24
+#define  HOSTPAGESIZEPF6(x)     ((x) << HOSTPAGESIZEPF6_SHIFT)
+
+#define  HOSTPAGESIZEPF5_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF5_SHIFT  20
+#define  HOSTPAGESIZEPF5(x)     ((x) << HOSTPAGESIZEPF5_SHIFT)
+
+#define  HOSTPAGESIZEPF4_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF4_SHIFT  16
+#define  HOSTPAGESIZEPF4(x)     ((x) << HOSTPAGESIZEPF4_SHIFT)
+
+#define  HOSTPAGESIZEPF3_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF3_SHIFT  12
+#define  HOSTPAGESIZEPF3(x)     ((x) << HOSTPAGESIZEPF3_SHIFT)
+
+#define  HOSTPAGESIZEPF2_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF2_SHIFT  8
+#define  HOSTPAGESIZEPF2(x)     ((x) << HOSTPAGESIZEPF2_SHIFT)
+
+#define  HOSTPAGESIZEPF1_MASK   0x0000000fU
+#define  HOSTPAGESIZEPF1_SHIFT  4
+#define  HOSTPAGESIZEPF1(x)     ((x) << HOSTPAGESIZEPF1_SHIFT)
+
 #define  HOSTPAGESIZEPF0_MASK   0x0000000fU
 #define  HOSTPAGESIZEPF0_SHIFT  0
 #define  HOSTPAGESIZEPF0(x)     ((x) << HOSTPAGESIZEPF0_SHIFT)
@@ -155,6 +191,8 @@
 #define SGE_INT_ENABLE3 0x1040
 #define SGE_FL_BUFFER_SIZE0 0x1044
 #define SGE_FL_BUFFER_SIZE1 0x1048
+#define SGE_FL_BUFFER_SIZE2 0x104c
+#define SGE_FL_BUFFER_SIZE3 0x1050
 #define SGE_INGRESS_RX_THRESHOLD 0x10a0
 #define  THRESHOLD_0_MASK   0x3f000000U
 #define  THRESHOLD_0_SHIFT  24
@@ -173,6 +211,12 @@
 #define  THRESHOLD_3(x)     ((x) << THRESHOLD_3_SHIFT)
 #define  THRESHOLD_3_GET(x) (((x) & THRESHOLD_3_MASK) >> THRESHOLD_3_SHIFT)
 
+#define SGE_CONM_CTRL 0x1094
+#define  EGRTHRESHOLD_MASK   0x00003f00U
+#define  EGRTHRESHOLDshift   8
+#define  EGRTHRESHOLD(x)     ((x) << EGRTHRESHOLDshift)
+#define  EGRTHRESHOLD_GET(x) (((x) & EGRTHRESHOLD_MASK) >> EGRTHRESHOLDshift)
+
 #define SGE_TIMER_VALUE_0_AND_1 0x10b8
 #define  TIMERVALUE0_MASK   0xffff0000U
 #define  TIMERVALUE0_SHIFT  16
@@ -184,64 +228,54 @@
 #define  TIMERVALUE1_GET(x) (((x) & TIMERVALUE1_MASK) >> TIMERVALUE1_SHIFT)
 
 #define SGE_TIMER_VALUE_2_AND_3 0x10bc
+#define  TIMERVALUE2_MASK   0xffff0000U
+#define  TIMERVALUE2_SHIFT  16
+#define  TIMERVALUE2(x)     ((x) << TIMERVALUE2_SHIFT)
+#define  TIMERVALUE2_GET(x) (((x) & TIMERVALUE2_MASK) >> TIMERVALUE2_SHIFT)
+#define  TIMERVALUE3_MASK   0x0000ffffU
+#define  TIMERVALUE3_SHIFT  0
+#define  TIMERVALUE3(x)     ((x) << TIMERVALUE3_SHIFT)
+#define  TIMERVALUE3_GET(x) (((x) & TIMERVALUE3_MASK) >> TIMERVALUE3_SHIFT)
+
 #define SGE_TIMER_VALUE_4_AND_5 0x10c0
+#define  TIMERVALUE4_MASK   0xffff0000U
+#define  TIMERVALUE4_SHIFT  16
+#define  TIMERVALUE4(x)     ((x) << TIMERVALUE4_SHIFT)
+#define  TIMERVALUE4_GET(x) (((x) & TIMERVALUE4_MASK) >> TIMERVALUE4_SHIFT)
+#define  TIMERVALUE5_MASK   0x0000ffffU
+#define  TIMERVALUE5_SHIFT  0
+#define  TIMERVALUE5(x)     ((x) << TIMERVALUE5_SHIFT)
+#define  TIMERVALUE5_GET(x) (((x) & TIMERVALUE5_MASK) >> TIMERVALUE5_SHIFT)
+
 #define SGE_DEBUG_INDEX 0x10cc
 #define SGE_DEBUG_DATA_HIGH 0x10d0
 #define SGE_DEBUG_DATA_LOW 0x10d4
 #define SGE_INGRESS_QUEUES_PER_PAGE_PF 0x10f4
 
-#define S_LP_INT_THRESH    12
-#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH)
 #define S_HP_INT_THRESH    28
+#define M_HP_INT_THRESH 0xfU
 #define V_HP_INT_THRESH(x) ((x) << S_HP_INT_THRESH)
+#define M_HP_COUNT 0x7ffU
+#define S_HP_COUNT 16
+#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT)
+#define S_LP_INT_THRESH    12
+#define M_LP_INT_THRESH 0xfU
+#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH)
+#define M_LP_COUNT 0x7ffU
+#define S_LP_COUNT 0
+#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT)
 #define A_SGE_DBFIFO_STATUS 0x10a4
 
 #define S_ENABLE_DROP    13
 #define V_ENABLE_DROP(x) ((x) << S_ENABLE_DROP)
 #define F_ENABLE_DROP    V_ENABLE_DROP(1U)
-#define A_SGE_DOORBELL_CONTROL 0x10a8
-
-#define A_SGE_CTXT_CMD 0x11fc
-#define A_SGE_DBQ_CTXT_BADDR 0x1084
-
-#define A_SGE_PF_KDOORBELL 0x0
-
-#define S_QID 15
-#define V_QID(x) ((x) << S_QID)
-
-#define S_PIDX 0
-#define V_PIDX(x) ((x) << S_PIDX)
-
-#define M_LP_COUNT 0x7ffU
-#define S_LP_COUNT 0
-#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT)
-
-#define M_HP_COUNT 0x7ffU
-#define S_HP_COUNT 16
-#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT)
-
-#define A_SGE_INT_ENABLE3 0x1040
-
-#define S_DBFIFO_HP_INT 8
-#define V_DBFIFO_HP_INT(x) ((x) << S_DBFIFO_HP_INT)
-#define F_DBFIFO_HP_INT V_DBFIFO_HP_INT(1U)
-
-#define S_DBFIFO_LP_INT 7
-#define V_DBFIFO_LP_INT(x) ((x) << S_DBFIFO_LP_INT)
-#define F_DBFIFO_LP_INT V_DBFIFO_LP_INT(1U)
-
 #define S_DROPPED_DB 0
 #define V_DROPPED_DB(x) ((x) << S_DROPPED_DB)
 #define F_DROPPED_DB V_DROPPED_DB(1U)
+#define A_SGE_DOORBELL_CONTROL 0x10a8
 
-#define S_ERR_DROPPED_DB 18
-#define V_ERR_DROPPED_DB(x) ((x) << S_ERR_DROPPED_DB)
-#define F_ERR_DROPPED_DB V_ERR_DROPPED_DB(1U)
-
-#define A_PCIE_MEM_ACCESS_OFFSET 0x306c
-
-#define M_HP_INT_THRESH 0xfU
-#define M_LP_INT_THRESH 0xfU
+#define A_SGE_CTXT_CMD 0x11fc
+#define A_SGE_DBQ_CTXT_BADDR 0x1084
 
 #define PCIE_PF_CLI 0x44
 #define PCIE_INT_CAUSE 0x3004
@@ -287,6 +321,8 @@
 #define  WINDOW(x)       ((x) << WINDOW_SHIFT)
 #define PCIE_MEM_ACCESS_OFFSET 0x306c
 
+#define PCIE_FW 0x30b8
+
 #define PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS 0x5908
 #define  RNPP 0x80000000U
 #define  RPCP 0x20000000U
@@ -364,7 +400,7 @@
 #define  MEM_WRAP_CLIENT_NUM_MASK   0x0000000fU
 #define  MEM_WRAP_CLIENT_NUM_SHIFT  0
 #define  MEM_WRAP_CLIENT_NUM_GET(x) (((x) & MEM_WRAP_CLIENT_NUM_MASK) >> MEM_WRAP_CLIENT_NUM_SHIFT)
-
+#define MA_PCIE_FW 0x30b8
 #define MA_PARITY_ERROR_STATUS 0x77f4
 
 #define EDC_0_BASE_ADDR 0x7900
@@ -385,6 +421,7 @@
 
 #define CIM_BOOT_CFG 0x7b00
 #define  BOOTADDR_MASK 0xffffff00U
+#define  UPCRST        0x1U
 
 #define CIM_PF_MAILBOX_DATA 0x240
 #define CIM_PF_MAILBOX_CTRL 0x280
@@ -457,6 +494,13 @@
 #define  VLANEXTENABLE_MASK  0x0000f000U
 #define  VLANEXTENABLE_SHIFT 12
 
+#define TP_GLOBAL_CONFIG 0x7d08
+#define  FIVETUPLELOOKUP_SHIFT  17
+#define  FIVETUPLELOOKUP_MASK   0x00060000U
+#define  FIVETUPLELOOKUP(x)     ((x) << FIVETUPLELOOKUP_SHIFT)
+#define  FIVETUPLELOOKUP_GET(x) (((x) & FIVETUPLELOOKUP_MASK) >> \
+				FIVETUPLELOOKUP_SHIFT)
+
 #define TP_PARA_REG2 0x7d68
 #define  MAXRXDATA_MASK    0xffff0000U
 #define  MAXRXDATA_SHIFT   16
@@ -466,8 +510,47 @@
 #define  TIMERRESOLUTION_MASK   0x00ff0000U
 #define  TIMERRESOLUTION_SHIFT  16
 #define  TIMERRESOLUTION_GET(x) (((x) & TIMERRESOLUTION_MASK) >> TIMERRESOLUTION_SHIFT)
+#define  DELAYEDACKRESOLUTION_MASK 0x000000ffU
+#define  DELAYEDACKRESOLUTION_SHIFT     0
+#define  DELAYEDACKRESOLUTION_GET(x) \
+	(((x) & DELAYEDACKRESOLUTION_MASK) >> DELAYEDACKRESOLUTION_SHIFT)
 
 #define TP_SHIFT_CNT 0x7dc0
+#define  SYNSHIFTMAX_SHIFT         24
+#define  SYNSHIFTMAX_MASK          0xff000000U
+#define  SYNSHIFTMAX(x)            ((x) << SYNSHIFTMAX_SHIFT)
+#define  SYNSHIFTMAX_GET(x)        (((x) & SYNSHIFTMAX_MASK) >> \
+				   SYNSHIFTMAX_SHIFT)
+#define  RXTSHIFTMAXR1_SHIFT       20
+#define  RXTSHIFTMAXR1_MASK        0x00f00000U
+#define  RXTSHIFTMAXR1(x)          ((x) << RXTSHIFTMAXR1_SHIFT)
+#define  RXTSHIFTMAXR1_GET(x)      (((x) & RXTSHIFTMAXR1_MASK) >> \
+				   RXTSHIFTMAXR1_SHIFT)
+#define  RXTSHIFTMAXR2_SHIFT       16
+#define  RXTSHIFTMAXR2_MASK        0x000f0000U
+#define  RXTSHIFTMAXR2(x)          ((x) << RXTSHIFTMAXR2_SHIFT)
+#define  RXTSHIFTMAXR2_GET(x)      (((x) & RXTSHIFTMAXR2_MASK) >> \
+				   RXTSHIFTMAXR2_SHIFT)
+#define  PERSHIFTBACKOFFMAX_SHIFT  12
+#define  PERSHIFTBACKOFFMAX_MASK   0x0000f000U
+#define  PERSHIFTBACKOFFMAX(x)     ((x) << PERSHIFTBACKOFFMAX_SHIFT)
+#define  PERSHIFTBACKOFFMAX_GET(x) (((x) & PERSHIFTBACKOFFMAX_MASK) >> \
+				   PERSHIFTBACKOFFMAX_SHIFT)
+#define  PERSHIFTMAX_SHIFT         8
+#define  PERSHIFTMAX_MASK          0x00000f00U
+#define  PERSHIFTMAX(x)            ((x) << PERSHIFTMAX_SHIFT)
+#define  PERSHIFTMAX_GET(x)        (((x) & PERSHIFTMAX_MASK) >> \
+				   PERSHIFTMAX_SHIFT)
+#define  KEEPALIVEMAXR1_SHIFT      4
+#define  KEEPALIVEMAXR1_MASK       0x000000f0U
+#define  KEEPALIVEMAXR1(x)         ((x) << KEEPALIVEMAXR1_SHIFT)
+#define  KEEPALIVEMAXR1_GET(x)     (((x) & KEEPALIVEMAXR1_MASK) >> \
+				   KEEPALIVEMAXR1_SHIFT)
+#define KEEPALIVEMAXR2_SHIFT       0
+#define KEEPALIVEMAXR2_MASK        0x0000000fU
+#define KEEPALIVEMAXR2(x)          ((x) << KEEPALIVEMAXR2_SHIFT)
+#define KEEPALIVEMAXR2_GET(x)      (((x) & KEEPALIVEMAXR2_MASK) >> \
+				   KEEPALIVEMAXR2_SHIFT)
 
 #define TP_CCTRL_TABLE 0x7ddc
 #define TP_MTU_TABLE 0x7de4
@@ -501,6 +584,20 @@
 #define TP_INT_CAUSE 0x7e74
 #define  FLMTXFLSTEMPTY 0x40000000U
 
+#define TP_VLAN_PRI_MAP 0x140
+#define  FRAGMENTATION_SHIFT 9
+#define  FRAGMENTATION_MASK  0x00000200U
+#define  MPSHITTYPE_MASK     0x00000100U
+#define  MACMATCH_MASK       0x00000080U
+#define  ETHERTYPE_MASK      0x00000040U
+#define  PROTOCOL_MASK       0x00000020U
+#define  TOS_MASK            0x00000010U
+#define  VLAN_MASK           0x00000008U
+#define  VNIC_ID_MASK        0x00000004U
+#define  PORT_MASK           0x00000002U
+#define  FCOE_SHIFT          0
+#define  FCOE_MASK           0x00000001U
+
 #define TP_INGRESS_CONFIG 0x141
 #define  VNIC                0x00000800U
 #define  CSUM_HAS_PSEUDO_HDR 0x00000400U
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ad53f796b574..a6364632b490 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -79,6 +79,8 @@ struct fw_wr_hdr {
 #define FW_WR_FLOWID(x)	((x) << 8)
 #define FW_WR_LEN16(x)	((x) << 0)
 
+#define HW_TPL_FR_MT_PR_IV_P_FC         0X32B
+
 struct fw_ulptx_wr {
 	__be32 op_to_compl;
 	__be32 flowid_len16;
@@ -155,6 +157,17 @@ struct fw_eth_tx_pkt_vm_wr {
 
 #define FW_CMD_MAX_TIMEOUT 3000
 
+/*
+ * If a host driver does a HELLO and discovers that there's already a MASTER
+ * selected, we may have to wait for that MASTER to finish issuing RESET,
+ * configuration and INITIALIZE commands.  Also, there's a possibility that
+ * our own HELLO may get lost if it happens right as the MASTER is issuign a
+ * RESET command, so we need to be willing to make a few retries of our HELLO.
+ */
+#define FW_CMD_HELLO_TIMEOUT	(3 * FW_CMD_MAX_TIMEOUT)
+#define FW_CMD_HELLO_RETRIES	3
+
+
 enum fw_cmd_opcodes {
 	FW_LDST_CMD                    = 0x01,
 	FW_RESET_CMD                   = 0x03,
@@ -304,7 +317,17 @@ struct fw_reset_cmd {
 	__be32 op_to_write;
 	__be32 retval_len16;
 	__be32 val;
-	__be32 r3;
+	__be32 halt_pkd;
+};
+
+#define FW_RESET_CMD_HALT_SHIFT    31
+#define FW_RESET_CMD_HALT_MASK     0x1
+#define FW_RESET_CMD_HALT(x)       ((x) << FW_RESET_CMD_HALT_SHIFT)
+#define FW_RESET_CMD_HALT_GET(x)  \
+	(((x) >> FW_RESET_CMD_HALT_SHIFT) & FW_RESET_CMD_HALT_MASK)
+
+enum fw_hellow_cmd {
+	fw_hello_cmd_stage_os		= 0x0
 };
 
 struct fw_hello_cmd {
@@ -315,8 +338,14 @@ struct fw_hello_cmd {
 #define FW_HELLO_CMD_INIT	    (1U << 30)
 #define FW_HELLO_CMD_MASTERDIS(x)   ((x) << 29)
 #define FW_HELLO_CMD_MASTERFORCE(x) ((x) << 28)
-#define FW_HELLO_CMD_MBMASTER(x)    ((x) << 24)
+#define FW_HELLO_CMD_MBMASTER_MASK   0xfU
+#define FW_HELLO_CMD_MBMASTER_SHIFT  24
+#define FW_HELLO_CMD_MBMASTER(x)     ((x) << FW_HELLO_CMD_MBMASTER_SHIFT)
+#define FW_HELLO_CMD_MBMASTER_GET(x) \
+	(((x) >> FW_HELLO_CMD_MBMASTER_SHIFT) & FW_HELLO_CMD_MBMASTER_MASK)
 #define FW_HELLO_CMD_MBASYNCNOT(x)  ((x) << 20)
+#define FW_HELLO_CMD_STAGE(x)       ((x) << 17)
+#define FW_HELLO_CMD_CLEARINIT      (1U << 16)
 	__be32 fwrev;
 };
 
@@ -401,6 +430,14 @@ enum fw_caps_config_fcoe {
 	FW_CAPS_CONFIG_FCOE_TARGET	= 0x00000002,
 };
 
+enum fw_memtype_cf {
+	FW_MEMTYPE_CF_EDC0		= 0x0,
+	FW_MEMTYPE_CF_EDC1		= 0x1,
+	FW_MEMTYPE_CF_EXTMEM		= 0x2,
+	FW_MEMTYPE_CF_FLASH		= 0x4,
+	FW_MEMTYPE_CF_INTERNAL		= 0x5,
+};
+
 struct fw_caps_config_cmd {
 	__be32 op_to_write;
 	__be32 retval_len16;
@@ -416,10 +453,15 @@ struct fw_caps_config_cmd {
 	__be16 r4;
 	__be16 iscsicaps;
 	__be16 fcoecaps;
-	__be32 r5;
-	__be64 r6;
+	__be32 cfcsum;
+	__be32 finiver;
+	__be32 finicsum;
 };
 
+#define FW_CAPS_CONFIG_CMD_CFVALID          (1U << 27)
+#define FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x)    ((x) << 24)
+#define FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) ((x) << 16)
+
 /*
  * params command mnemonics
  */
@@ -451,6 +493,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_INTVER_FCOE = 0x0A,
 	FW_PARAMS_PARAM_DEV_FWREV = 0x0B,
 	FW_PARAMS_PARAM_DEV_TPREV = 0x0C,
+	FW_PARAMS_PARAM_DEV_CF = 0x0D,
 };
 
 /*
@@ -492,6 +535,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_IQFLINT_END = 0x2A,
 	FW_PARAMS_PARAM_PFVF_EQ_START	= 0x2B,
 	FW_PARAMS_PARAM_PFVF_EQ_END	= 0x2C,
+	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_START = 0x2D,
+	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E
 };
 
 /*
@@ -507,8 +552,16 @@ enum fw_params_param_dmaq {
 
 #define FW_PARAMS_MNEM(x)      ((x) << 24)
 #define FW_PARAMS_PARAM_X(x)   ((x) << 16)
-#define FW_PARAMS_PARAM_Y(x)   ((x) << 8)
-#define FW_PARAMS_PARAM_Z(x)   ((x) << 0)
+#define FW_PARAMS_PARAM_Y_SHIFT  8
+#define FW_PARAMS_PARAM_Y_MASK   0xffU
+#define FW_PARAMS_PARAM_Y(x)     ((x) << FW_PARAMS_PARAM_Y_SHIFT)
+#define FW_PARAMS_PARAM_Y_GET(x) (((x) >> FW_PARAMS_PARAM_Y_SHIFT) &\
+		FW_PARAMS_PARAM_Y_MASK)
+#define FW_PARAMS_PARAM_Z_SHIFT  0
+#define FW_PARAMS_PARAM_Z_MASK   0xffu
+#define FW_PARAMS_PARAM_Z(x)     ((x) << FW_PARAMS_PARAM_Z_SHIFT)
+#define FW_PARAMS_PARAM_Z_GET(x) (((x) >> FW_PARAMS_PARAM_Z_SHIFT) &\
+		FW_PARAMS_PARAM_Z_MASK)
 #define FW_PARAMS_PARAM_XYZ(x) ((x) << 0)
 #define FW_PARAMS_PARAM_YZ(x)  ((x) << 0)
 
@@ -1599,6 +1652,16 @@ struct fw_debug_cmd {
 	} u;
 };
 
+#define FW_PCIE_FW_ERR           (1U << 31)
+#define FW_PCIE_FW_INIT          (1U << 30)
+#define FW_PCIE_FW_HALT          (1U << 29)
+#define FW_PCIE_FW_MASTER_VLD    (1U << 15)
+#define FW_PCIE_FW_MASTER_MASK   0x7
+#define FW_PCIE_FW_MASTER_SHIFT  12
+#define FW_PCIE_FW_MASTER(x)     ((x) << FW_PCIE_FW_MASTER_SHIFT)
+#define FW_PCIE_FW_MASTER_GET(x) (((x) >> FW_PCIE_FW_MASTER_SHIFT) & \
+				 FW_PCIE_FW_MASTER_MASK)
+
 struct fw_hdr {
 	u8 ver;
 	u8 reserved1;
@@ -1613,7 +1676,11 @@ struct fw_hdr {
 	u8 intfver_iscsi;
 	u8 intfver_fcoe;
 	u8 reserved2;
-	__be32  reserved3[27];
+	__u32   reserved3;
+	__u32   reserved4;
+	__u32   reserved5;
+	__be32  flags;
+	__be32  reserved6[23];
 };
 
 #define FW_HDR_FW_VER_MAJOR_GET(x) (((x) >> 24) & 0xff)
@@ -1621,18 +1688,8 @@ struct fw_hdr {
 #define FW_HDR_FW_VER_MICRO_GET(x) (((x) >> 8) & 0xff)
 #define FW_HDR_FW_VER_BUILD_GET(x) (((x) >> 0) & 0xff)
 
-#define S_FW_CMD_OP 24
-#define V_FW_CMD_OP(x) ((x) << S_FW_CMD_OP)
-
-#define S_FW_CMD_REQUEST 23
-#define V_FW_CMD_REQUEST(x) ((x) << S_FW_CMD_REQUEST)
-#define F_FW_CMD_REQUEST V_FW_CMD_REQUEST(1U)
-
-#define S_FW_CMD_WRITE 21
-#define V_FW_CMD_WRITE(x) ((x) << S_FW_CMD_WRITE)
-#define F_FW_CMD_WRITE V_FW_CMD_WRITE(1U)
-
-#define S_FW_LDST_CMD_ADDRSPACE 0
-#define V_FW_LDST_CMD_ADDRSPACE(x) ((x) << S_FW_LDST_CMD_ADDRSPACE)
+enum fw_hdr_flags {
+	FW_HDR_FLAGS_RESET_HALT = 0x00000001,
+};
 
 #endif /* _T4FW_INTERFACE_H_ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 8877fbfefb63..f16745f4b36b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2421,7 +2421,7 @@ int t4vf_sge_init(struct adapter *adapter)
 			fl0, fl1);
 		return -EINVAL;
 	}
-	if ((sge_params->sge_control & RXPKTCPLMODE) == 0) {
+	if ((sge_params->sge_control & RXPKTCPLMODE_MASK) == 0) {
 		dev_err(adapter->pdev_dev, "bad SGE CPL MODE\n");
 		return -EINVAL;
 	}
@@ -2431,7 +2431,8 @@ int t4vf_sge_init(struct adapter *adapter)
 	 */
 	if (fl1)
 		FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT;
-	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE) ? 128 : 64);
+	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK)
+		    ? 128 : 64);
 	PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control);
 	FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
 			 SGE_INGPADBOUNDARY_SHIFT);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index d266c86a53f7..cf4c05bdf5fe 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -110,6 +110,7 @@ static inline char *nic_name(struct pci_dev *pdev)
 #define MAX_RX_POST		BE_NAPI_WEIGHT /* Frags posted at a time */
 #define RX_FRAGS_REFILL_WM	(RX_Q_LEN - MAX_RX_POST)
 
+#define MAX_VFS			30 /* Max VFs supported by BE3 FW */
 #define FW_VER_LEN		32
 
 struct be_dma_mem {
@@ -336,7 +337,6 @@ struct phy_info {
 	u16 auto_speeds_supported;
 	u16 fixed_speeds_supported;
 	int link_speed;
-	int forced_port_speed;
 	u32 dac_cable_len;
 	u32 advertising;
 	u32 supported;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 8c63d06ab12b..af60bb26e330 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -120,7 +120,7 @@ static int be_mcc_compl_process(struct be_adapter *adapter,
 
 		if (compl_status == MCC_STATUS_UNAUTHORIZED_REQUEST) {
 			dev_warn(&adapter->pdev->dev,
-				 "opcode %d-%d is not permitted\n",
+				 "VF is not privileged to issue opcode %d-%d\n",
 				 opcode, subsystem);
 		} else {
 			extd_status = (compl->status >> CQE_STATUS_EXTD_SHIFT) &
@@ -165,14 +165,13 @@ static void be_async_grp5_cos_priority_process(struct be_adapter *adapter,
 	}
 }
 
-/* Grp5 QOS Speed evt */
+/* Grp5 QOS Speed evt: qos_link_speed is in units of 10 Mbps */
 static void be_async_grp5_qos_speed_process(struct be_adapter *adapter,
 		struct be_async_event_grp5_qos_link_speed *evt)
 {
-	if (evt->physical_port == adapter->port_num) {
-		/* qos_link_speed is in units of 10 Mbps */
-		adapter->phy.link_speed = evt->qos_link_speed * 10;
-	}
+	if (adapter->phy.link_speed >= 0 &&
+	    evt->physical_port == adapter->port_num)
+		adapter->phy.link_speed = le16_to_cpu(evt->qos_link_speed) * 10;
 }
 
 /*Grp5 PVID evt*/
@@ -717,7 +716,7 @@ int be_cmd_eq_create(struct be_adapter *adapter,
 
 /* Use MCC */
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
-			u8 type, bool permanent, u32 if_handle, u32 pmac_id)
+			  bool permanent, u32 if_handle, u32 pmac_id)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_mac_query *req;
@@ -734,7 +733,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
 		OPCODE_COMMON_NTWK_MAC_QUERY, sizeof(*req), wrb, NULL);
-	req->type = type;
+	req->type = MAC_ADDRESS_TYPE_NETWORK;
 	if (permanent) {
 		req->permanent = 1;
 	} else {
@@ -1326,9 +1325,28 @@ err:
 	return status;
 }
 
-/* Uses synchronous mcc */
-int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
-			     u16 *link_speed, u8 *link_status, u32 dom)
+static int be_mac_to_link_speed(int mac_speed)
+{
+	switch (mac_speed) {
+	case PHY_LINK_SPEED_ZERO:
+		return 0;
+	case PHY_LINK_SPEED_10MBPS:
+		return 10;
+	case PHY_LINK_SPEED_100MBPS:
+		return 100;
+	case PHY_LINK_SPEED_1GBPS:
+		return 1000;
+	case PHY_LINK_SPEED_10GBPS:
+		return 10000;
+	}
+	return 0;
+}
+
+/* Uses synchronous mcc
+ * Returns link_speed in Mbps
+ */
+int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
+			     u8 *link_status, u32 dom)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_link_status *req;
@@ -1357,11 +1375,13 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
 	status = be_mcc_notify_wait(adapter);
 	if (!status) {
 		struct be_cmd_resp_link_status *resp = embedded_payload(wrb);
-		if (resp->mac_speed != PHY_LINK_SPEED_ZERO) {
-			if (link_speed)
-				*link_speed = le16_to_cpu(resp->link_speed);
-			if (mac_speed)
-				*mac_speed = resp->mac_speed;
+		if (link_speed) {
+			*link_speed = resp->link_speed ?
+				      le16_to_cpu(resp->link_speed) * 10 :
+				      be_mac_to_link_speed(resp->mac_speed);
+
+			if (!resp->logical_link_status)
+				*link_speed = 0;
 		}
 		if (link_status)
 			*link_status = resp->logical_link_status;
@@ -2405,6 +2425,9 @@ int be_cmd_req_native_mode(struct be_adapter *adapter)
 		struct be_cmd_resp_set_func_cap *resp = embedded_payload(wrb);
 		adapter->be3_native = le32_to_cpu(resp->cap_flags) &
 					CAPABILITY_BE3_NATIVE_ERX_API;
+		if (!adapter->be3_native)
+			dev_warn(&adapter->pdev->dev,
+				 "adapter not in advanced mode\n");
 	}
 err:
 	mutex_unlock(&adapter->mbox_lock);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 250f19b5f7b6..0936e21e3cff 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1687,7 +1687,7 @@ struct be_cmd_req_set_ext_fat_caps {
 extern int be_pci_fnum_get(struct be_adapter *adapter);
 extern int be_fw_wait_ready(struct be_adapter *adapter);
 extern int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
-			u8 type, bool permanent, u32 if_handle, u32 pmac_id);
+				 bool permanent, u32 if_handle, u32 pmac_id);
 extern int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
 			u32 if_id, u32 *pmac_id, u32 domain);
 extern int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id,
@@ -1714,8 +1714,8 @@ extern int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
 			int type);
 extern int be_cmd_rxq_destroy(struct be_adapter *adapter,
 			struct be_queue_info *q);
-extern int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
-				    u16 *link_speed, u8 *link_status, u32 dom);
+extern int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
+				    u8 *link_status, u32 dom);
 extern int be_cmd_reset(struct be_adapter *adapter);
 extern int be_cmd_get_stats(struct be_adapter *adapter,
 			struct be_dma_mem *nonemb_cmd);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index c0e700653f96..8e6fb0ba6aa9 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -512,28 +512,6 @@ static u32 convert_to_et_setting(u32 if_type, u32 if_speeds)
 	return val;
 }
 
-static int convert_to_et_speed(u32 be_speed)
-{
-	int et_speed = SPEED_10000;
-
-	switch (be_speed) {
-	case PHY_LINK_SPEED_10MBPS:
-		et_speed = SPEED_10;
-		break;
-	case PHY_LINK_SPEED_100MBPS:
-		et_speed = SPEED_100;
-		break;
-	case PHY_LINK_SPEED_1GBPS:
-		et_speed = SPEED_1000;
-		break;
-	case PHY_LINK_SPEED_10GBPS:
-		et_speed = SPEED_10000;
-		break;
-	}
-
-	return et_speed;
-}
-
 bool be_pause_supported(struct be_adapter *adapter)
 {
 	return (adapter->phy.interface_type == PHY_TYPE_SFP_PLUS_10GB ||
@@ -544,27 +522,16 @@ bool be_pause_supported(struct be_adapter *adapter)
 static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
-	u8 port_speed = 0;
-	u16 link_speed = 0;
 	u8 link_status;
-	u32 et_speed = 0;
+	u16 link_speed = 0;
 	int status;
 
-	if (adapter->phy.link_speed < 0 || !(netdev->flags & IFF_UP)) {
-		if (adapter->phy.forced_port_speed < 0) {
-			status = be_cmd_link_status_query(adapter, &port_speed,
-						&link_speed, &link_status, 0);
-			if (!status)
-				be_link_status_update(adapter, link_status);
-			if (link_speed)
-				et_speed = link_speed * 10;
-			else if (link_status)
-				et_speed = convert_to_et_speed(port_speed);
-		} else {
-			et_speed = adapter->phy.forced_port_speed;
-		}
-
-		ethtool_cmd_speed_set(ecmd, et_speed);
+	if (adapter->phy.link_speed < 0) {
+		status = be_cmd_link_status_query(adapter, &link_speed,
+						  &link_status, 0);
+		if (!status)
+			be_link_status_update(adapter, link_status);
+		ethtool_cmd_speed_set(ecmd, link_speed);
 
 		status = be_cmd_get_phy_info(adapter);
 		if (status)
@@ -773,8 +740,8 @@ static void
 be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
-	u8 mac_speed = 0;
-	u16 qos_link_speed = 0;
+	int status;
+	u8 link_status = 0;
 
 	memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
 
@@ -798,11 +765,11 @@ be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
 		test->flags |= ETH_TEST_FL_FAILED;
 	}
 
-	if (be_cmd_link_status_query(adapter, &mac_speed,
-				     &qos_link_speed, NULL, 0) != 0) {
+	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
+	if (status) {
 		test->flags |= ETH_TEST_FL_FAILED;
 		data[4] = -1;
-	} else if (!mac_speed) {
+	} else if (!link_status) {
 		test->flags |= ETH_TEST_FL_FAILED;
 		data[4] = 1;
 	}
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 95d10472f236..eb3f2cb3b93b 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -20,6 +20,7 @@
 #include "be.h"
 #include "be_cmds.h"
 #include <asm/div64.h>
+#include <linux/aer.h>
 
 MODULE_VERSION(DRV_VER);
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
@@ -240,9 +241,8 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	status = be_cmd_mac_addr_query(adapter, current_mac,
-				MAC_ADDRESS_TYPE_NETWORK, false,
-				adapter->if_handle, 0);
+	status = be_cmd_mac_addr_query(adapter, current_mac, false,
+				       adapter->if_handle, 0);
 	if (status)
 		goto err;
 
@@ -1075,7 +1075,7 @@ static int be_set_vf_tx_rate(struct net_device *netdev,
 static int be_find_vfs(struct be_adapter *adapter, int vf_state)
 {
 	struct pci_dev *dev, *pdev = adapter->pdev;
-	int vfs = 0, assigned_vfs = 0, pos, vf_fn;
+	int vfs = 0, assigned_vfs = 0, pos;
 	u16 offset, stride;
 
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
@@ -1086,9 +1086,7 @@ static int be_find_vfs(struct be_adapter *adapter, int vf_state)
 
 	dev = pci_get_device(pdev->vendor, PCI_ANY_ID, NULL);
 	while (dev) {
-		vf_fn = (pdev->devfn + offset + stride * vfs) & 0xFFFF;
-		if (dev->is_virtfn && dev->devfn == vf_fn &&
-			dev->bus->number == pdev->bus->number) {
+		if (dev->is_virtfn && pci_physfn(dev) == pdev) {
 			vfs++;
 			if (dev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)
 				assigned_vfs++;
@@ -1896,6 +1894,8 @@ static int be_tx_qs_create(struct be_adapter *adapter)
 			return status;
 	}
 
+	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
+		 adapter->num_tx_qs);
 	return 0;
 }
 
@@ -1946,10 +1946,9 @@ static int be_rx_cqs_create(struct be_adapter *adapter)
 			return rc;
 	}
 
-	if (adapter->num_rx_qs != MAX_RX_QS)
-		dev_info(&adapter->pdev->dev,
-			"Created only %d receive queues\n", adapter->num_rx_qs);
-
+	dev_info(&adapter->pdev->dev,
+		 "created %d RSS queue(s) and 1 default RX queue\n",
+		 adapter->num_rx_qs - 1);
 	return 0;
 }
 
@@ -2176,8 +2175,7 @@ static uint be_num_rss_want(struct be_adapter *adapter)
 {
 	u32 num = 0;
 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
-	     !sriov_want(adapter) && be_physfn(adapter) &&
-	     !be_is_mc(adapter)) {
+	     !sriov_want(adapter) && be_physfn(adapter)) {
 		num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
 		num = min_t(u32, num, (u32)netif_get_num_default_rss_queues());
 	}
@@ -2188,6 +2186,7 @@ static void be_msix_enable(struct be_adapter *adapter)
 {
 #define BE_MIN_MSIX_VECTORS		1
 	int i, status, num_vec, num_roce_vec = 0;
+	struct device *dev = &adapter->pdev->dev;
 
 	/* If RSS queues are not used, need a vec for default RX Q */
 	num_vec = min(be_num_rss_want(adapter), num_online_cpus());
@@ -2212,6 +2211,8 @@ static void be_msix_enable(struct be_adapter *adapter)
 				num_vec) == 0)
 			goto done;
 	}
+
+	dev_warn(dev, "MSIx enable failed\n");
 	return;
 done:
 	if (be_roce_supported(adapter)) {
@@ -2225,6 +2226,7 @@ done:
 		}
 	} else
 		adapter->num_msix_vec = num_vec;
+	dev_info(dev, "enabled %d MSI-x vector(s)\n", adapter->num_msix_vec);
 	return;
 }
 
@@ -2441,8 +2443,7 @@ static int be_open(struct net_device *netdev)
 		be_eq_notify(adapter, eqo->q.id, true, false, 0);
 	}
 
-	status = be_cmd_link_status_query(adapter, NULL, NULL,
-					  &link_status, 0);
+	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
 	if (!status)
 		be_link_status_update(adapter, link_status);
 
@@ -2646,8 +2647,8 @@ static int be_vf_setup(struct be_adapter *adapter)
 	}
 
 	for_all_vfs(adapter, vf_cfg, vf) {
-		status = be_cmd_link_status_query(adapter, NULL, &lnk_speed,
-						  NULL, vf + 1);
+		lnk_speed = 1000;
+		status = be_cmd_set_qos(adapter, lnk_speed, vf + 1);
 		if (status)
 			goto err;
 		vf_cfg->tx_rate = lnk_speed * 10;
@@ -2671,7 +2672,6 @@ static void be_setup_init(struct be_adapter *adapter)
 	adapter->be3_native = false;
 	adapter->promiscuous = false;
 	adapter->eq_next_idx = 0;
-	adapter->phy.forced_port_speed = -1;
 }
 
 static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle,
@@ -2693,21 +2693,16 @@ static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle,
 		status = be_cmd_get_mac_from_list(adapter, mac,
 						  active_mac, pmac_id, 0);
 		if (*active_mac) {
-			status = be_cmd_mac_addr_query(adapter, mac,
-						       MAC_ADDRESS_TYPE_NETWORK,
-						       false, if_handle,
-						       *pmac_id);
+			status = be_cmd_mac_addr_query(adapter, mac, false,
+						       if_handle, *pmac_id);
 		}
 	} else if (be_physfn(adapter)) {
 		/* For BE3, for PF get permanent MAC */
-		status = be_cmd_mac_addr_query(adapter, mac,
-					       MAC_ADDRESS_TYPE_NETWORK, true,
-					       0, 0);
+		status = be_cmd_mac_addr_query(adapter, mac, true, 0, 0);
 		*active_mac = false;
 	} else {
 		/* For BE3, for VF get soft MAC assigned by PF*/
-		status = be_cmd_mac_addr_query(adapter, mac,
-					       MAC_ADDRESS_TYPE_NETWORK, false,
+		status = be_cmd_mac_addr_query(adapter, mac, false,
 					       if_handle, 0);
 		*active_mac = true;
 	}
@@ -2724,6 +2719,8 @@ static int be_get_config(struct be_adapter *adapter)
 	if (pos) {
 		pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF,
 				     &dev_num_vfs);
+		if (!lancer_chip(adapter))
+			dev_num_vfs = min_t(u16, dev_num_vfs, MAX_VFS);
 		adapter->dev_num_vfs = dev_num_vfs;
 	}
 	return 0;
@@ -3437,6 +3434,7 @@ static void be_ctrl_cleanup(struct be_adapter *adapter)
 	if (mem->va)
 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 				  mem->dma);
+	kfree(adapter->pmac_id);
 }
 
 static int be_ctrl_init(struct be_adapter *adapter)
@@ -3473,6 +3471,12 @@ static int be_ctrl_init(struct be_adapter *adapter)
 	}
 	memset(rx_filter->va, 0, rx_filter->size);
 
+	/* primary mac needs 1 pmac entry */
+	adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1,
+				   sizeof(*adapter->pmac_id), GFP_KERNEL);
+	if (!adapter->pmac_id)
+		return -ENOMEM;
+
 	mutex_init(&adapter->mbox_lock);
 	spin_lock_init(&adapter->mcc_lock);
 	spin_lock_init(&adapter->mcc_cq_lock);
@@ -3543,6 +3547,8 @@ static void __devexit be_remove(struct pci_dev *pdev)
 
 	be_ctrl_cleanup(adapter);
 
+	pci_disable_pcie_error_reporting(pdev);
+
 	pci_set_drvdata(pdev, NULL);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
@@ -3609,12 +3615,6 @@ static int be_get_initial_config(struct be_adapter *adapter)
 	else
 		adapter->max_pmac_cnt = BE_VF_UC_PMAC_COUNT;
 
-	/* primary mac needs 1 pmac entry */
-	adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1,
-				  sizeof(u32), GFP_KERNEL);
-	if (!adapter->pmac_id)
-		return -ENOMEM;
-
 	status = be_cmd_get_cntl_attributes(adapter);
 	if (status)
 		return status;
@@ -3800,6 +3800,23 @@ static bool be_reset_required(struct be_adapter *adapter)
 	return be_find_vfs(adapter, ENABLED) > 0 ? false : true;
 }
 
+static char *mc_name(struct be_adapter *adapter)
+{
+	if (adapter->function_mode & FLEX10_MODE)
+		return "FLEX10";
+	else if (adapter->function_mode & VNIC_MODE)
+		return "vNIC";
+	else if (adapter->function_mode & UMC_ENABLED)
+		return "UMC";
+	else
+		return "";
+}
+
+static inline char *func_name(struct be_adapter *adapter)
+{
+	return be_physfn(adapter) ? "PF" : "VF";
+}
+
 static int __devinit be_probe(struct pci_dev *pdev,
 			const struct pci_device_id *pdev_id)
 {
@@ -3844,6 +3861,10 @@ static int __devinit be_probe(struct pci_dev *pdev,
 		}
 	}
 
+	status = pci_enable_pcie_error_reporting(pdev);
+	if (status)
+		dev_err(&pdev->dev, "Could not use PCIe error reporting\n");
+
 	status = be_ctrl_init(adapter);
 	if (status)
 		goto free_netdev;
@@ -3886,7 +3907,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	status = be_setup(adapter);
 	if (status)
-		goto msix_disable;
+		goto stats_clean;
 
 	be_netdev_init(netdev);
 	status = register_netdev(netdev);
@@ -3900,15 +3921,13 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	be_cmd_query_port_name(adapter, &port_name);
 
-	dev_info(&pdev->dev, "%s: %s port %c\n", netdev->name, nic_name(pdev),
-		 port_name);
+	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
+		 func_name(adapter), mc_name(adapter), port_name);
 
 	return 0;
 
 unsetup:
 	be_clear(adapter);
-msix_disable:
-	be_msix_disable(adapter);
 stats_clean:
 	be_stats_cleanup(adapter);
 ctrl_clean:
@@ -4066,6 +4085,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
 	if (status)
 		return PCI_ERS_RESULT_DISCONNECT;
 
+	pci_cleanup_aer_uncorrect_error_status(pdev);
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 3574e1499dfc..feff51664dcf 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -62,6 +62,13 @@ config FSL_PQ_MDIO
 	---help---
 	  This driver supports the MDIO bus used by the gianfar and UCC drivers.
 
+config FSL_XGMAC_MDIO
+	tristate "Freescale XGMAC MDIO"
+	depends on FSL_SOC
+	select PHYLIB
+	---help---
+	  This driver supports the MDIO bus on the Fman 10G Ethernet MACs.
+
 config UCC_GETH
 	tristate "Freescale QE Gigabit Ethernet"
 	depends on QUICC_ENGINE
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 1752488c9ee5..3d1839afff65 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -9,6 +9,7 @@ ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y)
 endif
 obj-$(CONFIG_FS_ENET) += fs_enet/
 obj-$(CONFIG_FSL_PQ_MDIO) += fsl_pq_mdio.o
+obj-$(CONFIG_FSL_XGMAC_MDIO) += xgmac_mdio.o
 obj-$(CONFIG_GIANFAR) += gianfar_driver.o
 obj-$(CONFIG_PTP_1588_CLOCK_GIANFAR) += gianfar_ptp.o
 gianfar_driver-objs := gianfar.o \
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 9527b28d70d1..c93a05654b46 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -19,54 +19,90 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/unistd.h>
 #include <linux/slab.h>
-#include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/crc32.h>
 #include <linux/mii.h>
-#include <linux/phy.h>
-#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/of_platform.h>
+#include <linux/of_device.h>
 
 #include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/uaccess.h>
-#include <asm/ucc.h>
+#include <asm/ucc.h>	/* for ucc_set_qe_mux_mii_mng() */
 
 #include "gianfar.h"
-#include "fsl_pq_mdio.h"
+
+#define MIIMIND_BUSY		0x00000001
+#define MIIMIND_NOTVALID	0x00000004
+#define MIIMCFG_INIT_VALUE	0x00000007
+#define MIIMCFG_RESET		0x80000000
+
+#define MII_READ_COMMAND	0x00000001
+
+struct fsl_pq_mii {
+	u32 miimcfg;	/* MII management configuration reg */
+	u32 miimcom;	/* MII management command reg */
+	u32 miimadd;	/* MII management address reg */
+	u32 miimcon;	/* MII management control reg */
+	u32 miimstat;	/* MII management status reg */
+	u32 miimind;	/* MII management indication reg */
+};
+
+struct fsl_pq_mdio {
+	u8 res1[16];
+	u32 ieventm;	/* MDIO Interrupt event register (for etsec2)*/
+	u32 imaskm;	/* MDIO Interrupt mask register (for etsec2)*/
+	u8 res2[4];
+	u32 emapm;	/* MDIO Event mapping register (for etsec2)*/
+	u8 res3[1280];
+	struct fsl_pq_mii mii;
+	u8 res4[28];
+	u32 utbipar;	/* TBI phy address reg (only on UCC) */
+	u8 res5[2728];
+} __packed;
 
 /* Number of microseconds to wait for an MII register to respond */
 #define MII_TIMEOUT	1000
 
 struct fsl_pq_mdio_priv {
 	void __iomem *map;
-	struct fsl_pq_mdio __iomem *regs;
+	struct fsl_pq_mii __iomem *regs;
+	int irqs[PHY_MAX_ADDR];
+};
+
+/*
+ * Per-device-type data.  Each type of device tree node that we support gets
+ * one of these.
+ *
+ * @mii_offset: the offset of the MII registers within the memory map of the
+ * node.  Some nodes define only the MII registers, and some define the whole
+ * MAC (which includes the MII registers).
+ *
+ * @get_tbipa: determines the address of the TBIPA register
+ *
+ * @ucc_configure: a special function for extra QE configuration
+ */
+struct fsl_pq_mdio_data {
+	unsigned int mii_offset;	/* offset of the MII registers */
+	uint32_t __iomem * (*get_tbipa)(void __iomem *p);
+	void (*ucc_configure)(phys_addr_t start, phys_addr_t end);
 };
 
 /*
- * Write value to the PHY at mii_id at register regnum,
- * on the bus attached to the local interface, which may be different from the
- * generic mdio bus (tied to a single interface), waiting until the write is
- * done before returning. This is helpful in programming interfaces like
- * the TBI which control interfaces like onchip SERDES and are always tied to
- * the local mdio pins, which may not be the same as system mdio bus, used for
+ * Write value to the PHY at mii_id at register regnum, on the bus attached
+ * to the local interface, which may be different from the generic mdio bus
+ * (tied to a single interface), waiting until the write is done before
+ * returning. This is helpful in programming interfaces like the TBI which
+ * control interfaces like onchip SERDES and are always tied to the local
+ * mdio pins, which may not be the same as system mdio bus, used for
  * controlling the external PHYs, for example.
  */
-int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id,
-		int regnum, u16 value)
+static int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
+		u16 value)
 {
+	struct fsl_pq_mdio_priv *priv = bus->priv;
+	struct fsl_pq_mii __iomem *regs = priv->regs;
 	u32 status;
 
 	/* Set the PHY address and the register address we want to write */
@@ -83,20 +119,21 @@ int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id,
 }
 
 /*
- * Read the bus for PHY at addr mii_id, register regnum, and
- * return the value.  Clears miimcom first.  All PHY operation
- * done on the bus attached to the local interface,
- * which may be different from the generic mdio bus
- * This is helpful in programming interfaces like
- * the TBI which, in turn, control interfaces like onchip SERDES
- * and are always tied to the local mdio pins, which may not be the
+ * Read the bus for PHY at addr mii_id, register regnum, and return the value.
+ * Clears miimcom first.
+ *
+ * All PHY operation done on the bus attached to the local interface, which
+ * may be different from the generic mdio bus.  This is helpful in programming
+ * interfaces like the TBI which, in turn, control interfaces like on-chip
+ * SERDES and are always tied to the local mdio pins, which may not be the
  * same as system mdio bus, used for controlling the external PHYs, for eg.
  */
-int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs,
-		int mii_id, int regnum)
+static int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
-	u16 value;
+	struct fsl_pq_mdio_priv *priv = bus->priv;
+	struct fsl_pq_mii __iomem *regs = priv->regs;
 	u32 status;
+	u16 value;
 
 	/* Set the PHY address and the register address we want to read */
 	out_be32(&regs->miimadd, (mii_id << 8) | regnum);
@@ -115,44 +152,15 @@ int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs,
 	/* Grab the value of the register from miimstat */
 	value = in_be32(&regs->miimstat);
 
+	dev_dbg(&bus->dev, "read %04x from address %x/%x\n", value, mii_id, regnum);
 	return value;
 }
 
-static struct fsl_pq_mdio __iomem *fsl_pq_mdio_get_regs(struct mii_bus *bus)
-{
-	struct fsl_pq_mdio_priv *priv = bus->priv;
-
-	return priv->regs;
-}
-
-/*
- * Write value to the PHY at mii_id at register regnum,
- * on the bus, waiting until the write is done before returning.
- */
-int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value)
-{
-	struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus);
-
-	/* Write to the local MII regs */
-	return fsl_pq_local_mdio_write(regs, mii_id, regnum, value);
-}
-
-/*
- * Read the bus for PHY at addr mii_id, register regnum, and
- * return the value.  Clears miimcom first.
- */
-int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
-{
-	struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus);
-
-	/* Read the local MII regs */
-	return fsl_pq_local_mdio_read(regs, mii_id, regnum);
-}
-
 /* Reset the MIIM registers, and wait for the bus to free */
 static int fsl_pq_mdio_reset(struct mii_bus *bus)
 {
-	struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus);
+	struct fsl_pq_mdio_priv *priv = bus->priv;
+	struct fsl_pq_mii __iomem *regs = priv->regs;
 	u32 status;
 
 	mutex_lock(&bus->mdio_lock);
@@ -170,234 +178,291 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus)
 	mutex_unlock(&bus->mdio_lock);
 
 	if (!status) {
-		printk(KERN_ERR "%s: The MII Bus is stuck!\n",
-				bus->name);
+		dev_err(&bus->dev, "timeout waiting for MII bus\n");
 		return -EBUSY;
 	}
 
 	return 0;
 }
 
-void fsl_pq_mdio_bus_name(char *name, struct device_node *np)
+#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
+/*
+ * This is mildly evil, but so is our hardware for doing this.
+ * Also, we have to cast back to struct gfar because of
+ * definition weirdness done in gianfar.h.
+ */
+static uint32_t __iomem *get_gfar_tbipa(void __iomem *p)
 {
-	const u32 *addr;
-	u64 taddr = OF_BAD_ADDR;
-
-	addr = of_get_address(np, 0, NULL, NULL);
-	if (addr)
-		taddr = of_translate_address(np, addr);
+	struct gfar __iomem *enet_regs = p;
 
-	snprintf(name, MII_BUS_ID_SIZE, "%s@%llx", np->name,
-		(unsigned long long)taddr);
+	return &enet_regs->tbipa;
 }
-EXPORT_SYMBOL_GPL(fsl_pq_mdio_bus_name);
 
+/*
+ * Return the TBIPAR address for an eTSEC2 node
+ */
+static uint32_t __iomem *get_etsec_tbipa(void __iomem *p)
+{
+	return p;
+}
+#endif
 
-static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct device_node *np)
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+/*
+ * Return the TBIPAR address for a QE MDIO node
+ */
+static uint32_t __iomem *get_ucc_tbipa(void __iomem *p)
 {
-#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
-	struct gfar __iomem *enet_regs;
+	struct fsl_pq_mdio __iomem *mdio = p;
 
-	/*
-	 * This is mildly evil, but so is our hardware for doing this.
-	 * Also, we have to cast back to struct gfar because of
-	 * definition weirdness done in gianfar.h.
-	 */
-	if(of_device_is_compatible(np, "fsl,gianfar-mdio") ||
-		of_device_is_compatible(np, "fsl,gianfar-tbi") ||
-		of_device_is_compatible(np, "gianfar")) {
-		enet_regs = (struct gfar __iomem *)regs;
-		return &enet_regs->tbipa;
-	} else if (of_device_is_compatible(np, "fsl,etsec2-mdio") ||
-			of_device_is_compatible(np, "fsl,etsec2-tbi")) {
-		return of_iomap(np, 1);
-	}
-#endif
-	return NULL;
+	return &mdio->utbipar;
 }
 
-
-static int get_ucc_id_for_range(u64 start, u64 end, u32 *ucc_id)
+/*
+ * Find the UCC node that controls the given MDIO node
+ *
+ * For some reason, the QE MDIO nodes are not children of the UCC devices
+ * that control them.  Therefore, we need to scan all UCC nodes looking for
+ * the one that encompases the given MDIO node.  We do this by comparing
+ * physical addresses.  The 'start' and 'end' addresses of the MDIO node are
+ * passed, and the correct UCC node will cover the entire address range.
+ *
+ * This assumes that there is only one QE MDIO node in the entire device tree.
+ */
+static void ucc_configure(phys_addr_t start, phys_addr_t end)
 {
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+	static bool found_mii_master;
 	struct device_node *np = NULL;
-	int err = 0;
 
-	for_each_compatible_node(np, NULL, "ucc_geth") {
-		struct resource tempres;
+	if (found_mii_master)
+		return;
 
-		err = of_address_to_resource(np, 0, &tempres);
-		if (err)
+	for_each_compatible_node(np, NULL, "ucc_geth") {
+		struct resource res;
+		const uint32_t *iprop;
+		uint32_t id;
+		int ret;
+
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret < 0) {
+			pr_debug("fsl-pq-mdio: no address range in node %s\n",
+				 np->full_name);
 			continue;
+		}
 
 		/* if our mdio regs fall within this UCC regs range */
-		if ((start >= tempres.start) && (end <= tempres.end)) {
-			/* Find the id of the UCC */
-			const u32 *id;
-
-			id = of_get_property(np, "cell-index", NULL);
-			if (!id) {
-				id = of_get_property(np, "device-id", NULL);
-				if (!id)
-					continue;
+		if ((start < res.start) || (end > res.end))
+			continue;
+
+		iprop = of_get_property(np, "cell-index", NULL);
+		if (!iprop) {
+			iprop = of_get_property(np, "device-id", NULL);
+			if (!iprop) {
+				pr_debug("fsl-pq-mdio: no UCC ID in node %s\n",
+					 np->full_name);
+				continue;
 			}
+		}
 
-			*ucc_id = *id;
+		id = be32_to_cpup(iprop);
 
-			return 0;
+		/*
+		 * cell-index and device-id for QE nodes are
+		 * numbered from 1, not 0.
+		 */
+		if (ucc_set_qe_mux_mii_mng(id - 1) < 0) {
+			pr_debug("fsl-pq-mdio: invalid UCC ID in node %s\n",
+				 np->full_name);
+			continue;
 		}
+
+		pr_debug("fsl-pq-mdio: setting node UCC%u to MII master\n", id);
+		found_mii_master = true;
 	}
+}
 
-	if (err)
-		return err;
-	else
-		return -EINVAL;
-#else
-	return -ENODEV;
 #endif
-}
 
-static int fsl_pq_mdio_probe(struct platform_device *ofdev)
+static struct of_device_id fsl_pq_mdio_match[] = {
+#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
+	{
+		.compatible = "fsl,gianfar-tbi",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = 0,
+			.get_tbipa = get_gfar_tbipa,
+		},
+	},
+	{
+		.compatible = "fsl,gianfar-mdio",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = 0,
+			.get_tbipa = get_gfar_tbipa,
+		},
+	},
+	{
+		.type = "mdio",
+		.compatible = "gianfar",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = offsetof(struct fsl_pq_mdio, mii),
+			.get_tbipa = get_gfar_tbipa,
+		},
+	},
+	{
+		.compatible = "fsl,etsec2-tbi",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = offsetof(struct fsl_pq_mdio, mii),
+			.get_tbipa = get_etsec_tbipa,
+		},
+	},
+	{
+		.compatible = "fsl,etsec2-mdio",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = offsetof(struct fsl_pq_mdio, mii),
+			.get_tbipa = get_etsec_tbipa,
+		},
+	},
+#endif
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+	{
+		.compatible = "fsl,ucc-mdio",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = 0,
+			.get_tbipa = get_ucc_tbipa,
+			.ucc_configure = ucc_configure,
+		},
+	},
+	{
+		/* Legacy UCC MDIO node */
+		.type = "mdio",
+		.compatible = "ucc_geth_phy",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = 0,
+			.get_tbipa = get_ucc_tbipa,
+			.ucc_configure = ucc_configure,
+		},
+	},
+#endif
+	/* No Kconfig option for Fman support yet */
+	{
+		.compatible = "fsl,fman-mdio",
+		.data = &(struct fsl_pq_mdio_data) {
+			.mii_offset = 0,
+			/* Fman TBI operations are handled elsewhere */
+		},
+	},
+
+	{},
+};
+MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match);
+
+static int fsl_pq_mdio_probe(struct platform_device *pdev)
 {
-	struct device_node *np = ofdev->dev.of_node;
+	const struct of_device_id *id =
+		of_match_device(fsl_pq_mdio_match, &pdev->dev);
+	const struct fsl_pq_mdio_data *data = id->data;
+	struct device_node *np = pdev->dev.of_node;
+	struct resource res;
 	struct device_node *tbi;
 	struct fsl_pq_mdio_priv *priv;
-	struct fsl_pq_mdio __iomem *regs = NULL;
-	void __iomem *map;
-	u32 __iomem *tbipa;
 	struct mii_bus *new_bus;
-	int tbiaddr = -1;
-	const u32 *addrp;
-	u64 addr = 0, size = 0;
 	int err;
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible);
 
-	new_bus = mdiobus_alloc();
-	if (!new_bus) {
-		err = -ENOMEM;
-		goto err_free_priv;
-	}
+	new_bus = mdiobus_alloc_size(sizeof(*priv));
+	if (!new_bus)
+		return -ENOMEM;
 
+	priv = new_bus->priv;
 	new_bus->name = "Freescale PowerQUICC MII Bus",
-	new_bus->read = &fsl_pq_mdio_read,
-	new_bus->write = &fsl_pq_mdio_write,
-	new_bus->reset = &fsl_pq_mdio_reset,
-	new_bus->priv = priv;
-	fsl_pq_mdio_bus_name(new_bus->id, np);
-
-	addrp = of_get_address(np, 0, &size, NULL);
-	if (!addrp) {
-		err = -EINVAL;
-		goto err_free_bus;
+	new_bus->read = &fsl_pq_mdio_read;
+	new_bus->write = &fsl_pq_mdio_write;
+	new_bus->reset = &fsl_pq_mdio_reset;
+	new_bus->irq = priv->irqs;
+
+	err = of_address_to_resource(np, 0, &res);
+	if (err < 0) {
+		dev_err(&pdev->dev, "could not obtain address information\n");
+		goto error;
 	}
 
-	/* Set the PHY base address */
-	addr = of_translate_address(np, addrp);
-	if (addr == OF_BAD_ADDR) {
-		err = -EINVAL;
-		goto err_free_bus;
-	}
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s@%llx", np->name,
+		(unsigned long long)res.start);
 
-	map = ioremap(addr, size);
-	if (!map) {
+	priv->map = of_iomap(np, 0);
+	if (!priv->map) {
 		err = -ENOMEM;
-		goto err_free_bus;
+		goto error;
 	}
-	priv->map = map;
-
-	if (of_device_is_compatible(np, "fsl,gianfar-mdio") ||
-			of_device_is_compatible(np, "fsl,gianfar-tbi") ||
-			of_device_is_compatible(np, "fsl,ucc-mdio") ||
-			of_device_is_compatible(np, "ucc_geth_phy"))
-		map -= offsetof(struct fsl_pq_mdio, miimcfg);
-	regs = map;
-	priv->regs = regs;
-
-	new_bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL);
 
-	if (NULL == new_bus->irq) {
-		err = -ENOMEM;
-		goto err_unmap_regs;
+	/*
+	 * Some device tree nodes represent only the MII registers, and
+	 * others represent the MAC and MII registers.  The 'mii_offset' field
+	 * contains the offset of the MII registers inside the mapped register
+	 * space.
+	 */
+	if (data->mii_offset > resource_size(&res)) {
+		dev_err(&pdev->dev, "invalid register map\n");
+		err = -EINVAL;
+		goto error;
 	}
+	priv->regs = priv->map + data->mii_offset;
 
-	new_bus->parent = &ofdev->dev;
-	dev_set_drvdata(&ofdev->dev, new_bus);
-
-	if (of_device_is_compatible(np, "fsl,gianfar-mdio") ||
-			of_device_is_compatible(np, "fsl,gianfar-tbi") ||
-			of_device_is_compatible(np, "fsl,etsec2-mdio") ||
-			of_device_is_compatible(np, "fsl,etsec2-tbi") ||
-			of_device_is_compatible(np, "gianfar")) {
-		tbipa = get_gfar_tbipa(regs, np);
-		if (!tbipa) {
-			err = -EINVAL;
-			goto err_free_irqs;
-		}
-	} else if (of_device_is_compatible(np, "fsl,ucc-mdio") ||
-			of_device_is_compatible(np, "ucc_geth_phy")) {
-		u32 id;
-		static u32 mii_mng_master;
-
-		tbipa = &regs->utbipar;
-
-		if ((err = get_ucc_id_for_range(addr, addr + size, &id)))
-			goto err_free_irqs;
+	new_bus->parent = &pdev->dev;
+	dev_set_drvdata(&pdev->dev, new_bus);
 
-		if (!mii_mng_master) {
-			mii_mng_master = id;
-			ucc_set_qe_mux_mii_mng(id - 1);
+	if (data->get_tbipa) {
+		for_each_child_of_node(np, tbi) {
+			if (strcmp(tbi->type, "tbi-phy") == 0) {
+				dev_dbg(&pdev->dev, "found TBI PHY node %s\n",
+					strrchr(tbi->full_name, '/') + 1);
+				break;
+			}
 		}
-	} else {
-		err = -ENODEV;
-		goto err_free_irqs;
-	}
 
-	for_each_child_of_node(np, tbi) {
-		if (!strncmp(tbi->type, "tbi-phy", 8))
-			break;
-	}
+		if (tbi) {
+			const u32 *prop = of_get_property(tbi, "reg", NULL);
+			uint32_t __iomem *tbipa;
 
-	if (tbi) {
-		const u32 *prop = of_get_property(tbi, "reg", NULL);
+			if (!prop) {
+				dev_err(&pdev->dev,
+					"missing 'reg' property in node %s\n",
+					tbi->full_name);
+				err = -EBUSY;
+				goto error;
+			}
 
-		if (prop)
-			tbiaddr = *prop;
+			tbipa = data->get_tbipa(priv->map);
 
-		if (tbiaddr == -1) {
-			err = -EBUSY;
-			goto err_free_irqs;
-		} else {
-			out_be32(tbipa, tbiaddr);
+			out_be32(tbipa, be32_to_cpup(prop));
 		}
 	}
 
+	if (data->ucc_configure)
+		data->ucc_configure(res.start, res.end);
+
 	err = of_mdiobus_register(new_bus, np);
 	if (err) {
-		printk (KERN_ERR "%s: Cannot register as MDIO bus\n",
-				new_bus->name);
-		goto err_free_irqs;
+		dev_err(&pdev->dev, "cannot register %s as MDIO bus\n",
+			new_bus->name);
+		goto error;
 	}
 
 	return 0;
 
-err_free_irqs:
-	kfree(new_bus->irq);
-err_unmap_regs:
-	iounmap(priv->map);
-err_free_bus:
+error:
+	if (priv->map)
+		iounmap(priv->map);
+
 	kfree(new_bus);
-err_free_priv:
-	kfree(priv);
+
 	return err;
 }
 
 
-static int fsl_pq_mdio_remove(struct platform_device *ofdev)
+static int fsl_pq_mdio_remove(struct platform_device *pdev)
 {
-	struct device *device = &ofdev->dev;
+	struct device *device = &pdev->dev;
 	struct mii_bus *bus = dev_get_drvdata(device);
 	struct fsl_pq_mdio_priv *priv = bus->priv;
 
@@ -406,41 +471,11 @@ static int fsl_pq_mdio_remove(struct platform_device *ofdev)
 	dev_set_drvdata(device, NULL);
 
 	iounmap(priv->map);
-	bus->priv = NULL;
 	mdiobus_free(bus);
-	kfree(priv);
 
 	return 0;
 }
 
-static struct of_device_id fsl_pq_mdio_match[] = {
-	{
-		.type = "mdio",
-		.compatible = "ucc_geth_phy",
-	},
-	{
-		.type = "mdio",
-		.compatible = "gianfar",
-	},
-	{
-		.compatible = "fsl,ucc-mdio",
-	},
-	{
-		.compatible = "fsl,gianfar-tbi",
-	},
-	{
-		.compatible = "fsl,gianfar-mdio",
-	},
-	{
-		.compatible = "fsl,etsec2-tbi",
-	},
-	{
-		.compatible = "fsl,etsec2-mdio",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match);
-
 static struct platform_driver fsl_pq_mdio_driver = {
 	.driver = {
 		.name = "fsl-pq_mdio",
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.h b/drivers/net/ethernet/freescale/fsl_pq_mdio.h
deleted file mode 100644
index bd17a2a0139b..000000000000
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Freescale PowerQUICC MDIO Driver -- MII Management Bus Implementation
- * Driver for the MDIO bus controller on Freescale PowerQUICC processors
- *
- * Author: Andy Fleming
- * Modifier: Sandeep Gopalpet
- *
- * Copyright 2002-2004, 2008-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-#ifndef __FSL_PQ_MDIO_H
-#define __FSL_PQ_MDIO_H
-
-#define MIIMIND_BUSY            0x00000001
-#define MIIMIND_NOTVALID        0x00000004
-#define MIIMCFG_INIT_VALUE	0x00000007
-#define MIIMCFG_RESET           0x80000000
-
-#define MII_READ_COMMAND       0x00000001
-
-struct fsl_pq_mdio {
-	u8 res1[16];
-	u32 ieventm;	/* MDIO Interrupt event register (for etsec2)*/
-	u32 imaskm;	/* MDIO Interrupt mask register (for etsec2)*/
-	u8 res2[4];
-	u32 emapm;	/* MDIO Event mapping register (for etsec2)*/
-	u8 res3[1280];
-	u32 miimcfg;		/* MII management configuration reg */
-	u32 miimcom;		/* MII management command reg */
-	u32 miimadd;		/* MII management address reg */
-	u32 miimcon;		/* MII management control reg */
-	u32 miimstat;		/* MII management status reg */
-	u32 miimind;		/* MII management indication reg */
-	u8 reserved[28];	/* Space holder */
-	u32 utbipar;		/* TBI phy address reg (only on UCC) */
-	u8 res4[2728];
-} __packed;
-
-int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum);
-int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
-int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id,
-			  int regnum, u16 value);
-int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, int mii_id, int regnum);
-int __init fsl_pq_mdio_init(void);
-void fsl_pq_mdio_exit(void);
-void fsl_pq_mdio_bus_name(char *name, struct device_node *np);
-#endif /* FSL_PQ_MDIO_H */
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index d3233f59a82e..a1b52ec3b930 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -100,7 +100,6 @@
 #include <linux/of_net.h>
 
 #include "gianfar.h"
-#include "fsl_pq_mdio.h"
 
 #define TX_TIMEOUT      (1*HZ)
 
@@ -395,7 +394,13 @@ static void gfar_init_mac(struct net_device *ndev)
 	if (ndev->features & NETIF_F_IP_CSUM)
 		tctrl |= TCTRL_INIT_CSUM;
 
-	tctrl |= TCTRL_TXSCHED_PRIO;
+	if (priv->prio_sched_en)
+		tctrl |= TCTRL_TXSCHED_PRIO;
+	else {
+		tctrl |= TCTRL_TXSCHED_WRRS;
+		gfar_write(&regs->tr03wt, DEFAULT_WRRS_WEIGHT);
+		gfar_write(&regs->tr47wt, DEFAULT_WRRS_WEIGHT);
+	}
 
 	gfar_write(&regs->tctrl, tctrl);
 
@@ -1161,6 +1166,9 @@ static int gfar_probe(struct platform_device *ofdev)
 	priv->rx_filer_enable = 1;
 	/* Enable most messages by default */
 	priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
+	/* use pritority h/w tx queue scheduling for single queue devices */
+	if (priv->num_tx_queues == 1)
+		priv->prio_sched_en = 1;
 
 	/* Carrier starts down, phylib will bring it up */
 	netif_carrier_off(dev);
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 2136c7ff5e6d..4141ef2ddafc 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -301,8 +301,16 @@ extern const char gfar_driver_version[];
 #define TCTRL_TFCPAUSE		0x00000008
 #define TCTRL_TXSCHED_MASK	0x00000006
 #define TCTRL_TXSCHED_INIT	0x00000000
+/* priority scheduling */
 #define TCTRL_TXSCHED_PRIO	0x00000002
+/* weighted round-robin scheduling (WRRS) */
 #define TCTRL_TXSCHED_WRRS	0x00000004
+/* default WRRS weight and policy setting,
+ * tailored to the tr03wt and tr47wt registers:
+ * equal weight for all Tx Qs, measured in 64byte units
+ */
+#define DEFAULT_WRRS_WEIGHT	0x18181818
+
 #define TCTRL_INIT_CSUM		(TCTRL_TUCSEN | TCTRL_IPCSEN)
 
 #define IEVENT_INIT_CLEAR	0xffffffff
@@ -1098,7 +1106,8 @@ struct gfar_private {
 		extended_hash:1,
 		bd_stash_en:1,
 		rx_filer_enable:1,
-		wol_en:1; /* Wake-on-LAN enabled */
+		wol_en:1, /* Wake-on-LAN enabled */
+		prio_sched_en:1; /* Enable priorty based Tx scheduling in Hw */
 	unsigned short padding;
 
 	/* PHY stuff */
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 0daa66b8eca0..b9db0e040563 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -510,7 +510,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
 
 	spin_unlock_irqrestore(&etsects->lock, flags);
 
-	etsects->clock = ptp_clock_register(&etsects->caps);
+	etsects->clock = ptp_clock_register(&etsects->caps, &dev->dev);
 	if (IS_ERR(etsects->clock)) {
 		err = PTR_ERR(etsects->clock);
 		goto no_clock;
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 21c6574c5f15..164288439220 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -42,7 +42,6 @@
 #include <asm/machdep.h>
 
 #include "ucc_geth.h"
-#include "fsl_pq_mdio.h"
 
 #undef DEBUG
 
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
new file mode 100644
index 000000000000..1afb5ea2a984
--- /dev/null
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -0,0 +1,274 @@
+/*
+ * QorIQ 10G MDIO Controller
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * Authors: Andy Fleming <afleming@freescale.com>
+ *          Timur Tabi <timur@freescale.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/mdio.h>
+#include <linux/of_platform.h>
+#include <linux/of_mdio.h>
+
+/* Number of microseconds to wait for a register to respond */
+#define TIMEOUT	1000
+
+struct tgec_mdio_controller {
+	__be32	reserved[12];
+	__be32	mdio_stat;	/* MDIO configuration and status */
+	__be32	mdio_ctl;	/* MDIO control */
+	__be32	mdio_data;	/* MDIO data */
+	__be32	mdio_addr;	/* MDIO address */
+} __packed;
+
+#define MDIO_STAT_CLKDIV(x)	(((x>>1) & 0xff) << 8)
+#define MDIO_STAT_BSY		(1 << 0)
+#define MDIO_STAT_RD_ER		(1 << 1)
+#define MDIO_CTL_DEV_ADDR(x) 	(x & 0x1f)
+#define MDIO_CTL_PORT_ADDR(x)	((x & 0x1f) << 5)
+#define MDIO_CTL_PRE_DIS	(1 << 10)
+#define MDIO_CTL_SCAN_EN	(1 << 11)
+#define MDIO_CTL_POST_INC	(1 << 14)
+#define MDIO_CTL_READ		(1 << 15)
+
+#define MDIO_DATA(x)		(x & 0xffff)
+#define MDIO_DATA_BSY		(1 << 31)
+
+/*
+ * Wait untill the MDIO bus is free
+ */
+static int xgmac_wait_until_free(struct device *dev,
+				 struct tgec_mdio_controller __iomem *regs)
+{
+	uint32_t status;
+
+	/* Wait till the bus is free */
+	status = spin_event_timeout(
+		!((in_be32(&regs->mdio_stat)) & MDIO_STAT_BSY), TIMEOUT, 0);
+	if (!status) {
+		dev_err(dev, "timeout waiting for bus to be free\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/*
+ * Wait till the MDIO read or write operation is complete
+ */
+static int xgmac_wait_until_done(struct device *dev,
+				 struct tgec_mdio_controller __iomem *regs)
+{
+	uint32_t status;
+
+	/* Wait till the MDIO write is complete */
+	status = spin_event_timeout(
+		!((in_be32(&regs->mdio_data)) & MDIO_DATA_BSY), TIMEOUT, 0);
+	if (!status) {
+		dev_err(dev, "timeout waiting for operation to complete\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/*
+ * Write value to the PHY for this device to the register at regnum,waiting
+ * until the write is done before it returns.  All PHY configuration has to be
+ * done through the TSEC1 MIIM regs.
+ */
+static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
+{
+	struct tgec_mdio_controller __iomem *regs = bus->priv;
+	uint16_t dev_addr = regnum >> 16;
+	int ret;
+
+	/* Setup the MII Mgmt clock speed */
+	out_be32(&regs->mdio_stat, MDIO_STAT_CLKDIV(100));
+
+	ret = xgmac_wait_until_free(&bus->dev, regs);
+	if (ret)
+		return ret;
+
+	/* Set the port and dev addr */
+	out_be32(&regs->mdio_ctl,
+		 MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr));
+
+	/* Set the register address */
+	out_be32(&regs->mdio_addr, regnum & 0xffff);
+
+	ret = xgmac_wait_until_free(&bus->dev, regs);
+	if (ret)
+		return ret;
+
+	/* Write the value to the register */
+	out_be32(&regs->mdio_data, MDIO_DATA(value));
+
+	ret = xgmac_wait_until_done(&bus->dev, regs);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Reads from register regnum in the PHY for device dev, returning the value.
+ * Clears miimcom first.  All PHY configuration has to be done through the
+ * TSEC1 MIIM regs.
+ */
+static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+{
+	struct tgec_mdio_controller __iomem *regs = bus->priv;
+	uint16_t dev_addr = regnum >> 16;
+	uint32_t mdio_ctl;
+	uint16_t value;
+	int ret;
+
+	/* Setup the MII Mgmt clock speed */
+	out_be32(&regs->mdio_stat, MDIO_STAT_CLKDIV(100));
+
+	ret = xgmac_wait_until_free(&bus->dev, regs);
+	if (ret)
+		return ret;
+
+	/* Set the Port and Device Addrs */
+	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
+	out_be32(&regs->mdio_ctl, mdio_ctl);
+
+	/* Set the register address */
+	out_be32(&regs->mdio_addr, regnum & 0xffff);
+
+	ret = xgmac_wait_until_free(&bus->dev, regs);
+	if (ret)
+		return ret;
+
+	/* Initiate the read */
+	out_be32(&regs->mdio_ctl, mdio_ctl | MDIO_CTL_READ);
+
+	ret = xgmac_wait_until_done(&bus->dev, regs);
+	if (ret)
+		return ret;
+
+	/* Return all Fs if nothing was there */
+	if (in_be32(&regs->mdio_stat) & MDIO_STAT_RD_ER) {
+		dev_err(&bus->dev, "MDIO read error\n");
+		return 0xffff;
+	}
+
+	value = in_be32(&regs->mdio_data) & 0xffff;
+	dev_dbg(&bus->dev, "read %04x\n", value);
+
+	return value;
+}
+
+/* Reset the MIIM registers, and wait for the bus to free */
+static int xgmac_mdio_reset(struct mii_bus *bus)
+{
+	struct tgec_mdio_controller __iomem *regs = bus->priv;
+	int ret;
+
+	mutex_lock(&bus->mdio_lock);
+
+	/* Setup the MII Mgmt clock speed */
+	out_be32(&regs->mdio_stat, MDIO_STAT_CLKDIV(100));
+
+	ret = xgmac_wait_until_free(&bus->dev, regs);
+
+	mutex_unlock(&bus->mdio_lock);
+
+	return ret;
+}
+
+static int __devinit xgmac_mdio_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct mii_bus *bus;
+	struct resource res;
+	int ret;
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret) {
+		dev_err(&pdev->dev, "could not obtain address\n");
+		return ret;
+	}
+
+	bus = mdiobus_alloc_size(PHY_MAX_ADDR * sizeof(int));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "Freescale XGMAC MDIO Bus";
+	bus->read = xgmac_mdio_read;
+	bus->write = xgmac_mdio_write;
+	bus->reset = xgmac_mdio_reset;
+	bus->irq = bus->priv;
+	bus->parent = &pdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res.start);
+
+	/* Set the PHY base address */
+	bus->priv = of_iomap(np, 0);
+	if (!bus->priv) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	ret = of_mdiobus_register(bus, np);
+	if (ret) {
+		dev_err(&pdev->dev, "cannot register MDIO bus\n");
+		goto err_registration;
+	}
+
+	dev_set_drvdata(&pdev->dev, bus);
+
+	return 0;
+
+err_registration:
+	iounmap(bus->priv);
+
+err_ioremap:
+	mdiobus_free(bus);
+
+	return ret;
+}
+
+static int __devexit xgmac_mdio_remove(struct platform_device *pdev)
+{
+	struct mii_bus *bus = dev_get_drvdata(&pdev->dev);
+
+	mdiobus_unregister(bus);
+	iounmap(bus->priv);
+	mdiobus_free(bus);
+
+	return 0;
+}
+
+static struct of_device_id xgmac_mdio_match[] = {
+	{
+		.compatible = "fsl,fman-xmdio",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, xgmac_mdio_match);
+
+static struct platform_driver xgmac_mdio_driver = {
+	.driver = {
+		.name = "fsl-fman_xmdio",
+		.of_match_table = xgmac_mdio_match,
+	},
+	.probe = xgmac_mdio_probe,
+	.remove = xgmac_mdio_remove,
+};
+
+module_platform_driver(xgmac_mdio_driver);
+
+MODULE_DESCRIPTION("Freescale QorIQ 10G MDIO Controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/i825xx/Kconfig b/drivers/net/ethernet/i825xx/Kconfig
index fed5080a6b62..959faf7388e2 100644
--- a/drivers/net/ethernet/i825xx/Kconfig
+++ b/drivers/net/ethernet/i825xx/Kconfig
@@ -150,7 +150,7 @@ config SUN3_82586
 
 config ZNET
 	tristate "Zenith Z-Note support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && ISA_DMA_API
+	depends on EXPERIMENTAL && ISA_DMA_API && X86
 	---help---
 	  The Zenith Z-Note notebook computer has a built-in network
 	  (Ethernet) card, and this is the Linux driver for it. Note that the
diff --git a/drivers/net/ethernet/i825xx/znet.c b/drivers/net/ethernet/i825xx/znet.c
index ba4e0cea3506..c9479e081b8a 100644
--- a/drivers/net/ethernet/i825xx/znet.c
+++ b/drivers/net/ethernet/i825xx/znet.c
@@ -865,14 +865,14 @@ static void hardware_init(struct net_device *dev)
 	disable_dma(znet->rx_dma); 		/* reset by an interrupting task. */
 	clear_dma_ff(znet->rx_dma);
 	set_dma_mode(znet->rx_dma, DMA_RX_MODE);
-	set_dma_addr(znet->rx_dma, (unsigned int) znet->rx_start);
+	set_dma_addr(znet->rx_dma, isa_virt_to_bus(znet->rx_start));
 	set_dma_count(znet->rx_dma, RX_BUF_SIZE);
 	enable_dma(znet->rx_dma);
 	/* Now set up the Tx channel. */
 	disable_dma(znet->tx_dma);
 	clear_dma_ff(znet->tx_dma);
 	set_dma_mode(znet->tx_dma, DMA_TX_MODE);
-	set_dma_addr(znet->tx_dma, (unsigned int) znet->tx_start);
+	set_dma_addr(znet->tx_dma, isa_virt_to_bus(znet->tx_start));
 	set_dma_count(znet->tx_dma, znet->tx_buf_len<<1);
 	enable_dma(znet->tx_dma);
 	release_dma_lock(flags);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 736a7d987db5..9089d00f1421 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -174,6 +174,20 @@ static int e1000_get_settings(struct net_device *netdev,
 
 	ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) ||
 			 hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* MDI-X => 1; MDI => 0 */
+	if ((hw->media_type == e1000_media_type_copper) &&
+	    netif_carrier_ok(netdev))
+		ecmd->eth_tp_mdix = (!!adapter->phy_info.mdix_mode ?
+							ETH_TP_MDI_X :
+							ETH_TP_MDI);
+	else
+		ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->mdix == AUTO_ALL_MODES)
+		ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		ecmd->eth_tp_mdix_ctrl = hw->mdix;
 	return 0;
 }
 
@@ -183,6 +197,22 @@ static int e1000_set_settings(struct net_device *netdev,
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 
+	/*
+	 * MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		if (hw->media_type != e1000_media_type_copper)
+			return -EOPNOTSUPP;
+
+		if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (ecmd->autoneg != AUTONEG_ENABLE)) {
+			e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
 		msleep(1);
 
@@ -199,12 +229,21 @@ static int e1000_set_settings(struct net_device *netdev,
 		ecmd->advertising = hw->autoneg_advertised;
 	} else {
 		u32 speed = ethtool_cmd_speed(ecmd);
+		/* calling this overrides forced MDI setting */
 		if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) {
 			clear_bit(__E1000_RESETTING, &adapter->flags);
 			return -EINVAL;
 		}
 	}
 
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->mdix = AUTO_ALL_MODES;
+		else
+			hw->mdix = ecmd->eth_tp_mdix_ctrl;
+	}
+
 	/* reset the link */
 
 	if (netif_running(adapter->netdev)) {
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index f3f9aeb7d1e1..222bfaff4622 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -2014,6 +2014,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
 		e1000_unmap_and_free_tx_resource(adapter, buffer_info);
 	}
 
+	netdev_reset_queue(adapter->netdev);
 	size = sizeof(struct e1000_buffer) * tx_ring->count;
 	memset(tx_ring->buffer_info, 0, size);
 
@@ -3273,6 +3274,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 	                     nr_frags, mss);
 
 	if (count) {
+		netdev_sent_queue(netdev, skb->len);
 		skb_tx_timestamp(skb);
 
 		e1000_tx_queue(adapter, tx_ring, tx_flags, count);
@@ -3860,6 +3862,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
 	unsigned int i, eop;
 	unsigned int count = 0;
 	unsigned int total_tx_bytes=0, total_tx_packets=0;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
 
 	i = tx_ring->next_to_clean;
 	eop = tx_ring->buffer_info[i].next_to_watch;
@@ -3877,6 +3880,11 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
 			if (cleaned) {
 				total_tx_packets += buffer_info->segs;
 				total_tx_bytes += buffer_info->bytecount;
+				if (buffer_info->skb) {
+					bytes_compl += buffer_info->skb->len;
+					pkts_compl++;
+				}
+
 			}
 			e1000_unmap_and_free_tx_resource(adapter, buffer_info);
 			tx_desc->upper.data = 0;
@@ -3890,6 +3898,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
 
 	tx_ring->next_to_clean = i;
 
+	netdev_completed_queue(netdev, pkts_compl, bytes_compl);
+
 #define TX_WAKE_THRESHOLD 32
 	if (unlikely(count && netif_carrier_ok(netdev) &&
 		     E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) {
@@ -4950,6 +4960,10 @@ int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx)
 	default:
 		goto err_inval;
 	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	hw->mdix = AUTO_ALL_MODES;
+
 	return 0;
 
 err_inval:
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 080c89093feb..c98586408005 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -653,7 +653,7 @@ static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
  **/
 static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active)
 {
-	u16 data = er32(POEMB);
+	u32 data = er32(POEMB);
 
 	if (active)
 		data |= E1000_PHY_CTRL_D0A_LPLU;
@@ -677,7 +677,7 @@ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active)
  **/
 static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active)
 {
-	u16 data = er32(POEMB);
+	u32 data = er32(POEMB);
 
 	if (!active) {
 		data &= ~E1000_PHY_CTRL_NOND0A_LPLU;
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 0349e2478df8..c11ac2756667 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -199,6 +199,11 @@ static int e1000_get_settings(struct net_device *netdev,
 	else
 		ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
 
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
+
 	return 0;
 }
 
@@ -241,6 +246,10 @@ static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx)
 	default:
 		goto err_inval;
 	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
 	return 0;
 
 err_inval:
@@ -264,6 +273,22 @@ static int e1000_set_settings(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	/*
+	 * MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		if (hw->phy.media_type != e1000_media_type_copper)
+			return -EOPNOTSUPP;
+
+		if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (ecmd->autoneg != AUTONEG_ENABLE)) {
+			e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);
 
@@ -282,20 +307,32 @@ static int e1000_set_settings(struct net_device *netdev,
 			hw->fc.requested_mode = e1000_fc_default;
 	} else {
 		u32 speed = ethtool_cmd_speed(ecmd);
+		/* calling this overrides forced MDI setting */
 		if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) {
 			clear_bit(__E1000_RESETTING, &adapter->state);
 			return -EINVAL;
 		}
 	}
 
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		/*
+		 * fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
+	}
+
 	/* reset the link */
 
 	if (netif_running(adapter->netdev)) {
 		e1000e_down(adapter);
 		e1000e_up(adapter);
-	} else {
+	} else
 		e1000e_reset(adapter);
-	}
 
 	clear_bit(__E1000_RESETTING, &adapter->state);
 	return 0;
@@ -1905,7 +1942,8 @@ static int e1000_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 
 	if (ec->rx_coalesce_usecs == 4) {
-		adapter->itr = adapter->itr_setting = 4;
+		adapter->itr_setting = 4;
+		adapter->itr = adapter->itr_setting;
 	} else if (ec->rx_coalesce_usecs <= 3) {
 		adapter->itr = 20000;
 		adapter->itr_setting = ec->rx_coalesce_usecs;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 3f0223ac4c7c..fb659dd8db03 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -56,7 +56,7 @@
 
 #define DRV_EXTRAVERSION "-k"
 
-#define DRV_VERSION "2.0.0" DRV_EXTRAVERSION
+#define DRV_VERSION "2.1.4" DRV_EXTRAVERSION
 char e1000e_driver_name[] = "e1000e";
 const char e1000e_driver_version[] = DRV_VERSION;
 
@@ -3446,7 +3446,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 			/*
 			 * if short on Rx space, Rx wins and must trump Tx
-			 * adjustment or use Early Receive if available
+			 * adjustment
 			 */
 			if (pba < min_rx_space)
 				pba = min_rx_space;
@@ -3755,6 +3755,10 @@ static irqreturn_t e1000_intr_msi_test(int irq, void *data)
 	e_dbg("icr is %08X\n", icr);
 	if (icr & E1000_ICR_RXSEQ) {
 		adapter->flags &= ~FLAG_MSI_TEST_FAILED;
+		/*
+		 * Force memory writes to complete before acknowledging the
+		 * interrupt is handled.
+		 */
 		wmb();
 	}
 
@@ -3796,6 +3800,10 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter)
 		goto msi_test_failed;
 	}
 
+	/*
+	 * Force memory writes to complete before enabling and firing an
+	 * interrupt.
+	 */
 	wmb();
 
 	e1000_irq_enable(adapter);
@@ -3807,7 +3815,7 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter)
 
 	e1000_irq_disable(adapter);
 
-	rmb();
+	rmb();			/* read flags after interrupt has been fired */
 
 	if (adapter->flags & FLAG_MSI_TEST_FAILED) {
 		adapter->int_mode = E1000E_INT_MODE_LEGACY;
@@ -4670,7 +4678,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
 	struct e1000_buffer *buffer_info;
 	unsigned int i;
 	u32 cmd_length = 0;
-	u16 ipcse = 0, tucse, mss;
+	u16 ipcse = 0, mss;
 	u8 ipcss, ipcso, tucss, tucso, hdr_len;
 
 	if (!skb_is_gso(skb))
@@ -4704,7 +4712,6 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
 	ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
 	tucss = skb_transport_offset(skb);
 	tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
-	tucse = 0;
 
 	cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
 	               E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
@@ -4718,7 +4725,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
 	context_desc->lower_setup.ip_fields.ipcse  = cpu_to_le16(ipcse);
 	context_desc->upper_setup.tcp_fields.tucss = tucss;
 	context_desc->upper_setup.tcp_fields.tucso = tucso;
-	context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
+	context_desc->upper_setup.tcp_fields.tucse = 0;
 	context_desc->tcp_seg_setup.fields.mss     = cpu_to_le16(mss);
 	context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
 	context_desc->cmd_and_length = cpu_to_le32(cmd_length);
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index b860d4f7ea2a..fc62a3f3a5be 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -84,8 +84,9 @@ static const u16 e1000_igp_2_cable_length_table[] = {
 #define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200
 
 /* I82577 PHY Control 2 */
-#define I82577_PHY_CTRL2_AUTO_MDIX        0x0400
-#define I82577_PHY_CTRL2_FORCE_MDI_MDIX   0x0200
+#define I82577_PHY_CTRL2_MANUAL_MDIX      0x0200
+#define I82577_PHY_CTRL2_AUTO_MDI_MDIX    0x0400
+#define I82577_PHY_CTRL2_MDIX_CFG_MASK    0x0600
 
 /* I82577 PHY Diagnostics Status */
 #define I82577_DSTATUS_CABLE_LENGTH       0x03FC
@@ -702,6 +703,32 @@ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
 	if (ret_val)
 		return ret_val;
 
+	/* Set MDI/MDIX mode */
+	ret_val = e1e_rphy(hw, I82577_PHY_CTRL_2, &phy_data);
+	if (ret_val)
+		return ret_val;
+	phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK;
+	/*
+	 * Options:
+	 *   0 - Auto (default)
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 */
+	switch (hw->phy.mdix) {
+	case 1:
+		break;
+	case 2:
+		phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX;
+		break;
+	case 0:
+	default:
+		phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX;
+		break;
+	}
+	ret_val = e1e_wphy(hw, I82577_PHY_CTRL_2, phy_data);
+	if (ret_val)
+		return ret_val;
+
 	return e1000_set_master_slave_mode(hw);
 }
 
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index ba994fb4cec6..ca4641e2f748 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -2223,11 +2223,10 @@ out:
 s32 igb_set_eee_i350(struct e1000_hw *hw)
 {
 	s32 ret_val = 0;
-	u32 ipcnfg, eeer, ctrl_ext;
+	u32 ipcnfg, eeer;
 
-	ctrl_ext = rd32(E1000_CTRL_EXT);
-	if ((hw->mac.type != e1000_i350) ||
-	    (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK))
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
 		goto out;
 	ipcnfg = rd32(E1000_IPCNFG);
 	eeer = rd32(E1000_EEER);
@@ -2240,6 +2239,14 @@ s32 igb_set_eee_i350(struct e1000_hw *hw)
 			E1000_EEER_RX_LPI_EN |
 			E1000_EEER_LPI_FC);
 
+		/* keep the LPI clock running before EEE is enabled */
+		if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
+			u32 eee_su;
+			eee_su = rd32(E1000_EEE_SU);
+			eee_su &= ~E1000_EEE_SU_LPI_CLK_STP;
+			wr32(E1000_EEE_SU, eee_su);
+		}
+
 	} else {
 		ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN |
 			E1000_IPCNFG_EEE_100M_AN);
@@ -2249,6 +2256,8 @@ s32 igb_set_eee_i350(struct e1000_hw *hw)
 	}
 	wr32(E1000_IPCNFG, ipcnfg);
 	wr32(E1000_EEER, eeer);
+	rd32(E1000_IPCNFG);
+	rd32(E1000_EEER);
 out:
 
 	return ret_val;
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index ec7e4fe3e3ee..de4b41ec3c40 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -322,6 +322,9 @@
 #define E1000_FCRTC_RTH_COAL_SHIFT      4
 #define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power decision */
 
+/* Timestamp in Rx buffer */
+#define E1000_RXPBS_CFG_TS_EN           0x80000000
+
 /* SerDes Control */
 #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
 
@@ -360,6 +363,7 @@
 #define E1000_ICR_RXDMT0        0x00000010 /* rx desc min. threshold (0) */
 #define E1000_ICR_RXT0          0x00000080 /* rx timer intr (ring 0) */
 #define E1000_ICR_VMMB          0x00000100 /* VM MB event */
+#define E1000_ICR_TS            0x00080000 /* Time Sync Interrupt */
 #define E1000_ICR_DRSTA         0x40000000 /* Device Reset Asserted */
 /* If this bit asserted, the driver should claim the interrupt */
 #define E1000_ICR_INT_ASSERTED  0x80000000
@@ -399,6 +403,7 @@
 #define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
 #define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
 #define E1000_IMS_VMMB      E1000_ICR_VMMB      /* Mail box activity */
+#define E1000_IMS_TS        E1000_ICR_TS        /* Time Sync Interrupt */
 #define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
 #define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
 #define E1000_IMS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
@@ -510,6 +515,9 @@
 
 #define E1000_TIMINCA_16NS_SHIFT 24
 
+#define E1000_TSICR_TXTS 0x00000002
+#define E1000_TSIM_TXTS 0x00000002
+
 #define E1000_MDICNFG_EXT_MDIO    0x80000000      /* MDI ext/int destination */
 #define E1000_MDICNFG_COM_MDIO    0x40000000      /* MDI shared w/ lan 0 */
 #define E1000_MDICNFG_PHY_MASK    0x03E00000
@@ -849,8 +857,9 @@
 #define E1000_IPCNFG_EEE_100M_AN     0x00000004  /* EEE Enable 100M AN */
 #define E1000_EEER_TX_LPI_EN         0x00010000  /* EEE Tx LPI Enable */
 #define E1000_EEER_RX_LPI_EN         0x00020000  /* EEE Rx LPI Enable */
-#define E1000_EEER_FRC_AN            0x10000000 /* Enable EEE in loopback */
+#define E1000_EEER_FRC_AN            0x10000000  /* Enable EEE in loopback */
 #define E1000_EEER_LPI_FC            0x00040000  /* EEE Enable on FC */
+#define E1000_EEE_SU_LPI_CLK_STP     0X00800000  /* EEE LPI Clock Stop */
 
 /* SerDes Control */
 #define E1000_GEN_CTL_READY             0x80000000
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 7be98b6f1052..3404bc79f4ca 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -464,6 +464,32 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw)
 	phy_data |= I82580_CFG_ENABLE_DOWNSHIFT;
 
 	ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data);
+	if (ret_val)
+		goto out;
+
+	/* Set MDI/MDIX mode */
+	ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data);
+	if (ret_val)
+		goto out;
+	phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK;
+	/*
+	 * Options:
+	 *   0 - Auto (default)
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 */
+	switch (hw->phy.mdix) {
+	case 1:
+		break;
+	case 2:
+		phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX;
+		break;
+	case 0:
+	default:
+		phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX;
+		break;
+	}
+	ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data);
 
 out:
 	return ret_val;
@@ -2246,8 +2272,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw)
 	if (ret_val)
 		goto out;
 
-	phy_data &= ~I82580_PHY_CTRL2_AUTO_MDIX;
-	phy_data &= ~I82580_PHY_CTRL2_FORCE_MDI_MDIX;
+	phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK;
 
 	ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data);
 	if (ret_val)
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
index 34e40619f16b..6ac3299bfcb9 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.h
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h
@@ -111,8 +111,9 @@ s32  igb_check_polarity_m88(struct e1000_hw *hw);
 #define I82580_PHY_STATUS2_SPEED_100MBPS  0x0100
 
 /* I82580 PHY Control 2 */
-#define I82580_PHY_CTRL2_AUTO_MDIX        0x0400
-#define I82580_PHY_CTRL2_FORCE_MDI_MDIX   0x0200
+#define I82580_PHY_CTRL2_MANUAL_MDIX      0x0200
+#define I82580_PHY_CTRL2_AUTO_MDI_MDIX    0x0400
+#define I82580_PHY_CTRL2_MDIX_CFG_MASK    0x0600
 
 /* I82580 PHY Diagnostics Status */
 #define I82580_DSTATUS_CABLE_LENGTH       0x03FC
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 28394bea5253..e5db48594e8a 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -91,6 +91,8 @@
 #define E1000_TIMINCA    0x0B608 /* Increment attributes register - RW */
 #define E1000_TSAUXC     0x0B640 /* Timesync Auxiliary Control register */
 #define E1000_SYSTIMR    0x0B6F8 /* System time register Residue */
+#define E1000_TSICR      0x0B66C /* Interrupt Cause Register */
+#define E1000_TSIM       0x0B674 /* Interrupt Mask Register */
 
 /* Filtering Registers */
 #define E1000_SAQF(_n) (0x5980 + 4 * (_n))
@@ -347,6 +349,7 @@
 /* Energy Efficient Ethernet "EEE" register */
 #define E1000_IPCNFG  0x0E38  /* Internal PHY Configuration */
 #define E1000_EEER    0x0E30  /* Energy Efficient Ethernet */
+#define E1000_EEE_SU  0X0E34  /* EEE Setup */
 
 /* Thermal Sensor Register */
 #define E1000_THSTAT    0x08110 /* Thermal Sensor Status */
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 9e572dd29ab2..8aad230c0592 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -34,9 +34,11 @@
 #include "e1000_mac.h"
 #include "e1000_82575.h"
 
+#ifdef CONFIG_IGB_PTP
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#endif /* CONFIG_IGB_PTP */
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
 
@@ -99,7 +101,6 @@ struct vf_data_storage {
 	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
 	u16 pf_qos;
 	u16 tx_rate;
-	struct pci_dev *vfdev;
 };
 
 #define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
@@ -131,9 +132,9 @@ struct vf_data_storage {
 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
 
 /* Supported Rx Buffer Sizes */
-#define IGB_RXBUFFER_512   512
+#define IGB_RXBUFFER_256   256
 #define IGB_RXBUFFER_16384 16384
-#define IGB_RX_HDR_LEN     IGB_RXBUFFER_512
+#define IGB_RX_HDR_LEN     IGB_RXBUFFER_256
 
 /* How many Tx Descriptors do we need to call netif_wake_queue ? */
 #define IGB_TX_QUEUE_WAKE	16
@@ -167,8 +168,8 @@ struct igb_tx_buffer {
 	unsigned int bytecount;
 	u16 gso_segs;
 	__be16 protocol;
-	dma_addr_t dma;
-	u32 length;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
 	u32 tx_flags;
 };
 
@@ -212,7 +213,6 @@ struct igb_q_vector {
 	struct igb_ring_container rx, tx;
 
 	struct napi_struct napi;
-	int numa_node;
 
 	u16 itr_val;
 	u8 set_itr;
@@ -257,7 +257,6 @@ struct igb_ring {
 	};
 	/* Items past this point are only used during ring alloc / free */
 	dma_addr_t dma;                /* phys address of the ring */
-	int numa_node;                  /* node to alloc ring memory on */
 };
 
 enum e1000_ring_flags_t {
@@ -342,7 +341,6 @@ struct igb_adapter {
 
 	/* OS defined structs */
 	struct pci_dev *pdev;
-	struct hwtstamp_config hwtstamp_config;
 
 	spinlock_t stats64_lock;
 	struct rtnl_link_stats64 stats64;
@@ -373,15 +371,19 @@ struct igb_adapter {
 	int vf_rate_link_speed;
 	u32 rss_queues;
 	u32 wvbr;
-	int node;
 	u32 *shadow_vfta;
 
+#ifdef CONFIG_IGB_PTP
 	struct ptp_clock *ptp_clock;
-	struct ptp_clock_info caps;
-	struct delayed_work overflow_work;
+	struct ptp_clock_info ptp_caps;
+	struct delayed_work ptp_overflow_work;
+	struct work_struct ptp_tx_work;
+	struct sk_buff *ptp_tx_skb;
 	spinlock_t tmreg_lock;
 	struct cyclecounter cc;
 	struct timecounter tc;
+#endif /* CONFIG_IGB_PTP */
+
 	char fw_version[32];
 };
 
@@ -390,6 +392,7 @@ struct igb_adapter {
 #define IGB_FLAG_QUAD_PORT_A       (1 << 2)
 #define IGB_FLAG_QUEUE_PAIRS       (1 << 3)
 #define IGB_FLAG_DMAC              (1 << 4)
+#define IGB_FLAG_PTP               (1 << 5)
 
 /* DMA Coalescing defines */
 #define IGB_MIN_TXPBSIZE           20408
@@ -435,13 +438,17 @@ extern void igb_power_up_link(struct igb_adapter *);
 extern void igb_set_fw_version(struct igb_adapter *);
 #ifdef CONFIG_IGB_PTP
 extern void igb_ptp_init(struct igb_adapter *adapter);
-extern void igb_ptp_remove(struct igb_adapter *adapter);
-
-extern void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
-				   struct skb_shared_hwtstamps *hwtstamps,
-				   u64 systim);
+extern void igb_ptp_stop(struct igb_adapter *adapter);
+extern void igb_ptp_reset(struct igb_adapter *adapter);
+extern void igb_ptp_tx_work(struct work_struct *work);
+extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter);
+extern void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector,
+				union e1000_adv_rx_desc *rx_desc,
+				struct sk_buff *skb);
+extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
+				  struct ifreq *ifr, int cmd);
+#endif /* CONFIG_IGB_PTP */
 
-#endif
 static inline s32 igb_reset_phy(struct e1000_hw *hw)
 {
 	if (hw->phy.ops.reset)
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 70591117051b..2ea012849825 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -148,9 +148,9 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 				   SUPPORTED_100baseT_Full |
 				   SUPPORTED_1000baseT_Full|
 				   SUPPORTED_Autoneg |
-				   SUPPORTED_TP);
-		ecmd->advertising = (ADVERTISED_TP |
-				     ADVERTISED_Pause);
+				   SUPPORTED_TP |
+				   SUPPORTED_Pause);
+		ecmd->advertising = ADVERTISED_TP;
 
 		if (hw->mac.autoneg == 1) {
 			ecmd->advertising |= ADVERTISED_Autoneg;
@@ -158,6 +158,21 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 			ecmd->advertising |= hw->phy.autoneg_advertised;
 		}
 
+		if (hw->mac.autoneg != 1)
+			ecmd->advertising &= ~(ADVERTISED_Pause |
+					       ADVERTISED_Asym_Pause);
+
+		if (hw->fc.requested_mode == e1000_fc_full)
+			ecmd->advertising |= ADVERTISED_Pause;
+		else if (hw->fc.requested_mode == e1000_fc_rx_pause)
+			ecmd->advertising |= (ADVERTISED_Pause |
+					      ADVERTISED_Asym_Pause);
+		else if (hw->fc.requested_mode == e1000_fc_tx_pause)
+			ecmd->advertising |=  ADVERTISED_Asym_Pause;
+		else
+			ecmd->advertising &= ~(ADVERTISED_Pause |
+					       ADVERTISED_Asym_Pause);
+
 		ecmd->port = PORT_TP;
 		ecmd->phy_address = hw->phy.addr;
 	} else {
@@ -198,6 +213,19 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 	}
 
 	ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if (hw->phy.media_type == e1000_media_type_copper)
+		ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+						      ETH_TP_MDI;
+	else
+		ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
+
 	return 0;
 }
 
@@ -214,6 +242,22 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 		return -EINVAL;
 	}
 
+	/*
+	 * MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		if (hw->phy.media_type != e1000_media_type_copper)
+			return -EOPNOTSUPP;
+
+		if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (ecmd->autoneg != AUTONEG_ENABLE)) {
+			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
 		msleep(1);
 
@@ -227,12 +271,25 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 			hw->fc.requested_mode = e1000_fc_default;
 	} else {
 		u32 speed = ethtool_cmd_speed(ecmd);
+		/* calling this overrides forced MDI setting */
 		if (igb_set_spd_dplx(adapter, speed, ecmd->duplex)) {
 			clear_bit(__IGB_RESETTING, &adapter->state);
 			return -EINVAL;
 		}
 	}
 
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		/*
+		 * fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
+	}
+
 	/* reset the link */
 	if (netif_running(adapter->netdev)) {
 		igb_down(adapter);
@@ -1469,33 +1526,22 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ctrl_reg = 0;
-	u16 phy_reg = 0;
 
 	hw->mac.autoneg = false;
 
-	switch (hw->phy.type) {
-	case e1000_phy_m88:
-		/* Auto-MDI/MDIX Off */
-		igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
-		/* reset to update Auto-MDI/MDIX */
-		igb_write_phy_reg(hw, PHY_CONTROL, 0x9140);
-		/* autoneg off */
-		igb_write_phy_reg(hw, PHY_CONTROL, 0x8140);
-		break;
-	case e1000_phy_82580:
-		/* enable MII loopback */
-		igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041);
-		break;
-	case e1000_phy_i210:
-		/* set loopback speed in PHY */
-		igb_read_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2),
-					&phy_reg);
-		phy_reg |= GS40G_MAC_SPEED_1G;
-		igb_write_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2),
-					phy_reg);
-		ctrl_reg = rd32(E1000_CTRL_EXT);
-	default:
-		break;
+	if (hw->phy.type == e1000_phy_m88) {
+		if (hw->phy.id != I210_I_PHY_ID) {
+			/* Auto-MDI/MDIX Off */
+			igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
+			/* reset to update Auto-MDI/MDIX */
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x9140);
+			/* autoneg off */
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x8140);
+		} else {
+			/* force 1000, set loopback  */
+			igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+		}
 	}
 
 	/* add small delay to avoid loopback test failure */
@@ -1513,7 +1559,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
 		     E1000_CTRL_FD |	 /* Force Duplex to FULL */
 		     E1000_CTRL_SLU);	 /* Set link up enable bit */
 
-	if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210))
+	if (hw->phy.type == e1000_phy_m88)
 		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
 
 	wr32(E1000_CTRL, ctrl_reg);
@@ -1521,11 +1567,10 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
 	/* Disable the receiver on the PHY so when a cable is plugged in, the
 	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
 	 */
-	if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210))
+	if (hw->phy.type == e1000_phy_m88)
 		igb_phy_disable_receiver(adapter);
 
-	udelay(500);
-
+	mdelay(500);
 	return 0;
 }
 
@@ -1785,13 +1830,6 @@ static int igb_loopback_test(struct igb_adapter *adapter, u64 *data)
 		*data = 0;
 		goto out;
 	}
-	if ((adapter->hw.mac.type == e1000_i210)
-		|| (adapter->hw.mac.type == e1000_i211)) {
-		dev_err(&adapter->pdev->dev,
-			"Loopback test not supported on this part at this time.\n");
-		*data = 0;
-		goto out;
-	}
 	*data = igb_setup_desc_rings(adapter);
 	if (*data)
 		goto out;
@@ -2257,6 +2295,54 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	}
 }
 
+static int igb_get_ts_info(struct net_device *dev,
+			   struct ethtool_ts_info *info)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+
+	switch (adapter->hw.mac.type) {
+#ifdef CONFIG_IGB_PTP
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i210:
+	case e1000_i211:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_HARDWARE |
+			SOF_TIMESTAMPING_RX_HARDWARE |
+			SOF_TIMESTAMPING_RAW_HARDWARE;
+
+		if (adapter->ptp_clock)
+			info->phc_index = ptp_clock_index(adapter->ptp_clock);
+		else
+			info->phc_index = -1;
+
+		info->tx_types =
+			(1 << HWTSTAMP_TX_OFF) |
+			(1 << HWTSTAMP_TX_ON);
+
+		info->rx_filters = 1 << HWTSTAMP_FILTER_NONE;
+
+		/* 82576 does not support timestamping all packets. */
+		if (adapter->hw.mac.type >= e1000_82580)
+			info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL;
+		else
+			info->rx_filters |=
+				(1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+				(1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+		return 0;
+#endif /* CONFIG_IGB_PTP */
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int igb_ethtool_begin(struct net_device *netdev)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
@@ -2270,38 +2356,6 @@ static void igb_ethtool_complete(struct net_device *netdev)
 	pm_runtime_put(&adapter->pdev->dev);
 }
 
-#ifdef CONFIG_IGB_PTP
-static int igb_ethtool_get_ts_info(struct net_device *dev,
-				   struct ethtool_ts_info *info)
-{
-	struct igb_adapter *adapter = netdev_priv(dev);
-
-	info->so_timestamping =
-		SOF_TIMESTAMPING_TX_HARDWARE |
-		SOF_TIMESTAMPING_RX_HARDWARE |
-		SOF_TIMESTAMPING_RAW_HARDWARE;
-
-	if (adapter->ptp_clock)
-		info->phc_index = ptp_clock_index(adapter->ptp_clock);
-	else
-		info->phc_index = -1;
-
-	info->tx_types =
-		(1 << HWTSTAMP_TX_OFF) |
-		(1 << HWTSTAMP_TX_ON);
-
-	info->rx_filters =
-		(1 << HWTSTAMP_FILTER_NONE) |
-		(1 << HWTSTAMP_FILTER_ALL) |
-		(1 << HWTSTAMP_FILTER_SOME) |
-		(1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
-		(1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
-		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
-
-	return 0;
-}
-
-#endif
 static const struct ethtool_ops igb_ethtool_ops = {
 	.get_settings           = igb_get_settings,
 	.set_settings           = igb_set_settings,
@@ -2328,11 +2382,9 @@ static const struct ethtool_ops igb_ethtool_ops = {
 	.get_ethtool_stats      = igb_get_ethtool_stats,
 	.get_coalesce           = igb_get_coalesce,
 	.set_coalesce           = igb_set_coalesce,
+	.get_ts_info            = igb_get_ts_info,
 	.begin			= igb_ethtool_begin,
 	.complete		= igb_ethtool_complete,
-#ifdef CONFIG_IGB_PTP
-	.get_ts_info		= igb_ethtool_get_ts_info,
-#endif
 };
 
 void igb_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index f88c822e57a6..e1ceb37ef12e 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -172,8 +172,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *);
 
 #ifdef CONFIG_PCI_IOV
 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
-static int igb_find_enabled_vfs(struct igb_adapter *adapter);
-static int igb_check_vf_assignment(struct igb_adapter *adapter);
+static bool igb_vfs_are_assigned(struct igb_adapter *adapter);
 #endif
 
 #ifdef CONFIG_PM
@@ -404,8 +403,8 @@ static void igb_dump(struct igb_adapter *adapter)
 		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
 		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
 			n, tx_ring->next_to_use, tx_ring->next_to_clean,
-			(u64)buffer_info->dma,
-			buffer_info->length,
+			(u64)dma_unmap_addr(buffer_info, dma),
+			dma_unmap_len(buffer_info, len),
 			buffer_info->next_to_watch,
 			(u64)buffer_info->time_stamp);
 	}
@@ -456,8 +455,8 @@ static void igb_dump(struct igb_adapter *adapter)
 				" %04X  %p %016llX %p%s\n", i,
 				le64_to_cpu(u0->a),
 				le64_to_cpu(u0->b),
-				(u64)buffer_info->dma,
-				buffer_info->length,
+				(u64)dma_unmap_addr(buffer_info, dma),
+				dma_unmap_len(buffer_info, len),
 				buffer_info->next_to_watch,
 				(u64)buffer_info->time_stamp,
 				buffer_info->skb, next_desc);
@@ -466,7 +465,8 @@ static void igb_dump(struct igb_adapter *adapter)
 				print_hex_dump(KERN_INFO, "",
 					DUMP_PREFIX_ADDRESS,
 					16, 1, buffer_info->skb->data,
-					buffer_info->length, true);
+					dma_unmap_len(buffer_info, len),
+					true);
 		}
 	}
 
@@ -683,52 +683,29 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 {
 	struct igb_ring *ring;
 	int i;
-	int orig_node = adapter->node;
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
-		if (orig_node == -1) {
-			int cur_node = next_online_node(adapter->node);
-			if (cur_node == MAX_NUMNODES)
-				cur_node = first_online_node;
-			adapter->node = cur_node;
-		}
-		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
-				    adapter->node);
-		if (!ring)
-			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
+		ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
 		if (!ring)
 			goto err;
 		ring->count = adapter->tx_ring_count;
 		ring->queue_index = i;
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
-		ring->numa_node = adapter->node;
 		/* For 82575, context index must be unique per ring. */
 		if (adapter->hw.mac.type == e1000_82575)
 			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
 		adapter->tx_ring[i] = ring;
 	}
-	/* Restore the adapter's original node */
-	adapter->node = orig_node;
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
-		if (orig_node == -1) {
-			int cur_node = next_online_node(adapter->node);
-			if (cur_node == MAX_NUMNODES)
-				cur_node = first_online_node;
-			adapter->node = cur_node;
-		}
-		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
-				    adapter->node);
-		if (!ring)
-			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
+		ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
 		if (!ring)
 			goto err;
 		ring->count = adapter->rx_ring_count;
 		ring->queue_index = i;
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
-		ring->numa_node = adapter->node;
 		/* set flag indicating ring supports SCTP checksum offload */
 		if (adapter->hw.mac.type >= e1000_82576)
 			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
@@ -742,16 +719,12 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 
 		adapter->rx_ring[i] = ring;
 	}
-	/* Restore the adapter's original node */
-	adapter->node = orig_node;
 
 	igb_cache_ring_register(adapter);
 
 	return 0;
 
 err:
-	/* Restore the adapter's original node */
-	adapter->node = orig_node;
 	igb_free_queues(adapter);
 
 	return -ENOMEM;
@@ -1117,24 +1090,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter)
 	struct igb_q_vector *q_vector;
 	struct e1000_hw *hw = &adapter->hw;
 	int v_idx;
-	int orig_node = adapter->node;
 
 	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
-		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
-						adapter->num_tx_queues)) &&
-		    (adapter->num_rx_queues == v_idx))
-			adapter->node = orig_node;
-		if (orig_node == -1) {
-			int cur_node = next_online_node(adapter->node);
-			if (cur_node == MAX_NUMNODES)
-				cur_node = first_online_node;
-			adapter->node = cur_node;
-		}
-		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
-					adapter->node);
-		if (!q_vector)
-			q_vector = kzalloc(sizeof(struct igb_q_vector),
-					   GFP_KERNEL);
+		q_vector = kzalloc(sizeof(struct igb_q_vector),
+				   GFP_KERNEL);
 		if (!q_vector)
 			goto err_out;
 		q_vector->adapter = adapter;
@@ -1143,14 +1102,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter)
 		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
 		adapter->q_vector[v_idx] = q_vector;
 	}
-	/* Restore the adapter's original node */
-	adapter->node = orig_node;
 
 	return 0;
 
 err_out:
-	/* Restore the adapter's original node */
-	adapter->node = orig_node;
 	igb_free_q_vectors(adapter);
 	return -ENOMEM;
 }
@@ -1751,6 +1706,11 @@ void igb_reset(struct igb_adapter *adapter)
 	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
 	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
 
+#ifdef CONFIG_IGB_PTP
+	/* Re-enable PTP, where applicable. */
+	igb_ptp_reset(adapter);
+#endif /* CONFIG_IGB_PTP */
+
 	igb_get_phy_info(hw);
 }
 
@@ -2180,11 +2140,12 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 	}
 
 #endif
+
 #ifdef CONFIG_IGB_PTP
 	/* do hw tstamp init after resetting */
 	igb_ptp_init(adapter);
+#endif /* CONFIG_IGB_PTP */
 
-#endif
 	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
 	/* print bus type/speed/width info */
 	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
@@ -2259,9 +2220,9 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 
 	pm_runtime_get_noresume(&pdev->dev);
 #ifdef CONFIG_IGB_PTP
-	igb_ptp_remove(adapter);
+	igb_ptp_stop(adapter);
+#endif /* CONFIG_IGB_PTP */
 
-#endif
 	/*
 	 * The watchdog timer may be rescheduled, so explicitly
 	 * disable watchdog from being rescheduled.
@@ -2294,11 +2255,11 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 	/* reclaim resources allocated to VFs */
 	if (adapter->vf_data) {
 		/* disable iov and allow time for transactions to clear */
-		if (!igb_check_vf_assignment(adapter)) {
+		if (igb_vfs_are_assigned(adapter)) {
+			dev_info(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n");
+		} else {
 			pci_disable_sriov(pdev);
 			msleep(500);
-		} else {
-			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
 		}
 
 		kfree(adapter->vf_data);
@@ -2338,7 +2299,7 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
 #ifdef CONFIG_PCI_IOV
 	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_hw *hw = &adapter->hw;
-	int old_vfs = igb_find_enabled_vfs(adapter);
+	int old_vfs = pci_num_vf(adapter->pdev);
 	int i;
 
 	/* Virtualization features not supported on i210 family. */
@@ -2418,8 +2379,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
 				  VLAN_HLEN;
 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
-	adapter->node = -1;
-
 	spin_lock_init(&adapter->stats64_lock);
 #ifdef CONFIG_PCI_IOV
 	switch (hw->mac.type) {
@@ -2666,13 +2625,11 @@ static int igb_close(struct net_device *netdev)
 int igb_setup_tx_resources(struct igb_ring *tx_ring)
 {
 	struct device *dev = tx_ring->dev;
-	int orig_node = dev_to_node(dev);
 	int size;
 
 	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
-	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
-	if (!tx_ring->tx_buffer_info)
-		tx_ring->tx_buffer_info = vzalloc(size);
+
+	tx_ring->tx_buffer_info = vzalloc(size);
 	if (!tx_ring->tx_buffer_info)
 		goto err;
 
@@ -2680,18 +2637,10 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring)
 	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
 	tx_ring->size = ALIGN(tx_ring->size, 4096);
 
-	set_dev_node(dev, tx_ring->numa_node);
 	tx_ring->desc = dma_alloc_coherent(dev,
 					   tx_ring->size,
 					   &tx_ring->dma,
 					   GFP_KERNEL);
-	set_dev_node(dev, orig_node);
-	if (!tx_ring->desc)
-		tx_ring->desc = dma_alloc_coherent(dev,
-						   tx_ring->size,
-						   &tx_ring->dma,
-						   GFP_KERNEL);
-
 	if (!tx_ring->desc)
 		goto err;
 
@@ -2702,8 +2651,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring)
 
 err:
 	vfree(tx_ring->tx_buffer_info);
-	dev_err(dev,
-		"Unable to allocate memory for the transmit descriptor ring\n");
+	tx_ring->tx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
 	return -ENOMEM;
 }
 
@@ -2820,34 +2769,23 @@ static void igb_configure_tx(struct igb_adapter *adapter)
 int igb_setup_rx_resources(struct igb_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
-	int orig_node = dev_to_node(dev);
-	int size, desc_len;
+	int size;
 
 	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
-	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
-	if (!rx_ring->rx_buffer_info)
-		rx_ring->rx_buffer_info = vzalloc(size);
+
+	rx_ring->rx_buffer_info = vzalloc(size);
 	if (!rx_ring->rx_buffer_info)
 		goto err;
 
-	desc_len = sizeof(union e1000_adv_rx_desc);
 
 	/* Round up to nearest 4K */
-	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
 	rx_ring->size = ALIGN(rx_ring->size, 4096);
 
-	set_dev_node(dev, rx_ring->numa_node);
 	rx_ring->desc = dma_alloc_coherent(dev,
 					   rx_ring->size,
 					   &rx_ring->dma,
 					   GFP_KERNEL);
-	set_dev_node(dev, orig_node);
-	if (!rx_ring->desc)
-		rx_ring->desc = dma_alloc_coherent(dev,
-						   rx_ring->size,
-						   &rx_ring->dma,
-						   GFP_KERNEL);
-
 	if (!rx_ring->desc)
 		goto err;
 
@@ -2859,8 +2797,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
 err:
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
-	dev_err(dev, "Unable to allocate memory for the receive descriptor"
-		" ring\n");
+	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
 	return -ENOMEM;
 }
 
@@ -2898,57 +2835,48 @@ static void igb_setup_mrqc(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 mrqc, rxcsum;
-	u32 j, num_rx_queues, shift = 0, shift2 = 0;
-	union e1000_reta {
-		u32 dword;
-		u8  bytes[4];
-	} reta;
-	static const u8 rsshash[40] = {
-		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
-		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
-		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
-		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
+	u32 j, num_rx_queues, shift = 0;
+	static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741,
+					0xB08FA343, 0xCB2BCAD0, 0xB4307BAE,
+					0xA32DCB77, 0x0CF23080, 0x3BB7426A,
+					0xFA01ACBE };
 
 	/* Fill out hash function seeds */
-	for (j = 0; j < 10; j++) {
-		u32 rsskey = rsshash[(j * 4)];
-		rsskey |= rsshash[(j * 4) + 1] << 8;
-		rsskey |= rsshash[(j * 4) + 2] << 16;
-		rsskey |= rsshash[(j * 4) + 3] << 24;
-		array_wr32(E1000_RSSRK(0), j, rsskey);
-	}
+	for (j = 0; j < 10; j++)
+		wr32(E1000_RSSRK(j), rsskey[j]);
 
 	num_rx_queues = adapter->rss_queues;
 
-	if (adapter->vfs_allocated_count) {
-		/* 82575 and 82576 supports 2 RSS queues for VMDq */
-		switch (hw->mac.type) {
-		case e1000_i350:
-		case e1000_82580:
-			num_rx_queues = 1;
-			shift = 0;
-			break;
-		case e1000_82576:
+	switch (hw->mac.type) {
+	case e1000_82575:
+		shift = 6;
+		break;
+	case e1000_82576:
+		/* 82576 supports 2 RSS queues for SR-IOV */
+		if (adapter->vfs_allocated_count) {
 			shift = 3;
 			num_rx_queues = 2;
-			break;
-		case e1000_82575:
-			shift = 2;
-			shift2 = 6;
-		default:
-			break;
 		}
-	} else {
-		if (hw->mac.type == e1000_82575)
-			shift = 6;
+		break;
+	default:
+		break;
 	}
 
-	for (j = 0; j < (32 * 4); j++) {
-		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
-		if (shift2)
-			reta.bytes[j & 3] |= num_rx_queues << shift2;
-		if ((j & 3) == 3)
-			wr32(E1000_RETA(j >> 2), reta.dword);
+	/*
+	 * Populate the indirection table 4 entries at a time.  To do this
+	 * we are generating the results for n and n+2 and then interleaving
+	 * those with the results with n+1 and n+3.
+	 */
+	for (j = 0; j < 32; j++) {
+		/* first pass generates n and n+2 */
+		u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues;
+		u32 reta = (base & 0x07800780) >> (7 - shift);
+
+		/* second pass generates n+1 and n+3 */
+		base += 0x00010001 * num_rx_queues;
+		reta |= (base & 0x07800780) << (1 + shift);
+
+		wr32(E1000_RETA(j), reta);
 	}
 
 	/*
@@ -3184,8 +3112,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 #endif
 	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+#ifdef CONFIG_IGB_PTP
 	if (hw->mac.type >= e1000_82580)
 		srrctl |= E1000_SRRCTL_TIMESTAMP;
+#endif /* CONFIG_IGB_PTP */
 	/* Only set Drop Enable if we are supporting multiple queues */
 	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
 		srrctl |= E1000_SRRCTL_DROP_EN;
@@ -3269,20 +3199,20 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
 {
 	if (tx_buffer->skb) {
 		dev_kfree_skb_any(tx_buffer->skb);
-		if (tx_buffer->dma)
+		if (dma_unmap_len(tx_buffer, len))
 			dma_unmap_single(ring->dev,
-					 tx_buffer->dma,
-					 tx_buffer->length,
+					 dma_unmap_addr(tx_buffer, dma),
+					 dma_unmap_len(tx_buffer, len),
 					 DMA_TO_DEVICE);
-	} else if (tx_buffer->dma) {
+	} else if (dma_unmap_len(tx_buffer, len)) {
 		dma_unmap_page(ring->dev,
-			       tx_buffer->dma,
-			       tx_buffer->length,
+			       dma_unmap_addr(tx_buffer, dma),
+			       dma_unmap_len(tx_buffer, len),
 			       DMA_TO_DEVICE);
 	}
 	tx_buffer->next_to_watch = NULL;
 	tx_buffer->skb = NULL;
-	tx_buffer->dma = 0;
+	dma_unmap_len_set(tx_buffer, len, 0);
 	/* buffer_info must be completely set up in the transmit path */
 }
 
@@ -4229,9 +4159,11 @@ static __le32 igb_tx_cmd_type(u32 tx_flags)
 	if (tx_flags & IGB_TX_FLAGS_VLAN)
 		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
 
+#ifdef CONFIG_IGB_PTP
 	/* set timestamp bit if present */
-	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
+	if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP))
 		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
+#endif /* CONFIG_IGB_PTP */
 
 	/* set segmentation bits for TSO */
 	if (tx_flags & IGB_TX_FLAGS_TSO)
@@ -4275,7 +4207,7 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 		       const u8 hdr_len)
 {
 	struct sk_buff *skb = first->skb;
-	struct igb_tx_buffer *tx_buffer_info;
+	struct igb_tx_buffer *tx_buffer;
 	union e1000_adv_tx_desc *tx_desc;
 	dma_addr_t dma;
 	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
@@ -4296,8 +4228,8 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 		goto dma_error;
 
 	/* record length, and DMA address */
-	first->length = size;
-	first->dma = dma;
+	dma_unmap_len_set(first, len, size);
+	dma_unmap_addr_set(first, dma, dma);
 	tx_desc->read.buffer_addr = cpu_to_le64(dma);
 
 	for (;;) {
@@ -4339,9 +4271,9 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 		if (dma_mapping_error(tx_ring->dev, dma))
 			goto dma_error;
 
-		tx_buffer_info = &tx_ring->tx_buffer_info[i];
-		tx_buffer_info->length = size;
-		tx_buffer_info->dma = dma;
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
 
 		tx_desc->read.olinfo_status = 0;
 		tx_desc->read.buffer_addr = cpu_to_le64(dma);
@@ -4392,9 +4324,9 @@ dma_error:
 
 	/* clear dma mappings for failed tx_buffer_info map */
 	for (;;) {
-		tx_buffer_info = &tx_ring->tx_buffer_info[i];
-		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
-		if (tx_buffer_info == first)
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
+		if (tx_buffer == first)
 			break;
 		if (i == 0)
 			i = tx_ring->count;
@@ -4440,6 +4372,9 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 				struct igb_ring *tx_ring)
 {
+#ifdef CONFIG_IGB_PTP
+	struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
+#endif /* CONFIG_IGB_PTP */
 	struct igb_tx_buffer *first;
 	int tso;
 	u32 tx_flags = 0;
@@ -4462,10 +4397,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 	first->bytecount = skb->len;
 	first->gso_segs = 1;
 
-	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+#ifdef CONFIG_IGB_PTP
+	if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+		     !(adapter->ptp_tx_skb))) {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_flags |= IGB_TX_FLAGS_TSTAMP;
+
+		adapter->ptp_tx_skb = skb_get(skb);
+		if (adapter->hw.mac.type == e1000_82576)
+			schedule_work(&adapter->ptp_tx_work);
 	}
+#endif /* CONFIG_IGB_PTP */
 
 	if (vlan_tx_tag_present(skb)) {
 		tx_flags |= IGB_TX_FLAGS_VLAN;
@@ -4661,11 +4603,13 @@ void igb_update_stats(struct igb_adapter *adapter,
 	bytes = 0;
 	packets = 0;
 	for (i = 0; i < adapter->num_rx_queues; i++) {
-		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
+		u32 rqdpc = rd32(E1000_RQDPC(i));
 		struct igb_ring *ring = adapter->rx_ring[i];
 
-		ring->rx_stats.drops += rqdpc_tmp;
-		net_stats->rx_fifo_errors += rqdpc_tmp;
+		if (rqdpc) {
+			ring->rx_stats.drops += rqdpc;
+			net_stats->rx_fifo_errors += rqdpc;
+		}
 
 		do {
 			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
@@ -4755,7 +4699,11 @@ void igb_update_stats(struct igb_adapter *adapter,
 	reg = rd32(E1000_CTRL_EXT);
 	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
 		adapter->stats.rxerrc += rd32(E1000_RXERRC);
-		adapter->stats.tncrs += rd32(E1000_TNCRS);
+
+		/* this stat has invalid values on i210/i211 */
+		if ((hw->mac.type != e1000_i210) &&
+		    (hw->mac.type != e1000_i211))
+			adapter->stats.tncrs += rd32(E1000_TNCRS);
 	}
 
 	adapter->stats.tsctc += rd32(E1000_TSCTC);
@@ -4852,6 +4800,19 @@ static irqreturn_t igb_msix_other(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
+#ifdef CONFIG_IGB_PTP
+	if (icr & E1000_ICR_TS) {
+		u32 tsicr = rd32(E1000_TSICR);
+
+		if (tsicr & E1000_TSICR_TXTS) {
+			/* acknowledge the interrupt */
+			wr32(E1000_TSICR, E1000_TSICR_TXTS);
+			/* retrieve hardware timestamp */
+			schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* CONFIG_IGB_PTP */
+
 	wr32(E1000_EIMS, adapter->eims_other);
 
 	return IRQ_HANDLED;
@@ -5002,102 +4963,43 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
 {
 	unsigned char mac_addr[ETH_ALEN];
-	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_hw *hw = &adapter->hw;
-	struct pci_dev *pvfdev;
-	unsigned int device_id;
-	u16 thisvf_devfn;
 
 	eth_random_addr(mac_addr);
 	igb_set_vf_mac(adapter, vf, mac_addr);
 
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-		device_id = IGB_82576_VF_DEV_ID;
-		/* VF Stride for 82576 is 2 */
-		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
-			(pdev->devfn & 1);
-		break;
-	case e1000_i350:
-		device_id = IGB_I350_VF_DEV_ID;
-		/* VF Stride for I350 is 4 */
-		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
-				(pdev->devfn & 3);
-		break;
-	default:
-		device_id = 0;
-		thisvf_devfn = 0;
-		break;
-	}
-
-	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
-	while (pvfdev) {
-		if (pvfdev->devfn == thisvf_devfn)
-			break;
-		pvfdev = pci_get_device(hw->vendor_id,
-					device_id, pvfdev);
-	}
-
-	if (pvfdev)
-		adapter->vf_data[vf].vfdev = pvfdev;
-	else
-		dev_err(&pdev->dev,
-			"Couldn't find pci dev ptr for VF %4.4x\n",
-			thisvf_devfn);
-	return pvfdev != NULL;
+	return 0;
 }
 
-static int igb_find_enabled_vfs(struct igb_adapter *adapter)
+static bool igb_vfs_are_assigned(struct igb_adapter *adapter)
 {
-	struct e1000_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
-	struct pci_dev *pvfdev;
-	u16 vf_devfn = 0;
-	u16 vf_stride;
-	unsigned int device_id;
-	int vfs_found = 0;
+	struct pci_dev *vfdev;
+	int dev_id;
 
 	switch (adapter->hw.mac.type) {
 	case e1000_82576:
-		device_id = IGB_82576_VF_DEV_ID;
-		/* VF Stride for 82576 is 2 */
-		vf_stride = 2;
+		dev_id = IGB_82576_VF_DEV_ID;
 		break;
 	case e1000_i350:
-		device_id = IGB_I350_VF_DEV_ID;
-		/* VF Stride for I350 is 4 */
-		vf_stride = 4;
+		dev_id = IGB_I350_VF_DEV_ID;
 		break;
 	default:
-		device_id = 0;
-		vf_stride = 0;
-		break;
-	}
-
-	vf_devfn = pdev->devfn + 0x80;
-	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
-	while (pvfdev) {
-		if (pvfdev->devfn == vf_devfn &&
-		    (pvfdev->bus->number >= pdev->bus->number))
-			vfs_found++;
-		vf_devfn += vf_stride;
-		pvfdev = pci_get_device(hw->vendor_id,
-					device_id, pvfdev);
+		return false;
 	}
 
-	return vfs_found;
-}
-
-static int igb_check_vf_assignment(struct igb_adapter *adapter)
-{
-	int i;
-	for (i = 0; i < adapter->vfs_allocated_count; i++) {
-		if (adapter->vf_data[i].vfdev) {
-			if (adapter->vf_data[i].vfdev->dev_flags &
-			    PCI_DEV_FLAGS_ASSIGNED)
+	/* loop through all the VFs to see if we own any that are assigned */
+	vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
+	while (vfdev) {
+		/* if we don't own it we don't care */
+		if (vfdev->is_virtfn && vfdev->physfn == pdev) {
+			/* if it is assigned we cannot release it */
+			if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)
 				return true;
 		}
+
+		vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev);
 	}
+
 	return false;
 }
 
@@ -5643,6 +5545,19 @@ static irqreturn_t igb_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
+#ifdef CONFIG_IGB_PTP
+	if (icr & E1000_ICR_TS) {
+		u32 tsicr = rd32(E1000_TSICR);
+
+		if (tsicr & E1000_TSICR_TXTS) {
+			/* acknowledge the interrupt */
+			wr32(E1000_TSICR, E1000_TSICR_TXTS);
+			/* retrieve hardware timestamp */
+			schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* CONFIG_IGB_PTP */
+
 	napi_schedule(&q_vector->napi);
 
 	return IRQ_HANDLED;
@@ -5684,6 +5599,19 @@ static irqreturn_t igb_intr(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
+#ifdef CONFIG_IGB_PTP
+	if (icr & E1000_ICR_TS) {
+		u32 tsicr = rd32(E1000_TSICR);
+
+		if (tsicr & E1000_TSICR_TXTS) {
+			/* acknowledge the interrupt */
+			wr32(E1000_TSICR, E1000_TSICR_TXTS);
+			/* retrieve hardware timestamp */
+			schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* CONFIG_IGB_PTP */
+
 	napi_schedule(&q_vector->napi);
 
 	return IRQ_HANDLED;
@@ -5743,37 +5671,6 @@ static int igb_poll(struct napi_struct *napi, int budget)
 	return 0;
 }
 
-#ifdef CONFIG_IGB_PTP
-/**
- * igb_tx_hwtstamp - utility function which checks for TX time stamp
- * @q_vector: pointer to q_vector containing needed info
- * @buffer: pointer to igb_tx_buffer structure
- *
- * If we were asked to do hardware stamping and such a time stamp is
- * available, then it must have been for this skb here because we only
- * allow only one such packet into the queue.
- */
-static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
-			    struct igb_tx_buffer *buffer_info)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	struct e1000_hw *hw = &adapter->hw;
-	struct skb_shared_hwtstamps shhwtstamps;
-	u64 regval;
-
-	/* if skb does not support hw timestamp or TX stamp not valid exit */
-	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
-	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
-		return;
-
-	regval = rd32(E1000_TXSTMPL);
-	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
-
-	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
-	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
-}
-
-#endif
 /**
  * igb_clean_tx_irq - Reclaim resources after transmit completes
  * @q_vector: pointer to q_vector containing needed info
@@ -5785,7 +5682,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct igb_ring *tx_ring = q_vector->tx.ring;
 	struct igb_tx_buffer *tx_buffer;
-	union e1000_adv_tx_desc *tx_desc, *eop_desc;
+	union e1000_adv_tx_desc *tx_desc;
 	unsigned int total_bytes = 0, total_packets = 0;
 	unsigned int budget = q_vector->tx.work_limit;
 	unsigned int i = tx_ring->next_to_clean;
@@ -5797,16 +5694,16 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 	tx_desc = IGB_TX_DESC(tx_ring, i);
 	i -= tx_ring->count;
 
-	for (; budget; budget--) {
-		eop_desc = tx_buffer->next_to_watch;
-
-		/* prevent any other reads prior to eop_desc */
-		rmb();
+	do {
+		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
 
 		/* if next_to_watch is not set then there is no work pending */
 		if (!eop_desc)
 			break;
 
+		/* prevent any other reads prior to eop_desc */
+		rmb();
+
 		/* if DD is not set pending work has not been completed */
 		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
 			break;
@@ -5818,25 +5715,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
 
-#ifdef CONFIG_IGB_PTP
-		/* retrieve hardware timestamp */
-		igb_tx_hwtstamp(q_vector, tx_buffer);
-
-#endif
 		/* free the skb */
 		dev_kfree_skb_any(tx_buffer->skb);
-		tx_buffer->skb = NULL;
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
-				 tx_buffer->dma,
-				 tx_buffer->length,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
 				 DMA_TO_DEVICE);
 
+		/* clear tx_buffer data */
+		tx_buffer->skb = NULL;
+		dma_unmap_len_set(tx_buffer, len, 0);
+
 		/* clear last DMA location and unmap remaining buffers */
 		while (tx_desc != eop_desc) {
-			tx_buffer->dma = 0;
-
 			tx_buffer++;
 			tx_desc++;
 			i++;
@@ -5847,17 +5740,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 			}
 
 			/* unmap any remaining paged data */
-			if (tx_buffer->dma) {
+			if (dma_unmap_len(tx_buffer, len)) {
 				dma_unmap_page(tx_ring->dev,
-					       tx_buffer->dma,
-					       tx_buffer->length,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
 					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
 			}
 		}
 
-		/* clear last DMA location */
-		tx_buffer->dma = 0;
-
 		/* move us one more past the eop_desc for start of next pkt */
 		tx_buffer++;
 		tx_desc++;
@@ -5867,7 +5758,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 			tx_buffer = tx_ring->tx_buffer_info;
 			tx_desc = IGB_TX_DESC(tx_ring, 0);
 		}
-	}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
 
 	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
@@ -5883,12 +5780,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
 		struct e1000_hw *hw = &adapter->hw;
 
-		eop_desc = tx_buffer->next_to_watch;
-
 		/* Detect a transmit hang in hardware, this serializes the
 		 * check with the clearing of time_stamp and movement of i */
 		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
-		if (eop_desc &&
+		if (tx_buffer->next_to_watch &&
 		    time_after(jiffies, tx_buffer->time_stamp +
 			       (adapter->tx_timeout_factor * HZ)) &&
 		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
@@ -5912,9 +5807,9 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 				tx_ring->next_to_use,
 				tx_ring->next_to_clean,
 				tx_buffer->time_stamp,
-				eop_desc,
+				tx_buffer->next_to_watch,
 				jiffies,
-				eop_desc->wb.status);
+				tx_buffer->next_to_watch->wb.status);
 			netif_stop_subqueue(tx_ring->netdev,
 					    tx_ring->queue_index);
 
@@ -5994,47 +5889,6 @@ static inline void igb_rx_hash(struct igb_ring *ring,
 		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
 }
 
-#ifdef CONFIG_IGB_PTP
-static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
-			    union e1000_adv_rx_desc *rx_desc,
-			    struct sk_buff *skb)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	struct e1000_hw *hw = &adapter->hw;
-	u64 regval;
-
-	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
-				       E1000_RXDADV_STAT_TS))
-		return;
-
-	/*
-	 * If this bit is set, then the RX registers contain the time stamp. No
-	 * other packet will be time stamped until we read these registers, so
-	 * read the registers to make them available again. Because only one
-	 * packet can be time stamped at a time, we know that the register
-	 * values must belong to this one here and therefore we don't need to
-	 * compare any of the additional attributes stored for it.
-	 *
-	 * If nothing went wrong, then it should have a shared tx_flags that we
-	 * can turn into a skb_shared_hwtstamps.
-	 */
-	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-		u32 *stamp = (u32 *)skb->data;
-		regval = le32_to_cpu(*(stamp + 2));
-		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
-		skb_pull(skb, IGB_TS_HDR_LEN);
-	} else {
-		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
-			return;
-
-		regval = rd32(E1000_RXSTMPL);
-		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
-	}
-
-	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
-}
-
-#endif
 static void igb_rx_vlan(struct igb_ring *ring,
 			union e1000_adv_rx_desc *rx_desc,
 			struct sk_buff *skb)
@@ -6146,8 +6000,8 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 		}
 
 #ifdef CONFIG_IGB_PTP
-		igb_rx_hwtstamp(q_vector, rx_desc, skb);
-#endif
+		igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb);
+#endif /* CONFIG_IGB_PTP */
 		igb_rx_hash(rx_ring, rx_desc, skb);
 		igb_rx_checksum(rx_ring, rx_desc, skb);
 		igb_rx_vlan(rx_ring, rx_desc, skb);
@@ -6341,181 +6195,6 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 }
 
 /**
- * igb_hwtstamp_ioctl - control hardware time stamping
- * @netdev:
- * @ifreq:
- * @cmd:
- *
- * Outgoing time stamping can be enabled and disabled. Play nice and
- * disable it when requested, although it shouldn't case any overhead
- * when no packet needs it. At most one packet in the queue may be
- * marked for time stamping, otherwise it would be impossible to tell
- * for sure to which packet the hardware time stamp belongs.
- *
- * Incoming time stamping has to be configured via the hardware
- * filters. Not all combinations are supported, in particular event
- * type has to be specified. Matching the kind of event packet is
- * not supported, with the exception of "all V2 events regardless of
- * level 2 or 4".
- *
- **/
-static int igb_hwtstamp_ioctl(struct net_device *netdev,
-			      struct ifreq *ifr, int cmd)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct hwtstamp_config config;
-	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
-	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
-	u32 tsync_rx_cfg = 0;
-	bool is_l4 = false;
-	bool is_l2 = false;
-	u32 regval;
-
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
-
-	/* reserved for future extensions */
-	if (config.flags)
-		return -EINVAL;
-
-	switch (config.tx_type) {
-	case HWTSTAMP_TX_OFF:
-		tsync_tx_ctl = 0;
-	case HWTSTAMP_TX_ON:
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	switch (config.rx_filter) {
-	case HWTSTAMP_FILTER_NONE:
-		tsync_rx_ctl = 0;
-		break;
-	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-	case HWTSTAMP_FILTER_ALL:
-		/*
-		 * register TSYNCRXCFG must be set, therefore it is not
-		 * possible to time stamp both Sync and Delay_Req messages
-		 * => fall back to time stamping all packets
-		 */
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
-		config.rx_filter = HWTSTAMP_FILTER_ALL;
-		break;
-	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
-		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
-		is_l4 = true;
-		break;
-	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
-		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
-		is_l4 = true;
-		break;
-	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
-		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
-		is_l2 = true;
-		is_l4 = true;
-		config.rx_filter = HWTSTAMP_FILTER_SOME;
-		break;
-	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
-		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
-		is_l2 = true;
-		is_l4 = true;
-		config.rx_filter = HWTSTAMP_FILTER_SOME;
-		break;
-	case HWTSTAMP_FILTER_PTP_V2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
-		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
-		is_l2 = true;
-		is_l4 = true;
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	if (hw->mac.type == e1000_82575) {
-		if (tsync_rx_ctl | tsync_tx_ctl)
-			return -EINVAL;
-		return 0;
-	}
-
-	/*
-	 * Per-packet timestamping only works if all packets are
-	 * timestamped, so enable timestamping in all packets as
-	 * long as one rx filter was configured.
-	 */
-	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
-		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
-	}
-
-	/* enable/disable TX */
-	regval = rd32(E1000_TSYNCTXCTL);
-	regval &= ~E1000_TSYNCTXCTL_ENABLED;
-	regval |= tsync_tx_ctl;
-	wr32(E1000_TSYNCTXCTL, regval);
-
-	/* enable/disable RX */
-	regval = rd32(E1000_TSYNCRXCTL);
-	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
-	regval |= tsync_rx_ctl;
-	wr32(E1000_TSYNCRXCTL, regval);
-
-	/* define which PTP packets are time stamped */
-	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
-
-	/* define ethertype filter for timestamped packets */
-	if (is_l2)
-		wr32(E1000_ETQF(3),
-		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
-		                 E1000_ETQF_1588 | /* enable timestamping */
-		                 ETH_P_1588));     /* 1588 eth protocol type */
-	else
-		wr32(E1000_ETQF(3), 0);
-
-#define PTP_PORT 319
-	/* L4 Queue Filter[3]: filter by destination port and protocol */
-	if (is_l4) {
-		u32 ftqf = (IPPROTO_UDP /* UDP */
-			| E1000_FTQF_VF_BP /* VF not compared */
-			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
-			| E1000_FTQF_MASK); /* mask all inputs */
-		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
-
-		wr32(E1000_IMIR(3), htons(PTP_PORT));
-		wr32(E1000_IMIREXT(3),
-		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
-		if (hw->mac.type == e1000_82576) {
-			/* enable source port check */
-			wr32(E1000_SPQF(3), htons(PTP_PORT));
-			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
-		}
-		wr32(E1000_FTQF(3), ftqf);
-	} else {
-		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
-	}
-	wrfl();
-
-	adapter->hwtstamp_config = config;
-
-	/* clear TX/RX time stamp registers, just to be sure */
-	regval = rd32(E1000_TXSTMPH);
-	regval = rd32(E1000_RXSTMPH);
-
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
-		-EFAULT : 0;
-}
-
-/**
  * igb_ioctl -
  * @netdev:
  * @ifreq:
@@ -6528,8 +6207,10 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
 		return igb_mii_ioctl(netdev, ifr, cmd);
+#ifdef CONFIG_IGB_PTP
 	case SIOCSHWTSTAMP:
-		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
+		return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
+#endif /* CONFIG_IGB_PTP */
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -6667,6 +6348,10 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
 	default:
 		goto err_inval;
 	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
 	return 0;
 
 err_inval:
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index c846ea9131a3..ee21445157a3 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -69,22 +69,22 @@
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
  */
 
-#define IGB_OVERFLOW_PERIOD	(HZ * 60 * 9)
-#define INCPERIOD_82576		(1 << E1000_TIMINCA_16NS_SHIFT)
-#define INCVALUE_82576_MASK	((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
-#define INCVALUE_82576		(16 << IGB_82576_TSYNC_SHIFT)
-#define IGB_NBITS_82580		40
+#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 9)
+#define INCPERIOD_82576			(1 << E1000_TIMINCA_16NS_SHIFT)
+#define INCVALUE_82576_MASK		((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
+#define INCVALUE_82576			(16 << IGB_82576_TSYNC_SHIFT)
+#define IGB_NBITS_82580			40
 
 /*
  * SYSTIM read access for the 82576
  */
 
-static cycle_t igb_82576_systim_read(const struct cyclecounter *cc)
+static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
 {
-	u64 val;
-	u32 lo, hi;
 	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
 	struct e1000_hw *hw = &igb->hw;
+	u64 val;
+	u32 lo, hi;
 
 	lo = rd32(E1000_SYSTIML);
 	hi = rd32(E1000_SYSTIMH);
@@ -99,12 +99,12 @@ static cycle_t igb_82576_systim_read(const struct cyclecounter *cc)
  * SYSTIM read access for the 82580
  */
 
-static cycle_t igb_82580_systim_read(const struct cyclecounter *cc)
+static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
 {
-	u64 val;
-	u32 lo, hi, jk;
 	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
 	struct e1000_hw *hw = &igb->hw;
+	u64 val;
+	u32 lo, hi, jk;
 
 	/*
 	 * The timestamp latches on lowest register read. For the 82580
@@ -122,16 +122,101 @@ static cycle_t igb_82580_systim_read(const struct cyclecounter *cc)
 }
 
 /*
+ * SYSTIM read access for I210/I211
+ */
+
+static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 sec, nsec, jk;
+
+	/*
+	 * The timestamp latches on lowest register read. For I210/I211, the
+	 * lowest register is SYSTIMR. Since we only need to provide nanosecond
+	 * resolution, we can ignore it.
+	 */
+	jk = rd32(E1000_SYSTIMR);
+	nsec = rd32(E1000_SYSTIML);
+	sec = rd32(E1000_SYSTIMH);
+
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static void igb_ptp_write_i210(struct igb_adapter *adapter,
+			       const struct timespec *ts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/*
+	 * Writing the SYSTIMR register is not necessary as it only provides
+	 * sub-nanosecond resolution.
+	 */
+	wr32(E1000_SYSTIML, ts->tv_nsec);
+	wr32(E1000_SYSTIMH, ts->tv_sec);
+}
+
+/**
+ * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value.
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ *
+ * The 'tmreg_lock' spinlock is used to protect the consistency of the
+ * system time value. This is needed because reading the 64 bit time
+ * value involves reading two (or three) 32 bit registers. The first
+ * read latches the value. Ditto for writing.
+ *
+ * In addition, here have extended the system time with an overflow
+ * counter in software.
+ **/
+static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
+				       struct skb_shared_hwtstamps *hwtstamps,
+				       u64 systim)
+{
+	unsigned long flags;
+	u64 ns;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+		spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+		ns = timecounter_cyc2time(&adapter->tc, systim);
+
+		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		hwtstamps->hwtstamp = ns_to_ktime(ns);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		/* Upper 32 bits contain s, lower 32 bits contain ns. */
+		hwtstamps->hwtstamp = ktime_set(systim >> 32,
+						systim & 0xFFFFFFFF);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
  * PTP clock operations
  */
 
-static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
 {
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	int neg_adj = 0;
 	u64 rate;
 	u32 incvalue;
-	int neg_adj = 0;
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps);
-	struct e1000_hw *hw = &igb->hw;
 
 	if (ppb < 0) {
 		neg_adj = 1;
@@ -153,13 +238,14 @@ static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 	return 0;
 }
 
-static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
 {
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	int neg_adj = 0;
 	u64 rate;
 	u32 inca;
-	int neg_adj = 0;
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps);
-	struct e1000_hw *hw = &igb->hw;
 
 	if (ppb < 0) {
 		neg_adj = 1;
@@ -178,11 +264,12 @@ static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 	return 0;
 }
 
-static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta)
+static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
 {
-	s64 now;
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
 	unsigned long flags;
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps);
+	s64 now;
 
 	spin_lock_irqsave(&igb->tmreg_lock, flags);
 
@@ -195,12 +282,32 @@ static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	return 0;
 }
 
-static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
 {
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	struct timespec now, then = ns_to_timespec(delta);
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_read_i210(igb, &now);
+	now = timespec_add(now, then);
+	igb_ptp_write_i210(igb, (const struct timespec *)&now);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
+				 struct timespec *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
 	u64 ns;
 	u32 remainder;
-	unsigned long flags;
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps);
 
 	spin_lock_irqsave(&igb->tmreg_lock, flags);
 
@@ -214,11 +321,29 @@ static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
 	return 0;
 }
 
-static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts)
+static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
+				struct timespec *ts)
 {
-	u64 ns;
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
 	unsigned long flags;
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps);
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_read_i210(igb, ts);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
+				 const struct timespec *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	u64 ns;
 
 	ns = ts->tv_sec * 1000000000ULL;
 	ns += ts->tv_nsec;
@@ -232,77 +357,369 @@ static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts)
 	return 0;
 }
 
-static int ptp_82576_enable(struct ptp_clock_info *ptp,
-			    struct ptp_clock_request *rq, int on)
+static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
+				const struct timespec *ts)
 {
-	return -EOPNOTSUPP;
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_write_i210(igb, ts);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
 }
 
-static int ptp_82580_enable(struct ptp_clock_info *ptp,
-			    struct ptp_clock_request *rq, int on)
+static int igb_ptp_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
 {
 	return -EOPNOTSUPP;
 }
 
-static void igb_overflow_check(struct work_struct *work)
+/**
+ * igb_ptp_tx_work
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.
+ */
+void igb_ptp_tx_work(struct work_struct *work)
+{
+	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
+						   ptp_tx_work);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tsynctxctl;
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	tsynctxctl = rd32(E1000_TSYNCTXCTL);
+	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
+		igb_ptp_tx_hwtstamp(adapter);
+	else
+		/* reschedule to check later */
+		schedule_work(&adapter->ptp_tx_work);
+}
+
+static void igb_ptp_overflow_check(struct work_struct *work)
 {
-	struct timespec ts;
 	struct igb_adapter *igb =
-		container_of(work, struct igb_adapter, overflow_work.work);
+		container_of(work, struct igb_adapter, ptp_overflow_work.work);
+	struct timespec ts;
 
-	igb_gettime(&igb->caps, &ts);
+	igb->ptp_caps.gettime(&igb->ptp_caps, &ts);
 
 	pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec);
 
-	schedule_delayed_work(&igb->overflow_work, IGB_OVERFLOW_PERIOD);
+	schedule_delayed_work(&igb->ptp_overflow_work,
+			      IGB_SYSTIM_OVERFLOW_PERIOD);
+}
+
+/**
+ * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure.
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct skb_shared_hwtstamps shhwtstamps;
+	u64 regval;
+
+	regval = rd32(E1000_TXSTMPL);
+	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
+
+	igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+	skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
+	dev_kfree_skb_any(adapter->ptp_tx_skb);
+	adapter->ptp_tx_skb = NULL;
+}
+
+void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector,
+			 union e1000_adv_rx_desc *rx_desc,
+			 struct sk_buff *skb)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	u64 regval;
+
+	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
+				       E1000_RXDADV_STAT_TS))
+		return;
+
+	/*
+	 * If this bit is set, then the RX registers contain the time stamp. No
+	 * other packet will be time stamped until we read these registers, so
+	 * read the registers to make them available again. Because only one
+	 * packet can be time stamped at a time, we know that the register
+	 * values must belong to this one here and therefore we don't need to
+	 * compare any of the additional attributes stored for it.
+	 *
+	 * If nothing went wrong, then it should have a shared tx_flags that we
+	 * can turn into a skb_shared_hwtstamps.
+	 */
+	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+		u32 *stamp = (u32 *)skb->data;
+		regval = le32_to_cpu(*(stamp + 2));
+		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
+		skb_pull(skb, IGB_TS_HDR_LEN);
+	} else {
+		if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
+			return;
+
+		regval = rd32(E1000_RXSTMPL);
+		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
+	}
+
+	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+}
+
+/**
+ * igb_ptp_hwtstamp_ioctl - control hardware time stamping
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ **/
+int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
+			   struct ifreq *ifr, int cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct hwtstamp_config config;
+	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+	u32 tsync_rx_cfg = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_ALL:
+		/*
+		 * register TSYNCRXCFG must be set, therefore it is not
+		 * possible to time stamp both Sync and Delay_Req messages
+		 * => fall back to time stamping all packets
+		 */
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
+		is_l2 = true;
+		is_l4 = true;
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
+		is_l2 = true;
+		is_l4 = true;
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (hw->mac.type == e1000_82575) {
+		if (tsync_rx_ctl | tsync_tx_ctl)
+			return -EINVAL;
+		return 0;
+	}
+
+	/*
+	 * Per-packet timestamping only works if all packets are
+	 * timestamped, so enable timestamping in all packets as
+	 * long as one rx filter was configured.
+	 */
+	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
+		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+
+		if ((hw->mac.type == e1000_i210) ||
+		    (hw->mac.type == e1000_i211)) {
+			regval = rd32(E1000_RXPBS);
+			regval |= E1000_RXPBS_CFG_TS_EN;
+			wr32(E1000_RXPBS, regval);
+		}
+	}
+
+	/* enable/disable TX */
+	regval = rd32(E1000_TSYNCTXCTL);
+	regval &= ~E1000_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	wr32(E1000_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = rd32(E1000_TSYNCRXCTL);
+	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	wr32(E1000_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
+
+	/* define ethertype filter for timestamped packets */
+	if (is_l2)
+		wr32(E1000_ETQF(3),
+		     (E1000_ETQF_FILTER_ENABLE | /* enable filter */
+		      E1000_ETQF_1588 | /* enable timestamping */
+		      ETH_P_1588));     /* 1588 eth protocol type */
+	else
+		wr32(E1000_ETQF(3), 0);
+
+#define PTP_PORT 319
+	/* L4 Queue Filter[3]: filter by destination port and protocol */
+	if (is_l4) {
+		u32 ftqf = (IPPROTO_UDP /* UDP */
+			| E1000_FTQF_VF_BP /* VF not compared */
+			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
+			| E1000_FTQF_MASK); /* mask all inputs */
+		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
+
+		wr32(E1000_IMIR(3), htons(PTP_PORT));
+		wr32(E1000_IMIREXT(3),
+		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
+		if (hw->mac.type == e1000_82576) {
+			/* enable source port check */
+			wr32(E1000_SPQF(3), htons(PTP_PORT));
+			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
+		}
+		wr32(E1000_FTQF(3), ftqf);
+	} else {
+		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
+	}
+	wrfl();
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	regval = rd32(E1000_TXSTMPL);
+	regval = rd32(E1000_TXSTMPH);
+	regval = rd32(E1000_RXSTMPL);
+	regval = rd32(E1000_RXSTMPH);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
 }
 
 void igb_ptp_init(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
 
 	switch (hw->mac.type) {
-	case e1000_i210:
-	case e1000_i211:
-	case e1000_i350:
+	case e1000_82576:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 1000000000;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
+		adapter->ptp_caps.settime = igb_ptp_settime_82576;
+		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->cc.read = igb_ptp_read_82576;
+		adapter->cc.mask = CLOCKSOURCE_MASK(64);
+		adapter->cc.mult = 1;
+		adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
+		/* Dial the nominal frequency. */
+		wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+		break;
 	case e1000_82580:
-		adapter->caps.owner	= THIS_MODULE;
-		strcpy(adapter->caps.name, "igb-82580");
-		adapter->caps.max_adj	= 62499999;
-		adapter->caps.n_ext_ts	= 0;
-		adapter->caps.pps	= 0;
-		adapter->caps.adjfreq	= ptp_82580_adjfreq;
-		adapter->caps.adjtime	= igb_adjtime;
-		adapter->caps.gettime	= igb_gettime;
-		adapter->caps.settime	= igb_settime;
-		adapter->caps.enable	= ptp_82580_enable;
-		adapter->cc.read	= igb_82580_systim_read;
-		adapter->cc.mask	= CLOCKSOURCE_MASK(IGB_NBITS_82580);
-		adapter->cc.mult	= 1;
-		adapter->cc.shift	= 0;
+	case e1000_i350:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
+		adapter->ptp_caps.settime = igb_ptp_settime_82576;
+		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->cc.read = igb_ptp_read_82580;
+		adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
+		adapter->cc.mult = 1;
+		adapter->cc.shift = 0;
 		/* Enable the timer functions by clearing bit 31. */
 		wr32(E1000_TSAUXC, 0x0);
 		break;
-
-	case e1000_82576:
-		adapter->caps.owner	= THIS_MODULE;
-		strcpy(adapter->caps.name, "igb-82576");
-		adapter->caps.max_adj	= 1000000000;
-		adapter->caps.n_ext_ts	= 0;
-		adapter->caps.pps	= 0;
-		adapter->caps.adjfreq	= ptp_82576_adjfreq;
-		adapter->caps.adjtime	= igb_adjtime;
-		adapter->caps.gettime	= igb_gettime;
-		adapter->caps.settime	= igb_settime;
-		adapter->caps.enable	= ptp_82576_enable;
-		adapter->cc.read	= igb_82576_systim_read;
-		adapter->cc.mask	= CLOCKSOURCE_MASK(64);
-		adapter->cc.mult	= 1;
-		adapter->cc.shift	= IGB_82576_TSYNC_SHIFT;
-		/* Dial the nominal frequency. */
-		wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+	case e1000_i210:
+	case e1000_i211:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
+		adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
+		adapter->ptp_caps.settime = igb_ptp_settime_i210;
+		adapter->ptp_caps.enable = igb_ptp_enable;
+		/* Enable the timer functions by clearing bit 31. */
+		wr32(E1000_TSAUXC, 0x0);
 		break;
-
 	default:
 		adapter->ptp_clock = NULL;
 		return;
@@ -310,86 +727,114 @@ void igb_ptp_init(struct igb_adapter *adapter)
 
 	wrfl();
 
-	timecounter_init(&adapter->tc, &adapter->cc,
-			 ktime_to_ns(ktime_get_real()));
+	spin_lock_init(&adapter->tmreg_lock);
+	INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+	/* Initialize the clock and overflow work for devices that need it. */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+		struct timespec ts = ktime_to_timespec(ktime_get_real());
 
-	INIT_DELAYED_WORK(&adapter->overflow_work, igb_overflow_check);
+		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
 
-	spin_lock_init(&adapter->tmreg_lock);
+		INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+				  igb_ptp_overflow_check);
 
-	schedule_delayed_work(&adapter->overflow_work, IGB_OVERFLOW_PERIOD);
+		schedule_delayed_work(&adapter->ptp_overflow_work,
+				      IGB_SYSTIM_OVERFLOW_PERIOD);
+	}
+
+	/* Initialize the time sync interrupts for devices that support it. */
+	if (hw->mac.type >= e1000_82580) {
+		wr32(E1000_TSIM, E1000_TSIM_TXTS);
+		wr32(E1000_IMS, E1000_IMS_TS);
+	}
 
-	adapter->ptp_clock = ptp_clock_register(&adapter->caps);
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
-	} else
+	} else {
 		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
 			 adapter->netdev->name);
+		adapter->flags |= IGB_FLAG_PTP;
+	}
 }
 
-void igb_ptp_remove(struct igb_adapter *adapter)
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
 {
 	switch (adapter->hw.mac.type) {
-	case e1000_i211:
-	case e1000_i210:
-	case e1000_i350:
-	case e1000_82580:
 	case e1000_82576:
-		cancel_delayed_work_sync(&adapter->overflow_work);
+	case e1000_82580:
+	case e1000_i350:
+		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		/* No delayed work to cancel. */
 		break;
 	default:
 		return;
 	}
 
+	cancel_work_sync(&adapter->ptp_tx_work);
+
 	if (adapter->ptp_clock) {
 		ptp_clock_unregister(adapter->ptp_clock);
 		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
 			 adapter->netdev->name);
+		adapter->flags &= ~IGB_FLAG_PTP;
 	}
 }
 
 /**
- * igb_systim_to_hwtstamp - convert system time value to hw timestamp
- * @adapter: board private structure
- * @hwtstamps: timestamp structure to update
- * @systim: unsigned 64bit system time value.
- *
- * We need to convert the system time value stored in the RX/TXSTMP registers
- * into a hwtstamp which can be used by the upper level timestamping functions.
+ * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
  *
- * The 'tmreg_lock' spinlock is used to protect the consistency of the
- * system time value. This is needed because reading the 64 bit time
- * value involves reading two (or three) 32 bit registers. The first
- * read latches the value. Ditto for writing.
- *
- * In addition, here have extended the system time with an overflow
- * counter in software.
+ * This function handles the reset work required to re-enable the PTP device.
  **/
-void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
-			    struct skb_shared_hwtstamps *hwtstamps,
-			    u64 systim)
+void igb_ptp_reset(struct igb_adapter *adapter)
 {
-	u64 ns;
-	unsigned long flags;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (!(adapter->flags & IGB_FLAG_PTP))
+		return;
 
 	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		/* Dial the nominal frequency. */
+		wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+		break;
+	case e1000_82580:
+	case e1000_i350:
 	case e1000_i210:
 	case e1000_i211:
-	case e1000_i350:
-	case e1000_82580:
-	case e1000_82576:
+		/* Enable the timer functions and interrupts. */
+		wr32(E1000_TSAUXC, 0x0);
+		wr32(E1000_TSIM, E1000_TSIM_TXTS);
+		wr32(E1000_IMS, E1000_IMS_TS);
 		break;
 	default:
+		/* No work to do. */
 		return;
 	}
 
-	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	/* Re-initialize the timer. */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+		struct timespec ts = ktime_to_timespec(ktime_get_real());
 
-	ns = timecounter_cyc2time(&adapter->tc, systim);
-
-	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
-	memset(hwtstamps, 0, sizeof(*hwtstamps));
-	hwtstamps->hwtstamp = ns_to_ktime(ns);
+		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
+	}
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 5fd5d04c26c9..89f40e51fc13 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -32,7 +32,7 @@
 
 obj-$(CONFIG_IXGBE) += ixgbe.o
 
-ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
+ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o ixgbe_debugfs.o\
               ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \
               ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index b9623e9ea895..5bd26763554c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -78,6 +78,9 @@
 
 /* Supported Rx Buffer Sizes */
 #define IXGBE_RXBUFFER_256    256  /* Used for skb receive header */
+#define IXGBE_RXBUFFER_2K    2048
+#define IXGBE_RXBUFFER_3K    3072
+#define IXGBE_RXBUFFER_4K    4096
 #define IXGBE_MAX_RXBUFFER  16384  /* largest size for a single descriptor */
 
 /*
@@ -104,6 +107,7 @@
 #define IXGBE_TX_FLAGS_FSO		(u32)(1 << 6)
 #define IXGBE_TX_FLAGS_TXSW		(u32)(1 << 7)
 #define IXGBE_TX_FLAGS_TSTAMP		(u32)(1 << 8)
+#define IXGBE_TX_FLAGS_NO_IFCS		(u32)(1 << 9)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT  29
@@ -293,16 +297,25 @@ struct ixgbe_ring_feature {
  * this is twice the size of a half page we need to double the page order
  * for FCoE enabled Rx queues.
  */
-#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192)
-static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
+static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring)
 {
-	return test_bit(__IXGBE_RX_FCOE, &ring->state) ? 1 : 0;
+#ifdef IXGBE_FCOE
+	if (test_bit(__IXGBE_RX_FCOE, &ring->state))
+		return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K :
+					    IXGBE_RXBUFFER_3K;
+#endif
+	return IXGBE_RXBUFFER_2K;
 }
-#else
-#define ixgbe_rx_pg_order(_ring) 0
+
+static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
+{
+#ifdef IXGBE_FCOE
+	if (test_bit(__IXGBE_RX_FCOE, &ring->state))
+		return (PAGE_SIZE < 8192) ? 1 : 0;
 #endif
+	return 0;
+}
 #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
-#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring))
 
 struct ixgbe_ring_container {
 	struct ixgbe_ring *ring;	/* pointer to linked list of rings */
@@ -584,6 +597,9 @@ struct ixgbe_adapter {
 #ifdef CONFIG_IXGBE_HWMON
 	struct hwmon_buff ixgbe_hwmon_buff;
 #endif /* CONFIG_IXGBE_HWMON */
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *ixgbe_dbg_adapter;
+#endif /*CONFIG_DEBUG_FS*/
 };
 
 struct ixgbe_fdir_filter {
@@ -712,7 +728,12 @@ extern int ixgbe_fcoe_get_hbainfo(struct net_device *netdev,
 				  struct netdev_fcoe_hbainfo *info);
 extern u8 ixgbe_fcoe_get_tc(struct ixgbe_adapter *adapter);
 #endif /* IXGBE_FCOE */
-
+#ifdef CONFIG_DEBUG_FS
+extern void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter);
+extern void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter);
+extern void ixgbe_dbg_init(void);
+extern void ixgbe_dbg_exit(void);
+#endif /* CONFIG_DEBUG_FS */
 static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring)
 {
 	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
new file mode 100644
index 000000000000..8d3a21889099
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
@@ -0,0 +1,300 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "ixgbe.h"
+
+static struct dentry *ixgbe_dbg_root;
+
+static char ixgbe_dbg_reg_ops_buf[256] = "";
+
+/**
+ * ixgbe_dbg_reg_ops_open - prep the debugfs pokee data item when opened
+ * @inode: inode that was opened
+ * @filp:  file info
+ *
+ * Stash the adapter pointer hiding in the inode into the file pointer where
+ * we can find it later in the read and write calls
+ **/
+static int ixgbe_dbg_reg_ops_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+/**
+ * ixgbe_dbg_reg_ops_read - read for reg_ops datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	char buf[256];
+	int bytes_not_copied;
+	int len;
+
+	/* don't allow partial reads */
+	if (*ppos != 0)
+		return 0;
+
+	len = snprintf(buf, sizeof(buf), "%s: %s\n",
+		       adapter->netdev->name, ixgbe_dbg_reg_ops_buf);
+	if (count < len)
+		return -ENOSPC;
+	bytes_not_copied = copy_to_user(buffer, buf, len);
+	if (bytes_not_copied < 0)
+		return bytes_not_copied;
+
+	*ppos = len;
+	return len;
+}
+
+/**
+ * ixgbe_dbg_reg_ops_write - write into reg_ops datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_reg_ops_write(struct file *filp,
+				     const char __user *buffer,
+				     size_t count, loff_t *ppos)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	int bytes_not_copied;
+
+	/* don't allow partial writes */
+	if (*ppos != 0)
+		return 0;
+	if (count >= sizeof(ixgbe_dbg_reg_ops_buf))
+		return -ENOSPC;
+
+	bytes_not_copied = copy_from_user(ixgbe_dbg_reg_ops_buf, buffer, count);
+	if (bytes_not_copied < 0)
+		return bytes_not_copied;
+	else if (bytes_not_copied < count)
+		count -= bytes_not_copied;
+	else
+		return -ENOSPC;
+	ixgbe_dbg_reg_ops_buf[count] = '\0';
+
+	if (strncmp(ixgbe_dbg_reg_ops_buf, "write", 5) == 0) {
+		u32 reg, value;
+		int cnt;
+		cnt = sscanf(&ixgbe_dbg_reg_ops_buf[5], "%x %x", &reg, &value);
+		if (cnt == 2) {
+			IXGBE_WRITE_REG(&adapter->hw, reg, value);
+			value = IXGBE_READ_REG(&adapter->hw, reg);
+			e_dev_info("write: 0x%08x = 0x%08x\n", reg, value);
+		} else {
+			e_dev_info("write <reg> <value>\n");
+		}
+	} else if (strncmp(ixgbe_dbg_reg_ops_buf, "read", 4) == 0) {
+		u32 reg, value;
+		int cnt;
+		cnt = sscanf(&ixgbe_dbg_reg_ops_buf[4], "%x", &reg);
+		if (cnt == 1) {
+			value = IXGBE_READ_REG(&adapter->hw, reg);
+			e_dev_info("read 0x%08x = 0x%08x\n", reg, value);
+		} else {
+			e_dev_info("read <reg>\n");
+		}
+	} else {
+		e_dev_info("Unknown command %s\n", ixgbe_dbg_reg_ops_buf);
+		e_dev_info("Available commands:\n");
+		e_dev_info("   read <reg>\n");
+		e_dev_info("   write <reg> <value>\n");
+	}
+	return count;
+}
+
+static const struct file_operations ixgbe_dbg_reg_ops_fops = {
+	.owner = THIS_MODULE,
+	.open =  ixgbe_dbg_reg_ops_open,
+	.read =  ixgbe_dbg_reg_ops_read,
+	.write = ixgbe_dbg_reg_ops_write,
+};
+
+static char ixgbe_dbg_netdev_ops_buf[256] = "";
+
+/**
+ * ixgbe_dbg_netdev_ops_open - prep the debugfs netdev_ops data item
+ * @inode: inode that was opened
+ * @filp: file info
+ *
+ * Stash the adapter pointer hiding in the inode into the file pointer
+ * where we can find it later in the read and write calls
+ **/
+static int ixgbe_dbg_netdev_ops_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+/**
+ * ixgbe_dbg_netdev_ops_read - read for netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp,
+					 char __user *buffer,
+					 size_t count, loff_t *ppos)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	char buf[256];
+	int bytes_not_copied;
+	int len;
+
+	/* don't allow partial reads */
+	if (*ppos != 0)
+		return 0;
+
+	len = snprintf(buf, sizeof(buf), "%s: %s\n",
+		       adapter->netdev->name, ixgbe_dbg_netdev_ops_buf);
+	if (count < len)
+		return -ENOSPC;
+	bytes_not_copied = copy_to_user(buffer, buf, len);
+	if (bytes_not_copied < 0)
+		return bytes_not_copied;
+
+	*ppos = len;
+	return len;
+}
+
+/**
+ * ixgbe_dbg_netdev_ops_write - write into netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp,
+					  const char __user *buffer,
+					  size_t count, loff_t *ppos)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	int bytes_not_copied;
+
+	/* don't allow partial writes */
+	if (*ppos != 0)
+		return 0;
+	if (count >= sizeof(ixgbe_dbg_netdev_ops_buf))
+		return -ENOSPC;
+
+	bytes_not_copied = copy_from_user(ixgbe_dbg_netdev_ops_buf,
+					  buffer, count);
+	if (bytes_not_copied < 0)
+		return bytes_not_copied;
+	else if (bytes_not_copied < count)
+		count -= bytes_not_copied;
+	else
+		return -ENOSPC;
+	ixgbe_dbg_netdev_ops_buf[count] = '\0';
+
+	if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
+		adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev);
+		e_dev_info("tx_timeout called\n");
+	} else {
+		e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf);
+		e_dev_info("Available commands:\n");
+		e_dev_info("    tx_timeout\n");
+	}
+	return count;
+}
+
+static const struct file_operations ixgbe_dbg_netdev_ops_fops = {
+	.owner = THIS_MODULE,
+	.open = ixgbe_dbg_netdev_ops_open,
+	.read = ixgbe_dbg_netdev_ops_read,
+	.write = ixgbe_dbg_netdev_ops_write,
+};
+
+/**
+ * ixgbe_dbg_adapter_init - setup the debugfs directory for the adapter
+ * @adapter: the adapter that is starting up
+ **/
+void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter)
+{
+	const char *name = pci_name(adapter->pdev);
+	struct dentry *pfile;
+	adapter->ixgbe_dbg_adapter = debugfs_create_dir(name, ixgbe_dbg_root);
+	if (adapter->ixgbe_dbg_adapter) {
+		pfile = debugfs_create_file("reg_ops", 0600,
+					    adapter->ixgbe_dbg_adapter, adapter,
+					    &ixgbe_dbg_reg_ops_fops);
+		if (!pfile)
+			e_dev_err("debugfs reg_ops for %s failed\n", name);
+		pfile = debugfs_create_file("netdev_ops", 0600,
+					    adapter->ixgbe_dbg_adapter, adapter,
+					    &ixgbe_dbg_netdev_ops_fops);
+		if (!pfile)
+			e_dev_err("debugfs netdev_ops for %s failed\n", name);
+	} else {
+		e_dev_err("debugfs entry for %s failed\n", name);
+	}
+}
+
+/**
+ * ixgbe_dbg_adapter_exit - clear out the adapter's debugfs entries
+ * @pf: the pf that is stopping
+ **/
+void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter)
+{
+	if (adapter->ixgbe_dbg_adapter)
+		debugfs_remove_recursive(adapter->ixgbe_dbg_adapter);
+	adapter->ixgbe_dbg_adapter = NULL;
+}
+
+/**
+ * ixgbe_dbg_init - start up debugfs for the driver
+ **/
+void ixgbe_dbg_init(void)
+{
+	ixgbe_dbg_root = debugfs_create_dir(ixgbe_driver_name, NULL);
+	if (ixgbe_dbg_root == NULL)
+		pr_err("init of debugfs failed\n");
+}
+
+/**
+ * ixgbe_dbg_exit - clean out the driver's debugfs entries
+ **/
+void ixgbe_dbg_exit(void)
+{
+	debugfs_remove_recursive(ixgbe_dbg_root);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ee61819d6088..868af6938219 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1167,7 +1167,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
 	}
 
 	bi->dma = dma;
-	bi->page_offset ^= ixgbe_rx_bufsz(rx_ring);
+	bi->page_offset = 0;
 
 	return true;
 }
@@ -1320,29 +1320,6 @@ static unsigned int ixgbe_get_headlen(unsigned char *data,
 		return max_len;
 }
 
-static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring,
-			      union ixgbe_adv_rx_desc *rx_desc,
-			      struct sk_buff *skb)
-{
-	__le32 rsc_enabled;
-	u32 rsc_cnt;
-
-	if (!ring_is_rsc_enabled(rx_ring))
-		return;
-
-	rsc_enabled = rx_desc->wb.lower.lo_dword.data &
-		      cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK);
-
-	/* If this is an RSC frame rsc_cnt should be non-zero */
-	if (!rsc_enabled)
-		return;
-
-	rsc_cnt = le32_to_cpu(rsc_enabled);
-	rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT;
-
-	IXGBE_CB(skb)->append_cnt += rsc_cnt - 1;
-}
-
 static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
 				   struct sk_buff *skb)
 {
@@ -1440,16 +1417,28 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring,
 
 	prefetch(IXGBE_RX_DESC(rx_ring, ntc));
 
-	if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
-		return false;
+	/* update RSC append count if present */
+	if (ring_is_rsc_enabled(rx_ring)) {
+		__le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data &
+				     cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK);
+
+		if (unlikely(rsc_enabled)) {
+			u32 rsc_cnt = le32_to_cpu(rsc_enabled);
+
+			rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT;
+			IXGBE_CB(skb)->append_cnt += rsc_cnt - 1;
 
-	/* append_cnt indicates packet is RSC, if so fetch nextp */
-	if (IXGBE_CB(skb)->append_cnt) {
-		ntc = le32_to_cpu(rx_desc->wb.upper.status_error);
-		ntc &= IXGBE_RXDADV_NEXTP_MASK;
-		ntc >>= IXGBE_RXDADV_NEXTP_SHIFT;
+			/* update ntc based on RSC value */
+			ntc = le32_to_cpu(rx_desc->wb.upper.status_error);
+			ntc &= IXGBE_RXDADV_NEXTP_MASK;
+			ntc >>= IXGBE_RXDADV_NEXTP_SHIFT;
+		}
 	}
 
+	/* if we are the last buffer then there is nothing else to do */
+	if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
+		return false;
+
 	/* place skb in next buffer to be received */
 	rx_ring->rx_buffer_info[ntc].skb = skb;
 	rx_ring->rx_stats.non_eop_descs++;
@@ -1458,6 +1447,78 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring,
 }
 
 /**
+ * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ixgbe specific version of __pskb_pull_tail.  The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
+			    struct sk_buff *skb)
+{
+	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	unsigned char *va;
+	unsigned int pull_len;
+
+	/*
+	 * it is valid to use page_address instead of kmap since we are
+	 * working with pages allocated out of the lomem pool per
+	 * alloc_page(GFP_ATOMIC)
+	 */
+	va = skb_frag_address(frag);
+
+	/*
+	 * we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	skb_frag_size_sub(frag, pull_len);
+	frag->page_offset += pull_len;
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+}
+
+/**
+ * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being updated
+ *
+ * This function provides a basic DMA sync up for the first fragment of an
+ * skb.  The reason for doing this is that the first fragment cannot be
+ * unmapped until we have reached the end of packet descriptor for a buffer
+ * chain.
+ */
+static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
+				struct sk_buff *skb)
+{
+	/* if the page was released unmap it, else just sync our portion */
+	if (unlikely(IXGBE_CB(skb)->page_released)) {
+		dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma,
+			       ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
+		IXGBE_CB(skb)->page_released = false;
+	} else {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      IXGBE_CB(skb)->dma,
+					      frag->page_offset,
+					      ixgbe_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+	}
+	IXGBE_CB(skb)->dma = 0;
+}
+
+/**
  * ixgbe_cleanup_headers - Correct corrupted or empty headers
  * @rx_ring: rx descriptor ring packet is being transacted on
  * @rx_desc: pointer to the EOP Rx descriptor
@@ -1479,24 +1540,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
 				  union ixgbe_adv_rx_desc *rx_desc,
 				  struct sk_buff *skb)
 {
-	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 	struct net_device *netdev = rx_ring->netdev;
-	unsigned char *va;
-	unsigned int pull_len;
-
-	/* if the page was released unmap it, else just sync our portion */
-	if (unlikely(IXGBE_CB(skb)->page_released)) {
-		dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma,
-			       ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
-		IXGBE_CB(skb)->page_released = false;
-	} else {
-		dma_sync_single_range_for_cpu(rx_ring->dev,
-					      IXGBE_CB(skb)->dma,
-					      frag->page_offset,
-					      ixgbe_rx_bufsz(rx_ring),
-					      DMA_FROM_DEVICE);
-	}
-	IXGBE_CB(skb)->dma = 0;
 
 	/* verify that the packet does not have any known errors */
 	if (unlikely(ixgbe_test_staterr(rx_desc,
@@ -1506,40 +1550,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
 		return true;
 	}
 
-	/*
-	 * it is valid to use page_address instead of kmap since we are
-	 * working with pages allocated out of the lomem pool per
-	 * alloc_page(GFP_ATOMIC)
-	 */
-	va = skb_frag_address(frag);
-
-	/*
-	 * we need the header to contain the greater of either ETH_HLEN or
-	 * 60 bytes if the skb->len is less than 60 for skb_pad.
-	 */
-	pull_len = skb_frag_size(frag);
-	if (pull_len > IXGBE_RX_HDR_SIZE)
-		pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE);
-
-	/* align pull length to size of long to optimize memcpy performance */
-	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
-
-	/* update all of the pointers */
-	skb_frag_size_sub(frag, pull_len);
-	frag->page_offset += pull_len;
-	skb->data_len -= pull_len;
-	skb->tail += pull_len;
-
-	/*
-	 * if we sucked the frag empty then we should free it,
-	 * if there are other frags here something is screwed up in hardware
-	 */
-	if (skb_frag_size(frag) == 0) {
-		BUG_ON(skb_shinfo(skb)->nr_frags != 1);
-		skb_shinfo(skb)->nr_frags = 0;
-		__skb_frag_unref(frag);
-		skb->truesize -= ixgbe_rx_bufsz(rx_ring);
-	}
+	/* place header in linear portion of buffer */
+	if (skb_is_nonlinear(skb))
+		ixgbe_pull_tail(rx_ring, skb);
 
 #ifdef IXGBE_FCOE
 	/* do not attempt to pad FCoE Frames as this will disrupt DDP */
@@ -1560,33 +1573,17 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
 }
 
 /**
- * ixgbe_can_reuse_page - determine if we can reuse a page
- * @rx_buffer: pointer to rx_buffer containing the page we want to reuse
- *
- * Returns true if page can be reused in another Rx buffer
- **/
-static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer)
-{
-	struct page *page = rx_buffer->page;
-
-	/* if we are only owner of page and it is local we can reuse it */
-	return likely(page_count(page) == 1) &&
-	       likely(page_to_nid(page) == numa_node_id());
-}
-
-/**
  * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring
  * @rx_ring: rx descriptor ring to store buffers on
  * @old_buff: donor buffer to have page reused
  *
- * Syncronizes page for reuse by the adapter
+ * Synchronizes page for reuse by the adapter
  **/
 static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring,
 				struct ixgbe_rx_buffer *old_buff)
 {
 	struct ixgbe_rx_buffer *new_buff;
 	u16 nta = rx_ring->next_to_alloc;
-	u16 bufsz = ixgbe_rx_bufsz(rx_ring);
 
 	new_buff = &rx_ring->rx_buffer_info[nta];
 
@@ -1597,17 +1594,13 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring,
 	/* transfer page from old buffer to new buffer */
 	new_buff->page = old_buff->page;
 	new_buff->dma = old_buff->dma;
-
-	/* flip page offset to other buffer and store to new_buff */
-	new_buff->page_offset = old_buff->page_offset ^ bufsz;
+	new_buff->page_offset = old_buff->page_offset;
 
 	/* sync the buffer for use by the device */
 	dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
-					 new_buff->page_offset, bufsz,
+					 new_buff->page_offset,
+					 ixgbe_rx_bufsz(rx_ring),
 					 DMA_FROM_DEVICE);
-
-	/* bump ref count on page before it is given to the stack */
-	get_page(new_buff->page);
 }
 
 /**
@@ -1617,20 +1610,159 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring,
  * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
  *
- * This function is based on skb_add_rx_frag.  I would have used that
- * function however it doesn't handle the truesize case correctly since we
- * are allocating more memory than might be used for a single receive.
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
  **/
-static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
+static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
 			      struct ixgbe_rx_buffer *rx_buffer,
-			      struct sk_buff *skb, int size)
+			      union ixgbe_adv_rx_desc *rx_desc,
+			      struct sk_buff *skb)
 {
-	skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
-			   rx_buffer->page, rx_buffer->page_offset,
-			   size);
-	skb->len += size;
-	skb->data_len += size;
-	skb->truesize += ixgbe_rx_bufsz(rx_ring);
+	struct page *page = rx_buffer->page;
+	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbe_rx_bufsz(rx_ring);
+#else
+	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+	unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) -
+				   ixgbe_rx_bufsz(rx_ring);
+#endif
+
+	if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
+		unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+		/* we can reuse buffer as-is, just make sure it is local */
+		if (likely(page_to_nid(page) == numa_node_id()))
+			return true;
+
+		/* this page cannot be reused so discard it */
+		put_page(page);
+		return false;
+	}
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+			rx_buffer->page_offset, size, truesize);
+
+	/* avoid re-using remote pages */
+	if (unlikely(page_to_nid(page) != numa_node_id()))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely(page_count(page) != 1))
+		return false;
+
+	/* flip page offset to other buffer */
+	rx_buffer->page_offset ^= truesize;
+
+	/*
+	 * since we are the only owner of the page and we need to
+	 * increment it, just set the value to 2 in order to avoid
+	 * an unecessary locked operation
+	 */
+	atomic_set(&page->_count, 2);
+#else
+	/* move offset up to the next cache line */
+	rx_buffer->page_offset += truesize;
+
+	if (rx_buffer->page_offset > last_offset)
+		return false;
+
+	/* bump ref count on page before it is given to the stack */
+	get_page(page);
+#endif
+
+	return true;
+}
+
+static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring,
+					     union ixgbe_adv_rx_desc *rx_desc)
+{
+	struct ixgbe_rx_buffer *rx_buffer;
+	struct sk_buff *skb;
+	struct page *page;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	page = rx_buffer->page;
+	prefetchw(page);
+
+	skb = rx_buffer->skb;
+
+	if (likely(!skb)) {
+		void *page_addr = page_address(page) +
+				  rx_buffer->page_offset;
+
+		/* prefetch first cache line of first page */
+		prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+		prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+		/* allocate a skb to store the frags */
+		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+						IXGBE_RX_HDR_SIZE);
+		if (unlikely(!skb)) {
+			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			return NULL;
+		}
+
+		/*
+		 * we will be copying header into skb->data in
+		 * pskb_may_pull so it is in our interest to prefetch
+		 * it now to avoid a possible cache miss
+		 */
+		prefetchw(skb->data);
+
+		/*
+		 * Delay unmapping of the first packet. It carries the
+		 * header information, HW may still access the header
+		 * after the writeback.  Only unmap it when EOP is
+		 * reached
+		 */
+		if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
+			goto dma_sync;
+
+		IXGBE_CB(skb)->dma = rx_buffer->dma;
+	} else {
+		if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
+			ixgbe_dma_sync_frag(rx_ring, skb);
+
+dma_sync:
+		/* we are reusing so sync this buffer for CPU use */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      rx_buffer->dma,
+					      rx_buffer->page_offset,
+					      ixgbe_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+	}
+
+	/* pull page into skb */
+	if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+		/* hand second half of page back to the ring */
+		ixgbe_reuse_rx_page(rx_ring, rx_buffer);
+	} else if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
+		/* the page has been released from the ring */
+		IXGBE_CB(skb)->page_released = true;
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+			       ixgbe_rx_pg_size(rx_ring),
+			       DMA_FROM_DEVICE);
+	}
+
+	/* clear contents of buffer_info */
+	rx_buffer->skb = NULL;
+	rx_buffer->dma = 0;
+	rx_buffer->page = NULL;
+
+	return skb;
 }
 
 /**
@@ -1653,16 +1785,14 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 #ifdef IXGBE_FCOE
 	struct ixgbe_adapter *adapter = q_vector->adapter;
-	int ddp_bytes = 0;
+	int ddp_bytes;
+	unsigned int mss = 0;
 #endif /* IXGBE_FCOE */
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
 
 	do {
-		struct ixgbe_rx_buffer *rx_buffer;
 		union ixgbe_adv_rx_desc *rx_desc;
 		struct sk_buff *skb;
-		struct page *page;
-		u16 ntc;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
@@ -1670,9 +1800,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 			cleaned_count = 0;
 		}
 
-		ntc = rx_ring->next_to_clean;
-		rx_desc = IXGBE_RX_DESC(rx_ring, ntc);
-		rx_buffer = &rx_ring->rx_buffer_info[ntc];
+		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
 		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
 			break;
@@ -1684,75 +1812,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		 */
 		rmb();
 
-		page = rx_buffer->page;
-		prefetchw(page);
-
-		skb = rx_buffer->skb;
-
-		if (likely(!skb)) {
-			void *page_addr = page_address(page) +
-					  rx_buffer->page_offset;
-
-			/* prefetch first cache line of first page */
-			prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
-			prefetch(page_addr + L1_CACHE_BYTES);
-#endif
+		/* retrieve a buffer from the ring */
+		skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc);
 
-			/* allocate a skb to store the frags */
-			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-							IXGBE_RX_HDR_SIZE);
-			if (unlikely(!skb)) {
-				rx_ring->rx_stats.alloc_rx_buff_failed++;
-				break;
-			}
-
-			/*
-			 * we will be copying header into skb->data in
-			 * pskb_may_pull so it is in our interest to prefetch
-			 * it now to avoid a possible cache miss
-			 */
-			prefetchw(skb->data);
-
-			/*
-			 * Delay unmapping of the first packet. It carries the
-			 * header information, HW may still access the header
-			 * after the writeback.  Only unmap it when EOP is
-			 * reached
-			 */
-			IXGBE_CB(skb)->dma = rx_buffer->dma;
-		} else {
-			/* we are reusing so sync this buffer for CPU use */
-			dma_sync_single_range_for_cpu(rx_ring->dev,
-						      rx_buffer->dma,
-						      rx_buffer->page_offset,
-						      ixgbe_rx_bufsz(rx_ring),
-						      DMA_FROM_DEVICE);
-		}
-
-		/* pull page into skb */
-		ixgbe_add_rx_frag(rx_ring, rx_buffer, skb,
-				  le16_to_cpu(rx_desc->wb.upper.length));
-
-		if (ixgbe_can_reuse_page(rx_buffer)) {
-			/* hand second half of page back to the ring */
-			ixgbe_reuse_rx_page(rx_ring, rx_buffer);
-		} else if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
-			/* the page has been released from the ring */
-			IXGBE_CB(skb)->page_released = true;
-		} else {
-			/* we are not reusing the buffer so unmap it */
-			dma_unmap_page(rx_ring->dev, rx_buffer->dma,
-				       ixgbe_rx_pg_size(rx_ring),
-				       DMA_FROM_DEVICE);
-		}
-
-		/* clear contents of buffer_info */
-		rx_buffer->skb = NULL;
-		rx_buffer->dma = 0;
-		rx_buffer->page = NULL;
-
-		ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb);
+		/* exit if we failed to retrieve a buffer */
+		if (!skb)
+			break;
 
 		cleaned_count++;
 
@@ -1775,6 +1840,20 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		/* if ddp, not passing to ULD unless for FCP_RSP or error */
 		if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) {
 			ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb);
+			/* include DDPed FCoE data */
+			if (ddp_bytes > 0) {
+				if (!mss) {
+					mss = rx_ring->netdev->mtu -
+						sizeof(struct fcoe_hdr) -
+						sizeof(struct fc_frame_header) -
+						sizeof(struct fcoe_crc_eof);
+					if (mss > 512)
+						mss &= ~511;
+				}
+				total_rx_bytes += ddp_bytes;
+				total_rx_packets += DIV_ROUND_UP(ddp_bytes,
+								 mss);
+			}
 			if (!ddp_bytes) {
 				dev_kfree_skb_any(skb);
 				continue;
@@ -1788,21 +1867,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		budget--;
 	} while (likely(budget));
 
-#ifdef IXGBE_FCOE
-	/* include DDPed FCoE data */
-	if (ddp_bytes > 0) {
-		unsigned int mss;
-
-		mss = rx_ring->netdev->mtu - sizeof(struct fcoe_hdr) -
-			sizeof(struct fc_frame_header) -
-			sizeof(struct fcoe_crc_eof);
-		if (mss > 512)
-			mss &= ~511;
-		total_rx_bytes += ddp_bytes;
-		total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss);
-	}
-
-#endif /* IXGBE_FCOE */
 	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->stats.packets += total_rx_packets;
 	rx_ring->stats.bytes += total_rx_bytes;
@@ -2868,11 +2932,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
 	srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
 
 	/* configure the packet buffer length */
-#if PAGE_SIZE > IXGBE_MAX_RXBUFFER
-	srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-#else
 	srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-#endif
 
 	/* configure descriptor type */
 	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
@@ -2980,13 +3040,7 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
 	 * total size of max desc * buf_len is not greater
 	 * than 65536
 	 */
-#if (PAGE_SIZE <= 8192)
 	rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
-#elif (PAGE_SIZE <= 16384)
-	rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
-#else
-	rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
-#endif
 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl);
 }
 
@@ -3606,8 +3660,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		netif_set_gso_max_size(adapter->netdev, 32768);
 
-	hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
-
 #ifdef IXGBE_FCOE
 	if (adapter->netdev->features & NETIF_F_FCOE_MTU)
 		max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
@@ -3807,6 +3859,11 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 #ifdef CONFIG_IXGBE_DCB
 	ixgbe_configure_dcb(adapter);
 #endif
+	/*
+	 * We must restore virtualization before VLANs or else
+	 * the VLVF registers will not be populated
+	 */
+	ixgbe_configure_virtualization(adapter);
 
 	ixgbe_set_rx_mode(adapter->netdev);
 	ixgbe_restore_vlan(adapter);
@@ -3838,8 +3895,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 		break;
 	}
 
-	ixgbe_configure_virtualization(adapter);
-
 #ifdef IXGBE_FCOE
 	/* configure FCoE L2 filters, redirection table, and Rx control */
 	ixgbe_configure_fcoe(adapter);
@@ -4130,27 +4185,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
 }
 
 /**
- * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers
- * @rx_ring: ring to setup
- *
- * On many IA platforms the L1 cache has a critical stride of 4K, this
- * results in each receive buffer starting in the same cache set.  To help
- * reduce the pressure on this cache set we can interleave the offsets so
- * that only every other buffer will be in the same cache set.
- **/
-static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring)
-{
-	struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info;
-	u16 i;
-
-	for (i = 0; i < rx_ring->count; i += 2) {
-		rx_buffer[0].page_offset = 0;
-		rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring);
-		rx_buffer = &rx_buffer[2];
-	}
-}
-
-/**
  * ixgbe_clean_rx_ring - Free Rx Buffers per Queue
  * @rx_ring: ring to free buffers from
  **/
@@ -4195,8 +4229,6 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
 	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
 	memset(rx_ring->rx_buffer_info, 0, size);
 
-	ixgbe_init_rx_page_offset(rx_ring);
-
 	/* Zero out the descriptor ring */
 	memset(rx_ring->desc, 0, rx_ring->size);
 
@@ -4646,8 +4678,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 
-	ixgbe_init_rx_page_offset(rx_ring);
-
 	return 0;
 err:
 	vfree(rx_ring->rx_buffer_info);
@@ -5530,8 +5560,9 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter)
 {
 	u32 ssvpc;
 
-	/* Do not perform spoof check for 82598 */
-	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+	/* Do not perform spoof check for 82598 or if not in IOV mode */
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
+	    adapter->num_vfs == 0)
 		return;
 
 	ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC);
@@ -5543,7 +5574,7 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter)
 	if (!ssvpc)
 		return;
 
-	e_warn(drv, "%d Spoofed packets detected\n", ssvpc);
+	e_warn(drv, "%u Spoofed packets detected\n", ssvpc);
 }
 
 /**
@@ -5874,9 +5905,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 	u32 type_tucmd = 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN) &&
-		    !(first->tx_flags & IXGBE_TX_FLAGS_TXSW))
-			return;
+		if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN)) {
+			if (unlikely(skb->no_fcs))
+				first->tx_flags |= IXGBE_TX_FLAGS_NO_IFCS;
+			if (!(first->tx_flags & IXGBE_TX_FLAGS_TXSW))
+				return;
+		}
 	} else {
 		u8 l4_hdr = 0;
 		switch (first->protocol) {
@@ -5938,7 +5972,6 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags)
 {
 	/* set type for advanced descriptor with frame checksum insertion */
 	__le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA |
-				      IXGBE_ADVTXD_DCMD_IFCS |
 				      IXGBE_ADVTXD_DCMD_DEXT);
 
 	/* set HW vlan bit if vlan is present */
@@ -5958,6 +5991,10 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags)
 #endif
 		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE);
 
+	/* insert frame checksum */
+	if (!(tx_flags & IXGBE_TX_FLAGS_NO_IFCS))
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS);
+
 	return cmd_type;
 }
 
@@ -6063,8 +6100,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 		if (likely(!data_len))
 			break;
 
-		if (unlikely(skb->no_fcs))
-			cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS));
 		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
 
 		i++;
@@ -6854,9 +6889,9 @@ static int ixgbe_set_features(struct net_device *netdev,
 	return 0;
 }
 
-static int ixgbe_ndo_fdb_add(struct ndmsg *ndm,
+static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
-			     unsigned char *addr,
+			     const unsigned char *addr,
 			     u16 flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
@@ -6893,7 +6928,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm,
 
 static int ixgbe_ndo_fdb_del(struct ndmsg *ndm,
 			     struct net_device *dev,
-			     unsigned char *addr)
+			     const unsigned char *addr)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	int err = -EOPNOTSUPP;
@@ -7136,11 +7171,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 		goto err_ioremap;
 	}
 
-	for (i = 1; i <= 5; i++) {
-		if (pci_resource_len(pdev, i) == 0)
-			continue;
-	}
-
 	netdev->netdev_ops = &ixgbe_netdev_ops;
 	ixgbe_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
@@ -7419,6 +7449,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 		e_err(probe, "failed to allocate sysfs resources\n");
 #endif /* CONFIG_IXGBE_HWMON */
 
+#ifdef CONFIG_DEBUG_FS
+	ixgbe_dbg_adapter_init(adapter);
+#endif /* CONFIG_DEBUG_FS */
+
 	return 0;
 
 err_register:
@@ -7453,6 +7487,10 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev)
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	struct net_device *netdev = adapter->netdev;
 
+#ifdef CONFIG_DEBUG_FS
+	ixgbe_dbg_adapter_exit(adapter);
+#endif /*CONFIG_DEBUG_FS */
+
 	set_bit(__IXGBE_DOWN, &adapter->state);
 	cancel_work_sync(&adapter->service_task);
 
@@ -7708,6 +7746,10 @@ static int __init ixgbe_init_module(void)
 	pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
 	pr_info("%s\n", ixgbe_copyright);
 
+#ifdef CONFIG_DEBUG_FS
+	ixgbe_dbg_init();
+#endif /* CONFIG_DEBUG_FS */
+
 #ifdef CONFIG_IXGBE_DCA
 	dca_register_notify(&dca_notifier);
 #endif
@@ -7730,6 +7772,11 @@ static void __exit ixgbe_exit_module(void)
 	dca_unregister_notify(&dca_notifier);
 #endif
 	pci_unregister_driver(&ixgbe_driver);
+
+#ifdef CONFIG_DEBUG_FS
+	ixgbe_dbg_exit();
+#endif /* CONFIG_DEBUG_FS */
+
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 3456d5617143..39881cb17a4b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -960,7 +960,8 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter)
 	/* (Re)start the overflow check */
 	adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED;
 
-	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps);
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		e_dev_err("ptp_clock_register failed\n");
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 4fea8716ab64..dce48bf64d96 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -346,6 +346,10 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
 			     u32 vf)
 {
+	/* VLAN 0 is a special case, don't allow it to be removed */
+	if (!vid && !add)
+		return 0;
+
 	return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add);
 }
 
@@ -414,6 +418,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 				  VLAN_PRIO_SHIFT)), vf);
 		ixgbe_set_vmolr(hw, vf, false);
 	} else {
+		ixgbe_set_vf_vlan(adapter, true, 0, vf);
 		ixgbe_set_vmvir(adapter, 0, vf);
 		ixgbe_set_vmolr(hw, vf, true);
 	}
@@ -810,9 +815,9 @@ out:
        return err;
 }
 
-static int ixgbe_link_mbps(int internal_link_speed)
+static int ixgbe_link_mbps(struct ixgbe_adapter *adapter)
 {
-	switch (internal_link_speed) {
+	switch (adapter->link_speed) {
 	case IXGBE_LINK_SPEED_100_FULL:
 		return 100;
 	case IXGBE_LINK_SPEED_1GB_FULL:
@@ -824,27 +829,30 @@ static int ixgbe_link_mbps(int internal_link_speed)
 	}
 }
 
-static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate,
-				    int link_speed)
+static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
 {
-	int rf_dec, rf_int;
-	u32 bcnrc_val;
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 bcnrc_val = 0;
+	u16 queue, queues_per_pool;
+	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
+
+	if (tx_rate) {
+		/* start with base link speed value */
+		bcnrc_val = adapter->vf_rate_link_speed;
 
-	if (tx_rate != 0) {
 		/* Calculate the rate factor values to set */
-		rf_int = link_speed / tx_rate;
-		rf_dec = (link_speed - (rf_int * tx_rate));
-		rf_dec = (rf_dec * (1<<IXGBE_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
-
-		bcnrc_val = IXGBE_RTTBCNRC_RS_ENA;
-		bcnrc_val |= ((rf_int<<IXGBE_RTTBCNRC_RF_INT_SHIFT) &
-		               IXGBE_RTTBCNRC_RF_INT_MASK);
-		bcnrc_val |= (rf_dec & IXGBE_RTTBCNRC_RF_DEC_MASK);
-	} else {
-		bcnrc_val = 0;
+		bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
+		bcnrc_val /= tx_rate;
+
+		/* clear everything but the rate factor */
+		bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK |
+			     IXGBE_RTTBCNRC_RF_DEC_MASK;
+
+		/* enable the rate scheduler */
+		bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
 	}
 
-	IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, 2*vf); /* vf Y uses queue 2*Y */
 	/*
 	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
 	 * register. Typically MMW_SIZE=0x014 if 9728-byte jumbo is supported
@@ -861,53 +869,68 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate,
 		break;
 	}
 
-	IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
+	/* determine how many queues per pool based on VMDq mask */
+	queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+
+	/* write value for all Tx queues belonging to VF */
+	for (queue = 0; queue < queues_per_pool; queue++) {
+		unsigned int reg_idx = (vf * queues_per_pool) + queue;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, reg_idx);
+		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
+	}
 }
 
 void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 {
-	int actual_link_speed, i;
-	bool reset_rate = false;
+	int i;
 
 	/* VF Tx rate limit was not set */
-	if (adapter->vf_rate_link_speed == 0)
+	if (!adapter->vf_rate_link_speed)
 		return;
 
-	actual_link_speed = ixgbe_link_mbps(adapter->link_speed);
-	if (actual_link_speed != adapter->vf_rate_link_speed) {
-		reset_rate = true;
+	if (ixgbe_link_mbps(adapter) != adapter->vf_rate_link_speed) {
 		adapter->vf_rate_link_speed = 0;
 		dev_info(&adapter->pdev->dev,
-		         "Link speed has been changed. VF Transmit rate "
-		         "is disabled\n");
+			 "Link speed has been changed. VF Transmit rate is disabled\n");
 	}
 
 	for (i = 0; i < adapter->num_vfs; i++) {
-		if (reset_rate)
+		if (!adapter->vf_rate_link_speed)
 			adapter->vfinfo[i].tx_rate = 0;
 
-		ixgbe_set_vf_rate_limit(&adapter->hw, i,
-					adapter->vfinfo[i].tx_rate,
-					actual_link_speed);
+		ixgbe_set_vf_rate_limit(adapter, i);
 	}
 }
 
 int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
-	int actual_link_speed;
+	int link_speed;
+
+	/* verify VF is active */
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
 
-	actual_link_speed = ixgbe_link_mbps(adapter->link_speed);
-	if ((vf >= adapter->num_vfs) || (!adapter->link_up) ||
-	    (tx_rate > actual_link_speed) || (actual_link_speed != 10000) ||
-	    ((tx_rate != 0) && (tx_rate <= 10)))
-	    /* rate limit cannot be set to 10Mb or less in 10Gb adapters */
+	/* verify link is up */
+	if (!adapter->link_up)
 		return -EINVAL;
 
-	adapter->vf_rate_link_speed = actual_link_speed;
-	adapter->vfinfo[vf].tx_rate = (u16)tx_rate;
-	ixgbe_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
+	/* verify we are linked at 10Gbps */
+	link_speed = ixgbe_link_mbps(adapter);
+	if (link_speed != 10000)
+		return -EINVAL;
+
+	/* rate limit cannot be less than 10Mbs or greater than link speed */
+	if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed)))
+		return -EINVAL;
+
+	/* store values */
+	adapter->vf_rate_link_speed = link_speed;
+	adapter->vfinfo[vf].tx_rate = tx_rate;
+
+	/* update hardware configuration */
+	ixgbe_set_vf_rate_limit(adapter, vf);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 418af827b230..da17ccf5c09d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -272,5 +272,6 @@ struct ixgbe_adv_tx_context_desc {
 /* Error Codes */
 #define IXGBE_ERR_INVALID_MAC_ADDR              -1
 #define IXGBE_ERR_RESET_FAILED                  -2
+#define IXGBE_ERR_INVALID_ARGUMENT              -3
 
 #endif /* _IXGBEVF_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 98cadb0c4dab..383b4e1cd175 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -101,7 +101,9 @@ struct ixgbevf_ring {
 
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256   256    /* Used for packet split */
-#define IXGBEVF_RXBUFFER_2048  2048
+#define IXGBEVF_RXBUFFER_3K    3072
+#define IXGBEVF_RXBUFFER_7K    7168
+#define IXGBEVF_RXBUFFER_15K   15360
 #define IXGBEVF_MAX_RXBUFFER   16384  /* largest size for single descriptor */
 
 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256
@@ -259,6 +261,11 @@ enum ixbgevf_state_t {
 	__IXGBEVF_DOWN
 };
 
+struct ixgbevf_cb {
+	struct sk_buff *prev;
+};
+#define IXGBE_CB(skb) ((struct ixgbevf_cb *)(skb)->cb)
+
 enum ixgbevf_boards {
 	board_82599_vf,
 	board_X540_vf,
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 6647383c4ddc..0ee9bd4819f4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -263,6 +263,8 @@ cont_loop:
 	tx_ring->total_bytes += total_bytes;
 	tx_ring->total_packets += total_packets;
 	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
 
 	return count < tx_ring->count;
 }
@@ -272,12 +274,10 @@ cont_loop:
  * @q_vector: structure containing interrupt and ring information
  * @skb: packet to send up
  * @status: hardware indication of status of receive
- * @rx_ring: rx descriptor ring (for a specific queue) to setup
  * @rx_desc: rx descriptor
  **/
 static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector,
 				struct sk_buff *skb, u8 status,
-				struct ixgbevf_ring *ring,
 				union ixgbe_adv_rx_desc *rx_desc)
 {
 	struct ixgbevf_adapter *adapter = q_vector->adapter;
@@ -433,11 +433,21 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 
 		if (!(staterr & IXGBE_RXD_STAT_EOP)) {
 			skb->next = next_buffer->skb;
-			skb->next->prev = skb;
+			IXGBE_CB(skb->next)->prev = skb;
 			adapter->non_eop_descs++;
 			goto next_desc;
 		}
 
+		/* we should not be chaining buffers, if we did drop the skb */
+		if (IXGBE_CB(skb)->prev) {
+			do {
+				struct sk_buff *this = skb;
+				skb = IXGBE_CB(skb)->prev;
+				dev_kfree_skb(this);
+			} while (skb);
+			goto next_desc;
+		}
+
 		/* ERR_MASK will only have valid bits if EOP set */
 		if (unlikely(staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) {
 			dev_kfree_skb_irq(skb);
@@ -461,7 +471,7 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		}
 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 
-		ixgbevf_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc);
+		ixgbevf_receive_skb(q_vector, skb, staterr, rx_desc);
 
 next_desc:
 		rx_desc->wb.upper.status_error = 0;
@@ -490,6 +500,8 @@ next_desc:
 	rx_ring->total_packets += total_rx_packets;
 	rx_ring->total_bytes += total_rx_bytes;
 	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_rx_packets;
+	q_vector->rx.total_bytes += total_rx_bytes;
 
 	return !!budget;
 }
@@ -716,40 +728,15 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector)
 	}
 }
 
-static irqreturn_t ixgbevf_msix_mbx(int irq, void *data)
+static irqreturn_t ixgbevf_msix_other(int irq, void *data)
 {
 	struct ixgbevf_adapter *adapter = data;
 	struct ixgbe_hw *hw = &adapter->hw;
-	u32 msg;
-	bool got_ack = false;
-
-	if (!hw->mbx.ops.check_for_ack(hw))
-		got_ack = true;
 
-	if (!hw->mbx.ops.check_for_msg(hw)) {
-		hw->mbx.ops.read(hw, &msg, 1);
+	hw->mac.get_link_status = 1;
 
-		if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG)
-			mod_timer(&adapter->watchdog_timer,
-				  round_jiffies(jiffies + 1));
-
-		if (msg & IXGBE_VT_MSGTYPE_NACK)
-			pr_warn("Last Request of type %2.2x to PF Nacked\n",
-				msg & 0xFF);
-		/*
-		 * Restore the PFSTS bit in case someone is polling for a
-		 * return message from the PF
-		 */
-		hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFSTS;
-	}
-
-	/*
-	 * checking for the ack clears the PFACK bit.  Place
-	 * it back in the v2p_mailbox cache so that anyone
-	 * polling for an ack will not miss it
-	 */
-	if (got_ack)
-		hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK;
+	if (!test_bit(__IXGBEVF_DOWN, &adapter->state))
+		mod_timer(&adapter->watchdog_timer, jiffies);
 
 	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_other);
 
@@ -899,10 +886,10 @@ static int ixgbevf_request_msix_irqs(struct ixgbevf_adapter *adapter)
 	}
 
 	err = request_irq(adapter->msix_entries[vector].vector,
-			  &ixgbevf_msix_mbx, 0, netdev->name, adapter);
+			  &ixgbevf_msix_other, 0, netdev->name, adapter);
 	if (err) {
 		hw_dbg(&adapter->hw,
-		       "request_irq for msix_mbx failed: %d\n", err);
+		       "request_irq for msix_other failed: %d\n", err);
 		goto free_queue_irqs;
 	}
 
@@ -1057,15 +1044,46 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
 
 	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
-	if (rx_ring->rx_buf_len == MAXIMUM_ETHERNET_VLAN_SIZE)
-		srrctl |= IXGBEVF_RXBUFFER_2048 >>
-			IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-	else
-		srrctl |= rx_ring->rx_buf_len >>
-			IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >>
+		  IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+
 	IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+	int i;
+	u16 rx_buf_len;
+
+	/* notify the PF of our intent to use this size of frame */
+	ixgbevf_rlpml_set_vf(hw, max_frame);
+
+	/* PF will allow an extra 4 bytes past for vlan tagged frames */
+	max_frame += VLAN_HLEN;
+
+	/*
+	 * Make best use of allocation by using all but 1K of a
+	 * power of 2 allocation that will be used for skb->head.
+	 */
+	if ((hw->mac.type == ixgbe_mac_X540_vf) &&
+	    (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE))
+		rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE;
+	else if (max_frame <= IXGBEVF_RXBUFFER_3K)
+		rx_buf_len = IXGBEVF_RXBUFFER_3K;
+	else if (max_frame <= IXGBEVF_RXBUFFER_7K)
+		rx_buf_len = IXGBEVF_RXBUFFER_7K;
+	else if (max_frame <= IXGBEVF_RXBUFFER_15K)
+		rx_buf_len = IXGBEVF_RXBUFFER_15K;
+	else
+		rx_buf_len = IXGBEVF_MAX_RXBUFFER;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		adapter->rx_ring[i].rx_buf_len = rx_buf_len;
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1076,18 +1094,14 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 {
 	u64 rdba;
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
 	int i, j;
 	u32 rdlen;
-	int rx_buf_len;
 
 	/* PSRTYPE must be initialized in 82599 */
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, 0);
-	if (netdev->mtu <= ETH_DATA_LEN)
-		rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE;
-	else
-		rx_buf_len = ALIGN(max_frame, 1024);
+
+	/* set_rx_buffer_len must be called before ring initialization */
+	ixgbevf_set_rx_buffer_len(adapter);
 
 	rdlen = adapter->rx_ring[0].count * sizeof(union ixgbe_adv_rx_desc);
 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
@@ -1103,7 +1117,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 		IXGBE_WRITE_REG(hw, IXGBE_VFRDT(j), 0);
 		adapter->rx_ring[i].head = IXGBE_VFRDH(j);
 		adapter->rx_ring[i].tail = IXGBE_VFRDT(j);
-		adapter->rx_ring[i].rx_buf_len = rx_buf_len;
 
 		ixgbevf_configure_srrctl(adapter, j);
 	}
@@ -1113,36 +1126,47 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	if (!hw->mac.ops.set_vfta)
+		return -EOPNOTSUPP;
 
 	spin_lock(&adapter->mbx_lock);
 
 	/* add VID to filter table */
-	if (hw->mac.ops.set_vfta)
-		hw->mac.ops.set_vfta(hw, vid, 0, true);
+	err = hw->mac.ops.set_vfta(hw, vid, 0, true);
 
 	spin_unlock(&adapter->mbx_lock);
 
+	/* translate error return types so error makes sense */
+	if (err == IXGBE_ERR_MBX)
+		return -EIO;
+
+	if (err == IXGBE_ERR_INVALID_ARGUMENT)
+		return -EACCES;
+
 	set_bit(vid, adapter->active_vlans);
 
-	return 0;
+	return err;
 }
 
 static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
+	int err = -EOPNOTSUPP;
 
 	spin_lock(&adapter->mbx_lock);
 
 	/* remove VID from filter table */
 	if (hw->mac.ops.set_vfta)
-		hw->mac.ops.set_vfta(hw, vid, 0, false);
+		err = hw->mac.ops.set_vfta(hw, vid, 0, false);
 
 	spin_unlock(&adapter->mbx_lock);
 
 	clear_bit(vid, adapter->active_vlans);
 
-	return 0;
+	return err;
 }
 
 static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter)
@@ -1308,6 +1332,25 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
 	adapter->stats.base_vfmprc = adapter->stats.last_vfmprc;
 }
 
+static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int api[] = { ixgbe_mbox_api_10,
+		      ixgbe_mbox_api_unknown };
+	int err = 0, idx = 0;
+
+	spin_lock(&adapter->mbx_lock);
+
+	while (api[idx] != ixgbe_mbox_api_unknown) {
+		err = ixgbevf_negotiate_api_version(hw, api[idx]);
+		if (!err)
+			break;
+		idx++;
+	}
+
+	spin_unlock(&adapter->mbx_lock);
+}
+
 static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -1315,7 +1358,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 	int i, j = 0;
 	int num_rx_rings = adapter->num_rx_queues;
 	u32 txdctl, rxdctl;
-	u32 msg[2];
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		j = adapter->tx_ring[i].reg_idx;
@@ -1356,10 +1398,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 			hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0);
 	}
 
-	msg[0] = IXGBE_VF_SET_LPE;
-	msg[1] = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
-	hw->mbx.ops.write_posted(hw, msg, 2);
-
 	spin_unlock(&adapter->mbx_lock);
 
 	clear_bit(__IXGBEVF_DOWN, &adapter->state);
@@ -1371,6 +1409,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 	ixgbevf_save_reset_stats(adapter);
 	ixgbevf_init_last_counter_stats(adapter);
 
+	hw->mac.get_link_status = 1;
 	mod_timer(&adapter->watchdog_timer, jiffies);
 }
 
@@ -1378,6 +1417,8 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 
+	ixgbevf_negotiate_api(adapter);
+
 	ixgbevf_configure(adapter);
 
 	ixgbevf_up_complete(adapter);
@@ -1419,7 +1460,7 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter,
 			rx_buffer_info->skb = NULL;
 			do {
 				struct sk_buff *this = skb;
-				skb = skb->prev;
+				skb = IXGBE_CB(skb)->prev;
 				dev_kfree_skb(this);
 			} while (skb);
 		}
@@ -1547,8 +1588,6 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter)
 
 void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
-
 	WARN_ON(in_interrupt());
 
 	while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state))
@@ -1561,10 +1600,8 @@ void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter)
 	 * watchdog task will continue to schedule reset tasks until
 	 * the PF is up and running.
 	 */
-	if (!hw->mac.ops.reset_hw(hw)) {
-		ixgbevf_down(adapter);
-		ixgbevf_up(adapter);
-	}
+	ixgbevf_down(adapter);
+	ixgbevf_up(adapter);
 
 	clear_bit(__IXGBEVF_RESETTING, &adapter->state);
 }
@@ -1867,6 +1904,22 @@ err_set_interrupt:
 }
 
 /**
+ * ixgbevf_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @adapter: board private structure to clear interrupt scheme on
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
+{
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+
+	ixgbevf_free_q_vectors(adapter);
+	ixgbevf_reset_interrupt_capability(adapter);
+}
+
+/**
  * ixgbevf_sw_init - Initialize general software structures
  * (struct ixgbevf_adapter)
  * @adapter: board private structure to initialize
@@ -2351,6 +2404,8 @@ static int ixgbevf_open(struct net_device *netdev)
 		}
 	}
 
+	ixgbevf_negotiate_api(adapter);
+
 	/* allocate transmit descriptors */
 	err = ixgbevf_setup_all_tx_resources(adapter);
 	if (err)
@@ -2860,10 +2915,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
 static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_hw *hw = &adapter->hw;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 	int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE;
-	u32 msg[2];
 
 	if (adapter->hw.mac.type == ixgbe_mac_X540_vf)
 		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
@@ -2877,35 +2930,91 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
 
-	if (!netif_running(netdev)) {
-		msg[0] = IXGBE_VF_SET_LPE;
-		msg[1] = max_frame;
-		hw->mbx.ops.write_posted(hw, msg, 2);
-	}
-
 	if (netif_running(netdev))
 		ixgbevf_reinit_locked(adapter);
 
 	return 0;
 }
 
-static void ixgbevf_shutdown(struct pci_dev *pdev)
+static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+#ifdef CONFIG_PM
+	int retval = 0;
+#endif
 
 	netif_device_detach(netdev);
 
 	if (netif_running(netdev)) {
+		rtnl_lock();
 		ixgbevf_down(adapter);
 		ixgbevf_free_irq(adapter);
 		ixgbevf_free_all_tx_resources(adapter);
 		ixgbevf_free_all_rx_resources(adapter);
+		rtnl_unlock();
 	}
 
-	pci_save_state(pdev);
+	ixgbevf_clear_interrupt_scheme(adapter);
 
+#ifdef CONFIG_PM
+	retval = pci_save_state(pdev);
+	if (retval)
+		return retval;
+
+#endif
 	pci_disable_device(pdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int ixgbevf_resume(struct pci_dev *pdev)
+{
+	struct ixgbevf_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+	u32 err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	/*
+	 * pci_restore_state clears dev->state_saved so call
+	 * pci_save_state to restore it.
+	 */
+	pci_save_state(pdev);
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
+		return err;
+	}
+	pci_set_master(pdev);
+
+	rtnl_lock();
+	err = ixgbevf_init_interrupt_scheme(adapter);
+	rtnl_unlock();
+	if (err) {
+		dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+		return err;
+	}
+
+	ixgbevf_reset(adapter);
+
+	if (netif_running(netdev)) {
+		err = ixgbevf_open(netdev);
+		if (err)
+			return err;
+	}
+
+	netif_device_attach(netdev);
+
+	return err;
+}
+
+#endif /* CONFIG_PM */
+static void ixgbevf_shutdown(struct pci_dev *pdev)
+{
+	ixgbevf_suspend(pdev, PMSG_SUSPEND);
 }
 
 static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
@@ -2946,7 +3055,7 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
 	return stats;
 }
 
-static const struct net_device_ops ixgbe_netdev_ops = {
+static const struct net_device_ops ixgbevf_netdev_ops = {
 	.ndo_open		= ixgbevf_open,
 	.ndo_stop		= ixgbevf_close,
 	.ndo_start_xmit		= ixgbevf_xmit_frame,
@@ -2962,7 +3071,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 
 static void ixgbevf_assign_netdev_ops(struct net_device *dev)
 {
-	dev->netdev_ops = &ixgbe_netdev_ops;
+	dev->netdev_ops = &ixgbevf_netdev_ops;
 	ixgbevf_set_ethtool_ops(dev);
 	dev->watchdog_timeo = 5 * HZ;
 }
@@ -3131,6 +3240,7 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev,
 	return 0;
 
 err_register:
+	ixgbevf_clear_interrupt_scheme(adapter);
 err_sw_init:
 	ixgbevf_reset_interrupt_capability(adapter);
 	iounmap(hw->hw_addr);
@@ -3168,6 +3278,7 @@ static void __devexit ixgbevf_remove(struct pci_dev *pdev)
 	if (netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(netdev);
 
+	ixgbevf_clear_interrupt_scheme(adapter);
 	ixgbevf_reset_interrupt_capability(adapter);
 
 	iounmap(adapter->hw.hw_addr);
@@ -3267,6 +3378,11 @@ static struct pci_driver ixgbevf_driver = {
 	.id_table = ixgbevf_pci_tbl,
 	.probe    = ixgbevf_probe,
 	.remove   = __devexit_p(ixgbevf_remove),
+#ifdef CONFIG_PM
+	/* Power Management Hooks */
+	.suspend  = ixgbevf_suspend,
+	.resume   = ixgbevf_resume,
+#endif
 	.shutdown = ixgbevf_shutdown,
 	.err_handler = &ixgbevf_err_handler
 };
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index 9c955900fe64..d5028ddf4b31 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -86,14 +86,17 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw)
 static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = IXGBE_ERR_MBX;
+	s32 ret_val = -IXGBE_ERR_MBX;
+
+	if (!mbx->ops.read)
+		goto out;
 
 	ret_val = ixgbevf_poll_for_msg(hw);
 
 	/* if ack received read message, otherwise we timed out */
 	if (!ret_val)
 		ret_val = mbx->ops.read(hw, msg, size);
-
+out:
 	return ret_val;
 }
 
@@ -109,7 +112,11 @@ static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val;
+	s32 ret_val = -IXGBE_ERR_MBX;
+
+	/* exit if either we can't write or there isn't a defined timeout */
+	if (!mbx->ops.write || !mbx->timeout)
+		goto out;
 
 	/* send msg */
 	ret_val = mbx->ops.write(hw, msg, size);
@@ -117,7 +124,7 @@ static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 	/* if msg sent wait until we receive an ack */
 	if (!ret_val)
 		ret_val = ixgbevf_poll_for_ack(hw);
-
+out:
 	return ret_val;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index cf9131c5c115..946ce86f337f 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -76,12 +76,29 @@
 /* bits 23:16 are used for exra info for certain messages */
 #define IXGBE_VT_MSGINFO_MASK     (0xFF << IXGBE_VT_MSGINFO_SHIFT)
 
+/* definitions to support mailbox API version negotiation */
+
+/*
+ * each element denotes a version of the API; existing numbers may not
+ * change; any additions must go at the end
+ */
+enum ixgbe_pfvf_api_rev {
+	ixgbe_mbox_api_10,	/* API version 1.0, linux/freebsd VF driver */
+	ixgbe_mbox_api_20,	/* API version 2.0, solaris Phase1 VF driver */
+	/* This value should always be last */
+	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
+};
+
+/* mailbox API, legacy requests */
 #define IXGBE_VF_RESET            0x01 /* VF requests reset */
 #define IXGBE_VF_SET_MAC_ADDR     0x02 /* VF requests PF to set MAC addr */
 #define IXGBE_VF_SET_MULTICAST    0x03 /* VF requests PF to set MC addr */
 #define IXGBE_VF_SET_VLAN         0x04 /* VF requests PF to set VLAN */
-#define IXGBE_VF_SET_LPE          0x05 /* VF requests PF to set VMOLR.LPE */
-#define IXGBE_VF_SET_MACVLAN      0x06 /* VF requests PF for unicast filter */
+
+/* mailbox API, version 1.0 VF requests */
+#define IXGBE_VF_SET_LPE	0x05 /* VF requests PF to set VMOLR.LPE */
+#define IXGBE_VF_SET_MACVLAN	0x06 /* VF requests PF for unicast filter */
+#define IXGBE_VF_API_NEGOTIATE	0x08 /* negotiate API version */
 
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN 4
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index ec89b86f7ca4..0c7447e6fcc8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -79,6 +79,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
 	/* Call adapter stop to disable tx/rx and clear interrupts */
 	hw->mac.ops.stop_adapter(hw);
 
+	/* reset the api version */
+	hw->api_version = ixgbe_mbox_api_10;
+
 	IXGBE_WRITE_REG(hw, IXGBE_VFCTRL, IXGBE_CTRL_RST);
 	IXGBE_WRITE_FLUSH(hw);
 
@@ -97,7 +100,7 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
 	msgbuf[0] = IXGBE_VF_RESET;
 	mbx->ops.write_posted(hw, msgbuf, 1);
 
-	msleep(10);
+	mdelay(10);
 
 	/* set our "perm_addr" based on info provided by PF */
 	/* also set up the mc_filter_type which is piggy backed
@@ -346,16 +349,32 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 			       bool vlan_on)
 {
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[2];
+	s32 err;
 
 	msgbuf[0] = IXGBE_VF_SET_VLAN;
 	msgbuf[1] = vlan;
 	/* Setting the 8 bit field MSG INFO to TRUE indicates "add" */
 	msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
 
-	ixgbevf_write_msg_read_ack(hw, msgbuf, 2);
+	err = mbx->ops.write_posted(hw, msgbuf, 2);
+	if (err)
+		goto mbx_err;
 
-	return 0;
+	err = mbx->ops.read_posted(hw, msgbuf, 2);
+	if (err)
+		goto mbx_err;
+
+	/* remove extra bits from the message */
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+	msgbuf[0] &= ~(0xFF << IXGBE_VT_MSGINFO_SHIFT);
+
+	if (msgbuf[0] != (IXGBE_VF_SET_VLAN | IXGBE_VT_MSGTYPE_ACK))
+		err = IXGBE_ERR_INVALID_ARGUMENT;
+
+mbx_err:
+	return err;
 }
 
 /**
@@ -389,20 +408,23 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw,
 				     bool *link_up,
 				     bool autoneg_wait_to_complete)
 {
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	struct ixgbe_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
 	u32 links_reg;
+	u32 in_msg = 0;
 
-	if (!(hw->mbx.ops.check_for_rst(hw))) {
-		*link_up = false;
-		*speed = 0;
-		return -1;
-	}
+	/* If we were hit with a reset drop the link */
+	if (!mbx->ops.check_for_rst(hw) || !mbx->timeout)
+		mac->get_link_status = true;
 
-	links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+	if (!mac->get_link_status)
+		goto out;
 
-	if (links_reg & IXGBE_LINKS_UP)
-		*link_up = true;
-	else
-		*link_up = false;
+	/* if link status is down no point in checking to see if pf is up */
+	links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+	if (!(links_reg & IXGBE_LINKS_UP))
+		goto out;
 
 	switch (links_reg & IXGBE_LINKS_SPEED_82599) {
 	case IXGBE_LINKS_SPEED_10G_82599:
@@ -416,7 +438,79 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw,
 		break;
 	}
 
-	return 0;
+	/* if the read failed it could just be a mailbox collision, best wait
+	 * until we are called again and don't report an error */
+	if (mbx->ops.read(hw, &in_msg, 1))
+		goto out;
+
+	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
+		/* msg is not CTS and is NACK we must have lost CTS status */
+		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
+			ret_val = -1;
+		goto out;
+	}
+
+	/* the pf is talking, if we timed out in the past we reinit */
+	if (!mbx->timeout) {
+		ret_val = -1;
+		goto out;
+	}
+
+	/* if we passed all the tests above then the link is up and we no
+	 * longer need to check for link */
+	mac->get_link_status = false;
+
+out:
+	*link_up = !mac->get_link_status;
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_rlpml_set_vf - Set the maximum receive packet length
+ *  @hw: pointer to the HW structure
+ *  @max_size: value to assign to max frame size
+ **/
+void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size)
+{
+	u32 msgbuf[2];
+
+	msgbuf[0] = IXGBE_VF_SET_LPE;
+	msgbuf[1] = max_size;
+	ixgbevf_write_msg_read_ack(hw, msgbuf, 2);
+}
+
+/**
+ *  ixgbevf_negotiate_api_version - Negotiate supported API version
+ *  @hw: pointer to the HW structure
+ *  @api: integer containing requested API version
+ **/
+int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api)
+{
+	int err;
+	u32 msg[3];
+
+	/* Negotiate the mailbox API version */
+	msg[0] = IXGBE_VF_API_NEGOTIATE;
+	msg[1] = api;
+	msg[2] = 0;
+	err = hw->mbx.ops.write_posted(hw, msg, 3);
+
+	if (!err)
+		err = hw->mbx.ops.read_posted(hw, msg, 3);
+
+	if (!err) {
+		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+		/* Store value and return 0 on success */
+		if (msg[0] == (IXGBE_VF_API_NEGOTIATE | IXGBE_VT_MSGTYPE_ACK)) {
+			hw->api_version = api;
+			return 0;
+		}
+
+		err = IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	return err;
 }
 
 static const struct ixgbe_mac_operations ixgbevf_mac_ops = {
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index 25c951daee5d..47f11a584d8c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -137,6 +137,8 @@ struct ixgbe_hw {
 
 	u8  revision_id;
 	bool adapter_stopped;
+
+	int api_version;
 };
 
 struct ixgbevf_hw_stats {
@@ -170,5 +172,7 @@ struct ixgbevf_info {
 	const struct ixgbe_mac_operations *mac_ops;
 };
 
+void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size);
+int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api);
 #endif /* __IXGBE_VF_H__ */
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 10bba09c44ea..c10e3a6de09f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -712,10 +712,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (bounce)
 		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
 
-	/* Run destructor before passing skb to HW */
-	if (likely(!skb_shared(skb)))
-		skb_orphan(skb);
-
 	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
 		*(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn);
 		op_own |= htonl((bf_index & 0xffff) << 8);
diff --git a/drivers/net/ethernet/mipsnet.c b/drivers/net/ethernet/mipsnet.c
deleted file mode 100644
index db5285befe2a..000000000000
--- a/drivers/net/ethernet/mipsnet.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <asm/mips-boards/simint.h>
-
-#define MIPSNET_VERSION "2007-11-17"
-
-/*
- * Net status/control block as seen by sw in the core.
- */
-struct mipsnet_regs {
-	/*
-	 * Device info for probing, reads as MIPSNET%d where %d is some
-	 * form of version.
-	 */
-	u64 devId;		/*0x00 */
-
-	/*
-	 * read only busy flag.
-	 * Set and cleared by the Net Device to indicate that an rx or a tx
-	 * is in progress.
-	 */
-	u32 busy;		/*0x08 */
-
-	/*
-	 * Set by the Net Device.
-	 * The device will set it once data has been received.
-	 * The value is the number of bytes that should be read from
-	 * rxDataBuffer.  The value will decrease till 0 until all the data
-	 * from rxDataBuffer has been read.
-	 */
-	u32 rxDataCount;	/*0x0c */
-#define MIPSNET_MAX_RXTX_DATACOUNT (1 << 16)
-
-	/*
-	 * Settable from the MIPS core, cleared by the Net Device.
-	 * The core should set the number of bytes it wants to send,
-	 * then it should write those bytes of data to txDataBuffer.
-	 * The device will clear txDataCount has been processed (not
-	 * necessarily sent).
-	 */
-	u32 txDataCount;	/*0x10 */
-
-	/*
-	 * Interrupt control
-	 *
-	 * Used to clear the interrupted generated by this dev.
-	 * Write a 1 to clear the interrupt. (except bit31).
-	 *
-	 * Bit0 is set if it was a tx-done interrupt.
-	 * Bit1 is set when new rx-data is available.
-	 *    Until this bit is cleared there will be no other RXs.
-	 *
-	 * Bit31 is used for testing, it clears after a read.
-	 *    Writing 1 to this bit will cause an interrupt to be generated.
-	 *    To clear the test interrupt, write 0 to this register.
-	 */
-	u32 interruptControl;	/*0x14 */
-#define MIPSNET_INTCTL_TXDONE     (1u << 0)
-#define MIPSNET_INTCTL_RXDONE     (1u << 1)
-#define MIPSNET_INTCTL_TESTBIT    (1u << 31)
-
-	/*
-	 * Readonly core-specific interrupt info for the device to signal
-	 * the core. The meaning of the contents of this field might change.
-	 */
-	/* XXX: the whole memIntf interrupt scheme is messy: the device
-	 * should have no control what so ever of what VPE/register set is
-	 * being used.
-	 * The MemIntf should only expose interrupt lines, and something in
-	 * the config should be responsible for the line<->core/vpe bindings.
-	 */
-	u32 interruptInfo;	/*0x18 */
-
-	/*
-	 * This is where the received data is read out.
-	 * There is more data to read until rxDataReady is 0.
-	 * Only 1 byte at this regs offset is used.
-	 */
-	u32 rxDataBuffer;	/*0x1c */
-
-	/*
-	 * This is where the data to transmit is written.
-	 * Data should be written for the amount specified in the
-	 * txDataCount register.
-	 * Only 1 byte at this regs offset is used.
-	 */
-	u32 txDataBuffer;	/*0x20 */
-};
-
-#define regaddr(dev, field) \
-  (dev->base_addr + offsetof(struct mipsnet_regs, field))
-
-static char mipsnet_string[] = "mipsnet";
-
-/*
- * Copy data from the MIPSNET rx data port
- */
-static int ioiocpy_frommipsnet(struct net_device *dev, unsigned char *kdata,
-			int len)
-{
-	for (; len > 0; len--, kdata++)
-		*kdata = inb(regaddr(dev, rxDataBuffer));
-
-	return inl(regaddr(dev, rxDataCount));
-}
-
-static inline void mipsnet_put_todevice(struct net_device *dev,
-	struct sk_buff *skb)
-{
-	int count_to_go = skb->len;
-	char *buf_ptr = skb->data;
-
-	outl(skb->len, regaddr(dev, txDataCount));
-
-	for (; count_to_go; buf_ptr++, count_to_go--)
-		outb(*buf_ptr, regaddr(dev, txDataBuffer));
-
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
-
-	dev_kfree_skb(skb);
-}
-
-static int mipsnet_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	/*
-	 * Only one packet at a time. Once TXDONE interrupt is serviced, the
-	 * queue will be restarted.
-	 */
-	netif_stop_queue(dev);
-	mipsnet_put_todevice(dev, skb);
-
-	return NETDEV_TX_OK;
-}
-
-static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t len)
-{
-	struct sk_buff *skb;
-
-	if (!len)
-		return len;
-
-	skb = netdev_alloc_skb(dev, len + NET_IP_ALIGN);
-	if (!skb) {
-		dev->stats.rx_dropped++;
-		return -ENOMEM;
-	}
-
-	skb_reserve(skb, NET_IP_ALIGN);
-	if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len))
-		return -EFAULT;
-
-	skb->protocol = eth_type_trans(skb, dev);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	netif_rx(skb);
-
-	dev->stats.rx_packets++;
-	dev->stats.rx_bytes += len;
-
-	return len;
-}
-
-static irqreturn_t mipsnet_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	u32 int_flags;
-	irqreturn_t ret = IRQ_NONE;
-
-	if (irq != dev->irq)
-		goto out_badirq;
-
-	/* TESTBIT is cleared on read. */
-	int_flags = inl(regaddr(dev, interruptControl));
-	if (int_flags & MIPSNET_INTCTL_TESTBIT) {
-		/* TESTBIT takes effect after a write with 0. */
-		outl(0, regaddr(dev, interruptControl));
-		ret = IRQ_HANDLED;
-	} else if (int_flags & MIPSNET_INTCTL_TXDONE) {
-		/* Only one packet at a time, we are done. */
-		dev->stats.tx_packets++;
-		netif_wake_queue(dev);
-		outl(MIPSNET_INTCTL_TXDONE,
-		     regaddr(dev, interruptControl));
-		ret = IRQ_HANDLED;
-	} else if (int_flags & MIPSNET_INTCTL_RXDONE) {
-		mipsnet_get_fromdev(dev, inl(regaddr(dev, rxDataCount)));
-		outl(MIPSNET_INTCTL_RXDONE, regaddr(dev, interruptControl));
-		ret = IRQ_HANDLED;
-	}
-	return ret;
-
-out_badirq:
-	printk(KERN_INFO "%s: %s(): irq %d for unknown device\n",
-	       dev->name, __func__, irq);
-	return ret;
-}
-
-static int mipsnet_open(struct net_device *dev)
-{
-	int err;
-
-	err = request_irq(dev->irq, mipsnet_interrupt,
-			  IRQF_SHARED, dev->name, (void *) dev);
-	if (err) {
-		release_region(dev->base_addr, sizeof(struct mipsnet_regs));
-		return err;
-	}
-
-	netif_start_queue(dev);
-
-	/* test interrupt handler */
-	outl(MIPSNET_INTCTL_TESTBIT, regaddr(dev, interruptControl));
-
-	return 0;
-}
-
-static int mipsnet_close(struct net_device *dev)
-{
-	netif_stop_queue(dev);
-	free_irq(dev->irq, dev);
-	return 0;
-}
-
-static void mipsnet_set_mclist(struct net_device *dev)
-{
-}
-
-static const struct net_device_ops mipsnet_netdev_ops = {
-	.ndo_open		= mipsnet_open,
-	.ndo_stop		= mipsnet_close,
-	.ndo_start_xmit		= mipsnet_xmit,
-	.ndo_set_rx_mode	= mipsnet_set_mclist,
-	.ndo_change_mtu		= eth_change_mtu,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= eth_mac_addr,
-};
-
-static int __devinit mipsnet_probe(struct platform_device *dev)
-{
-	struct net_device *netdev;
-	int err;
-
-	netdev = alloc_etherdev(0);
-	if (!netdev) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	platform_set_drvdata(dev, netdev);
-
-	netdev->netdev_ops = &mipsnet_netdev_ops;
-
-	/*
-	 * TODO: probe for these or load them from PARAM
-	 */
-	netdev->base_addr = 0x4200;
-	netdev->irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB0 +
-		      inl(regaddr(netdev, interruptInfo));
-
-	/* Get the io region now, get irq on open() */
-	if (!request_region(netdev->base_addr, sizeof(struct mipsnet_regs),
-			    "mipsnet")) {
-		err = -EBUSY;
-		goto out_free_netdev;
-	}
-
-	/*
-	 * Lacking any better mechanism to allocate a MAC address we use a
-	 * random one ...
-	 */
-	eth_hw_addr_random(netdev);
-
-	err = register_netdev(netdev);
-	if (err) {
-		printk(KERN_ERR "MIPSNet: failed to register netdev.\n");
-		goto out_free_region;
-	}
-
-	return 0;
-
-out_free_region:
-	release_region(netdev->base_addr, sizeof(struct mipsnet_regs));
-
-out_free_netdev:
-	free_netdev(netdev);
-
-out:
-	return err;
-}
-
-static int __devexit mipsnet_device_remove(struct platform_device *device)
-{
-	struct net_device *dev = platform_get_drvdata(device);
-
-	unregister_netdev(dev);
-	release_region(dev->base_addr, sizeof(struct mipsnet_regs));
-	free_netdev(dev);
-	platform_set_drvdata(device, NULL);
-
-	return 0;
-}
-
-static struct platform_driver mipsnet_driver = {
-	.driver = {
-		.name		= mipsnet_string,
-		.owner		= THIS_MODULE,
-	},
-	.probe		= mipsnet_probe,
-	.remove		= __devexit_p(mipsnet_device_remove),
-};
-
-static int __init mipsnet_init_module(void)
-{
-	int err;
-
-	printk(KERN_INFO "MIPSNet Ethernet driver. Version: %s. "
-	       "(c)2005 MIPS Technologies, Inc.\n", MIPSNET_VERSION);
-
-	err = platform_driver_register(&mipsnet_driver);
-	if (err)
-		printk(KERN_ERR "Driver registration failed\n");
-
-	return err;
-}
-
-static void __exit mipsnet_exit_module(void)
-{
-	platform_driver_unregister(&mipsnet_driver);
-}
-
-module_init(mipsnet_init_module);
-module_exit(mipsnet_exit_module);
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index f45def01a98e..876beceaf2d7 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -3409,7 +3409,7 @@ set_speed:
 
 	pause_flags = 0;
 	/* setup pause frame */
-	if (np->duplex != 0) {
+	if (netif_running(dev) && (np->duplex != 0)) {
 		if (np->autoneg && np->pause_flags & NV_PAUSEFRAME_AUTONEG) {
 			adv_pause = adv & (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM);
 			lpa_pause = lpa & (LPA_PAUSE_CAP | LPA_PAUSE_ASYM);
@@ -4435,7 +4435,7 @@ static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void
 
 	regs->version = FORCEDETH_REGS_VER;
 	spin_lock_irq(&np->lock);
-	for (i = 0; i <= np->register_size/sizeof(u32); i++)
+	for (i = 0; i < np->register_size/sizeof(u32); i++)
 		rbuf[i] = readl(base + i*sizeof(u32));
 	spin_unlock_irq(&np->lock);
 }
@@ -5455,6 +5455,7 @@ static int nv_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 	spin_lock_irq(&np->lock);
+	nv_update_pause(dev, 0); /* otherwise stop_tx bricks NIC */
 	nv_stop_rxtx(dev);
 	nv_txrx_reset(dev);
 
@@ -5904,11 +5905,19 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		goto out_error;
 	}
 
+	netif_carrier_off(dev);
+
+	/* Some NICs freeze when TX pause is enabled while NIC is
+	 * down, and this stays across warm reboots. The sequence
+	 * below should be enough to recover from that state.
+	 */
+	nv_update_pause(dev, 0);
+	nv_start_tx(dev);
+	nv_stop_tx(dev);
+
 	if (id->driver_data & DEV_HAS_VLAN)
 		nv_vlan_mode(dev, dev->features);
 
-	netif_carrier_off(dev);
-
 	dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
 		 dev->name, np->phy_oui, np->phyaddr, dev->dev_addr);
 
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index a7cc56007b33..e7ff886e8047 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -77,7 +77,7 @@
 static const int multicast_filter_limit = 32;
 
 #define MAX_READ_REQUEST_SHIFT	12
-#define TX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
+#define TX_DMA_BURST	7	/* Maximum PCI burst, '7' is unlimited */
 #define SafeMtu		0x1c20	/* ... actually life sucks beyond ~7k */
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
 
@@ -287,6 +287,8 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8167), 0, 0, RTL_CFG_0 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8168), 0, 0, RTL_CFG_1 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8169), 0, 0, RTL_CFG_0 },
+	{ PCI_VENDOR_ID_DLINK,			0x4300,
+		PCI_VENDOR_ID_DLINK, 0x4b10,		 0, 0, RTL_CFG_1 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4300), 0, 0, RTL_CFG_0 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4302), 0, 0, RTL_CFG_0 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AT,		0xc107), 0, 0, RTL_CFG_0 },
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index fb3cbc27063c..25906c1d1b15 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -34,3 +34,10 @@ config SFC_SRIOV
 	  This enables support for the SFC9000 I/O Virtualization
 	  features, allowing accelerated network performance in
 	  virtualized environments.
+config SFC_PTP
+	bool "Solarflare SFC9000-family PTP support"
+	depends on SFC && PTP_1588_CLOCK && !(SFC=y && PTP_1588_CLOCK=m)
+	default y
+	---help---
+	  This enables support for the Precision Time Protocol (PTP)
+	  on SFC9000-family NICs
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index ea1f8db57318..e11f2ecf69d9 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -5,5 +5,6 @@ sfc-y			+= efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \
 			   mcdi.o mcdi_phy.o mcdi_mon.o
 sfc-$(CONFIG_SFC_MTD)	+= mtd.o
 sfc-$(CONFIG_SFC_SRIOV)	+= siena_sriov.o
+sfc-$(CONFIG_SFC_PTP)	+= ptp.o
 
 obj-$(CONFIG_SFC)	+= sfc.o
diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h
index b26a954c27fc..5400a33f254f 100644
--- a/drivers/net/ethernet/sfc/bitfield.h
+++ b/drivers/net/ethernet/sfc/bitfield.h
@@ -120,10 +120,10 @@ typedef union efx_oword {
  * [0,high-low), with garbage in bits [high-low+1,...).
  */
 #define EFX_EXTRACT_NATIVE(native_element, min, max, low, high)		\
-	(((low > max) || (high < min)) ? 0 :				\
-	 ((low > min) ?							\
-	  ((native_element) >> (low - min)) :				\
-	  ((native_element) << (min - low))))
+	((low) > (max) || (high) < (min) ? 0 :				\
+	 (low) > (min) ?						\
+	 (native_element) >> ((low) - (min)) :				\
+	 (native_element) << ((min) - (low)))
 
 /*
  * Extract bit field portion [low,high) from the 64-bit little-endian
@@ -142,27 +142,27 @@ typedef union efx_oword {
 #define EFX_EXTRACT_OWORD64(oword, low, high)				\
 	((EFX_EXTRACT64((oword).u64[0], 0, 63, low, high) |		\
 	  EFX_EXTRACT64((oword).u64[1], 64, 127, low, high)) &		\
-	 EFX_MASK64(high + 1 - low))
+	 EFX_MASK64((high) + 1 - (low)))
 
 #define EFX_EXTRACT_QWORD64(qword, low, high)				\
 	(EFX_EXTRACT64((qword).u64[0], 0, 63, low, high) &		\
-	 EFX_MASK64(high + 1 - low))
+	 EFX_MASK64((high) + 1 - (low)))
 
 #define EFX_EXTRACT_OWORD32(oword, low, high)				\
 	((EFX_EXTRACT32((oword).u32[0], 0, 31, low, high) |		\
 	  EFX_EXTRACT32((oword).u32[1], 32, 63, low, high) |		\
 	  EFX_EXTRACT32((oword).u32[2], 64, 95, low, high) |		\
 	  EFX_EXTRACT32((oword).u32[3], 96, 127, low, high)) &		\
-	 EFX_MASK32(high + 1 - low))
+	 EFX_MASK32((high) + 1 - (low)))
 
 #define EFX_EXTRACT_QWORD32(qword, low, high)				\
 	((EFX_EXTRACT32((qword).u32[0], 0, 31, low, high) |		\
 	  EFX_EXTRACT32((qword).u32[1], 32, 63, low, high)) &		\
-	 EFX_MASK32(high + 1 - low))
+	 EFX_MASK32((high) + 1 - (low)))
 
 #define EFX_EXTRACT_DWORD(dword, low, high)			\
 	(EFX_EXTRACT32((dword).u32[0], 0, 31, low, high) &	\
-	 EFX_MASK32(high + 1 - low))
+	 EFX_MASK32((high) + 1 - (low)))
 
 #define EFX_OWORD_FIELD64(oword, field)				\
 	EFX_EXTRACT_OWORD64(oword, EFX_LOW_BIT(field),		\
@@ -442,10 +442,10 @@ typedef union efx_oword {
 	cpu_to_le32(EFX_INSERT_NATIVE(min, max, low, high, value))
 
 #define EFX_INPLACE_MASK64(min, max, low, high)				\
-	EFX_INSERT64(min, max, low, high, EFX_MASK64(high + 1 - low))
+	EFX_INSERT64(min, max, low, high, EFX_MASK64((high) + 1 - (low)))
 
 #define EFX_INPLACE_MASK32(min, max, low, high)				\
-	EFX_INSERT32(min, max, low, high, EFX_MASK32(high + 1 - low))
+	EFX_INSERT32(min, max, low, high, EFX_MASK32((high) + 1 - (low)))
 
 #define EFX_SET_OWORD64(oword, low, high, value) do {			\
 	(oword).u64[0] = (((oword).u64[0]				\
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 65a8d49106a4..96bd980e828d 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -202,11 +202,21 @@ static void efx_stop_all(struct efx_nic *efx);
 
 #define EFX_ASSERT_RESET_SERIALISED(efx)		\
 	do {						\
-		if ((efx->state == STATE_RUNNING) ||	\
+		if ((efx->state == STATE_READY) ||	\
 		    (efx->state == STATE_DISABLED))	\
 			ASSERT_RTNL();			\
 	} while (0)
 
+static int efx_check_disabled(struct efx_nic *efx)
+{
+	if (efx->state == STATE_DISABLED) {
+		netif_err(efx, drv, efx->net_dev,
+			  "device is disabled due to earlier errors\n");
+		return -EIO;
+	}
+	return 0;
+}
+
 /**************************************************************************
  *
  * Event queue processing
@@ -630,6 +640,16 @@ static void efx_start_datapath(struct efx_nic *efx)
 	efx->rx_buffer_order = get_order(efx->rx_buffer_len +
 					 sizeof(struct efx_rx_page_state));
 
+	/* We must keep at least one descriptor in a TX ring empty.
+	 * We could avoid this when the queue size does not exactly
+	 * match the hardware ring size, but it's not that important.
+	 * Therefore we stop the queue when one more skb might fill
+	 * the ring completely.  We wake it when half way back to
+	 * empty.
+	 */
+	efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
+	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
+
 	/* Initialise the channels */
 	efx_for_each_channel(channel, efx) {
 		efx_for_each_channel_tx_queue(tx_queue, channel)
@@ -714,6 +734,7 @@ static void efx_remove_channel(struct efx_channel *channel)
 	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
 		efx_remove_tx_queue(tx_queue);
 	efx_remove_eventq(channel);
+	channel->type->post_remove(channel);
 }
 
 static void efx_remove_channels(struct efx_nic *efx)
@@ -730,7 +751,11 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 	u32 old_rxq_entries, old_txq_entries;
 	unsigned i, next_buffer_table = 0;
-	int rc = 0;
+	int rc;
+
+	rc = efx_check_disabled(efx);
+	if (rc)
+		return rc;
 
 	/* Not all channels should be reallocated. We must avoid
 	 * reallocating their buffer table entries.
@@ -828,6 +853,7 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
 
 static const struct efx_channel_type efx_default_channel_type = {
 	.pre_probe		= efx_channel_dummy_op_int,
+	.post_remove		= efx_channel_dummy_op_void,
 	.get_name		= efx_get_channel_name,
 	.copy			= efx_copy_channel,
 	.keep_eventq		= false,
@@ -838,6 +864,10 @@ int efx_channel_dummy_op_int(struct efx_channel *channel)
 	return 0;
 }
 
+void efx_channel_dummy_op_void(struct efx_channel *channel)
+{
+}
+
 /**************************************************************************
  *
  * Port handling
@@ -1365,6 +1395,8 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq)
 {
 	struct efx_channel *channel;
 
+	BUG_ON(efx->state == STATE_DISABLED);
+
 	if (efx->legacy_irq)
 		efx->legacy_irq_enabled = true;
 	efx_nic_enable_interrupts(efx);
@@ -1382,6 +1414,9 @@ static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq)
 {
 	struct efx_channel *channel;
 
+	if (efx->state == STATE_DISABLED)
+		return;
+
 	efx_mcdi_mode_poll(efx);
 
 	efx_nic_disable_interrupts(efx);
@@ -1422,10 +1457,16 @@ static void efx_set_channels(struct efx_nic *efx)
 	efx->tx_channel_offset =
 		separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0;
 
-	/* We need to adjust the TX queue numbers if we have separate
+	/* We need to mark which channels really have RX and TX
+	 * queues, and adjust the TX queue numbers if we have separate
 	 * RX-only and TX-only channels.
 	 */
 	efx_for_each_channel(channel, efx) {
+		if (channel->channel < efx->n_rx_channels)
+			channel->rx_queue.core_index = channel->channel;
+		else
+			channel->rx_queue.core_index = -1;
+
 		efx_for_each_channel_tx_queue(tx_queue, channel)
 			tx_queue->queue -= (efx->tx_channel_offset *
 					    EFX_TXQ_TYPES);
@@ -1533,22 +1574,21 @@ static int efx_probe_all(struct efx_nic *efx)
 	return rc;
 }
 
-/* Called after previous invocation(s) of efx_stop_all, restarts the port,
- * kernel transmit queues and NAPI processing, and ensures that the port is
- * scheduled to be reconfigured. This function is safe to call multiple
- * times when the NIC is in any state.
+/* If the interface is supposed to be running but is not, start
+ * the hardware and software data path, regular activity for the port
+ * (MAC statistics, link polling, etc.) and schedule the port to be
+ * reconfigured.  Interrupts must already be enabled.  This function
+ * is safe to call multiple times, so long as the NIC is not disabled.
+ * Requires the RTNL lock.
  */
 static void efx_start_all(struct efx_nic *efx)
 {
 	EFX_ASSERT_RESET_SERIALISED(efx);
+	BUG_ON(efx->state == STATE_DISABLED);
 
 	/* Check that it is appropriate to restart the interface. All
 	 * of these flags are safe to read under just the rtnl lock */
-	if (efx->port_enabled)
-		return;
-	if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT))
-		return;
-	if (!netif_running(efx->net_dev))
+	if (efx->port_enabled || !netif_running(efx->net_dev))
 		return;
 
 	efx_start_port(efx);
@@ -1582,11 +1622,11 @@ static void efx_flush_all(struct efx_nic *efx)
 	cancel_work_sync(&efx->mac_work);
 }
 
-/* Quiesce hardware and software without bringing the link down.
- * Safe to call multiple times, when the nic and interface is in any
- * state. The caller is guaranteed to subsequently be in a position
- * to modify any hardware and software state they see fit without
- * taking locks. */
+/* Quiesce the hardware and software data path, and regular activity
+ * for the port without bringing the link down.  Safe to call multiple
+ * times with the NIC in almost any state, but interrupts should be
+ * enabled.  Requires the RTNL lock.
+ */
 static void efx_stop_all(struct efx_nic *efx)
 {
 	EFX_ASSERT_RESET_SERIALISED(efx);
@@ -1739,7 +1779,8 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct mii_ioctl_data *data = if_mii(ifr);
 
-	EFX_ASSERT_RESET_SERIALISED(efx);
+	if (cmd == SIOCSHWTSTAMP)
+		return efx_ptp_ioctl(efx, ifr, cmd);
 
 	/* Convert phy_id from older PRTAD/DEVAD format */
 	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
@@ -1820,13 +1861,14 @@ static void efx_netpoll(struct net_device *net_dev)
 static int efx_net_open(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	EFX_ASSERT_RESET_SERIALISED(efx);
+	int rc;
 
 	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
 		  raw_smp_processor_id());
 
-	if (efx->state == STATE_DISABLED)
-		return -EIO;
+	rc = efx_check_disabled(efx);
+	if (rc)
+		return rc;
 	if (efx->phy_mode & PHY_MODE_SPECIAL)
 		return -EBUSY;
 	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
@@ -1852,10 +1894,8 @@ static int efx_net_stop(struct net_device *net_dev)
 	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
 		  raw_smp_processor_id());
 
-	if (efx->state != STATE_DISABLED) {
-		/* Stop the device and flush all the channels */
-		efx_stop_all(efx);
-	}
+	/* Stop the device and flush all the channels */
+	efx_stop_all(efx);
 
 	return 0;
 }
@@ -1915,9 +1955,11 @@ static void efx_watchdog(struct net_device *net_dev)
 static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
 
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
+	rc = efx_check_disabled(efx);
+	if (rc)
+		return rc;
 	if (new_mtu > EFX_MAX_MTU)
 		return -EINVAL;
 
@@ -1926,8 +1968,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
 	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 
 	mutex_lock(&efx->mac_lock);
-	/* Reconfigure the MAC before enabling the dma queues so that
-	 * the RX buffers don't overflow */
 	net_dev->mtu = new_mtu;
 	efx->type->reconfigure_mac(efx);
 	mutex_unlock(&efx->mac_lock);
@@ -1942,8 +1982,6 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
 	struct sockaddr *addr = data;
 	char *new_addr = addr->sa_data;
 
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
 	if (!is_valid_ether_addr(new_addr)) {
 		netif_err(efx, drv, efx->net_dev,
 			  "invalid ethernet MAC address requested: %pM\n",
@@ -2079,11 +2117,27 @@ static int efx_register_netdev(struct efx_nic *efx)
 
 	rtnl_lock();
 
+	/* Enable resets to be scheduled and check whether any were
+	 * already requested.  If so, the NIC is probably hosed so we
+	 * abort.
+	 */
+	efx->state = STATE_READY;
+	smp_mb(); /* ensure we change state before checking reset_pending */
+	if (efx->reset_pending) {
+		netif_err(efx, probe, efx->net_dev,
+			  "aborting probe due to scheduled reset\n");
+		rc = -EIO;
+		goto fail_locked;
+	}
+
 	rc = dev_alloc_name(net_dev, net_dev->name);
 	if (rc < 0)
 		goto fail_locked;
 	efx_update_name(efx);
 
+	/* Always start with carrier off; PHY events will detect the link */
+	netif_carrier_off(net_dev);
+
 	rc = register_netdevice(net_dev);
 	if (rc)
 		goto fail_locked;
@@ -2094,9 +2148,6 @@ static int efx_register_netdev(struct efx_nic *efx)
 			efx_init_tx_queue_core_txq(tx_queue);
 	}
 
-	/* Always start with carrier off; PHY events will detect the link */
-	netif_carrier_off(net_dev);
-
 	rtnl_unlock();
 
 	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
@@ -2108,14 +2159,14 @@ static int efx_register_netdev(struct efx_nic *efx)
 
 	return 0;
 
+fail_registered:
+	rtnl_lock();
+	unregister_netdevice(net_dev);
 fail_locked:
+	efx->state = STATE_UNINIT;
 	rtnl_unlock();
 	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
 	return rc;
-
-fail_registered:
-	unregister_netdev(net_dev);
-	return rc;
 }
 
 static void efx_unregister_netdev(struct efx_nic *efx)
@@ -2138,7 +2189,11 @@ static void efx_unregister_netdev(struct efx_nic *efx)
 
 	strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
 	device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
-	unregister_netdev(efx->net_dev);
+
+	rtnl_lock();
+	unregister_netdevice(efx->net_dev);
+	efx->state = STATE_UNINIT;
+	rtnl_unlock();
 }
 
 /**************************************************************************
@@ -2154,9 +2209,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
 	EFX_ASSERT_RESET_SERIALISED(efx);
 
 	efx_stop_all(efx);
-	mutex_lock(&efx->mac_lock);
-
 	efx_stop_interrupts(efx, false);
+
+	mutex_lock(&efx->mac_lock);
 	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE)
 		efx->phy_op->fini(efx);
 	efx->type->fini(efx);
@@ -2276,16 +2331,15 @@ static void efx_reset_work(struct work_struct *data)
 	if (!pending)
 		return;
 
-	/* If we're not RUNNING then don't reset. Leave the reset_pending
-	 * flags set so that efx_pci_probe_main will be retried */
-	if (efx->state != STATE_RUNNING) {
-		netif_info(efx, drv, efx->net_dev,
-			   "scheduled reset quenched. NIC not RUNNING\n");
-		return;
-	}
-
 	rtnl_lock();
-	(void)efx_reset(efx, fls(pending) - 1);
+
+	/* We checked the state in efx_schedule_reset() but it may
+	 * have changed by now.  Now that we have the RTNL lock,
+	 * it cannot change again.
+	 */
+	if (efx->state == STATE_READY)
+		(void)efx_reset(efx, fls(pending) - 1);
+
 	rtnl_unlock();
 }
 
@@ -2311,6 +2365,13 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
 	}
 
 	set_bit(method, &efx->reset_pending);
+	smp_mb(); /* ensure we change reset_pending before checking state */
+
+	/* If we're not READY then just leave the flags set as the cue
+	 * to abort probing or reschedule the reset later.
+	 */
+	if (ACCESS_ONCE(efx->state) != STATE_READY)
+		return;
 
 	/* efx_process_channel() will no longer read events once a
 	 * reset is scheduled. So switch back to poll'd MCDI completions. */
@@ -2376,13 +2437,12 @@ static const struct efx_phy_operations efx_dummy_phy_operations = {
 /* This zeroes out and then fills in the invariants in a struct
  * efx_nic (including all sub-structures).
  */
-static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
+static int efx_init_struct(struct efx_nic *efx,
 			   struct pci_dev *pci_dev, struct net_device *net_dev)
 {
 	int i;
 
 	/* Initialise common structures */
-	memset(efx, 0, sizeof(*efx));
 	spin_lock_init(&efx->biu_lock);
 #ifdef CONFIG_SFC_MTD
 	INIT_LIST_HEAD(&efx->mtd_list);
@@ -2392,7 +2452,7 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
 	INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
 	efx->pci_dev = pci_dev;
 	efx->msg_enable = debug;
-	efx->state = STATE_INIT;
+	efx->state = STATE_UNINIT;
 	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
 
 	efx->net_dev = net_dev;
@@ -2409,8 +2469,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
 			goto fail;
 	}
 
-	efx->type = type;
-
 	EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS);
 
 	/* Higher numbered interrupt modes are less capable! */
@@ -2455,6 +2513,12 @@ static void efx_fini_struct(struct efx_nic *efx)
  */
 static void efx_pci_remove_main(struct efx_nic *efx)
 {
+	/* Flush reset_work. It can no longer be scheduled since we
+	 * are not READY.
+	 */
+	BUG_ON(efx->state == STATE_READY);
+	cancel_work_sync(&efx->reset_work);
+
 #ifdef CONFIG_RFS_ACCEL
 	free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
 	efx->net_dev->rx_cpu_rmap = NULL;
@@ -2480,24 +2544,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
 
 	/* Mark the NIC as fini, then stop the interface */
 	rtnl_lock();
-	efx->state = STATE_FINI;
 	dev_close(efx->net_dev);
-
-	/* Allow any queued efx_resets() to complete */
+	efx_stop_interrupts(efx, false);
 	rtnl_unlock();
 
-	efx_stop_interrupts(efx, false);
 	efx_sriov_fini(efx);
 	efx_unregister_netdev(efx);
 
 	efx_mtd_remove(efx);
 
-	/* Wait for any scheduled resets to complete. No more will be
-	 * scheduled from this point because efx_stop_all() has been
-	 * called, we are no longer registered with driverlink, and
-	 * the net_device's have been removed. */
-	cancel_work_sync(&efx->reset_work);
-
 	efx_pci_remove_main(efx);
 
 	efx_fini_io(efx);
@@ -2617,7 +2672,6 @@ static int efx_pci_probe_main(struct efx_nic *efx)
 static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
 				   const struct pci_device_id *entry)
 {
-	const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data;
 	struct net_device *net_dev;
 	struct efx_nic *efx;
 	int rc;
@@ -2627,10 +2681,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
 				     EFX_MAX_RX_QUEUES);
 	if (!net_dev)
 		return -ENOMEM;
-	net_dev->features |= (type->offload_features | NETIF_F_SG |
+	efx = netdev_priv(net_dev);
+	efx->type = (const struct efx_nic_type *) entry->driver_data;
+	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
 			      NETIF_F_HIGHDMA | NETIF_F_TSO |
 			      NETIF_F_RXCSUM);
-	if (type->offload_features & NETIF_F_V6_CSUM)
+	if (efx->type->offload_features & NETIF_F_V6_CSUM)
 		net_dev->features |= NETIF_F_TSO6;
 	/* Mask for features that also apply to VLAN devices */
 	net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG |
@@ -2638,10 +2694,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
 				   NETIF_F_RXCSUM);
 	/* All offloads can be toggled */
 	net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA;
-	efx = netdev_priv(net_dev);
 	pci_set_drvdata(pci_dev, efx);
 	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
-	rc = efx_init_struct(efx, type, pci_dev, net_dev);
+	rc = efx_init_struct(efx, pci_dev, net_dev);
 	if (rc)
 		goto fail1;
 
@@ -2656,28 +2711,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
 		goto fail2;
 
 	rc = efx_pci_probe_main(efx);
-
-	/* Serialise against efx_reset(). No more resets will be
-	 * scheduled since efx_stop_all() has been called, and we have
-	 * not and never have been registered.
-	 */
-	cancel_work_sync(&efx->reset_work);
-
 	if (rc)
 		goto fail3;
 
-	/* If there was a scheduled reset during probe, the NIC is
-	 * probably hosed anyway.
-	 */
-	if (efx->reset_pending) {
-		rc = -EIO;
-		goto fail4;
-	}
-
-	/* Switch to the running state before we expose the device to the OS,
-	 * so that dev_open()|efx_start_all() will actually start the device */
-	efx->state = STATE_RUNNING;
-
 	rc = efx_register_netdev(efx);
 	if (rc)
 		goto fail4;
@@ -2717,12 +2753,18 @@ static int efx_pm_freeze(struct device *dev)
 {
 	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
 
-	efx->state = STATE_FINI;
+	rtnl_lock();
 
-	netif_device_detach(efx->net_dev);
+	if (efx->state != STATE_DISABLED) {
+		efx->state = STATE_UNINIT;
 
-	efx_stop_all(efx);
-	efx_stop_interrupts(efx, false);
+		netif_device_detach(efx->net_dev);
+
+		efx_stop_all(efx);
+		efx_stop_interrupts(efx, false);
+	}
+
+	rtnl_unlock();
 
 	return 0;
 }
@@ -2731,21 +2773,25 @@ static int efx_pm_thaw(struct device *dev)
 {
 	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
 
-	efx->state = STATE_INIT;
+	rtnl_lock();
 
-	efx_start_interrupts(efx, false);
+	if (efx->state != STATE_DISABLED) {
+		efx_start_interrupts(efx, false);
 
-	mutex_lock(&efx->mac_lock);
-	efx->phy_op->reconfigure(efx);
-	mutex_unlock(&efx->mac_lock);
+		mutex_lock(&efx->mac_lock);
+		efx->phy_op->reconfigure(efx);
+		mutex_unlock(&efx->mac_lock);
 
-	efx_start_all(efx);
+		efx_start_all(efx);
 
-	netif_device_attach(efx->net_dev);
+		netif_device_attach(efx->net_dev);
 
-	efx->state = STATE_RUNNING;
+		efx->state = STATE_READY;
 
-	efx->type->resume_wol(efx);
+		efx->type->resume_wol(efx);
+	}
+
+	rtnl_unlock();
 
 	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
 	queue_work(reset_workqueue, &efx->reset_work);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 70755c97251a..f11170bc48bf 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -102,6 +102,7 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
 
 /* Channels */
 extern int efx_channel_dummy_op_int(struct efx_channel *channel);
+extern void efx_channel_dummy_op_void(struct efx_channel *channel);
 extern void efx_process_channel_now(struct efx_channel *channel);
 extern int
 efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 5faedd855b77..90f078eff8e6 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -337,7 +337,8 @@ static int efx_fill_loopback_test(struct efx_nic *efx,
 				  unsigned int test_index,
 				  struct ethtool_string *strings, u64 *data)
 {
-	struct efx_channel *channel = efx_get_channel(efx, 0);
+	struct efx_channel *channel =
+		efx_get_channel(efx, efx->tx_channel_offset);
 	struct efx_tx_queue *tx_queue;
 
 	efx_for_each_channel_tx_queue(tx_queue, channel) {
@@ -529,9 +530,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
 	if (!efx_tests)
 		goto fail;
 
-
-	ASSERT_RTNL();
-	if (efx->state != STATE_RUNNING) {
+	if (efx->state != STATE_READY) {
 		rc = -EIO;
 		goto fail1;
 	}
@@ -962,9 +961,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	int rc;
 
 	/* Check that user wants us to choose the location */
-	if (rule->location != RX_CLS_LOC_ANY &&
-	    rule->location != RX_CLS_LOC_FIRST &&
-	    rule->location != RX_CLS_LOC_LAST)
+	if (rule->location != RX_CLS_LOC_ANY)
 		return -EINVAL;
 
 	/* Range-check ring_cookie */
@@ -978,9 +975,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	     rule->m_ext.data[1]))
 		return -EINVAL;
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
-			   (rule->location == RX_CLS_LOC_FIRST) ?
-			   EFX_FILTER_FLAG_RX_OVERRIDE_IP : 0,
+	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0,
 			   (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
 			   0xfff : rule->ring_cookie);
 
@@ -1176,6 +1171,7 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
 	.get_rxfh_indir		= efx_ethtool_get_rxfh_indir,
 	.set_rxfh_indir		= efx_ethtool_set_rxfh_indir,
+	.get_ts_info		= efx_ptp_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
 };
diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c
index 8687a6c3db0d..ec1e99d0dcad 100644
--- a/drivers/net/ethernet/sfc/falcon_boards.c
+++ b/drivers/net/ethernet/sfc/falcon_boards.c
@@ -380,7 +380,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev,
 		new_mode = PHY_MODE_SPECIAL;
 	if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) {
 		err = 0;
-	} else if (efx->state != STATE_RUNNING || netif_running(efx->net_dev)) {
+	} else if (efx->state != STATE_READY || netif_running(efx->net_dev)) {
 		err = -EBUSY;
 	} else {
 		/* Reset the PHY, reconfigure the MAC and enable/disable
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index c3fd61f0a95c..8af42cd1feda 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -162,20 +162,12 @@ static void efx_filter_push_rx_config(struct efx_nic *efx)
 			!!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags &
 			   EFX_FILTER_FLAG_RX_RSS));
 		EFX_SET_OWORD_FIELD(
-			filter_ctl, FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE,
-			!!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags &
-			   EFX_FILTER_FLAG_RX_OVERRIDE_IP));
-		EFX_SET_OWORD_FIELD(
 			filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID,
 			table->spec[EFX_FILTER_INDEX_MC_DEF].dmaq_id);
 		EFX_SET_OWORD_FIELD(
 			filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
 			!!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
 			   EFX_FILTER_FLAG_RX_RSS));
-		EFX_SET_OWORD_FIELD(
-			filter_ctl, FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE,
-			!!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
-			   EFX_FILTER_FLAG_RX_OVERRIDE_IP));
 	}
 
 	efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
@@ -480,14 +472,12 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec)
 
 	case EFX_FILTER_TABLE_RX_MAC: {
 		bool is_wild = spec->type == EFX_FILTER_MAC_WILD;
-		EFX_POPULATE_OWORD_8(
+		EFX_POPULATE_OWORD_7(
 			*filter,
 			FRF_CZ_RMFT_RSS_EN,
 			!!(spec->flags & EFX_FILTER_FLAG_RX_RSS),
 			FRF_CZ_RMFT_SCATTER_EN,
 			!!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER),
-			FRF_CZ_RMFT_IP_OVERRIDE,
-			!!(spec->flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP),
 			FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id,
 			FRF_CZ_RMFT_WILDCARD_MATCH, is_wild,
 			FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2],
@@ -567,49 +557,62 @@ static int efx_filter_search(struct efx_filter_table *table,
 }
 
 /*
- * Construct/deconstruct external filter IDs.  These must be ordered
- * by matching priority, for RX NFC semantics.
+ * Construct/deconstruct external filter IDs.  At least the RX filter
+ * IDs must be ordered by matching priority, for RX NFC semantics.
  *
- * Each RX MAC filter entry has a flag for whether it can override an
- * RX IP filter that also matches.  So we assign locations for MAC
- * filters with overriding behaviour, then for IP filters, then for
- * MAC filters without overriding behaviour.
+ * Deconstruction needs to be robust against invalid IDs so that
+ * efx_filter_remove_id_safe() and efx_filter_get_filter_safe() can
+ * accept user-provided IDs.
  */
 
-#define EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP	0
-#define EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP	1
-#define EFX_FILTER_MATCH_PRI_NORMAL_BASE	2
+#define EFX_FILTER_MATCH_PRI_COUNT	5
+
+static const u8 efx_filter_type_match_pri[EFX_FILTER_TYPE_COUNT] = {
+	[EFX_FILTER_TCP_FULL]	= 0,
+	[EFX_FILTER_UDP_FULL]	= 0,
+	[EFX_FILTER_TCP_WILD]	= 1,
+	[EFX_FILTER_UDP_WILD]	= 1,
+	[EFX_FILTER_MAC_FULL]	= 2,
+	[EFX_FILTER_MAC_WILD]	= 3,
+	[EFX_FILTER_UC_DEF]	= 4,
+	[EFX_FILTER_MC_DEF]	= 4,
+};
+
+static const enum efx_filter_table_id efx_filter_range_table[] = {
+	EFX_FILTER_TABLE_RX_IP,		/* RX match pri 0 */
+	EFX_FILTER_TABLE_RX_IP,
+	EFX_FILTER_TABLE_RX_MAC,
+	EFX_FILTER_TABLE_RX_MAC,
+	EFX_FILTER_TABLE_RX_DEF,	/* RX match pri 4 */
+	EFX_FILTER_TABLE_COUNT,		/* TX match pri 0; invalid */
+	EFX_FILTER_TABLE_COUNT,		/* invalid */
+	EFX_FILTER_TABLE_TX_MAC,
+	EFX_FILTER_TABLE_TX_MAC,	/* TX match pri 3 */
+};
 
 #define EFX_FILTER_INDEX_WIDTH	13
 #define EFX_FILTER_INDEX_MASK	((1 << EFX_FILTER_INDEX_WIDTH) - 1)
 
-static inline u32 efx_filter_make_id(enum efx_filter_table_id table_id,
-				     unsigned int index, u8 flags)
+static inline u32
+efx_filter_make_id(const struct efx_filter_spec *spec, unsigned int index)
 {
-	unsigned int match_pri = EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id;
+	unsigned int range;
 
-	if (flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP) {
-		if (table_id == EFX_FILTER_TABLE_RX_MAC)
-			match_pri = EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP;
-		else if (table_id == EFX_FILTER_TABLE_RX_DEF)
-			match_pri = EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP;
-	}
+	range = efx_filter_type_match_pri[spec->type];
+	if (!(spec->flags & EFX_FILTER_FLAG_RX))
+		range += EFX_FILTER_MATCH_PRI_COUNT;
 
-	return match_pri << EFX_FILTER_INDEX_WIDTH | index;
+	return range << EFX_FILTER_INDEX_WIDTH | index;
 }
 
 static inline enum efx_filter_table_id efx_filter_id_table_id(u32 id)
 {
-	unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH;
+	unsigned int range = id >> EFX_FILTER_INDEX_WIDTH;
 
-	switch (match_pri) {
-	case EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP:
-		return EFX_FILTER_TABLE_RX_MAC;
-	case EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP:
-		return EFX_FILTER_TABLE_RX_DEF;
-	default:
-		return match_pri - EFX_FILTER_MATCH_PRI_NORMAL_BASE;
-	}
+	if (range < ARRAY_SIZE(efx_filter_range_table))
+		return efx_filter_range_table[range];
+	else
+		return EFX_FILTER_TABLE_COUNT; /* invalid */
 }
 
 static inline unsigned int efx_filter_id_index(u32 id)
@@ -619,12 +622,9 @@ static inline unsigned int efx_filter_id_index(u32 id)
 
 static inline u8 efx_filter_id_flags(u32 id)
 {
-	unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH;
+	unsigned int range = id >> EFX_FILTER_INDEX_WIDTH;
 
-	if (match_pri < EFX_FILTER_MATCH_PRI_NORMAL_BASE)
-		return EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_OVERRIDE_IP;
-	else if (match_pri <=
-		 EFX_FILTER_MATCH_PRI_NORMAL_BASE + EFX_FILTER_TABLE_RX_DEF)
+	if (range < EFX_FILTER_MATCH_PRI_COUNT)
 		return EFX_FILTER_FLAG_RX;
 	else
 		return EFX_FILTER_FLAG_TX;
@@ -633,14 +633,15 @@ static inline u8 efx_filter_id_flags(u32 id)
 u32 efx_filter_get_rx_id_limit(struct efx_nic *efx)
 {
 	struct efx_filter_state *state = efx->filter_state;
-	unsigned int table_id = EFX_FILTER_TABLE_RX_DEF;
+	unsigned int range = EFX_FILTER_MATCH_PRI_COUNT - 1;
+	enum efx_filter_table_id table_id;
 
 	do {
+		table_id = efx_filter_range_table[range];
 		if (state->table[table_id].size != 0)
-			return ((EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id)
-				<< EFX_FILTER_INDEX_WIDTH) +
+			return range << EFX_FILTER_INDEX_WIDTH |
 				state->table[table_id].size;
-	} while (table_id--);
+	} while (range--);
 
 	return 0;
 }
@@ -718,7 +719,7 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec,
 	netif_vdbg(efx, hw, efx->net_dev,
 		   "%s: filter type %d index %d rxq %u set",
 		   __func__, spec->type, filter_idx, spec->dmaq_id);
-	rc = efx_filter_make_id(table->id, filter_idx, spec->flags);
+	rc = efx_filter_make_id(spec, filter_idx);
 
 out:
 	spin_unlock_bh(&state->lock);
@@ -781,8 +782,7 @@ int efx_filter_remove_id_safe(struct efx_nic *efx,
 	spin_lock_bh(&state->lock);
 
 	if (test_bit(filter_idx, table->used_bitmap) &&
-	    spec->priority == priority &&
-	    !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) {
+	    spec->priority == priority) {
 		efx_filter_table_clear_entry(efx, table, filter_idx);
 		if (table->used == 0)
 			efx_filter_table_reset_search_depth(table);
@@ -833,8 +833,7 @@ int efx_filter_get_filter_safe(struct efx_nic *efx,
 	spin_lock_bh(&state->lock);
 
 	if (test_bit(filter_idx, table->used_bitmap) &&
-	    spec->priority == priority &&
-	    !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) {
+	    spec->priority == priority) {
 		*spec_buf = *spec;
 		rc = 0;
 	} else {
@@ -927,8 +926,7 @@ s32 efx_filter_get_rx_ids(struct efx_nic *efx,
 					goto out;
 				}
 				buf[count++] = efx_filter_make_id(
-					table_id, filter_idx,
-					table->spec[filter_idx].flags);
+					&table->spec[filter_idx], filter_idx);
 			}
 		}
 	}
diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
index 3c77802aed6c..5cb54723b824 100644
--- a/drivers/net/ethernet/sfc/filter.h
+++ b/drivers/net/ethernet/sfc/filter.h
@@ -61,16 +61,12 @@ enum efx_filter_priority {
  *	according to the indirection table.
  * @EFX_FILTER_FLAG_RX_SCATTER: Enable DMA scatter on the receiving
  *	queue.
- * @EFX_FILTER_FLAG_RX_OVERRIDE_IP: Enables a MAC filter to override
- *	any IP filter that matches the same packet.  By default, IP
- *	filters take precedence.
  * @EFX_FILTER_FLAG_RX: Filter is for RX
  * @EFX_FILTER_FLAG_TX: Filter is for TX
  */
 enum efx_filter_flags {
 	EFX_FILTER_FLAG_RX_RSS = 0x01,
 	EFX_FILTER_FLAG_RX_SCATTER = 0x02,
-	EFX_FILTER_FLAG_RX_OVERRIDE_IP = 0x04,
 	EFX_FILTER_FLAG_RX = 0x08,
 	EFX_FILTER_FLAG_TX = 0x10,
 };
@@ -88,8 +84,7 @@ enum efx_filter_flags {
  *
  * The @priority field is used by software to determine whether a new
  * filter may replace an old one.  The hardware priority of a filter
- * depends on the filter type and %EFX_FILTER_FLAG_RX_OVERRIDE_IP
- * flag.
+ * depends on the filter type.
  */
 struct efx_filter_spec {
 	u8	type:4;
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index fc5e7bbcbc9e..aea43cbd0520 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -320,14 +320,20 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno,
 		efx_mcdi_complete(mcdi);
 }
 
-/* Issue the given command by writing the data into the shared memory PDU,
- * ring the doorbell and wait for completion. Copyout the result. */
 int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd,
 		 const u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen,
 		 size_t *outlen_actual)
 {
+	efx_mcdi_rpc_start(efx, cmd, inbuf, inlen);
+	return efx_mcdi_rpc_finish(efx, cmd, inlen,
+				   outbuf, outlen, outlen_actual);
+}
+
+void efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, const u8 *inbuf,
+			size_t inlen)
+{
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-	int rc;
+
 	BUG_ON(efx_nic_rev(efx) < EFX_REV_SIENA_A0);
 
 	efx_mcdi_acquire(mcdi);
@@ -338,6 +344,15 @@ int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd,
 	spin_unlock_bh(&mcdi->iface_lock);
 
 	efx_mcdi_copyin(efx, cmd, inbuf, inlen);
+}
+
+int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen,
+			u8 *outbuf, size_t outlen, size_t *outlen_actual)
+{
+	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+	int rc;
+
+	BUG_ON(efx_nic_rev(efx) < EFX_REV_SIENA_A0);
 
 	if (mcdi->mode == MCDI_MODE_POLL)
 		rc = efx_mcdi_poll(efx);
@@ -563,6 +578,11 @@ void efx_mcdi_process_event(struct efx_channel *channel,
 	case MCDI_EVENT_CODE_FLR:
 		efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF));
 		break;
+	case MCDI_EVENT_CODE_PTP_RX:
+	case MCDI_EVENT_CODE_PTP_FAULT:
+	case MCDI_EVENT_CODE_PTP_PPS:
+		efx_ptp_event(efx, event);
+		break;
 
 	default:
 		netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
@@ -641,9 +661,8 @@ int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address,
 			   u16 *fw_subtype_list, u32 *capabilities)
 {
 	uint8_t outbuf[MC_CMD_GET_BOARD_CFG_OUT_LENMIN];
-	size_t outlen;
+	size_t outlen, offset, i;
 	int port_num = efx_port_num(efx);
-	int offset;
 	int rc;
 
 	BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_IN_LEN != 0);
@@ -663,11 +682,18 @@ int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address,
 		: MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0_OFST;
 	if (mac_address)
 		memcpy(mac_address, outbuf + offset, ETH_ALEN);
-	if (fw_subtype_list)
-		memcpy(fw_subtype_list,
-		       outbuf + MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST,
-		       MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MINNUM *
-		       sizeof(fw_subtype_list[0]));
+	if (fw_subtype_list) {
+		/* Byte-swap and truncate or zero-pad as necessary */
+		offset = MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST;
+		for (i = 0;
+		     i < MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM;
+		     i++) {
+			fw_subtype_list[i] =
+				(offset + 2 <= outlen) ?
+				le16_to_cpup((__le16 *)(outbuf + offset)) : 0;
+			offset += 2;
+		}
+	}
 	if (capabilities) {
 		if (port_num)
 			*capabilities = MCDI_DWORD(outbuf,
@@ -1169,6 +1195,9 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx)
 	__le32 *qid;
 	int rc, count;
 
+	BUILD_BUG_ON(EFX_MAX_CHANNELS >
+		     MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM);
+
 	qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL);
 	if (qid == NULL)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 0bdf3e331832..3ba2e5b5a9cc 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -71,6 +71,12 @@ extern int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const u8 *inbuf,
 			size_t inlen, u8 *outbuf, size_t outlen,
 			size_t *outlen_actual);
 
+extern void efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
+			       const u8 *inbuf, size_t inlen);
+extern int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen,
+			       u8 *outbuf, size_t outlen,
+			       size_t *outlen_actual);
+
 extern int efx_mcdi_poll_reboot(struct efx_nic *efx);
 extern void efx_mcdi_mode_poll(struct efx_nic *efx);
 extern void efx_mcdi_mode_event(struct efx_nic *efx);
@@ -107,11 +113,13 @@ extern void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
 #define MCDI_EVENT_FIELD(_ev, _field)			\
 	EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field)
 #define MCDI_ARRAY_FIELD(_buf, _field1, _type, _index, _field2)		\
-	EFX_DWORD_FIELD(						\
+	EFX_EXTRACT_DWORD(						\
 		*((efx_dword_t *)					\
 		  (MCDI_ARRAY_PTR(_buf, _field1, _type, _index) +	\
 		   (MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _OFST & ~3))), \
-		MC_CMD_ ## _type ## _TYPEDEF_ ## _field2)
+		MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _LBN & 0x1f, \
+		(MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _LBN & 0x1f) + \
+		MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _WIDTH - 1)
 
 extern void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len);
 extern int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating,
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index db4beed97669..9d426d0457bd 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -289,6 +289,7 @@
 #define          MCDI_EVENT_CODE_TX_FLUSH  0xc /* enum */
 #define          MCDI_EVENT_CODE_PTP_RX  0xd /* enum */
 #define          MCDI_EVENT_CODE_PTP_FAULT  0xe /* enum */
+#define          MCDI_EVENT_CODE_PTP_PPS  0xf /* enum */
 #define       MCDI_EVENT_CMDDONE_DATA_OFST 0
 #define       MCDI_EVENT_CMDDONE_DATA_LBN 0
 #define       MCDI_EVENT_CMDDONE_DATA_WIDTH 32
@@ -491,12 +492,12 @@
 
 /* MC_CMD_GET_FPGAREG_OUT msgresponse */
 #define    MC_CMD_GET_FPGAREG_OUT_LENMIN 1
-#define    MC_CMD_GET_FPGAREG_OUT_LENMAX 255
+#define    MC_CMD_GET_FPGAREG_OUT_LENMAX 252
 #define    MC_CMD_GET_FPGAREG_OUT_LEN(num) (0+1*(num))
 #define       MC_CMD_GET_FPGAREG_OUT_BUFFER_OFST 0
 #define       MC_CMD_GET_FPGAREG_OUT_BUFFER_LEN 1
 #define       MC_CMD_GET_FPGAREG_OUT_BUFFER_MINNUM 1
-#define       MC_CMD_GET_FPGAREG_OUT_BUFFER_MAXNUM 255
+#define       MC_CMD_GET_FPGAREG_OUT_BUFFER_MAXNUM 252
 
 
 /***********************************/
@@ -507,13 +508,13 @@
 
 /* MC_CMD_PUT_FPGAREG_IN msgrequest */
 #define    MC_CMD_PUT_FPGAREG_IN_LENMIN 5
-#define    MC_CMD_PUT_FPGAREG_IN_LENMAX 255
+#define    MC_CMD_PUT_FPGAREG_IN_LENMAX 252
 #define    MC_CMD_PUT_FPGAREG_IN_LEN(num) (4+1*(num))
 #define       MC_CMD_PUT_FPGAREG_IN_ADDR_OFST 0
 #define       MC_CMD_PUT_FPGAREG_IN_BUFFER_OFST 4
 #define       MC_CMD_PUT_FPGAREG_IN_BUFFER_LEN 1
 #define       MC_CMD_PUT_FPGAREG_IN_BUFFER_MINNUM 1
-#define       MC_CMD_PUT_FPGAREG_IN_BUFFER_MAXNUM 251
+#define       MC_CMD_PUT_FPGAREG_IN_BUFFER_MAXNUM 248
 
 /* MC_CMD_PUT_FPGAREG_OUT msgresponse */
 #define    MC_CMD_PUT_FPGAREG_OUT_LEN 0
@@ -560,7 +561,7 @@
 
 /* MC_CMD_PTP_IN_TRANSMIT msgrequest */
 #define    MC_CMD_PTP_IN_TRANSMIT_LENMIN 13
-#define    MC_CMD_PTP_IN_TRANSMIT_LENMAX 255
+#define    MC_CMD_PTP_IN_TRANSMIT_LENMAX 252
 #define    MC_CMD_PTP_IN_TRANSMIT_LEN(num) (12+1*(num))
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
@@ -568,7 +569,7 @@
 #define       MC_CMD_PTP_IN_TRANSMIT_PACKET_OFST 12
 #define       MC_CMD_PTP_IN_TRANSMIT_PACKET_LEN 1
 #define       MC_CMD_PTP_IN_TRANSMIT_PACKET_MINNUM 1
-#define       MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM 243
+#define       MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM 240
 
 /* MC_CMD_PTP_IN_READ_NIC_TIME msgrequest */
 #define    MC_CMD_PTP_IN_READ_NIC_TIME_LEN 8
@@ -1145,7 +1146,7 @@
 
 /* MC_CMD_PUTS_IN msgrequest */
 #define    MC_CMD_PUTS_IN_LENMIN 13
-#define    MC_CMD_PUTS_IN_LENMAX 255
+#define    MC_CMD_PUTS_IN_LENMAX 252
 #define    MC_CMD_PUTS_IN_LEN(num) (12+1*(num))
 #define       MC_CMD_PUTS_IN_DEST_OFST 0
 #define        MC_CMD_PUTS_IN_UART_LBN 0
@@ -1157,7 +1158,7 @@
 #define       MC_CMD_PUTS_IN_STRING_OFST 12
 #define       MC_CMD_PUTS_IN_STRING_LEN 1
 #define       MC_CMD_PUTS_IN_STRING_MINNUM 1
-#define       MC_CMD_PUTS_IN_STRING_MAXNUM 243
+#define       MC_CMD_PUTS_IN_STRING_MAXNUM 240
 
 /* MC_CMD_PUTS_OUT msgresponse */
 #define    MC_CMD_PUTS_OUT_LEN 0
@@ -1947,12 +1948,12 @@
 
 /* MC_CMD_NVRAM_READ_OUT msgresponse */
 #define    MC_CMD_NVRAM_READ_OUT_LENMIN 1
-#define    MC_CMD_NVRAM_READ_OUT_LENMAX 255
+#define    MC_CMD_NVRAM_READ_OUT_LENMAX 252
 #define    MC_CMD_NVRAM_READ_OUT_LEN(num) (0+1*(num))
 #define       MC_CMD_NVRAM_READ_OUT_READ_BUFFER_OFST 0
 #define       MC_CMD_NVRAM_READ_OUT_READ_BUFFER_LEN 1
 #define       MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MINNUM 1
-#define       MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MAXNUM 255
+#define       MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MAXNUM 252
 
 
 /***********************************/
@@ -1963,7 +1964,7 @@
 
 /* MC_CMD_NVRAM_WRITE_IN msgrequest */
 #define    MC_CMD_NVRAM_WRITE_IN_LENMIN 13
-#define    MC_CMD_NVRAM_WRITE_IN_LENMAX 255
+#define    MC_CMD_NVRAM_WRITE_IN_LENMAX 252
 #define    MC_CMD_NVRAM_WRITE_IN_LEN(num) (12+1*(num))
 #define       MC_CMD_NVRAM_WRITE_IN_TYPE_OFST 0
 /*            Enum values, see field(s): */
@@ -1973,7 +1974,7 @@
 #define       MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_OFST 12
 #define       MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN 1
 #define       MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MINNUM 1
-#define       MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM 243
+#define       MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM 240
 
 /* MC_CMD_NVRAM_WRITE_OUT msgresponse */
 #define    MC_CMD_NVRAM_WRITE_OUT_LEN 0
@@ -2305,13 +2306,13 @@
 
 /* MC_CMD_GET_PHY_MEDIA_INFO_OUT msgresponse */
 #define    MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMIN 5
-#define    MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX 255
+#define    MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX 252
 #define    MC_CMD_GET_PHY_MEDIA_INFO_OUT_LEN(num) (4+1*(num))
 #define       MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATALEN_OFST 0
 #define       MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST 4
 #define       MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_LEN 1
 #define       MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MINNUM 1
-#define       MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MAXNUM 251
+#define       MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MAXNUM 248
 
 
 /***********************************/
diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c
index 758148379b0e..08f825b71ac8 100644
--- a/drivers/net/ethernet/sfc/mtd.c
+++ b/drivers/net/ethernet/sfc/mtd.c
@@ -585,6 +585,7 @@ static const struct siena_nvram_type_info siena_nvram_types[] = {
 	[MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT1]	= { 1, "sfc_exp_rom_cfg" },
 	[MC_CMD_NVRAM_TYPE_PHY_PORT0]		= { 0, "sfc_phy_fw" },
 	[MC_CMD_NVRAM_TYPE_PHY_PORT1]		= { 1, "sfc_phy_fw" },
+	[MC_CMD_NVRAM_TYPE_FPGA]		= { 0, "sfc_fpga" },
 };
 
 static int siena_mtd_probe_partition(struct efx_nic *efx,
@@ -598,7 +599,8 @@ static int siena_mtd_probe_partition(struct efx_nic *efx,
 	bool protected;
 	int rc;
 
-	if (type >= ARRAY_SIZE(siena_nvram_types))
+	if (type >= ARRAY_SIZE(siena_nvram_types) ||
+	    siena_nvram_types[type].name == NULL)
 		return -ENODEV;
 
 	info = &siena_nvram_types[type];
@@ -627,7 +629,8 @@ static int siena_mtd_get_fw_subtypes(struct efx_nic *efx,
 				     struct efx_mtd *efx_mtd)
 {
 	struct efx_mtd_partition *part;
-	uint16_t fw_subtype_list[MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MINNUM];
+	uint16_t fw_subtype_list[
+		MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM];
 	int rc;
 
 	rc = efx_mcdi_get_board_cfg(efx, NULL, fw_subtype_list, NULL);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index cd9c0a989692..c1a010cda89b 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -37,7 +37,7 @@
  *
  **************************************************************************/
 
-#define EFX_DRIVER_VERSION	"3.1"
+#define EFX_DRIVER_VERSION	"3.2"
 
 #ifdef DEBUG
 #define EFX_BUG_ON_PARANOID(x) BUG_ON(x)
@@ -56,7 +56,8 @@
 #define EFX_MAX_CHANNELS 32U
 #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
 #define EFX_EXTRA_CHANNEL_IOV	0
-#define EFX_MAX_EXTRA_CHANNELS	1U
+#define EFX_EXTRA_CHANNEL_PTP	1
+#define EFX_MAX_EXTRA_CHANNELS	2U
 
 /* Checksum generation is a per-queue option in hardware, so each
  * queue visible to the networking core is backed by two hardware TX
@@ -68,6 +69,9 @@
 #define EFX_TXQ_TYPES		4
 #define EFX_MAX_TX_QUEUES	(EFX_TXQ_TYPES * EFX_MAX_CHANNELS)
 
+/* Forward declare Precision Time Protocol (PTP) support structure. */
+struct efx_ptp_data;
+
 struct efx_self_tests;
 
 /**
@@ -91,29 +95,31 @@ struct efx_special_buffer {
 };
 
 /**
- * struct efx_tx_buffer - An Efx TX buffer
- * @skb: The associated socket buffer.
- *	Set only on the final fragment of a packet; %NULL for all other
- *	fragments.  When this fragment completes, then we can free this
- *	skb.
- * @tsoh: The associated TSO header structure, or %NULL if this
- *	buffer is not a TSO header.
+ * struct efx_tx_buffer - buffer state for a TX descriptor
+ * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
+ *	freed when descriptor completes
+ * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be
+ *	freed when descriptor completes.
  * @dma_addr: DMA address of the fragment.
+ * @flags: Flags for allocation and DMA mapping type
  * @len: Length of this fragment.
  *	This field is zero when the queue slot is empty.
- * @continuation: True if this fragment is not the end of a packet.
- * @unmap_single: True if dma_unmap_single should be used.
  * @unmap_len: Length of this fragment to unmap
  */
 struct efx_tx_buffer {
-	const struct sk_buff *skb;
-	struct efx_tso_header *tsoh;
+	union {
+		const struct sk_buff *skb;
+		void *heap_buf;
+	};
 	dma_addr_t dma_addr;
+	unsigned short flags;
 	unsigned short len;
-	bool continuation;
-	bool unmap_single;
 	unsigned short unmap_len;
 };
+#define EFX_TX_BUF_CONT		1	/* not last descriptor of packet */
+#define EFX_TX_BUF_SKB		2	/* buffer is last part of skb */
+#define EFX_TX_BUF_HEAP		4	/* buffer was allocated with kmalloc() */
+#define EFX_TX_BUF_MAP_SINGLE	8	/* buffer was mapped with dma_map_single() */
 
 /**
  * struct efx_tx_queue - An Efx TX queue
@@ -133,6 +139,7 @@ struct efx_tx_buffer {
  * @channel: The associated channel
  * @core_txq: The networking core TX queue structure
  * @buffer: The software buffer ring
+ * @tsoh_page: Array of pages of TSO header buffers
  * @txd: The hardware descriptor ring
  * @ptr_mask: The size of the ring minus 1.
  * @initialised: Has hardware queue been initialised?
@@ -156,9 +163,6 @@ struct efx_tx_buffer {
  *	variable indicates that the queue is full.  This is to
  *	avoid cache-line ping-pong between the xmit path and the
  *	completion path.
- * @tso_headers_free: A list of TSO headers allocated for this TX queue
- *	that are not in use, and so available for new TSO sends. The list
- *	is protected by the TX queue lock.
  * @tso_bursts: Number of times TSO xmit invoked by kernel
  * @tso_long_headers: Number of packets with headers too long for standard
  *	blocks
@@ -175,6 +179,7 @@ struct efx_tx_queue {
 	struct efx_channel *channel;
 	struct netdev_queue *core_txq;
 	struct efx_tx_buffer *buffer;
+	struct efx_buffer *tsoh_page;
 	struct efx_special_buffer txd;
 	unsigned int ptr_mask;
 	bool initialised;
@@ -187,7 +192,6 @@ struct efx_tx_queue {
 	unsigned int insert_count ____cacheline_aligned_in_smp;
 	unsigned int write_count;
 	unsigned int old_read_count;
-	struct efx_tso_header *tso_headers_free;
 	unsigned int tso_bursts;
 	unsigned int tso_long_headers;
 	unsigned int tso_packets;
@@ -242,6 +246,8 @@ struct efx_rx_page_state {
 /**
  * struct efx_rx_queue - An Efx RX queue
  * @efx: The associated Efx NIC
+ * @core_index:  Index of network core RX queue.  Will be >= 0 iff this
+ *	is associated with a real RX queue.
  * @buffer: The software buffer ring
  * @rxd: The hardware descriptor ring
  * @ptr_mask: The size of the ring minus 1.
@@ -263,6 +269,7 @@ struct efx_rx_page_state {
  */
 struct efx_rx_queue {
 	struct efx_nic *efx;
+	int core_index;
 	struct efx_rx_buffer *buffer;
 	struct efx_special_buffer rxd;
 	unsigned int ptr_mask;
@@ -390,14 +397,17 @@ struct efx_channel {
  * @get_name: Generate the channel's name (used for its IRQ handler)
  * @copy: Copy the channel state prior to reallocation.  May be %NULL if
  *	reallocation is not supported.
+ * @receive_skb: Handle an skb ready to be passed to netif_receive_skb()
  * @keep_eventq: Flag for whether event queue should be kept initialised
  *	while the device is stopped
  */
 struct efx_channel_type {
 	void (*handle_no_channel)(struct efx_nic *);
 	int (*pre_probe)(struct efx_channel *);
+	void (*post_remove)(struct efx_channel *);
 	void (*get_name)(struct efx_channel *, char *buf, size_t len);
 	struct efx_channel *(*copy)(const struct efx_channel *);
+	void (*receive_skb)(struct efx_channel *, struct sk_buff *);
 	bool keep_eventq;
 };
 
@@ -430,11 +440,9 @@ enum efx_int_mode {
 #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI)
 
 enum nic_state {
-	STATE_INIT = 0,
-	STATE_RUNNING = 1,
-	STATE_FINI = 2,
-	STATE_DISABLED = 3,
-	STATE_MAX,
+	STATE_UNINIT = 0,	/* device being probed/removed or is frozen */
+	STATE_READY = 1,	/* hardware ready and netdev registered */
+	STATE_DISABLED = 2,	/* device disabled due to hardware errors */
 };
 
 /*
@@ -654,7 +662,7 @@ struct vfdi_status;
  * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
  * @irq_rx_moderation: IRQ moderation time for RX event queues
  * @msg_enable: Log message enable flags
- * @state: Device state flag. Serialised by the rtnl_lock.
+ * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
  * @reset_pending: Bitmask for pending resets
  * @tx_queue: TX DMA queues
  * @rx_queue: RX DMA queues
@@ -664,6 +672,8 @@ struct vfdi_status;
  *	should be allocated for this NIC
  * @rxq_entries: Size of receive queues requested by user.
  * @txq_entries: Size of transmit queues requested by user.
+ * @txq_stop_thresh: TX queue fill level at or above which we stop it.
+ * @txq_wake_thresh: TX queue fill level at or below which we wake it.
  * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches
  * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches
  * @sram_lim_qw: Qword address limit of SRAM
@@ -730,6 +740,7 @@ struct vfdi_status;
  *	%local_addr_list. Protected by %local_lock.
  * @local_lock: Mutex protecting %local_addr_list and %local_page_list.
  * @peer_work: Work item to broadcast peer addresses to VMs.
+ * @ptp_data: PTP state data
  * @monitor_work: Hardware monitor workitem
  * @biu_lock: BIU (bus interface unit) lock
  * @last_irq_cpu: Last CPU to handle a possible test interrupt.  This
@@ -774,6 +785,9 @@ struct efx_nic {
 
 	unsigned rxq_entries;
 	unsigned txq_entries;
+	unsigned int txq_stop_thresh;
+	unsigned int txq_wake_thresh;
+
 	unsigned tx_dc_base;
 	unsigned rx_dc_base;
 	unsigned sram_lim_qw;
@@ -854,6 +868,10 @@ struct efx_nic {
 	struct work_struct peer_work;
 #endif
 
+#ifdef CONFIG_SFC_PTP
+	struct efx_ptp_data *ptp_data;
+#endif
+
 	/* The following fields may be written more often */
 
 	struct delayed_work monitor_work ____cacheline_aligned_in_smp;
@@ -1044,7 +1062,7 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue)
 
 static inline bool efx_channel_has_rx_queue(struct efx_channel *channel)
 {
-	return channel->channel < channel->efx->n_rx_channels;
+	return channel->rx_queue.core_index >= 0;
 }
 
 static inline struct efx_rx_queue *
@@ -1116,5 +1134,13 @@ static inline void clear_bit_le(unsigned nr, unsigned char *addr)
 #define EFX_MAX_FRAME_LEN(mtu) \
 	((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16)
 
+static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP;
+}
+static inline void efx_xmit_hwtstamp_pending(struct sk_buff *skb)
+{
+	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
 
 #endif /* EFX_NET_DRIVER_H */
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 326d799762d6..cdff40b65729 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
 /**************************************************************************
  *
  * Generic buffer handling
- * These buffers are used for interrupt status and MAC stats
+ * These buffers are used for interrupt status, MAC stats, etc.
  *
  **************************************************************************/
 
@@ -401,8 +401,10 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
 		++tx_queue->write_count;
 
 		/* Create TX descriptor ring entry */
+		BUILD_BUG_ON(EFX_TX_BUF_CONT != 1);
 		EFX_POPULATE_QWORD_4(*txd,
-				     FSF_AZ_TX_KER_CONT, buffer->continuation,
+				     FSF_AZ_TX_KER_CONT,
+				     buffer->flags & EFX_TX_BUF_CONT,
 				     FSF_AZ_TX_KER_BYTE_COUNT, buffer->len,
 				     FSF_AZ_TX_KER_BUF_REGION, 0,
 				     FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr);
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index bab5cd9f5740..438cef11f727 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -11,6 +11,7 @@
 #ifndef EFX_NIC_H
 #define EFX_NIC_H
 
+#include <linux/net_tstamp.h>
 #include <linux/i2c-algo-bit.h>
 #include "net_driver.h"
 #include "efx.h"
@@ -250,6 +251,41 @@ extern int efx_sriov_get_vf_config(struct net_device *dev, int vf,
 extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf,
 				     bool spoofchk);
 
+struct ethtool_ts_info;
+#ifdef CONFIG_SFC_PTP
+extern void efx_ptp_probe(struct efx_nic *efx);
+extern int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd);
+extern int efx_ptp_get_ts_info(struct net_device *net_dev,
+			       struct ethtool_ts_info *ts_info);
+extern bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
+extern int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
+extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev);
+#else
+static inline void efx_ptp_probe(struct efx_nic *efx) {}
+static inline int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd)
+{
+	return -EOPNOTSUPP;
+}
+static inline int efx_ptp_get_ts_info(struct net_device *net_dev,
+				      struct ethtool_ts_info *ts_info)
+{
+	ts_info->so_timestamping = (SOF_TIMESTAMPING_SOFTWARE |
+				    SOF_TIMESTAMPING_RX_SOFTWARE);
+	ts_info->phc_index = -1;
+
+	return 0;
+}
+static inline bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb)
+{
+	return false;
+}
+static inline int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb)
+{
+	return NETDEV_TX_OK;
+}
+static inline void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) {}
+#endif
+
 extern const struct efx_nic_type falcon_a1_nic_type;
 extern const struct efx_nic_type falcon_b0_nic_type;
 extern const struct efx_nic_type siena_a0_nic_type;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
new file mode 100644
index 000000000000..5b3dd028ce85
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -0,0 +1,1484 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+/* Theory of operation:
+ *
+ * PTP support is assisted by firmware running on the MC, which provides
+ * the hardware timestamping capabilities.  Both transmitted and received
+ * PTP event packets are queued onto internal queues for subsequent processing;
+ * this is because the MC operations are relatively long and would block
+ * block NAPI/interrupt operation.
+ *
+ * Receive event processing:
+ *	The event contains the packet's UUID and sequence number, together
+ *	with the hardware timestamp.  The PTP receive packet queue is searched
+ *	for this UUID/sequence number and, if found, put on a pending queue.
+ *	Packets not matching are delivered without timestamps (MCDI events will
+ *	always arrive after the actual packet).
+ *	It is important for the operation of the PTP protocol that the ordering
+ *	of packets between the event and general port is maintained.
+ *
+ * Work queue processing:
+ *	If work waiting, synchronise host/hardware time
+ *
+ *	Transmit: send packet through MC, which returns the transmission time
+ *	that is converted to an appropriate timestamp.
+ *
+ *	Receive: the packet's reception time is converted to an appropriate
+ *	timestamp.
+ */
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/net_tstamp.h>
+#include <linux/pps_kernel.h>
+#include <linux/ptp_clock_kernel.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+#include "io.h"
+#include "regs.h"
+#include "nic.h"
+
+/* Maximum number of events expected to make up a PTP event */
+#define	MAX_EVENT_FRAGS			3
+
+/* Maximum delay, ms, to begin synchronisation */
+#define	MAX_SYNCHRONISE_WAIT_MS		2
+
+/* How long, at most, to spend synchronising */
+#define	SYNCHRONISE_PERIOD_NS		250000
+
+/* How often to update the shared memory time */
+#define	SYNCHRONISATION_GRANULARITY_NS	200
+
+/* Minimum permitted length of a (corrected) synchronisation time */
+#define	MIN_SYNCHRONISATION_NS		120
+
+/* Maximum permitted length of a (corrected) synchronisation time */
+#define	MAX_SYNCHRONISATION_NS		1000
+
+/* How many (MC) receive events that can be queued */
+#define	MAX_RECEIVE_EVENTS		8
+
+/* Length of (modified) moving average. */
+#define	AVERAGE_LENGTH			16
+
+/* How long an unmatched event or packet can be held */
+#define PKT_EVENT_LIFETIME_MS		10
+
+/* Offsets into PTP packet for identification.  These offsets are from the
+ * start of the IP header, not the MAC header.  Note that neither PTP V1 nor
+ * PTP V2 permit the use of IPV4 options.
+ */
+#define PTP_DPORT_OFFSET	22
+
+#define PTP_V1_VERSION_LENGTH	2
+#define PTP_V1_VERSION_OFFSET	28
+
+#define PTP_V1_UUID_LENGTH	6
+#define PTP_V1_UUID_OFFSET	50
+
+#define PTP_V1_SEQUENCE_LENGTH	2
+#define PTP_V1_SEQUENCE_OFFSET	58
+
+/* The minimum length of a PTP V1 packet for offsets, etc. to be valid:
+ * includes IP header.
+ */
+#define	PTP_V1_MIN_LENGTH	64
+
+#define PTP_V2_VERSION_LENGTH	1
+#define PTP_V2_VERSION_OFFSET	29
+
+/* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2),
+ * the MC only captures the last six bytes of the clock identity. These values
+ * reflect those, not the ones used in the standard.  The standard permits
+ * mapping of V1 UUIDs to V2 UUIDs with these same values.
+ */
+#define PTP_V2_MC_UUID_LENGTH	6
+#define PTP_V2_MC_UUID_OFFSET	50
+
+#define PTP_V2_SEQUENCE_LENGTH	2
+#define PTP_V2_SEQUENCE_OFFSET	58
+
+/* The minimum length of a PTP V2 packet for offsets, etc. to be valid:
+ * includes IP header.
+ */
+#define	PTP_V2_MIN_LENGTH	63
+
+#define	PTP_MIN_LENGTH		63
+
+#define PTP_ADDRESS		0xe0000181	/* 224.0.1.129 */
+#define PTP_EVENT_PORT		319
+#define PTP_GENERAL_PORT	320
+
+/* Annoyingly the format of the version numbers are different between
+ * versions 1 and 2 so it isn't possible to simply look for 1 or 2.
+ */
+#define	PTP_VERSION_V1		1
+
+#define	PTP_VERSION_V2		2
+#define	PTP_VERSION_V2_MASK	0x0f
+
+enum ptp_packet_state {
+	PTP_PACKET_STATE_UNMATCHED = 0,
+	PTP_PACKET_STATE_MATCHED,
+	PTP_PACKET_STATE_TIMED_OUT,
+	PTP_PACKET_STATE_MATCH_UNWANTED
+};
+
+/* NIC synchronised with single word of time only comprising
+ * partial seconds and full nanoseconds: 10^9 ~ 2^30 so 2 bits for seconds.
+ */
+#define	MC_NANOSECOND_BITS	30
+#define	MC_NANOSECOND_MASK	((1 << MC_NANOSECOND_BITS) - 1)
+#define	MC_SECOND_MASK		((1 << (32 - MC_NANOSECOND_BITS)) - 1)
+
+/* Maximum parts-per-billion adjustment that is acceptable */
+#define MAX_PPB			1000000
+
+/* Number of bits required to hold the above */
+#define	MAX_PPB_BITS		20
+
+/* Number of extra bits allowed when calculating fractional ns.
+ * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should
+ * be less than 63.
+ */
+#define	PPB_EXTRA_BITS		2
+
+/* Precalculate scale word to avoid long long division at runtime */
+#define	PPB_SCALE_WORD	((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\
+			MAX_PPB_BITS)) / 1000000000LL)
+
+#define PTP_SYNC_ATTEMPTS	4
+
+/**
+ * struct efx_ptp_match - Matching structure, stored in sk_buff's cb area.
+ * @words: UUID and (partial) sequence number
+ * @expiry: Time after which the packet should be delivered irrespective of
+ *            event arrival.
+ * @state: The state of the packet - whether it is ready for processing or
+ *         whether that is of no interest.
+ */
+struct efx_ptp_match {
+	u32 words[DIV_ROUND_UP(PTP_V1_UUID_LENGTH, 4)];
+	unsigned long expiry;
+	enum ptp_packet_state state;
+};
+
+/**
+ * struct efx_ptp_event_rx - A PTP receive event (from MC)
+ * @seq0: First part of (PTP) UUID
+ * @seq1: Second part of (PTP) UUID and sequence number
+ * @hwtimestamp: Event timestamp
+ */
+struct efx_ptp_event_rx {
+	struct list_head link;
+	u32 seq0;
+	u32 seq1;
+	ktime_t hwtimestamp;
+	unsigned long expiry;
+};
+
+/**
+ * struct efx_ptp_timeset - Synchronisation between host and MC
+ * @host_start: Host time immediately before hardware timestamp taken
+ * @seconds: Hardware timestamp, seconds
+ * @nanoseconds: Hardware timestamp, nanoseconds
+ * @host_end: Host time immediately after hardware timestamp taken
+ * @waitns: Number of nanoseconds between hardware timestamp being read and
+ *          host end time being seen
+ * @window: Difference of host_end and host_start
+ * @valid: Whether this timeset is valid
+ */
+struct efx_ptp_timeset {
+	u32 host_start;
+	u32 seconds;
+	u32 nanoseconds;
+	u32 host_end;
+	u32 waitns;
+	u32 window;	/* Derived: end - start, allowing for wrap */
+};
+
+/**
+ * struct efx_ptp_data - Precision Time Protocol (PTP) state
+ * @channel: The PTP channel
+ * @rxq: Receive queue (awaiting timestamps)
+ * @txq: Transmit queue
+ * @evt_list: List of MC receive events awaiting packets
+ * @evt_free_list: List of free events
+ * @evt_lock: Lock for manipulating evt_list and evt_free_list
+ * @rx_evts: Instantiated events (on evt_list and evt_free_list)
+ * @workwq: Work queue for processing pending PTP operations
+ * @work: Work task
+ * @reset_required: A serious error has occurred and the PTP task needs to be
+ *                  reset (disable, enable).
+ * @rxfilter_event: Receive filter when operating
+ * @rxfilter_general: Receive filter when operating
+ * @config: Current timestamp configuration
+ * @enabled: PTP operation enabled
+ * @mode: Mode in which PTP operating (PTP version)
+ * @evt_frags: Partly assembled PTP events
+ * @evt_frag_idx: Current fragment number
+ * @evt_code: Last event code
+ * @start: Address at which MC indicates ready for synchronisation
+ * @host_time_pps: Host time at last PPS
+ * @last_sync_ns: Last number of nanoseconds between readings when synchronising
+ * @base_sync_ns: Number of nanoseconds for last synchronisation.
+ * @base_sync_valid: Whether base_sync_time is valid.
+ * @current_adjfreq: Current ppb adjustment.
+ * @phc_clock: Pointer to registered phc device
+ * @phc_clock_info: Registration structure for phc device
+ * @pps_work: pps work task for handling pps events
+ * @pps_workwq: pps work queue
+ * @nic_ts_enabled: Flag indicating if NIC generated TS events are handled
+ * @txbuf: Buffer for use when transmitting (PTP) packets to MC (avoids
+ *         allocations in main data path).
+ * @debug_ptp_dir: PTP debugfs directory
+ * @missed_rx_sync: Number of packets received without syncrhonisation.
+ * @good_syncs: Number of successful synchronisations.
+ * @no_time_syncs: Number of synchronisations with no good times.
+ * @bad_sync_durations: Number of synchronisations with bad durations.
+ * @bad_syncs: Number of failed synchronisations.
+ * @last_sync_time: Number of nanoseconds for last synchronisation.
+ * @sync_timeouts: Number of synchronisation timeouts
+ * @fast_syncs: Number of synchronisations requiring short delay
+ * @min_sync_delta: Minimum time between event and synchronisation
+ * @max_sync_delta: Maximum time between event and synchronisation
+ * @average_sync_delta: Average time between event and synchronisation.
+ *                      Modified moving average.
+ * @last_sync_delta: Last time between event and synchronisation
+ * @mc_stats: Context value for MC statistics
+ * @timeset: Last set of synchronisation statistics.
+ */
+struct efx_ptp_data {
+	struct efx_channel *channel;
+	struct sk_buff_head rxq;
+	struct sk_buff_head txq;
+	struct list_head evt_list;
+	struct list_head evt_free_list;
+	spinlock_t evt_lock;
+	struct efx_ptp_event_rx rx_evts[MAX_RECEIVE_EVENTS];
+	struct workqueue_struct *workwq;
+	struct work_struct work;
+	bool reset_required;
+	u32 rxfilter_event;
+	u32 rxfilter_general;
+	bool rxfilter_installed;
+	struct hwtstamp_config config;
+	bool enabled;
+	unsigned int mode;
+	efx_qword_t evt_frags[MAX_EVENT_FRAGS];
+	int evt_frag_idx;
+	int evt_code;
+	struct efx_buffer start;
+	struct pps_event_time host_time_pps;
+	unsigned last_sync_ns;
+	unsigned base_sync_ns;
+	bool base_sync_valid;
+	s64 current_adjfreq;
+	struct ptp_clock *phc_clock;
+	struct ptp_clock_info phc_clock_info;
+	struct work_struct pps_work;
+	struct workqueue_struct *pps_workwq;
+	bool nic_ts_enabled;
+	u8 txbuf[ALIGN(MC_CMD_PTP_IN_TRANSMIT_LEN(
+			       MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM), 4)];
+	struct efx_ptp_timeset
+	timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM];
+};
+
+static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta);
+static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta);
+static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts);
+static int efx_phc_settime(struct ptp_clock_info *ptp,
+			   const struct timespec *e_ts);
+static int efx_phc_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *request, int on);
+
+/* Enable MCDI PTP support. */
+static int efx_ptp_enable(struct efx_nic *efx)
+{
+	u8 inbuf[MC_CMD_PTP_IN_ENABLE_LEN];
+
+	MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ENABLE);
+	MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_QUEUE,
+		       efx->ptp_data->channel->channel);
+	MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_MODE, efx->ptp_data->mode);
+
+	return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+			    NULL, 0, NULL);
+}
+
+/* Disable MCDI PTP support.
+ *
+ * Note that this function should never rely on the presence of ptp_data -
+ * may be called before that exists.
+ */
+static int efx_ptp_disable(struct efx_nic *efx)
+{
+	u8 inbuf[MC_CMD_PTP_IN_DISABLE_LEN];
+
+	MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_DISABLE);
+	return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+			    NULL, 0, NULL);
+}
+
+static void efx_ptp_deliver_rx_queue(struct sk_buff_head *q)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(q))) {
+		local_bh_disable();
+		netif_receive_skb(skb);
+		local_bh_enable();
+	}
+}
+
+static void efx_ptp_handle_no_channel(struct efx_nic *efx)
+{
+	netif_err(efx, drv, efx->net_dev,
+		  "ERROR: PTP requires MSI-X and 1 additional interrupt"
+		  "vector. PTP disabled\n");
+}
+
+/* Repeatedly send the host time to the MC which will capture the hardware
+ * time.
+ */
+static void efx_ptp_send_times(struct efx_nic *efx,
+			       struct pps_event_time *last_time)
+{
+	struct pps_event_time now;
+	struct timespec limit;
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	struct timespec start;
+	int *mc_running = ptp->start.addr;
+
+	pps_get_ts(&now);
+	start = now.ts_real;
+	limit = now.ts_real;
+	timespec_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
+
+	/* Write host time for specified period or until MC is done */
+	while ((timespec_compare(&now.ts_real, &limit) < 0) &&
+	       ACCESS_ONCE(*mc_running)) {
+		struct timespec update_time;
+		unsigned int host_time;
+
+		/* Don't update continuously to avoid saturating the PCIe bus */
+		update_time = now.ts_real;
+		timespec_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS);
+		do {
+			pps_get_ts(&now);
+		} while ((timespec_compare(&now.ts_real, &update_time) < 0) &&
+			 ACCESS_ONCE(*mc_running));
+
+		/* Synchronise NIC with single word of time only */
+		host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS |
+			     now.ts_real.tv_nsec);
+		/* Update host time in NIC memory */
+		_efx_writed(efx, cpu_to_le32(host_time),
+			    FR_CZ_MC_TREG_SMEM + MC_SMEM_P0_PTP_TIME_OFST);
+	}
+	*last_time = now;
+}
+
+/* Read a timeset from the MC's results and partial process. */
+static void efx_ptp_read_timeset(u8 *data, struct efx_ptp_timeset *timeset)
+{
+	unsigned start_ns, end_ns;
+
+	timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART);
+	timeset->seconds = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_SECONDS);
+	timeset->nanoseconds = MCDI_DWORD(data,
+					 PTP_OUT_SYNCHRONIZE_NANOSECONDS);
+	timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND),
+	timeset->waitns = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS);
+
+	/* Ignore seconds */
+	start_ns = timeset->host_start & MC_NANOSECOND_MASK;
+	end_ns = timeset->host_end & MC_NANOSECOND_MASK;
+	/* Allow for rollover */
+	if (end_ns < start_ns)
+		end_ns += NSEC_PER_SEC;
+	/* Determine duration of operation */
+	timeset->window = end_ns - start_ns;
+}
+
+/* Process times received from MC.
+ *
+ * Extract times from returned results, and establish the minimum value
+ * seen.  The minimum value represents the "best" possible time and events
+ * too much greater than this are rejected - the machine is, perhaps, too
+ * busy. A number of readings are taken so that, hopefully, at least one good
+ * synchronisation will be seen in the results.
+ */
+static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf,
+				 size_t response_length,
+				 const struct pps_event_time *last_time)
+{
+	unsigned number_readings = (response_length /
+			       MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN);
+	unsigned i;
+	unsigned min;
+	unsigned min_set = 0;
+	unsigned total;
+	unsigned ngood = 0;
+	unsigned last_good = 0;
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	bool min_valid = false;
+	u32 last_sec;
+	u32 start_sec;
+	struct timespec delta;
+
+	if (number_readings == 0)
+		return -EAGAIN;
+
+	/* Find minimum value in this set of results, discarding clearly
+	 * erroneous results.
+	 */
+	for (i = 0; i < number_readings; i++) {
+		efx_ptp_read_timeset(synch_buf, &ptp->timeset[i]);
+		synch_buf += MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN;
+		if (ptp->timeset[i].window > SYNCHRONISATION_GRANULARITY_NS) {
+			if (min_valid) {
+				if (ptp->timeset[i].window < min_set)
+					min_set = ptp->timeset[i].window;
+			} else {
+				min_valid = true;
+				min_set = ptp->timeset[i].window;
+			}
+		}
+	}
+
+	if (min_valid) {
+		if (ptp->base_sync_valid && (min_set > ptp->base_sync_ns))
+			min = ptp->base_sync_ns;
+		else
+			min = min_set;
+	} else {
+		min = SYNCHRONISATION_GRANULARITY_NS;
+	}
+
+	/* Discard excessively long synchronise durations.  The MC times
+	 * when it finishes reading the host time so the corrected window
+	 * time should be fairly constant for a given platform.
+	 */
+	total = 0;
+	for (i = 0; i < number_readings; i++)
+		if (ptp->timeset[i].window > ptp->timeset[i].waitns) {
+			unsigned win;
+
+			win = ptp->timeset[i].window - ptp->timeset[i].waitns;
+			if (win >= MIN_SYNCHRONISATION_NS &&
+			    win < MAX_SYNCHRONISATION_NS) {
+				total += ptp->timeset[i].window;
+				ngood++;
+				last_good = i;
+			}
+		}
+
+	if (ngood == 0) {
+		netif_warn(efx, drv, efx->net_dev,
+			   "PTP no suitable synchronisations %dns %dns\n",
+			   ptp->base_sync_ns, min_set);
+		return -EAGAIN;
+	}
+
+	/* Average minimum this synchronisation */
+	ptp->last_sync_ns = DIV_ROUND_UP(total, ngood);
+	if (!ptp->base_sync_valid || (ptp->last_sync_ns < ptp->base_sync_ns)) {
+		ptp->base_sync_valid = true;
+		ptp->base_sync_ns = ptp->last_sync_ns;
+	}
+
+	/* Calculate delay from actual PPS to last_time */
+	delta.tv_nsec =
+		ptp->timeset[last_good].nanoseconds +
+		last_time->ts_real.tv_nsec -
+		(ptp->timeset[last_good].host_start & MC_NANOSECOND_MASK);
+
+	/* It is possible that the seconds rolled over between taking
+	 * the start reading and the last value written by the host.  The
+	 * timescales are such that a gap of more than one second is never
+	 * expected.
+	 */
+	start_sec = ptp->timeset[last_good].host_start >> MC_NANOSECOND_BITS;
+	last_sec = last_time->ts_real.tv_sec & MC_SECOND_MASK;
+	if (start_sec != last_sec) {
+		if (((start_sec + 1) & MC_SECOND_MASK) != last_sec) {
+			netif_warn(efx, hw, efx->net_dev,
+				   "PTP bad synchronisation seconds\n");
+			return -EAGAIN;
+		} else {
+			delta.tv_sec = 1;
+		}
+	} else {
+		delta.tv_sec = 0;
+	}
+
+	ptp->host_time_pps = *last_time;
+	pps_sub_ts(&ptp->host_time_pps, delta);
+
+	return 0;
+}
+
+/* Synchronize times between the host and the MC */
+static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	u8 synch_buf[MC_CMD_PTP_OUT_SYNCHRONIZE_LENMAX];
+	size_t response_length;
+	int rc;
+	unsigned long timeout;
+	struct pps_event_time last_time = {};
+	unsigned int loops = 0;
+	int *start = ptp->start.addr;
+
+	MCDI_SET_DWORD(synch_buf, PTP_IN_OP, MC_CMD_PTP_OP_SYNCHRONIZE);
+	MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_NUMTIMESETS,
+		       num_readings);
+	MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR_LO,
+		       (u32)ptp->start.dma_addr);
+	MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR_HI,
+		       (u32)((u64)ptp->start.dma_addr >> 32));
+
+	/* Clear flag that signals MC ready */
+	ACCESS_ONCE(*start) = 0;
+	efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
+			   MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
+
+	/* Wait for start from MCDI (or timeout) */
+	timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS);
+	while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) {
+		udelay(20);	/* Usually start MCDI execution quickly */
+		loops++;
+	}
+
+	if (ACCESS_ONCE(*start))
+		efx_ptp_send_times(efx, &last_time);
+
+	/* Collect results */
+	rc = efx_mcdi_rpc_finish(efx, MC_CMD_PTP,
+				 MC_CMD_PTP_IN_SYNCHRONIZE_LEN,
+				 synch_buf, sizeof(synch_buf),
+				 &response_length);
+	if (rc == 0)
+		rc = efx_ptp_process_times(efx, synch_buf, response_length,
+					   &last_time);
+
+	return rc;
+}
+
+/* Transmit a PTP packet, via the MCDI interface, to the wire. */
+static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb)
+{
+	u8 *txbuf = efx->ptp_data->txbuf;
+	struct skb_shared_hwtstamps timestamps;
+	int rc = -EIO;
+	/* MCDI driver requires word aligned lengths */
+	size_t len = ALIGN(MC_CMD_PTP_IN_TRANSMIT_LEN(skb->len), 4);
+	u8 txtime[MC_CMD_PTP_OUT_TRANSMIT_LEN];
+
+	MCDI_SET_DWORD(txbuf, PTP_IN_OP, MC_CMD_PTP_OP_TRANSMIT);
+	MCDI_SET_DWORD(txbuf, PTP_IN_TRANSMIT_LENGTH, skb->len);
+	if (skb_shinfo(skb)->nr_frags != 0) {
+		rc = skb_linearize(skb);
+		if (rc != 0)
+			goto fail;
+	}
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		rc = skb_checksum_help(skb);
+		if (rc != 0)
+			goto fail;
+	}
+	skb_copy_from_linear_data(skb,
+				  &txbuf[MC_CMD_PTP_IN_TRANSMIT_PACKET_OFST],
+				  len);
+	rc = efx_mcdi_rpc(efx, MC_CMD_PTP, txbuf, len, txtime,
+			  sizeof(txtime), &len);
+	if (rc != 0)
+		goto fail;
+
+	memset(&timestamps, 0, sizeof(timestamps));
+	timestamps.hwtstamp = ktime_set(
+		MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_SECONDS),
+		MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_NANOSECONDS));
+
+	skb_tstamp_tx(skb, &timestamps);
+
+	rc = 0;
+
+fail:
+	dev_kfree_skb(skb);
+
+	return rc;
+}
+
+static void efx_ptp_drop_time_expired_events(struct efx_nic *efx)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	struct list_head *cursor;
+	struct list_head *next;
+
+	/* Drop time-expired events */
+	spin_lock_bh(&ptp->evt_lock);
+	if (!list_empty(&ptp->evt_list)) {
+		list_for_each_safe(cursor, next, &ptp->evt_list) {
+			struct efx_ptp_event_rx *evt;
+
+			evt = list_entry(cursor, struct efx_ptp_event_rx,
+					 link);
+			if (time_after(jiffies, evt->expiry)) {
+				list_del(&evt->link);
+				list_add(&evt->link, &ptp->evt_free_list);
+				netif_warn(efx, hw, efx->net_dev,
+					   "PTP rx event dropped\n");
+			}
+		}
+	}
+	spin_unlock_bh(&ptp->evt_lock);
+}
+
+static enum ptp_packet_state efx_ptp_match_rx(struct efx_nic *efx,
+					      struct sk_buff *skb)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	bool evts_waiting;
+	struct list_head *cursor;
+	struct list_head *next;
+	struct efx_ptp_match *match;
+	enum ptp_packet_state rc = PTP_PACKET_STATE_UNMATCHED;
+
+	spin_lock_bh(&ptp->evt_lock);
+	evts_waiting = !list_empty(&ptp->evt_list);
+	spin_unlock_bh(&ptp->evt_lock);
+
+	if (!evts_waiting)
+		return PTP_PACKET_STATE_UNMATCHED;
+
+	match = (struct efx_ptp_match *)skb->cb;
+	/* Look for a matching timestamp in the event queue */
+	spin_lock_bh(&ptp->evt_lock);
+	list_for_each_safe(cursor, next, &ptp->evt_list) {
+		struct efx_ptp_event_rx *evt;
+
+		evt = list_entry(cursor, struct efx_ptp_event_rx, link);
+		if ((evt->seq0 == match->words[0]) &&
+		    (evt->seq1 == match->words[1])) {
+			struct skb_shared_hwtstamps *timestamps;
+
+			/* Match - add in hardware timestamp */
+			timestamps = skb_hwtstamps(skb);
+			timestamps->hwtstamp = evt->hwtimestamp;
+
+			match->state = PTP_PACKET_STATE_MATCHED;
+			rc = PTP_PACKET_STATE_MATCHED;
+			list_del(&evt->link);
+			list_add(&evt->link, &ptp->evt_free_list);
+			break;
+		}
+	}
+	spin_unlock_bh(&ptp->evt_lock);
+
+	return rc;
+}
+
+/* Process any queued receive events and corresponding packets
+ *
+ * q is returned with all the packets that are ready for delivery.
+ * true is returned if at least one of those packets requires
+ * synchronisation.
+ */
+static bool efx_ptp_process_events(struct efx_nic *efx, struct sk_buff_head *q)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	bool rc = false;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&ptp->rxq))) {
+		struct efx_ptp_match *match;
+
+		match = (struct efx_ptp_match *)skb->cb;
+		if (match->state == PTP_PACKET_STATE_MATCH_UNWANTED) {
+			__skb_queue_tail(q, skb);
+		} else if (efx_ptp_match_rx(efx, skb) ==
+			   PTP_PACKET_STATE_MATCHED) {
+			rc = true;
+			__skb_queue_tail(q, skb);
+		} else if (time_after(jiffies, match->expiry)) {
+			match->state = PTP_PACKET_STATE_TIMED_OUT;
+			netif_warn(efx, rx_err, efx->net_dev,
+				   "PTP packet - no timestamp seen\n");
+			__skb_queue_tail(q, skb);
+		} else {
+			/* Replace unprocessed entry and stop */
+			skb_queue_head(&ptp->rxq, skb);
+			break;
+		}
+	}
+
+	return rc;
+}
+
+/* Complete processing of a received packet */
+static inline void efx_ptp_process_rx(struct efx_nic *efx, struct sk_buff *skb)
+{
+	local_bh_disable();
+	netif_receive_skb(skb);
+	local_bh_enable();
+}
+
+static int efx_ptp_start(struct efx_nic *efx)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	struct efx_filter_spec rxfilter;
+	int rc;
+
+	ptp->reset_required = false;
+
+	/* Must filter on both event and general ports to ensure
+	 * that there is no packet re-ordering.
+	 */
+	efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0,
+			   efx_rx_queue_index(
+				   efx_channel_get_rx_queue(ptp->channel)));
+	rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP,
+				       htonl(PTP_ADDRESS),
+				       htons(PTP_EVENT_PORT));
+	if (rc != 0)
+		return rc;
+
+	rc = efx_filter_insert_filter(efx, &rxfilter, true);
+	if (rc < 0)
+		return rc;
+	ptp->rxfilter_event = rc;
+
+	efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0,
+			   efx_rx_queue_index(
+				   efx_channel_get_rx_queue(ptp->channel)));
+	rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP,
+				       htonl(PTP_ADDRESS),
+				       htons(PTP_GENERAL_PORT));
+	if (rc != 0)
+		goto fail;
+
+	rc = efx_filter_insert_filter(efx, &rxfilter, true);
+	if (rc < 0)
+		goto fail;
+	ptp->rxfilter_general = rc;
+
+	rc = efx_ptp_enable(efx);
+	if (rc != 0)
+		goto fail2;
+
+	ptp->evt_frag_idx = 0;
+	ptp->current_adjfreq = 0;
+	ptp->rxfilter_installed = true;
+
+	return 0;
+
+fail2:
+	efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+				  ptp->rxfilter_general);
+fail:
+	efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+				  ptp->rxfilter_event);
+
+	return rc;
+}
+
+static int efx_ptp_stop(struct efx_nic *efx)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	int rc = efx_ptp_disable(efx);
+	struct list_head *cursor;
+	struct list_head *next;
+
+	if (ptp->rxfilter_installed) {
+		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+					  ptp->rxfilter_general);
+		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+					  ptp->rxfilter_event);
+		ptp->rxfilter_installed = false;
+	}
+
+	/* Make sure RX packets are really delivered */
+	efx_ptp_deliver_rx_queue(&efx->ptp_data->rxq);
+	skb_queue_purge(&efx->ptp_data->txq);
+
+	/* Drop any pending receive events */
+	spin_lock_bh(&efx->ptp_data->evt_lock);
+	list_for_each_safe(cursor, next, &efx->ptp_data->evt_list) {
+		list_del(cursor);
+		list_add(cursor, &efx->ptp_data->evt_free_list);
+	}
+	spin_unlock_bh(&efx->ptp_data->evt_lock);
+
+	return rc;
+}
+
+static void efx_ptp_pps_worker(struct work_struct *work)
+{
+	struct efx_ptp_data *ptp =
+		container_of(work, struct efx_ptp_data, pps_work);
+	struct efx_nic *efx = ptp->channel->efx;
+	struct ptp_clock_event ptp_evt;
+
+	if (efx_ptp_synchronize(efx, PTP_SYNC_ATTEMPTS))
+		return;
+
+	ptp_evt.type = PTP_CLOCK_PPSUSR;
+	ptp_evt.pps_times = ptp->host_time_pps;
+	ptp_clock_event(ptp->phc_clock, &ptp_evt);
+}
+
+/* Process any pending transmissions and timestamp any received packets.
+ */
+static void efx_ptp_worker(struct work_struct *work)
+{
+	struct efx_ptp_data *ptp_data =
+		container_of(work, struct efx_ptp_data, work);
+	struct efx_nic *efx = ptp_data->channel->efx;
+	struct sk_buff *skb;
+	struct sk_buff_head tempq;
+
+	if (ptp_data->reset_required) {
+		efx_ptp_stop(efx);
+		efx_ptp_start(efx);
+		return;
+	}
+
+	efx_ptp_drop_time_expired_events(efx);
+
+	__skb_queue_head_init(&tempq);
+	if (efx_ptp_process_events(efx, &tempq) ||
+	    !skb_queue_empty(&ptp_data->txq)) {
+
+		while ((skb = skb_dequeue(&ptp_data->txq)))
+			efx_ptp_xmit_skb(efx, skb);
+	}
+
+	while ((skb = __skb_dequeue(&tempq)))
+		efx_ptp_process_rx(efx, skb);
+}
+
+/* Initialise PTP channel and state.
+ *
+ * Setting core_index to zero causes the queue to be initialised and doesn't
+ * overlap with 'rxq0' because ptp.c doesn't use skb_record_rx_queue.
+ */
+static int efx_ptp_probe_channel(struct efx_channel *channel)
+{
+	struct efx_nic *efx = channel->efx;
+	struct efx_ptp_data *ptp;
+	int rc = 0;
+	unsigned int pos;
+
+	channel->irq_moderation = 0;
+	channel->rx_queue.core_index = 0;
+
+	ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
+	efx->ptp_data = ptp;
+	if (!efx->ptp_data)
+		return -ENOMEM;
+
+	rc = efx_nic_alloc_buffer(efx, &ptp->start, sizeof(int));
+	if (rc != 0)
+		goto fail1;
+
+	ptp->channel = channel;
+	skb_queue_head_init(&ptp->rxq);
+	skb_queue_head_init(&ptp->txq);
+	ptp->workwq = create_singlethread_workqueue("sfc_ptp");
+	if (!ptp->workwq) {
+		rc = -ENOMEM;
+		goto fail2;
+	}
+
+	INIT_WORK(&ptp->work, efx_ptp_worker);
+	ptp->config.flags = 0;
+	ptp->config.tx_type = HWTSTAMP_TX_OFF;
+	ptp->config.rx_filter = HWTSTAMP_FILTER_NONE;
+	INIT_LIST_HEAD(&ptp->evt_list);
+	INIT_LIST_HEAD(&ptp->evt_free_list);
+	spin_lock_init(&ptp->evt_lock);
+	for (pos = 0; pos < MAX_RECEIVE_EVENTS; pos++)
+		list_add(&ptp->rx_evts[pos].link, &ptp->evt_free_list);
+
+	ptp->phc_clock_info.owner = THIS_MODULE;
+	snprintf(ptp->phc_clock_info.name,
+		 sizeof(ptp->phc_clock_info.name),
+		 "%pm", efx->net_dev->perm_addr);
+	ptp->phc_clock_info.max_adj = MAX_PPB;
+	ptp->phc_clock_info.n_alarm = 0;
+	ptp->phc_clock_info.n_ext_ts = 0;
+	ptp->phc_clock_info.n_per_out = 0;
+	ptp->phc_clock_info.pps = 1;
+	ptp->phc_clock_info.adjfreq = efx_phc_adjfreq;
+	ptp->phc_clock_info.adjtime = efx_phc_adjtime;
+	ptp->phc_clock_info.gettime = efx_phc_gettime;
+	ptp->phc_clock_info.settime = efx_phc_settime;
+	ptp->phc_clock_info.enable = efx_phc_enable;
+
+	ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info,
+					    &efx->pci_dev->dev);
+	if (!ptp->phc_clock)
+		goto fail3;
+
+	INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
+	ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
+	if (!ptp->pps_workwq) {
+		rc = -ENOMEM;
+		goto fail4;
+	}
+	ptp->nic_ts_enabled = false;
+
+	return 0;
+fail4:
+	ptp_clock_unregister(efx->ptp_data->phc_clock);
+
+fail3:
+	destroy_workqueue(efx->ptp_data->workwq);
+
+fail2:
+	efx_nic_free_buffer(efx, &ptp->start);
+
+fail1:
+	kfree(efx->ptp_data);
+	efx->ptp_data = NULL;
+
+	return rc;
+}
+
+static void efx_ptp_remove_channel(struct efx_channel *channel)
+{
+	struct efx_nic *efx = channel->efx;
+
+	if (!efx->ptp_data)
+		return;
+
+	(void)efx_ptp_disable(channel->efx);
+
+	cancel_work_sync(&efx->ptp_data->work);
+	cancel_work_sync(&efx->ptp_data->pps_work);
+
+	skb_queue_purge(&efx->ptp_data->rxq);
+	skb_queue_purge(&efx->ptp_data->txq);
+
+	ptp_clock_unregister(efx->ptp_data->phc_clock);
+
+	destroy_workqueue(efx->ptp_data->workwq);
+	destroy_workqueue(efx->ptp_data->pps_workwq);
+
+	efx_nic_free_buffer(efx, &efx->ptp_data->start);
+	kfree(efx->ptp_data);
+}
+
+static void efx_ptp_get_channel_name(struct efx_channel *channel,
+				     char *buf, size_t len)
+{
+	snprintf(buf, len, "%s-ptp", channel->efx->name);
+}
+
+/* Determine whether this packet should be processed by the PTP module
+ * or transmitted conventionally.
+ */
+bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb)
+{
+	return efx->ptp_data &&
+		efx->ptp_data->enabled &&
+		skb->len >= PTP_MIN_LENGTH &&
+		skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM &&
+		likely(skb->protocol == htons(ETH_P_IP)) &&
+		ip_hdr(skb)->protocol == IPPROTO_UDP &&
+		udp_hdr(skb)->dest == htons(PTP_EVENT_PORT);
+}
+
+/* Receive a PTP packet.  Packets are queued until the arrival of
+ * the receive timestamp from the MC - this will probably occur after the
+ * packet arrival because of the processing in the MC.
+ */
+static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb)
+{
+	struct efx_nic *efx = channel->efx;
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb;
+	u8 *data;
+	unsigned int version;
+
+	match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS);
+
+	/* Correct version? */
+	if (ptp->mode == MC_CMD_PTP_MODE_V1) {
+		if (skb->len < PTP_V1_MIN_LENGTH) {
+			netif_receive_skb(skb);
+			return;
+		}
+		version = ntohs(*(__be16 *)&skb->data[PTP_V1_VERSION_OFFSET]);
+		if (version != PTP_VERSION_V1) {
+			netif_receive_skb(skb);
+			return;
+		}
+	} else {
+		if (skb->len < PTP_V2_MIN_LENGTH) {
+			netif_receive_skb(skb);
+			return;
+		}
+		version = skb->data[PTP_V2_VERSION_OFFSET];
+
+		BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2);
+		BUILD_BUG_ON(PTP_V1_UUID_OFFSET != PTP_V2_MC_UUID_OFFSET);
+		BUILD_BUG_ON(PTP_V1_UUID_LENGTH != PTP_V2_MC_UUID_LENGTH);
+		BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET);
+		BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH);
+
+		if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) {
+			netif_receive_skb(skb);
+			return;
+		}
+	}
+
+	/* Does this packet require timestamping? */
+	if (ntohs(*(__be16 *)&skb->data[PTP_DPORT_OFFSET]) == PTP_EVENT_PORT) {
+		struct skb_shared_hwtstamps *timestamps;
+
+		match->state = PTP_PACKET_STATE_UNMATCHED;
+
+		/* Clear all timestamps held: filled in later */
+		timestamps = skb_hwtstamps(skb);
+		memset(timestamps, 0, sizeof(*timestamps));
+
+		/* Extract UUID/Sequence information */
+		data = skb->data + PTP_V1_UUID_OFFSET;
+		match->words[0] = (data[0]         |
+				   (data[1] << 8)  |
+				   (data[2] << 16) |
+				   (data[3] << 24));
+		match->words[1] = (data[4]         |
+				   (data[5] << 8)  |
+				   (skb->data[PTP_V1_SEQUENCE_OFFSET +
+					      PTP_V1_SEQUENCE_LENGTH - 1] <<
+				    16));
+	} else {
+		match->state = PTP_PACKET_STATE_MATCH_UNWANTED;
+	}
+
+	skb_queue_tail(&ptp->rxq, skb);
+	queue_work(ptp->workwq, &ptp->work);
+}
+
+/* Transmit a PTP packet.  This has to be transmitted by the MC
+ * itself, through an MCDI call.  MCDI calls aren't permitted
+ * in the transmit path so defer the actual transmission to a suitable worker.
+ */
+int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+
+	skb_queue_tail(&ptp->txq, skb);
+
+	if ((udp_hdr(skb)->dest == htons(PTP_EVENT_PORT)) &&
+	    (skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM))
+		efx_xmit_hwtstamp_pending(skb);
+	queue_work(ptp->workwq, &ptp->work);
+
+	return NETDEV_TX_OK;
+}
+
+static int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted,
+			       unsigned int new_mode)
+{
+	if ((enable_wanted != efx->ptp_data->enabled) ||
+	    (enable_wanted && (efx->ptp_data->mode != new_mode))) {
+		int rc;
+
+		if (enable_wanted) {
+			/* Change of mode requires disable */
+			if (efx->ptp_data->enabled &&
+			    (efx->ptp_data->mode != new_mode)) {
+				efx->ptp_data->enabled = false;
+				rc = efx_ptp_stop(efx);
+				if (rc != 0)
+					return rc;
+			}
+
+			/* Set new operating mode and establish
+			 * baseline synchronisation, which must
+			 * succeed.
+			 */
+			efx->ptp_data->mode = new_mode;
+			rc = efx_ptp_start(efx);
+			if (rc == 0) {
+				rc = efx_ptp_synchronize(efx,
+							 PTP_SYNC_ATTEMPTS * 2);
+				if (rc != 0)
+					efx_ptp_stop(efx);
+			}
+		} else {
+			rc = efx_ptp_stop(efx);
+		}
+
+		if (rc != 0)
+			return rc;
+
+		efx->ptp_data->enabled = enable_wanted;
+	}
+
+	return 0;
+}
+
+static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init)
+{
+	bool enable_wanted = false;
+	unsigned int new_mode;
+	int rc;
+
+	if (init->flags)
+		return -EINVAL;
+
+	if ((init->tx_type != HWTSTAMP_TX_OFF) &&
+	    (init->tx_type != HWTSTAMP_TX_ON))
+		return -ERANGE;
+
+	new_mode = efx->ptp_data->mode;
+	/* Determine whether any PTP HW operations are required */
+	switch (init->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		init->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+		new_mode = MC_CMD_PTP_MODE_V1;
+		enable_wanted = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	/* Although these three are accepted only IPV4 packets will be
+	 * timestamped
+	 */
+		init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+		new_mode = MC_CMD_PTP_MODE_V2;
+		enable_wanted = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		/* Non-IP + IPv6 timestamping not supported */
+		return -ERANGE;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (init->tx_type != HWTSTAMP_TX_OFF)
+		enable_wanted = true;
+
+	rc = efx_ptp_change_mode(efx, enable_wanted, new_mode);
+	if (rc != 0)
+		return rc;
+
+	efx->ptp_data->config = *init;
+
+	return 0;
+}
+
+int
+efx_ptp_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_ptp_data *ptp = efx->ptp_data;
+
+	if (!ptp)
+		return -EOPNOTSUPP;
+
+	ts_info->so_timestamping = (SOF_TIMESTAMPING_TX_HARDWARE |
+				    SOF_TIMESTAMPING_RX_HARDWARE |
+				    SOF_TIMESTAMPING_RAW_HARDWARE);
+	ts_info->phc_index = ptp_clock_index(ptp->phc_clock);
+	ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON;
+	ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE |
+			       1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT |
+			       1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC |
+			       1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ |
+			       1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT |
+			       1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC |
+			       1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
+	return 0;
+}
+
+int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd)
+{
+	struct hwtstamp_config config;
+	int rc;
+
+	/* Not a PTP enabled port */
+	if (!efx->ptp_data)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	rc = efx_ptp_ts_init(efx, &config);
+	if (rc != 0)
+		return rc;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config))
+		? -EFAULT : 0;
+}
+
+static void ptp_event_failure(struct efx_nic *efx, int expected_frag_len)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+
+	netif_err(efx, hw, efx->net_dev,
+		"PTP unexpected event length: got %d expected %d\n",
+		ptp->evt_frag_idx, expected_frag_len);
+	ptp->reset_required = true;
+	queue_work(ptp->workwq, &ptp->work);
+}
+
+/* Process a completed receive event.  Put it on the event queue and
+ * start worker thread.  This is required because event and their
+ * correspoding packets may come in either order.
+ */
+static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp)
+{
+	struct efx_ptp_event_rx *evt = NULL;
+
+	if (ptp->evt_frag_idx != 3) {
+		ptp_event_failure(efx, 3);
+		return;
+	}
+
+	spin_lock_bh(&ptp->evt_lock);
+	if (!list_empty(&ptp->evt_free_list)) {
+		evt = list_first_entry(&ptp->evt_free_list,
+				       struct efx_ptp_event_rx, link);
+		list_del(&evt->link);
+
+		evt->seq0 = EFX_QWORD_FIELD(ptp->evt_frags[2], MCDI_EVENT_DATA);
+		evt->seq1 = (EFX_QWORD_FIELD(ptp->evt_frags[2],
+					     MCDI_EVENT_SRC)        |
+			     (EFX_QWORD_FIELD(ptp->evt_frags[1],
+					      MCDI_EVENT_SRC) << 8) |
+			     (EFX_QWORD_FIELD(ptp->evt_frags[0],
+					      MCDI_EVENT_SRC) << 16));
+		evt->hwtimestamp = ktime_set(
+			EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA),
+			EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA));
+		evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS);
+		list_add_tail(&evt->link, &ptp->evt_list);
+
+		queue_work(ptp->workwq, &ptp->work);
+	} else {
+		netif_err(efx, rx_err, efx->net_dev, "No free PTP event");
+	}
+	spin_unlock_bh(&ptp->evt_lock);
+}
+
+static void ptp_event_fault(struct efx_nic *efx, struct efx_ptp_data *ptp)
+{
+	int code = EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA);
+	if (ptp->evt_frag_idx != 1) {
+		ptp_event_failure(efx, 1);
+		return;
+	}
+
+	netif_err(efx, hw, efx->net_dev, "PTP error %d\n", code);
+}
+
+static void ptp_event_pps(struct efx_nic *efx, struct efx_ptp_data *ptp)
+{
+	if (ptp->nic_ts_enabled)
+		queue_work(ptp->pps_workwq, &ptp->pps_work);
+}
+
+void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev)
+{
+	struct efx_ptp_data *ptp = efx->ptp_data;
+	int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE);
+
+	if (!ptp->enabled)
+		return;
+
+	if (ptp->evt_frag_idx == 0) {
+		ptp->evt_code = code;
+	} else if (ptp->evt_code != code) {
+		netif_err(efx, hw, efx->net_dev,
+			  "PTP out of sequence event %d\n", code);
+		ptp->evt_frag_idx = 0;
+	}
+
+	ptp->evt_frags[ptp->evt_frag_idx++] = *ev;
+	if (!MCDI_EVENT_FIELD(*ev, CONT)) {
+		/* Process resulting event */
+		switch (code) {
+		case MCDI_EVENT_CODE_PTP_RX:
+			ptp_event_rx(efx, ptp);
+			break;
+		case MCDI_EVENT_CODE_PTP_FAULT:
+			ptp_event_fault(efx, ptp);
+			break;
+		case MCDI_EVENT_CODE_PTP_PPS:
+			ptp_event_pps(efx, ptp);
+			break;
+		default:
+			netif_err(efx, hw, efx->net_dev,
+				  "PTP unknown event %d\n", code);
+			break;
+		}
+		ptp->evt_frag_idx = 0;
+	} else if (MAX_EVENT_FRAGS == ptp->evt_frag_idx) {
+		netif_err(efx, hw, efx->net_dev,
+			  "PTP too many event fragments\n");
+		ptp->evt_frag_idx = 0;
+	}
+}
+
+static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+	struct efx_ptp_data *ptp_data = container_of(ptp,
+						     struct efx_ptp_data,
+						     phc_clock_info);
+	struct efx_nic *efx = ptp_data->channel->efx;
+	u8 inadj[MC_CMD_PTP_IN_ADJUST_LEN];
+	s64 adjustment_ns;
+	int rc;
+
+	if (delta > MAX_PPB)
+		delta = MAX_PPB;
+	else if (delta < -MAX_PPB)
+		delta = -MAX_PPB;
+
+	/* Convert ppb to fixed point ns. */
+	adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >>
+			 (PPB_EXTRA_BITS + MAX_PPB_BITS));
+
+	MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST);
+	MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_FREQ_LO, (u32)adjustment_ns);
+	MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_FREQ_HI,
+		       (u32)(adjustment_ns >> 32));
+	MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_SECONDS, 0);
+	MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_NANOSECONDS, 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inadj, sizeof(inadj),
+			  NULL, 0, NULL);
+	if (rc != 0)
+		return rc;
+
+	ptp_data->current_adjfreq = delta;
+	return 0;
+}
+
+static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct efx_ptp_data *ptp_data = container_of(ptp,
+						     struct efx_ptp_data,
+						     phc_clock_info);
+	struct efx_nic *efx = ptp_data->channel->efx;
+	struct timespec delta_ts = ns_to_timespec(delta);
+	u8 inbuf[MC_CMD_PTP_IN_ADJUST_LEN];
+
+	MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST);
+	MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_FREQ_LO, 0);
+	MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_FREQ_HI, 0);
+	MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_SECONDS, (u32)delta_ts.tv_sec);
+	MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_NANOSECONDS, (u32)delta_ts.tv_nsec);
+	return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+			    NULL, 0, NULL);
+}
+
+static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+	struct efx_ptp_data *ptp_data = container_of(ptp,
+						     struct efx_ptp_data,
+						     phc_clock_info);
+	struct efx_nic *efx = ptp_data->channel->efx;
+	u8 inbuf[MC_CMD_PTP_IN_READ_NIC_TIME_LEN];
+	u8 outbuf[MC_CMD_PTP_OUT_READ_NIC_TIME_LEN];
+	int rc;
+
+	MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_READ_NIC_TIME);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+			  outbuf, sizeof(outbuf), NULL);
+	if (rc != 0)
+		return rc;
+
+	ts->tv_sec = MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_SECONDS);
+	ts->tv_nsec = MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_NANOSECONDS);
+	return 0;
+}
+
+static int efx_phc_settime(struct ptp_clock_info *ptp,
+			   const struct timespec *e_ts)
+{
+	/* Get the current NIC time, efx_phc_gettime.
+	 * Subtract from the desired time to get the offset
+	 * call efx_phc_adjtime with the offset
+	 */
+	int rc;
+	struct timespec time_now;
+	struct timespec delta;
+
+	rc = efx_phc_gettime(ptp, &time_now);
+	if (rc != 0)
+		return rc;
+
+	delta = timespec_sub(*e_ts, time_now);
+
+	efx_phc_adjtime(ptp, timespec_to_ns(&delta));
+	if (rc != 0)
+		return rc;
+
+	return 0;
+}
+
+static int efx_phc_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *request,
+			  int enable)
+{
+	struct efx_ptp_data *ptp_data = container_of(ptp,
+						     struct efx_ptp_data,
+						     phc_clock_info);
+	if (request->type != PTP_CLK_REQ_PPS)
+		return -EOPNOTSUPP;
+
+	ptp_data->nic_ts_enabled = !!enable;
+	return 0;
+}
+
+static const struct efx_channel_type efx_ptp_channel_type = {
+	.handle_no_channel	= efx_ptp_handle_no_channel,
+	.pre_probe		= efx_ptp_probe_channel,
+	.post_remove		= efx_ptp_remove_channel,
+	.get_name		= efx_ptp_get_channel_name,
+	/* no copy operation; there is no need to reallocate this channel */
+	.receive_skb		= efx_ptp_rx,
+	.keep_eventq		= false,
+};
+
+void efx_ptp_probe(struct efx_nic *efx)
+{
+	/* Check whether PTP is implemented on this NIC.  The DISABLE
+	 * operation will succeed if and only if it is implemented.
+	 */
+	if (efx_ptp_disable(efx) == 0)
+		efx->extra_channel_type[EFX_EXTRA_CHANNEL_PTP] =
+			&efx_ptp_channel_type;
+}
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 719319b89d7a..9e0ad1b75c33 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -479,7 +479,7 @@ static void efx_rx_packet_gro(struct efx_channel *channel,
 		skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
 				  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
 
-		skb_record_rx_queue(skb, channel->channel);
+		skb_record_rx_queue(skb, channel->rx_queue.core_index);
 
 		gro_result = napi_gro_frags(napi);
 	} else {
@@ -571,8 +571,14 @@ static void efx_rx_deliver(struct efx_channel *channel,
 	/* Set the SKB flags */
 	skb_checksum_none_assert(skb);
 
+	/* Record the rx_queue */
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
 	/* Pass the packet up */
-	netif_receive_skb(skb);
+	if (channel->type->receive_skb)
+		channel->type->receive_skb(channel, skb);
+	else
+		netif_receive_skb(skb);
 
 	/* Update allocation strategy method */
 	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
@@ -608,13 +614,14 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
 		 * at the ethernet header */
 		skb->protocol = eth_type_trans(skb, efx->net_dev);
 
-		skb_record_rx_queue(skb, channel->channel);
+		skb_record_rx_queue(skb, channel->rx_queue.core_index);
 	}
 
 	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
 		rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
 
-	if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)))
+	if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)) &&
+	    !channel->type->receive_skb)
 		efx_rx_packet_gro(channel, rx_buf, eh);
 	else
 		efx_rx_deliver(channel, rx_buf);
@@ -624,6 +631,11 @@ void efx_rx_strategy(struct efx_channel *channel)
 {
 	enum efx_rx_alloc_method method = rx_alloc_method;
 
+	if (channel->type->receive_skb) {
+		channel->rx_alloc_push_pages = false;
+		return;
+	}
+
 	/* Only makes sense to use page based allocation if GRO is enabled */
 	if (!(channel->efx->net_dev->features & NETIF_F_GRO)) {
 		method = RX_ALLOC_METHOD_SKB;
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index 96068d15b601..ce72ae4f399f 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -614,7 +614,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests,
 {
 	enum efx_loopback_mode mode;
 	struct efx_loopback_state *state;
-	struct efx_channel *channel = efx_get_channel(efx, 0);
+	struct efx_channel *channel =
+		efx_get_channel(efx, efx->tx_channel_offset);
 	struct efx_tx_queue *tx_queue;
 	int rc = 0;
 
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 6bafd216e55e..84b41bf08a38 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -335,6 +335,7 @@ static int siena_probe_nic(struct efx_nic *efx)
 		goto fail5;
 
 	efx_sriov_probe(efx);
+	efx_ptp_probe(efx);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index 9cb3b84ecae9..d49b53dc2a50 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -21,6 +21,9 @@
 /* Number of longs required to track all the VIs in a VF */
 #define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
 
+/* Maximum number of RX queues supported */
+#define VF_MAX_RX_QUEUES 63
+
 /**
  * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
  * @VF_TX_FILTER_OFF: Disabled
@@ -578,6 +581,7 @@ static int efx_vfdi_init_rxq(struct efx_vf *vf)
 	efx_oword_t reg;
 
 	if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
+	    vf_rxq >= VF_MAX_RX_QUEUES ||
 	    bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
 		if (net_ratelimit())
 			netif_err(efx, hw, efx->net_dev,
@@ -683,6 +687,9 @@ static int efx_vfdi_fini_all_queues(struct efx_vf *vf)
 	__le32 *rxqs;
 	int rc;
 
+	BUILD_BUG_ON(VF_MAX_RX_QUEUES >
+		     MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM);
+
 	rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL);
 	if (rxqs == NULL)
 		return VFDI_RC_ENOMEM;
@@ -1028,6 +1035,7 @@ efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 static const struct efx_channel_type efx_sriov_channel_type = {
 	.handle_no_channel	= efx_sriov_handle_no_channel,
 	.pre_probe		= efx_sriov_probe_channel,
+	.post_remove		= efx_channel_dummy_op_void,
 	.get_name		= efx_sriov_get_channel_name,
 	/* no copy operation; channel must not be reallocated */
 	.keep_eventq		= true,
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 18713436b443..5e090e54298e 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -22,14 +22,6 @@
 #include "nic.h"
 #include "workarounds.h"
 
-/*
- * TX descriptor ring full threshold
- *
- * The tx_queue descriptor ring fill-level must fall below this value
- * before we restart the netif queue
- */
-#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
-
 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 			       struct efx_tx_buffer *buffer,
 			       unsigned int *pkts_compl,
@@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 		struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
 		dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
 					 buffer->unmap_len);
-		if (buffer->unmap_single)
+		if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
 			dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
 					 DMA_TO_DEVICE);
 		else
 			dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
 				       DMA_TO_DEVICE);
 		buffer->unmap_len = 0;
-		buffer->unmap_single = false;
 	}
 
-	if (buffer->skb) {
+	if (buffer->flags & EFX_TX_BUF_SKB) {
 		(*pkts_compl)++;
 		(*bytes_compl) += buffer->skb->len;
 		dev_kfree_skb_any((struct sk_buff *) buffer->skb);
-		buffer->skb = NULL;
 		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
 			   "TX queue %d transmission id %x complete\n",
 			   tx_queue->queue, tx_queue->read_count);
+	} else if (buffer->flags & EFX_TX_BUF_HEAP) {
+		kfree(buffer->heap_buf);
 	}
-}
 
-/**
- * struct efx_tso_header - a DMA mapped buffer for packet headers
- * @next: Linked list of free ones.
- *	The list is protected by the TX queue lock.
- * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
- * @dma_addr: The DMA address of the header below.
- *
- * This controls the memory used for a TSO header.  Use TSOH_DATA()
- * to find the packet header data.  Use TSOH_SIZE() to calculate the
- * total size required for a given packet header length.  TSO headers
- * in the free list are exactly %TSOH_STD_SIZE bytes in size.
- */
-struct efx_tso_header {
-	union {
-		struct efx_tso_header *next;
-		size_t unmap_len;
-	};
-	dma_addr_t dma_addr;
-};
+	buffer->len = 0;
+	buffer->flags = 0;
+}
 
 static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 			       struct sk_buff *skb);
-static void efx_fini_tso(struct efx_tx_queue *tx_queue);
-static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
-			       struct efx_tso_header *tsoh);
-
-static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
-			  struct efx_tx_buffer *buffer)
-{
-	if (buffer->tsoh) {
-		if (likely(!buffer->tsoh->unmap_len)) {
-			buffer->tsoh->next = tx_queue->tso_headers_free;
-			tx_queue->tso_headers_free = buffer->tsoh;
-		} else {
-			efx_tsoh_heap_free(tx_queue, buffer->tsoh);
-		}
-		buffer->tsoh = NULL;
-	}
-}
-
 
 static inline unsigned
 efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
@@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
 	return max_descs;
 }
 
+/* Get partner of a TX queue, seen as part of the same net core queue */
+static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
+{
+	if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
+		return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
+	else
+		return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
+}
+
+static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
+{
+	/* We need to consider both queues that the net core sees as one */
+	struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
+	struct efx_nic *efx = txq1->efx;
+	unsigned int fill_level;
+
+	fill_level = max(txq1->insert_count - txq1->old_read_count,
+			 txq2->insert_count - txq2->old_read_count);
+	if (likely(fill_level < efx->txq_stop_thresh))
+		return;
+
+	/* We used the stale old_read_count above, which gives us a
+	 * pessimistic estimate of the fill level (which may even
+	 * validly be >= efx->txq_entries).  Now try again using
+	 * read_count (more likely to be a cache miss).
+	 *
+	 * If we read read_count and then conditionally stop the
+	 * queue, it is possible for the completion path to race with
+	 * us and complete all outstanding descriptors in the middle,
+	 * after which there will be no more completions to wake it.
+	 * Therefore we stop the queue first, then read read_count
+	 * (with a memory barrier to ensure the ordering), then
+	 * restart the queue if the fill level turns out to be low
+	 * enough.
+	 */
+	netif_tx_stop_queue(txq1->core_txq);
+	smp_mb();
+	txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
+	txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+
+	fill_level = max(txq1->insert_count - txq1->old_read_count,
+			 txq2->insert_count - txq2->old_read_count);
+	EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
+	if (likely(fill_level < efx->txq_stop_thresh)) {
+		smp_mb();
+		if (likely(!efx->loopback_selftest))
+			netif_tx_start_queue(txq1->core_txq);
+	}
+}
+
 /*
  * Add a socket buffer to a TX queue
  *
@@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
  * This function is split out from efx_hard_start_xmit to allow the
  * loopback test to direct packets via specific TX queues.
  *
- * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
+ * Returns NETDEV_TX_OK.
  * You must hold netif_tx_lock() to call this function.
  */
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
@@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 	struct device *dma_dev = &efx->pci_dev->dev;
 	struct efx_tx_buffer *buffer;
 	skb_frag_t *fragment;
-	unsigned int len, unmap_len = 0, fill_level, insert_ptr;
+	unsigned int len, unmap_len = 0, insert_ptr;
 	dma_addr_t dma_addr, unmap_addr = 0;
 	unsigned int dma_len;
-	bool unmap_single;
-	int q_space, i = 0;
-	netdev_tx_t rc = NETDEV_TX_OK;
+	unsigned short dma_flags;
+	int i = 0;
 
 	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
 
@@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 			return NETDEV_TX_OK;
 	}
 
-	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
-	q_space = efx->txq_entries - 1 - fill_level;
-
 	/* Map for DMA.  Use dma_map_single rather than dma_map_page
 	 * since this is more efficient on machines with sparse
 	 * memory.
 	 */
-	unmap_single = true;
+	dma_flags = EFX_TX_BUF_MAP_SINGLE;
 	dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE);
 
 	/* Process all fragments */
@@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 
 		/* Add to TX queue, splitting across DMA boundaries */
 		do {
-			if (unlikely(q_space-- <= 0)) {
-				/* It might be that completions have
-				 * happened since the xmit path last
-				 * checked.  Update the xmit path's
-				 * copy of read_count.
-				 */
-				netif_tx_stop_queue(tx_queue->core_txq);
-				/* This memory barrier protects the
-				 * change of queue state from the access
-				 * of read_count. */
-				smp_mb();
-				tx_queue->old_read_count =
-					ACCESS_ONCE(tx_queue->read_count);
-				fill_level = (tx_queue->insert_count
-					      - tx_queue->old_read_count);
-				q_space = efx->txq_entries - 1 - fill_level;
-				if (unlikely(q_space-- <= 0)) {
-					rc = NETDEV_TX_BUSY;
-					goto unwind;
-				}
-				smp_mb();
-				if (likely(!efx->loopback_selftest))
-					netif_tx_start_queue(
-						tx_queue->core_txq);
-			}
-
 			insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 			buffer = &tx_queue->buffer[insert_ptr];
-			efx_tsoh_free(tx_queue, buffer);
-			EFX_BUG_ON_PARANOID(buffer->tsoh);
-			EFX_BUG_ON_PARANOID(buffer->skb);
+			EFX_BUG_ON_PARANOID(buffer->flags);
 			EFX_BUG_ON_PARANOID(buffer->len);
-			EFX_BUG_ON_PARANOID(!buffer->continuation);
 			EFX_BUG_ON_PARANOID(buffer->unmap_len);
 
 			dma_len = efx_max_tx_len(efx, dma_addr);
@@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 			/* Fill out per descriptor fields */
 			buffer->len = dma_len;
 			buffer->dma_addr = dma_addr;
+			buffer->flags = EFX_TX_BUF_CONT;
 			len -= dma_len;
 			dma_addr += dma_len;
 			++tx_queue->insert_count;
 		} while (len);
 
 		/* Transfer ownership of the unmapping to the final buffer */
-		buffer->unmap_single = unmap_single;
+		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
 		buffer->unmap_len = unmap_len;
 		unmap_len = 0;
 
@@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 		len = skb_frag_size(fragment);
 		i++;
 		/* Map for DMA */
-		unmap_single = false;
+		dma_flags = 0;
 		dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
 					    DMA_TO_DEVICE);
 	}
 
 	/* Transfer ownership of the skb to the final buffer */
 	buffer->skb = skb;
-	buffer->continuation = false;
+	buffer->flags = EFX_TX_BUF_SKB | dma_flags;
 
 	netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
 
 	/* Pass off to hardware */
 	efx_nic_push_buffers(tx_queue);
 
+	efx_tx_maybe_stop_queue(tx_queue);
+
 	return NETDEV_TX_OK;
 
  dma_err:
@@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 	/* Mark the packet as transmitted, and free the SKB ourselves */
 	dev_kfree_skb_any(skb);
 
- unwind:
 	/* Work backwards until we hit the original insert pointer value */
 	while (tx_queue->insert_count != tx_queue->write_count) {
 		unsigned int pkts_compl = 0, bytes_compl = 0;
@@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 		buffer = &tx_queue->buffer[insert_ptr];
 		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-		buffer->len = 0;
 	}
 
 	/* Free the fragment we were mid-way through pushing */
 	if (unmap_len) {
-		if (unmap_single)
+		if (dma_flags & EFX_TX_BUF_MAP_SINGLE)
 			dma_unmap_single(dma_dev, unmap_addr, unmap_len,
 					 DMA_TO_DEVICE);
 		else
@@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 				       DMA_TO_DEVICE);
 	}
 
-	return rc;
+	return NETDEV_TX_OK;
 }
 
 /* Remove packets from the TX queue
@@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
 		}
 
 		efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
-		buffer->continuation = true;
-		buffer->len = 0;
 
 		++tx_queue->read_count;
 		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -366,6 +339,12 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 
 	EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
 
+	/* PTP "event" packet */
+	if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
+	    unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
+		return efx_ptp_tx(efx, skb);
+	}
+
 	index = skb_get_queue_mapping(skb);
 	type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
 	if (index >= efx->n_tx_channels) {
@@ -450,6 +429,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 {
 	unsigned fill_level;
 	struct efx_nic *efx = tx_queue->efx;
+	struct efx_tx_queue *txq2;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
 
 	EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
@@ -457,15 +437,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
 	netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
 
-	/* See if we need to restart the netif queue.  This barrier
-	 * separates the update of read_count from the test of the
-	 * queue state. */
+	/* See if we need to restart the netif queue.  This memory
+	 * barrier ensures that we write read_count (inside
+	 * efx_dequeue_buffers()) before reading the queue status.
+	 */
 	smp_mb();
 	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
 	    likely(efx->port_enabled) &&
 	    likely(netif_device_present(efx->net_dev))) {
-		fill_level = tx_queue->insert_count - tx_queue->read_count;
-		if (fill_level < EFX_TXQ_THRESHOLD(efx))
+		txq2 = efx_tx_queue_partner(tx_queue);
+		fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+				 txq2->insert_count - txq2->read_count);
+		if (fill_level <= efx->txq_wake_thresh)
 			netif_tx_wake_queue(tx_queue->core_txq);
 	}
 
@@ -480,11 +463,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 	}
 }
 
+/* Size of page-based TSO header buffers.  Larger blocks must be
+ * allocated from the heap.
+ */
+#define TSOH_STD_SIZE	128
+#define TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)
+
+/* At most half the descriptors in the queue at any time will refer to
+ * a TSO header buffer, since they must always be followed by a
+ * payload descriptor referring to an skb.
+ */
+static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
+{
+	return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
+}
+
 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 {
 	struct efx_nic *efx = tx_queue->efx;
 	unsigned int entries;
-	int i, rc;
+	int rc;
 
 	/* Create the smallest power-of-two aligned ring */
 	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
@@ -500,17 +498,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 				   GFP_KERNEL);
 	if (!tx_queue->buffer)
 		return -ENOMEM;
-	for (i = 0; i <= tx_queue->ptr_mask; ++i)
-		tx_queue->buffer[i].continuation = true;
+
+	if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
+		tx_queue->tsoh_page =
+			kcalloc(efx_tsoh_page_count(tx_queue),
+				sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
+		if (!tx_queue->tsoh_page) {
+			rc = -ENOMEM;
+			goto fail1;
+		}
+	}
 
 	/* Allocate hardware ring */
 	rc = efx_nic_probe_tx(tx_queue);
 	if (rc)
-		goto fail;
+		goto fail2;
 
 	return 0;
 
- fail:
+fail2:
+	kfree(tx_queue->tsoh_page);
+	tx_queue->tsoh_page = NULL;
+fail1:
 	kfree(tx_queue->buffer);
 	tx_queue->buffer = NULL;
 	return rc;
@@ -546,8 +555,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
 		unsigned int pkts_compl = 0, bytes_compl = 0;
 		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
 		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-		buffer->continuation = true;
-		buffer->len = 0;
 
 		++tx_queue->read_count;
 	}
@@ -568,13 +575,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
 	efx_nic_fini_tx(tx_queue);
 
 	efx_release_tx_buffers(tx_queue);
-
-	/* Free up TSO header cache */
-	efx_fini_tso(tx_queue);
 }
 
 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 {
+	int i;
+
 	if (!tx_queue->buffer)
 		return;
 
@@ -582,6 +588,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 		  "destroying TX queue %d\n", tx_queue->queue);
 	efx_nic_remove_tx(tx_queue);
 
+	if (tx_queue->tsoh_page) {
+		for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
+			efx_nic_free_buffer(tx_queue->efx,
+					    &tx_queue->tsoh_page[i]);
+		kfree(tx_queue->tsoh_page);
+		tx_queue->tsoh_page = NULL;
+	}
+
 	kfree(tx_queue->buffer);
 	tx_queue->buffer = NULL;
 }
@@ -604,22 +618,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 #define TSOH_OFFSET	NET_IP_ALIGN
 #endif
 
-#define TSOH_BUFFER(tsoh)	((u8 *)(tsoh + 1) + TSOH_OFFSET)
-
-/* Total size of struct efx_tso_header, buffer and padding */
-#define TSOH_SIZE(hdr_len)					\
-	(sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
-
-/* Size of blocks on free list.  Larger blocks must be allocated from
- * the heap.
- */
-#define TSOH_STD_SIZE		128
-
 #define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
-#define ETH_HDR_LEN(skb)  (skb_network_header(skb) - (skb)->data)
-#define SKB_TCP_OFF(skb)  PTR_DIFF(tcp_hdr(skb), (skb)->data)
-#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
-#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data)
 
 /**
  * struct tso_state - TSO state for an SKB
@@ -631,10 +630,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
  * @in_len: Remaining length in current SKB fragment
  * @unmap_len: Length of SKB fragment
  * @unmap_addr: DMA address of SKB fragment
- * @unmap_single: DMA single vs page mapping flag
+ * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0
  * @protocol: Network protocol (after any VLAN header)
+ * @ip_off: Offset of IP header
+ * @tcp_off: Offset of TCP header
  * @header_len: Number of bytes of header
- * @full_packet_size: Number of bytes to put in each outgoing segment
+ * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
  *
  * The state used during segmentation.  It is put into this data structure
  * just to make it easy to pass into inline functions.
@@ -651,11 +652,13 @@ struct tso_state {
 	unsigned in_len;
 	unsigned unmap_len;
 	dma_addr_t unmap_addr;
-	bool unmap_single;
+	unsigned short dma_flags;
 
 	__be16 protocol;
+	unsigned int ip_off;
+	unsigned int tcp_off;
 	unsigned header_len;
-	int full_packet_size;
+	unsigned int ip_base_len;
 };
 
 
@@ -687,91 +690,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb)
 	return protocol;
 }
 
-
-/*
- * Allocate a page worth of efx_tso_header structures, and string them
- * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
- */
-static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
+static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
+			       struct efx_tx_buffer *buffer, unsigned int len)
 {
-	struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
-	struct efx_tso_header *tsoh;
-	dma_addr_t dma_addr;
-	u8 *base_kva, *kva;
+	u8 *result;
 
-	base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC);
-	if (base_kva == NULL) {
-		netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev,
-			  "Unable to allocate page for TSO headers\n");
-		return -ENOMEM;
-	}
-
-	/* dma_alloc_coherent() allocates pages. */
-	EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
-
-	for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
-		tsoh = (struct efx_tso_header *)kva;
-		tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
-		tsoh->next = tx_queue->tso_headers_free;
-		tx_queue->tso_headers_free = tsoh;
-	}
-
-	return 0;
-}
-
-
-/* Free up a TSO header, and all others in the same page. */
-static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
-				struct efx_tso_header *tsoh,
-				struct device *dma_dev)
-{
-	struct efx_tso_header **p;
-	unsigned long base_kva;
-	dma_addr_t base_dma;
-
-	base_kva = (unsigned long)tsoh & PAGE_MASK;
-	base_dma = tsoh->dma_addr & PAGE_MASK;
-
-	p = &tx_queue->tso_headers_free;
-	while (*p != NULL) {
-		if (((unsigned long)*p & PAGE_MASK) == base_kva)
-			*p = (*p)->next;
-		else
-			p = &(*p)->next;
-	}
+	EFX_BUG_ON_PARANOID(buffer->len);
+	EFX_BUG_ON_PARANOID(buffer->flags);
+	EFX_BUG_ON_PARANOID(buffer->unmap_len);
 
-	dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma);
-}
+	if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) {
+		unsigned index =
+			(tx_queue->insert_count & tx_queue->ptr_mask) / 2;
+		struct efx_buffer *page_buf =
+			&tx_queue->tsoh_page[index / TSOH_PER_PAGE];
+		unsigned offset =
+			TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET;
+
+		if (unlikely(!page_buf->addr) &&
+		    efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE))
+			return NULL;
+
+		result = (u8 *)page_buf->addr + offset;
+		buffer->dma_addr = page_buf->dma_addr + offset;
+		buffer->flags = EFX_TX_BUF_CONT;
+	} else {
+		tx_queue->tso_long_headers++;
 
-static struct efx_tso_header *
-efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
-{
-	struct efx_tso_header *tsoh;
-
-	tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
-	if (unlikely(!tsoh))
-		return NULL;
-
-	tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
-					TSOH_BUFFER(tsoh), header_len,
-					DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
-				       tsoh->dma_addr))) {
-		kfree(tsoh);
-		return NULL;
+		buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC);
+		if (unlikely(!buffer->heap_buf))
+			return NULL;
+		result = (u8 *)buffer->heap_buf + TSOH_OFFSET;
+		buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
 	}
 
-	tsoh->unmap_len = header_len;
-	return tsoh;
-}
+	buffer->len = len;
 
-static void
-efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
-{
-	dma_unmap_single(&tx_queue->efx->pci_dev->dev,
-			 tsoh->dma_addr, tsoh->unmap_len,
-			 DMA_TO_DEVICE);
-	kfree(tsoh);
+	return result;
 }
 
 /**
@@ -781,47 +736,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
  * @len:		Length of fragment
  * @final_buffer:	The final buffer inserted into the queue
  *
- * Push descriptors onto the TX queue.  Return 0 on success or 1 if
- * @tx_queue full.
+ * Push descriptors onto the TX queue.
  */
-static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
-			       dma_addr_t dma_addr, unsigned len,
-			       struct efx_tx_buffer **final_buffer)
+static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
+				dma_addr_t dma_addr, unsigned len,
+				struct efx_tx_buffer **final_buffer)
 {
 	struct efx_tx_buffer *buffer;
 	struct efx_nic *efx = tx_queue->efx;
-	unsigned dma_len, fill_level, insert_ptr;
-	int q_space;
+	unsigned dma_len, insert_ptr;
 
 	EFX_BUG_ON_PARANOID(len <= 0);
 
-	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
-	/* -1 as there is no way to represent all descriptors used */
-	q_space = efx->txq_entries - 1 - fill_level;
-
 	while (1) {
-		if (unlikely(q_space-- <= 0)) {
-			/* It might be that completions have happened
-			 * since the xmit path last checked.  Update
-			 * the xmit path's copy of read_count.
-			 */
-			netif_tx_stop_queue(tx_queue->core_txq);
-			/* This memory barrier protects the change of
-			 * queue state from the access of read_count. */
-			smp_mb();
-			tx_queue->old_read_count =
-				ACCESS_ONCE(tx_queue->read_count);
-			fill_level = (tx_queue->insert_count
-				      - tx_queue->old_read_count);
-			q_space = efx->txq_entries - 1 - fill_level;
-			if (unlikely(q_space-- <= 0)) {
-				*final_buffer = NULL;
-				return 1;
-			}
-			smp_mb();
-			netif_tx_start_queue(tx_queue->core_txq);
-		}
-
 		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 		buffer = &tx_queue->buffer[insert_ptr];
 		++tx_queue->insert_count;
@@ -830,12 +757,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
 				    tx_queue->read_count >=
 				    efx->txq_entries);
 
-		efx_tsoh_free(tx_queue, buffer);
 		EFX_BUG_ON_PARANOID(buffer->len);
 		EFX_BUG_ON_PARANOID(buffer->unmap_len);
-		EFX_BUG_ON_PARANOID(buffer->skb);
-		EFX_BUG_ON_PARANOID(!buffer->continuation);
-		EFX_BUG_ON_PARANOID(buffer->tsoh);
+		EFX_BUG_ON_PARANOID(buffer->flags);
 
 		buffer->dma_addr = dma_addr;
 
@@ -845,7 +769,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
 		if (dma_len >= len)
 			break;
 
-		buffer->len = dma_len; /* Don't set the other members */
+		buffer->len = dma_len;
+		buffer->flags = EFX_TX_BUF_CONT;
 		dma_addr += dma_len;
 		len -= dma_len;
 	}
@@ -853,7 +778,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
 	EFX_BUG_ON_PARANOID(!len);
 	buffer->len = len;
 	*final_buffer = buffer;
-	return 0;
 }
 
 
@@ -864,54 +788,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
  * a single fragment, and we know it doesn't cross a page boundary.  It
  * also allows us to not worry about end-of-packet etc.
  */
-static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
-			       struct efx_tso_header *tsoh, unsigned len)
+static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
+			      struct efx_tx_buffer *buffer, u8 *header)
 {
-	struct efx_tx_buffer *buffer;
-
-	buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
-	efx_tsoh_free(tx_queue, buffer);
-	EFX_BUG_ON_PARANOID(buffer->len);
-	EFX_BUG_ON_PARANOID(buffer->unmap_len);
-	EFX_BUG_ON_PARANOID(buffer->skb);
-	EFX_BUG_ON_PARANOID(!buffer->continuation);
-	EFX_BUG_ON_PARANOID(buffer->tsoh);
-	buffer->len = len;
-	buffer->dma_addr = tsoh->dma_addr;
-	buffer->tsoh = tsoh;
+	if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
+		buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
+						  header, buffer->len,
+						  DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
+					       buffer->dma_addr))) {
+			kfree(buffer->heap_buf);
+			buffer->len = 0;
+			buffer->flags = 0;
+			return -ENOMEM;
+		}
+		buffer->unmap_len = buffer->len;
+		buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
+	}
 
 	++tx_queue->insert_count;
+	return 0;
 }
 
 
-/* Remove descriptors put into a tx_queue. */
+/* Remove buffers put into a tx_queue.  None of the buffers must have
+ * an skb attached.
+ */
 static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
 {
 	struct efx_tx_buffer *buffer;
-	dma_addr_t unmap_addr;
 
 	/* Work backwards until we hit the original insert pointer value */
 	while (tx_queue->insert_count != tx_queue->write_count) {
 		--tx_queue->insert_count;
 		buffer = &tx_queue->buffer[tx_queue->insert_count &
 					   tx_queue->ptr_mask];
-		efx_tsoh_free(tx_queue, buffer);
-		EFX_BUG_ON_PARANOID(buffer->skb);
-		if (buffer->unmap_len) {
-			unmap_addr = (buffer->dma_addr + buffer->len -
-				      buffer->unmap_len);
-			if (buffer->unmap_single)
-				dma_unmap_single(&tx_queue->efx->pci_dev->dev,
-						 unmap_addr, buffer->unmap_len,
-						 DMA_TO_DEVICE);
-			else
-				dma_unmap_page(&tx_queue->efx->pci_dev->dev,
-					       unmap_addr, buffer->unmap_len,
-					       DMA_TO_DEVICE);
-			buffer->unmap_len = 0;
-		}
-		buffer->len = 0;
-		buffer->continuation = true;
+		efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
 	}
 }
 
@@ -919,17 +831,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
 /* Parse the SKB header and initialise state. */
 static void tso_start(struct tso_state *st, const struct sk_buff *skb)
 {
-	/* All ethernet/IP/TCP headers combined size is TCP header size
-	 * plus offset of TCP header relative to start of packet.
-	 */
-	st->header_len = ((tcp_hdr(skb)->doff << 2u)
-			  + PTR_DIFF(tcp_hdr(skb), skb->data));
-	st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
-
-	if (st->protocol == htons(ETH_P_IP))
+	st->ip_off = skb_network_header(skb) - skb->data;
+	st->tcp_off = skb_transport_header(skb) - skb->data;
+	st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
+	if (st->protocol == htons(ETH_P_IP)) {
+		st->ip_base_len = st->header_len - st->ip_off;
 		st->ipv4_id = ntohs(ip_hdr(skb)->id);
-	else
+	} else {
+		st->ip_base_len = st->header_len - st->tcp_off;
 		st->ipv4_id = 0;
+	}
 	st->seqnum = ntohl(tcp_hdr(skb)->seq);
 
 	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
@@ -938,7 +849,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb)
 
 	st->out_len = skb->len - st->header_len;
 	st->unmap_len = 0;
-	st->unmap_single = false;
+	st->dma_flags = 0;
 }
 
 static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
@@ -947,7 +858,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
 	st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
 					  skb_frag_size(frag), DMA_TO_DEVICE);
 	if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
-		st->unmap_single = false;
+		st->dma_flags = 0;
 		st->unmap_len = skb_frag_size(frag);
 		st->in_len = skb_frag_size(frag);
 		st->dma_addr = st->unmap_addr;
@@ -965,7 +876,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
 	st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl,
 					len, DMA_TO_DEVICE);
 	if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
-		st->unmap_single = true;
+		st->dma_flags = EFX_TX_BUF_MAP_SINGLE;
 		st->unmap_len = len;
 		st->in_len = len;
 		st->dma_addr = st->unmap_addr;
@@ -982,20 +893,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
  * @st:			TSO state
  *
  * Form descriptors for the current fragment, until we reach the end
- * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
- * space in @tx_queue.
+ * of fragment or end-of-packet.
  */
-static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
-					 const struct sk_buff *skb,
-					 struct tso_state *st)
+static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
+					  const struct sk_buff *skb,
+					  struct tso_state *st)
 {
 	struct efx_tx_buffer *buffer;
-	int n, end_of_packet, rc;
+	int n;
 
 	if (st->in_len == 0)
-		return 0;
+		return;
 	if (st->packet_space == 0)
-		return 0;
+		return;
 
 	EFX_BUG_ON_PARANOID(st->in_len <= 0);
 	EFX_BUG_ON_PARANOID(st->packet_space <= 0);
@@ -1006,25 +916,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
 	st->out_len -= n;
 	st->in_len -= n;
 
-	rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
-	if (likely(rc == 0)) {
-		if (st->out_len == 0)
-			/* Transfer ownership of the skb */
-			buffer->skb = skb;
+	efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
 
-		end_of_packet = st->out_len == 0 || st->packet_space == 0;
-		buffer->continuation = !end_of_packet;
+	if (st->out_len == 0) {
+		/* Transfer ownership of the skb */
+		buffer->skb = skb;
+		buffer->flags = EFX_TX_BUF_SKB;
+	} else if (st->packet_space != 0) {
+		buffer->flags = EFX_TX_BUF_CONT;
+	}
 
-		if (st->in_len == 0) {
-			/* Transfer ownership of the DMA mapping */
-			buffer->unmap_len = st->unmap_len;
-			buffer->unmap_single = st->unmap_single;
-			st->unmap_len = 0;
-		}
+	if (st->in_len == 0) {
+		/* Transfer ownership of the DMA mapping */
+		buffer->unmap_len = st->unmap_len;
+		buffer->flags |= st->dma_flags;
+		st->unmap_len = 0;
 	}
 
 	st->dma_addr += n;
-	return rc;
 }
 
 
@@ -1035,36 +944,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
  * @st:			TSO state
  *
  * Generate a new header and prepare for the new packet.  Return 0 on
- * success, or -1 if failed to alloc header.
+ * success, or -%ENOMEM if failed to alloc header.
  */
 static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
 				const struct sk_buff *skb,
 				struct tso_state *st)
 {
-	struct efx_tso_header *tsoh;
+	struct efx_tx_buffer *buffer =
+		&tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
 	struct tcphdr *tsoh_th;
 	unsigned ip_length;
 	u8 *header;
+	int rc;
 
-	/* Allocate a DMA-mapped header buffer. */
-	if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
-		if (tx_queue->tso_headers_free == NULL) {
-			if (efx_tsoh_block_alloc(tx_queue))
-				return -1;
-		}
-		EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
-		tsoh = tx_queue->tso_headers_free;
-		tx_queue->tso_headers_free = tsoh->next;
-		tsoh->unmap_len = 0;
-	} else {
-		tx_queue->tso_long_headers++;
-		tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
-		if (unlikely(!tsoh))
-			return -1;
-	}
+	/* Allocate and insert a DMA-mapped header buffer. */
+	header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
+	if (!header)
+		return -ENOMEM;
 
-	header = TSOH_BUFFER(tsoh);
-	tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
+	tsoh_th = (struct tcphdr *)(header + st->tcp_off);
 
 	/* Copy and update the headers. */
 	memcpy(header, skb->data, st->header_len);
@@ -1073,19 +971,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
 	st->seqnum += skb_shinfo(skb)->gso_size;
 	if (st->out_len > skb_shinfo(skb)->gso_size) {
 		/* This packet will not finish the TSO burst. */
-		ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
+		st->packet_space = skb_shinfo(skb)->gso_size;
 		tsoh_th->fin = 0;
 		tsoh_th->psh = 0;
 	} else {
 		/* This packet will be the last in the TSO burst. */
-		ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
+		st->packet_space = st->out_len;
 		tsoh_th->fin = tcp_hdr(skb)->fin;
 		tsoh_th->psh = tcp_hdr(skb)->psh;
 	}
+	ip_length = st->ip_base_len + st->packet_space;
 
 	if (st->protocol == htons(ETH_P_IP)) {
-		struct iphdr *tsoh_iph =
-			(struct iphdr *)(header + SKB_IPV4_OFF(skb));
+		struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off);
 
 		tsoh_iph->tot_len = htons(ip_length);
 
@@ -1094,16 +992,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
 		st->ipv4_id++;
 	} else {
 		struct ipv6hdr *tsoh_iph =
-			(struct ipv6hdr *)(header + SKB_IPV6_OFF(skb));
+			(struct ipv6hdr *)(header + st->ip_off);
 
-		tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph));
+		tsoh_iph->payload_len = htons(ip_length);
 	}
 
-	st->packet_space = skb_shinfo(skb)->gso_size;
-	++tx_queue->tso_packets;
+	rc = efx_tso_put_header(tx_queue, buffer, header);
+	if (unlikely(rc))
+		return rc;
 
-	/* Form a descriptor for this header. */
-	efx_tso_put_header(tx_queue, tsoh, st->header_len);
+	++tx_queue->tso_packets;
 
 	return 0;
 }
@@ -1118,13 +1016,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
  *
  * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
  * @skb was not enqueued.  In all cases @skb is consumed.  Return
- * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
+ * %NETDEV_TX_OK.
  */
 static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 			       struct sk_buff *skb)
 {
 	struct efx_nic *efx = tx_queue->efx;
-	int frag_i, rc, rc2 = NETDEV_TX_OK;
+	int frag_i, rc;
 	struct tso_state state;
 
 	/* Find the packet protocol and sanity-check it */
@@ -1156,11 +1054,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 		goto mem_err;
 
 	while (1) {
-		rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
-		if (unlikely(rc)) {
-			rc2 = NETDEV_TX_BUSY;
-			goto unwind;
-		}
+		tso_fill_packet_with_fragment(tx_queue, skb, &state);
 
 		/* Move onto the next fragment? */
 		if (state.in_len == 0) {
@@ -1184,6 +1078,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 	/* Pass off to hardware */
 	efx_nic_push_buffers(tx_queue);
 
+	efx_tx_maybe_stop_queue(tx_queue);
+
 	tx_queue->tso_bursts++;
 	return NETDEV_TX_OK;
 
@@ -1192,10 +1088,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 		  "Out of memory for TSO headers, or DMA mapping error\n");
 	dev_kfree_skb_any(skb);
 
- unwind:
 	/* Free the DMA mapping we were in the process of writing out */
 	if (state.unmap_len) {
-		if (state.unmap_single)
+		if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
 			dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr,
 					 state.unmap_len, DMA_TO_DEVICE);
 		else
@@ -1204,25 +1099,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 	}
 
 	efx_enqueue_unwind(tx_queue);
-	return rc2;
-}
-
-
-/*
- * Free up all TSO datastructures associated with tx_queue. This
- * routine should be called only once the tx_queue is both empty and
- * will no longer be used.
- */
-static void efx_fini_tso(struct efx_tx_queue *tx_queue)
-{
-	unsigned i;
-
-	if (tx_queue->buffer) {
-		for (i = 0; i <= tx_queue->ptr_mask; ++i)
-			efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
-	}
-
-	while (tx_queue->tso_headers_free != NULL)
-		efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
-				    &tx_queue->efx->pci_dev->dev);
+	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ade108232048..0376a5e6b2bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -177,7 +177,7 @@ int stmmac_mdio_register(struct net_device *ndev)
 	new_bus->write = &stmmac_mdio_write;
 	new_bus->reset = &stmmac_mdio_reset;
 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
-		new_bus->name, mdio_bus_data->bus_id);
+		new_bus->name, priv->plat->bus_id);
 	new_bus->priv = ndev;
 	new_bus->irq = irqlist;
 	new_bus->phy_mask = mdio_bus_data->phy_mask;
@@ -213,12 +213,10 @@ int stmmac_mdio_register(struct net_device *ndev)
 			 * and no PHY number was provided to the MAC,
 			 * use the one probed here.
 			 */
-			if ((priv->plat->bus_id == mdio_bus_data->bus_id) &&
-			    (priv->plat->phy_addr == -1))
+			if (priv->plat->phy_addr == -1)
 				priv->plat->phy_addr = addr;
 
-			act = (priv->plat->bus_id == mdio_bus_data->bus_id) &&
-				(priv->plat->phy_addr == addr);
+			act = (priv->plat->phy_addr == addr);
 			switch (phydev->irq) {
 			case PHY_POLL:
 				irq_str = "POLL";
@@ -258,6 +256,9 @@ int stmmac_mdio_unregister(struct net_device *ndev)
 {
 	struct stmmac_priv *priv = netdev_priv(ndev);
 
+	if (!priv->mii)
+		return 0;
+
 	mdiobus_unregister(priv->mii);
 	priv->mii->priv = NULL;
 	mdiobus_free(priv->mii);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 13afb8edfadc..1f069b0f6af5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -40,7 +40,6 @@ static void stmmac_default_data(void)
 	plat_dat.has_gmac = 1;
 	plat_dat.force_sf_dma_mode = 1;
 
-	mdio_data.bus_id = 1;
 	mdio_data.phy_reset = NULL;
 	mdio_data.phy_mask = 0;
 	plat_dat.mdio_bus_data = &mdio_data;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index b93245c11995..ed112b55ae7f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -78,6 +78,7 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 	struct resource *res;
+	struct device *dev = &pdev->dev;
 	void __iomem *addr = NULL;
 	struct stmmac_priv *priv = NULL;
 	struct plat_stmmacenet_data *plat_dat = NULL;
@@ -87,18 +88,10 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev)
 	if (!res)
 		return -ENODEV;
 
-	if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
-		pr_err("%s: ERROR: memory allocation failed"
-		       "cannot get the I/O addr 0x%x\n",
-		       __func__, (unsigned int)res->start);
-		return -EBUSY;
-	}
-
-	addr = ioremap(res->start, resource_size(res));
+	addr = devm_request_and_ioremap(dev, res);
 	if (!addr) {
 		pr_err("%s: ERROR: memory mapping failed", __func__);
-		ret = -ENOMEM;
-		goto out_release_region;
+		return -ENOMEM;
 	}
 
 	if (pdev->dev.of_node) {
@@ -107,14 +100,13 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev)
 					GFP_KERNEL);
 		if (!plat_dat) {
 			pr_err("%s: ERROR: no memory", __func__);
-			ret = -ENOMEM;
-			goto out_unmap;
+			return  -ENOMEM;
 		}
 
 		ret = stmmac_probe_config_dt(pdev, plat_dat, &mac);
 		if (ret) {
 			pr_err("%s: main dt probe failed", __func__);
-			goto out_unmap;
+			return ret;
 		}
 	} else {
 		plat_dat = pdev->dev.platform_data;
@@ -124,13 +116,13 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev)
 	if (plat_dat->init) {
 		ret = plat_dat->init(pdev);
 		if (unlikely(ret))
-			goto out_unmap;
+			return ret;
 	}
 
 	priv = stmmac_dvr_probe(&(pdev->dev), plat_dat, addr);
 	if (!priv) {
 		pr_err("%s: main driver probe failed", __func__);
-		goto out_unmap;
+		return -ENODEV;
 	}
 
 	/* Get MAC address if available (DT) */
@@ -142,8 +134,7 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev)
 	if (priv->dev->irq == -ENXIO) {
 		pr_err("%s: ERROR: MAC IRQ configuration "
 		       "information not found\n", __func__);
-		ret = -ENXIO;
-		goto out_unmap;
+		return -ENXIO;
 	}
 
 	/*
@@ -165,15 +156,6 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev)
 	pr_debug("STMMAC platform driver registration completed");
 
 	return 0;
-
-out_unmap:
-	iounmap(addr);
-	platform_set_drvdata(pdev, NULL);
-
-out_release_region:
-	release_mem_region(res->start, resource_size(res));
-
-	return ret;
 }
 
 /**
@@ -186,7 +168,6 @@ static int stmmac_pltfr_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	struct resource *res;
 	int ret = stmmac_dvr_remove(ndev);
 
 	if (priv->plat->exit)
@@ -194,10 +175,6 @@ static int stmmac_pltfr_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 
-	iounmap((void __force __iomem *)priv->ioaddr);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 967fe8cb476e..c9c977bf02ac 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -212,7 +212,6 @@ static void bigmac_clean_rings(struct bigmac *bp)
 static void bigmac_init_rings(struct bigmac *bp, int from_irq)
 {
 	struct bmac_init_block *bb = bp->bmac_block;
-	struct net_device *dev = bp->dev;
 	int i;
 	gfp_t gfp_flags = GFP_KERNEL;
 
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 1b173a6145d6..b26cbda5efa9 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -32,7 +32,7 @@ config TI_DAVINCI_EMAC
 
 config TI_DAVINCI_MDIO
 	tristate "TI DaVinci MDIO Support"
-	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 )
+	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX )
 	select PHYLIB
 	---help---
 	  This driver supports TI's DaVinci MDIO module.
@@ -42,7 +42,7 @@ config TI_DAVINCI_MDIO
 
 config TI_DAVINCI_CPDMA
 	tristate "TI DaVinci CPDMA Support"
-	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 )
+	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX )
 	---help---
 	  This driver supports TI's DaVinci CPDMA dma engine.
 
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 1e5d85b06e71..df55e2403746 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -28,6 +28,9 @@
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_device.h>
 
 #include <linux/platform_data/cpsw.h>
 
@@ -383,6 +386,11 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
 			mac_control |= BIT(7);	/* GIGABITEN	*/
 		if (phy->duplex)
 			mac_control |= BIT(0);	/* FULLDUPLEXEN	*/
+
+		/* set speed_in input in case RMII mode is used in 100Mbps */
+		if (phy->speed == 100)
+			mac_control |= BIT(15);
+
 		*link = true;
 	} else {
 		mac_control = 0;
@@ -709,6 +717,158 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv)
 	slave->sliver	= regs + data->sliver_reg_ofs;
 }
 
+static int cpsw_probe_dt(struct cpsw_platform_data *data,
+			 struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *slave_node;
+	int i = 0, ret;
+	u32 prop;
+
+	if (!node)
+		return -EINVAL;
+
+	if (of_property_read_u32(node, "slaves", &prop)) {
+		pr_err("Missing slaves property in the DT.\n");
+		return -EINVAL;
+	}
+	data->slaves = prop;
+
+	data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) *
+				   data->slaves, GFP_KERNEL);
+	if (!data->slave_data) {
+		pr_err("Could not allocate slave memory.\n");
+		return -EINVAL;
+	}
+
+	data->no_bd_ram = of_property_read_bool(node, "no_bd_ram");
+
+	if (of_property_read_u32(node, "cpdma_channels", &prop)) {
+		pr_err("Missing cpdma_channels property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->channels = prop;
+
+	if (of_property_read_u32(node, "host_port_no", &prop)) {
+		pr_err("Missing host_port_no property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->host_port_num = prop;
+
+	if (of_property_read_u32(node, "cpdma_reg_ofs", &prop)) {
+		pr_err("Missing cpdma_reg_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->cpdma_reg_ofs = prop;
+
+	if (of_property_read_u32(node, "cpdma_sram_ofs", &prop)) {
+		pr_err("Missing cpdma_sram_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->cpdma_sram_ofs = prop;
+
+	if (of_property_read_u32(node, "ale_reg_ofs", &prop)) {
+		pr_err("Missing ale_reg_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->ale_reg_ofs = prop;
+
+	if (of_property_read_u32(node, "ale_entries", &prop)) {
+		pr_err("Missing ale_entries property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->ale_entries = prop;
+
+	if (of_property_read_u32(node, "host_port_reg_ofs", &prop)) {
+		pr_err("Missing host_port_reg_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->host_port_reg_ofs = prop;
+
+	if (of_property_read_u32(node, "hw_stats_reg_ofs", &prop)) {
+		pr_err("Missing hw_stats_reg_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->hw_stats_reg_ofs = prop;
+
+	if (of_property_read_u32(node, "bd_ram_ofs", &prop)) {
+		pr_err("Missing bd_ram_ofs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->bd_ram_ofs = prop;
+
+	if (of_property_read_u32(node, "bd_ram_size", &prop)) {
+		pr_err("Missing bd_ram_size property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->bd_ram_size = prop;
+
+	if (of_property_read_u32(node, "rx_descs", &prop)) {
+		pr_err("Missing rx_descs property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->rx_descs = prop;
+
+	if (of_property_read_u32(node, "mac_control", &prop)) {
+		pr_err("Missing mac_control property in the DT.\n");
+		ret = -EINVAL;
+		goto error_ret;
+	}
+	data->mac_control = prop;
+
+	for_each_child_of_node(node, slave_node) {
+		struct cpsw_slave_data *slave_data = data->slave_data + i;
+		const char *phy_id = NULL;
+		const void *mac_addr = NULL;
+
+		if (of_property_read_string(slave_node, "phy_id", &phy_id)) {
+			pr_err("Missing slave[%d] phy_id property\n", i);
+			ret = -EINVAL;
+			goto error_ret;
+		}
+		slave_data->phy_id = phy_id;
+
+		if (of_property_read_u32(slave_node, "slave_reg_ofs", &prop)) {
+			pr_err("Missing slave[%d] slave_reg_ofs property\n", i);
+			ret = -EINVAL;
+			goto error_ret;
+		}
+		slave_data->slave_reg_ofs = prop;
+
+		if (of_property_read_u32(slave_node, "sliver_reg_ofs",
+					 &prop)) {
+			pr_err("Missing slave[%d] sliver_reg_ofs property\n",
+				i);
+			ret = -EINVAL;
+			goto error_ret;
+		}
+		slave_data->sliver_reg_ofs = prop;
+
+		mac_addr = of_get_mac_address(slave_node);
+		if (mac_addr)
+			memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
+
+		i++;
+	}
+
+	return 0;
+
+error_ret:
+	kfree(data->slave_data);
+	return ret;
+}
+
 static int __devinit cpsw_probe(struct platform_device *pdev)
 {
 	struct cpsw_platform_data	*data = pdev->dev.platform_data;
@@ -720,11 +880,6 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 	struct resource			*res;
 	int ret = 0, i, k = 0;
 
-	if (!data) {
-		pr_err("platform data missing\n");
-		return -ENODEV;
-	}
-
 	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
 	if (!ndev) {
 		pr_err("error allocating net_device\n");
@@ -734,13 +889,19 @@ static int __devinit cpsw_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ndev);
 	priv = netdev_priv(ndev);
 	spin_lock_init(&priv->lock);
-	priv->data = *data;
 	priv->pdev = pdev;
 	priv->ndev = ndev;
 	priv->dev  = &ndev->dev;
 	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
 	priv->rx_packet_max = max(rx_packet_max, 128);
 
+	if (cpsw_probe_dt(&priv->data, pdev)) {
+		pr_err("cpsw: platform data missing\n");
+		ret = -ENODEV;
+		goto clean_ndev_ret;
+	}
+	data = &priv->data;
+
 	if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
 		memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
 		pr_info("Detected MACID = %pM", priv->mac_addr);
@@ -996,11 +1157,17 @@ static const struct dev_pm_ops cpsw_pm_ops = {
 	.resume		= cpsw_resume,
 };
 
+static const struct of_device_id cpsw_of_mtable[] = {
+	{ .compatible = "ti,cpsw", },
+	{ /* sentinel */ },
+};
+
 static struct platform_driver cpsw_driver = {
 	.driver = {
 		.name	 = "cpsw",
 		.owner	 = THIS_MODULE,
 		.pm	 = &cpsw_pm_ops,
+		.of_match_table = of_match_ptr(cpsw_of_mtable),
 	},
 	.probe = cpsw_probe,
 	.remove = __devexit_p(cpsw_remove),
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index a9ca4a03d31b..51a96dbee9ac 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -36,6 +36,8 @@
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
 #include <linux/davinci_emac.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 /*
  * This timeout definition is a worst-case ultra defensive measure against
@@ -289,6 +291,25 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id,
 	return 0;
 }
 
+static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
+			 struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	u32 prop;
+
+	if (!node)
+		return -EINVAL;
+
+	if (of_property_read_u32(node, "bus_freq", &prop)) {
+		pr_err("Missing bus_freq property in the DT.\n");
+		return -EINVAL;
+	}
+	data->bus_freq = prop;
+
+	return 0;
+}
+
+
 static int __devinit davinci_mdio_probe(struct platform_device *pdev)
 {
 	struct mdio_platform_data *pdata = pdev->dev.platform_data;
@@ -304,8 +325,6 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	data->pdata = pdata ? (*pdata) : default_pdata;
-
 	data->bus = mdiobus_alloc();
 	if (!data->bus) {
 		dev_err(dev, "failed to alloc mii bus\n");
@@ -313,14 +332,22 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev)
 		goto bail_out;
 	}
 
+	if (dev->of_node) {
+		if (davinci_mdio_probe_dt(&data->pdata, pdev))
+			data->pdata = default_pdata;
+		snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+	} else {
+		data->pdata = pdata ? (*pdata) : default_pdata;
+		snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x",
+			 pdev->name, pdev->id);
+	}
+
 	data->bus->name		= dev_name(dev);
 	data->bus->read		= davinci_mdio_read,
 	data->bus->write	= davinci_mdio_write,
 	data->bus->reset	= davinci_mdio_reset,
 	data->bus->parent	= dev;
 	data->bus->priv		= data;
-	snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x",
-		pdev->name, pdev->id);
 
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
@@ -456,11 +483,17 @@ static const struct dev_pm_ops davinci_mdio_pm_ops = {
 	.resume		= davinci_mdio_resume,
 };
 
+static const struct of_device_id davinci_mdio_of_mtable[] = {
+	{ .compatible = "ti,davinci_mdio", },
+	{ /* sentinel */ },
+};
+
 static struct platform_driver davinci_mdio_driver = {
 	.driver = {
 		.name	 = "davinci_mdio",
 		.owner	 = THIS_MODULE,
 		.pm	 = &davinci_mdio_pm_ops,
+		.of_match_table = of_match_ptr(davinci_mdio_of_mtable),
 	},
 	.probe = davinci_mdio_probe,
 	.remove = __devexit_p(davinci_mdio_remove),
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 277c93e9ff4d..8fa947a2d929 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1359,7 +1359,6 @@ static int tsi108_open(struct net_device *dev)
 		}
 
 		data->rxskbs[i] = skb;
-		data->rxskbs[i] = skb;
 		data->rxring[i].buf0 = virt_to_phys(data->rxskbs[i]->data);
 		data->rxring[i].misc = TSI108_RX_OWN | TSI108_RX_INT;
 	}
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index a5826a3111a6..2c08bf6e7bf3 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -637,8 +637,7 @@ static int __devinit w5100_hw_probe(struct platform_device *pdev)
 	if (data && is_valid_ether_addr(data->mac_addr)) {
 		memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN);
 	} else {
-		eth_random_addr(ndev->dev_addr);
-		ndev->addr_assign_type |= NET_ADDR_RANDOM;
+		eth_hw_addr_random(ndev);
 	}
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index bdd8891c215a..88943d90c765 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -557,8 +557,7 @@ static int __devinit w5300_hw_probe(struct platform_device *pdev)
 	if (data && is_valid_ether_addr(data->mac_addr)) {
 		memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN);
 	} else {
-		eth_random_addr(ndev->dev_addr);
-		ndev->addr_assign_type |= NET_ADDR_RANDOM;
+		eth_hw_addr_random(ndev);
 	}
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);