summary refs log tree commit diff
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2016-05-26 12:50:05 -0400
committerDoug Ledford <dledford@redhat.com>2016-05-26 12:50:05 -0400
commit8779e7658df2496f27660586e3a20a86c75fb526 (patch)
tree831a1ebc8e399ee231fe23cf1eecc3fc4a70948a
parente6f61130ed7a124138c4f7b1bd35e24e8113cb83 (diff)
parentf158486527ebfb4c1fe4dcb69b12479090d66b72 (diff)
downloadlinux-8779e7658df2496f27660586e3a20a86c75fb526.tar.gz
Merge branch 'hfi1-2' into k.o/for-4.7
-rw-r--r--MAINTAINERS13
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig (renamed from drivers/staging/rdma/hfi1/Kconfig)0
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile (renamed from drivers/staging/rdma/hfi1/Makefile)2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c (renamed from drivers/staging/rdma/hfi1/affinity.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h (renamed from drivers/staging/rdma/hfi1/affinity.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h (renamed from drivers/staging/rdma/hfi1/aspm.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c (renamed from drivers/staging/rdma/hfi1/chip.c)41
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h (renamed from drivers/staging/rdma/hfi1/chip.h)6
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h (renamed from drivers/staging/rdma/hfi1/chip_registers.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/common.h (renamed from drivers/staging/rdma/hfi1/common.h)5
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c (renamed from drivers/staging/rdma/hfi1/debugfs.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h (renamed from drivers/staging/rdma/hfi1/debugfs.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/device.c (renamed from drivers/staging/rdma/hfi1/device.c)18
-rw-r--r--drivers/infiniband/hw/hfi1/device.h (renamed from drivers/staging/rdma/hfi1/device.h)3
-rw-r--r--drivers/infiniband/hw/hfi1/dma.c (renamed from drivers/staging/rdma/hfi1/dma.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c (renamed from drivers/staging/rdma/hfi1/driver.c)2
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c (renamed from drivers/staging/rdma/hfi1/efivar.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h (renamed from drivers/staging/rdma/hfi1/efivar.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c102
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h (renamed from drivers/staging/rdma/hfi1/eprom.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c (renamed from drivers/staging/rdma/hfi1/file_ops.c)549
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c (renamed from drivers/staging/rdma/hfi1/firmware.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h (renamed from drivers/staging/rdma/hfi1/hfi.h)7
-rw-r--r--drivers/infiniband/hw/hfi1/init.c (renamed from drivers/staging/rdma/hfi1/init.c)22
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c (renamed from drivers/staging/rdma/hfi1/intr.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h (renamed from drivers/staging/rdma/hfi1/iowait.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c (renamed from drivers/staging/rdma/hfi1/mad.c)99
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h (renamed from drivers/staging/rdma/hfi1/mad.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c (renamed from drivers/staging/rdma/hfi1/mmu_rb.c)22
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h (renamed from drivers/staging/rdma/hfi1/mmu_rb.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h (renamed from drivers/staging/rdma/hfi1/opa_compat.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c (renamed from drivers/staging/rdma/hfi1/pcie.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c (renamed from drivers/staging/rdma/hfi1/pio.c)3
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h (renamed from drivers/staging/rdma/hfi1/pio.h)8
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c (renamed from drivers/staging/rdma/hfi1/pio_copy.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c (renamed from drivers/staging/rdma/hfi1/platform.c)27
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h (renamed from drivers/staging/rdma/hfi1/platform.h)1
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c (renamed from drivers/staging/rdma/hfi1/qp.c)9
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h (renamed from drivers/staging/rdma/hfi1/qp.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c (renamed from drivers/staging/rdma/hfi1/qsfp.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h (renamed from drivers/staging/rdma/hfi1/qsfp.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c (renamed from drivers/staging/rdma/hfi1/rc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c (renamed from drivers/staging/rdma/hfi1/ruc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c (renamed from drivers/staging/rdma/hfi1/sdma.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h (renamed from drivers/staging/rdma/hfi1/sdma.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h (renamed from drivers/staging/rdma/hfi1/sdma_txreq.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c (renamed from drivers/staging/rdma/hfi1/sysfs.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c (renamed from drivers/staging/rdma/hfi1/trace.c)8
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h (renamed from drivers/staging/rdma/hfi1/trace.h)5
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.c (renamed from drivers/staging/rdma/hfi1/twsi.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.h (renamed from drivers/staging/rdma/hfi1/twsi.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c (renamed from drivers/staging/rdma/hfi1/uc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c (renamed from drivers/staging/rdma/hfi1/ud.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c (renamed from drivers/staging/rdma/hfi1/user_pages.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c (renamed from drivers/staging/rdma/hfi1/user_sdma.c)18
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h (renamed from drivers/staging/rdma/hfi1/user_sdma.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c (renamed from drivers/staging/rdma/hfi1/verbs.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h (renamed from drivers/staging/rdma/hfi1/verbs.h)1
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c (renamed from drivers/staging/rdma/hfi1/verbs_txreq.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h (renamed from drivers/staging/rdma/hfi1/verbs_txreq.h)0
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h1
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c1
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c30
-rw-r--r--drivers/staging/rdma/Kconfig2
-rw-r--r--drivers/staging/rdma/Makefile1
-rw-r--r--drivers/staging/rdma/hfi1/TODO6
-rw-r--r--drivers/staging/rdma/hfi1/diag.c1925
-rw-r--r--drivers/staging/rdma/hfi1/eprom.c471
-rw-r--r--include/rdma/ib_pack.h5
-rw-r--r--include/rdma/rdma_vt.h13
-rw-r--r--include/rdma/rdmavt_qp.h5
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h80
78 files changed, 551 insertions, 2994 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c8025946eaae..98234560cfdc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5086,6 +5086,13 @@ F:	drivers/block/cciss*
 F:	include/linux/cciss_ioctl.h
 F:	include/uapi/linux/cciss_ioctl.h
 
+HFI1 DRIVER
+M:	Mike Marciniszyn <mike.marciniszyn@intel.com>
+M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/hw/hfi1
+
 HFS FILESYSTEM
 L:	linux-fsdevel@vger.kernel.org
 S:	Orphan
@@ -10661,12 +10668,6 @@ M:	Arnaud Patard <arnaud.patard@rtp-net.org>
 S:	Odd Fixes
 F:	drivers/staging/xgifb/
 
-HFI1 DRIVER
-M:	Mike Marciniszyn <infinipath@intel.com>
-L:	linux-rdma@vger.kernel.org
-S:	Supported
-F:	drivers/staging/rdma/hfi1
-
 STARFIRE/DURALAN NETWORK DRIVER
 M:	Ion Badulescu <ionut@badula.org>
 S:	Odd Fixes
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6425c0e5d18a..2137adfbd8c3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig"
 
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 
+source "drivers/infiniband/hw/hfi1/Kconfig"
+
 endif # INFINIBAND
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index c7ad0a4c8b15..c0c7cf8af3f4 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)		+= mlx5/
 obj-$(CONFIG_INFINIBAND_NES)		+= nes/
 obj-$(CONFIG_INFINIBAND_OCRDMA)		+= ocrdma/
 obj-$(CONFIG_INFINIBAND_USNIC)		+= usnic/
+obj-$(CONFIG_INFINIBAND_HFI1)		+= hfi1/
diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index a925fb0db706..a925fb0db706 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 8dc59382ee96..9b5382c94b0c 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -7,7 +7,7 @@
 #
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
 
-hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
+hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
 	eprom.o file_ops.o firmware.o \
 	init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
 	qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 6e7050ab9e16..6e7050ab9e16 100644
--- a/drivers/staging/rdma/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 20f52fe74091..20f52fe74091 100644
--- a/drivers/staging/rdma/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..0d58fe3b49b5 100644
--- a/drivers/staging/rdma/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index dcae8e723f98..3b876da745a1 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
 static void dc_start(struct hfi1_devdata *);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
+static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
 
 /*
  * Error interrupt table entry.  This is used as input to the interrupt
@@ -6105,7 +6106,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 	}
 
 	/* this access is valid only when the link is up */
-	if ((ppd->host_link_state & HLS_UP) == 0) {
+	if (ppd->host_link_state & HLS_DOWN) {
 		dd_dev_info(dd, "%s: link state %s not up\n",
 			    __func__, link_state_name(ppd->host_link_state));
 		ret = -EBUSY;
@@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work)
 	}
 
 	reset_neighbor_info(ppd);
+	if (ppd->mgmt_allowed)
+		remove_full_mgmt_pkey(ppd);
 
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
 }
 
+static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+{
+	ppd->pkeys[2] = 0;
+	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+}
+
 /*
  * Convert the given link width to the OPA link width bitmask.
  */
@@ -7429,7 +7438,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
 retry:
 	mutex_lock(&ppd->hls_lock);
 	/* only apply if the link is up */
-	if (!(ppd->host_link_state & HLS_UP)) {
+	if (ppd->host_link_state & HLS_DOWN) {
 		/* still going up..wait and retry */
 		if (ppd->host_link_state & HLS_GOING_UP) {
 			if (++tries < 1000) {
@@ -9212,9 +9221,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
 
 	/* Reset the QSFP */
 	mask = (u64)QSFP_HFI0_RESET_N;
-	qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
-	qsfp_mask |= mask;
-	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
 
 	qsfp_mask = read_csr(dd,
 			     dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
@@ -9252,6 +9258,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 		dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
 			    __func__);
 
+	/*
+	 * The remaining alarms/warnings don't matter if the link is down.
+	 */
+	if (ppd->host_link_state & HLS_DOWN)
+		return 0;
+
 	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
 	    (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
 		dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
@@ -9346,9 +9358,8 @@ void qsfp_event(struct work_struct *work)
 		return;
 
 	/*
-	 * Turn DC back on after cables has been
-	 * re-inserted. Up until now, the DC has been in
-	 * reset to save power.
+	 * Turn DC back on after cable has been re-inserted. Up until
+	 * now, the DC has been in reset to save power.
 	 */
 	dc_start(dd);
 
@@ -9480,7 +9491,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
 			return ret;
 	}
 
-	/* tune the SERDES to a ballpark setting for
+	get_port_type(ppd);
+	if (ppd->port_type == PORT_TYPE_QSFP) {
+		set_qsfp_int_n(ppd, 0);
+		wait_for_qsfp_init(ppd);
+		set_qsfp_int_n(ppd, 1);
+	}
+
+	/*
+	 * Tune the SerDes to a ballpark setting for
 	 * optimal signal and bit error rate
 	 * Needs to be done before starting the link
 	 */
@@ -10074,7 +10093,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
  */
 u32 driver_logical_state(struct hfi1_pportdata *ppd)
 {
-	if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
+	if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN))
 		return IB_PORT_DOWN;
 
 	switch (ppd->host_link_state & HLS_UP) {
@@ -14578,7 +14597,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
 		   (reason), (ret))
 
 /*
- * Initialize the Avago Thermal sensor.
+ * Initialize the thermal sensor.
  *
  * After initialization, enable polling of thermal sensor through
  * SBus interface. In order for this to work, the SBus Master
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 1948706fff1a..66a327978739 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -398,6 +398,12 @@
 /* Lane ID for general configuration registers */
 #define GENERAL_CONFIG 4
 
+/* LINK_TUNING_PARAMETERS fields */
+#define TUNING_METHOD_SHIFT 24
+
+/* LINK_OPTIMIZATION_SETTINGS fields */
+#define ENABLE_EXT_DEV_CONFIG_SHIFT 24
+
 /* LOAD_DATA 8051 command shifts and fields */
 #define LOAD_DATA_FIELD_ID_SHIFT 40
 #define LOAD_DATA_FIELD_ID_MASK 0xfull
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 8744de6667c2..8744de6667c2 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index e9b6bb322025..fcc9c217a97a 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -178,7 +178,8 @@
 		     HFI1_CAP_PKEY_CHECK |		\
 		     HFI1_CAP_NO_INTEGRITY)
 
-#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR)
+#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
+			     HFI1_USER_SWMINOR)
 
 #ifndef HFI1_KERN_TYPE
 #define HFI1_KERN_TYPE 0
@@ -349,6 +350,8 @@ struct hfi1_message_header {
 #define HFI1_BECN_MASK 1
 #define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
 
+#define HFI1_PSM_IOC_BASE_SEQ 0x0
+
 static inline __u64 rhf_to_cpu(const __le32 *rbuf)
 {
 	return __le64_to_cpu(*((__le64 *)rbuf));
diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index dbab9d9cc288..dbab9d9cc288 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..b6fb6814f1b8 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index c05c39da83b1..bf64b5a7bfd7 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -60,7 +60,8 @@ static dev_t hfi1_dev;
 int hfi1_cdev_init(int minor, const char *name,
 		   const struct file_operations *fops,
 		   struct cdev *cdev, struct device **devp,
-		   bool user_accessible)
+		   bool user_accessible,
+		   struct kobject *parent)
 {
 	const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor);
 	struct device *device = NULL;
@@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name,
 
 	cdev_init(cdev, fops);
 	cdev->owner = THIS_MODULE;
+	cdev->kobj.parent = parent;
 	kobject_set_name(&cdev->kobj, name);
 
 	ret = cdev_add(cdev, dev, 1);
@@ -82,13 +84,13 @@ int hfi1_cdev_init(int minor, const char *name,
 	else
 		device = device_create(class, NULL, dev, NULL, "%s", name);
 
-	if (!IS_ERR(device))
-		goto done;
-	ret = PTR_ERR(device);
-	device = NULL;
-	pr_err("Could not create device for minor %d, %s (err %d)\n",
-	       minor, name, -ret);
-	cdev_del(cdev);
+	if (IS_ERR(device)) {
+		ret = PTR_ERR(device);
+		device = NULL;
+		pr_err("Could not create device for minor %d, %s (err %d)\n",
+			minor, name, -ret);
+		cdev_del(cdev);
+	}
 done:
 	*devp = device;
 	return ret;
diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index 5bb3e83cf2da..c3ec19cb0ac9 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -50,7 +50,8 @@
 int hfi1_cdev_init(int minor, const char *name,
 		   const struct file_operations *fops,
 		   struct cdev *cdev, struct device **devp,
-		   bool user_accessible);
+		   bool user_accessible,
+		   struct kobject *parent);
 void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp);
 const char *class_name(void);
 int __init dev_init(void);
diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
index 7e8dab892848..7e8dab892848 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/infiniband/hw/hfi1/dma.c
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 700c6fa3a633..c75b0ae688f8 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
 	ppd->lmc = lmc;
 	hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0);
 
-	dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid);
+	dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid);
 
 	return 0;
 }
diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 106349fc1fb9..106349fc1fb9 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 94e9e70de568..94e9e70de568 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
new file mode 100644
index 000000000000..36b77943cbfd
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/delay.h>
+#include "hfi.h"
+#include "common.h"
+#include "eprom.h"
+
+#define CMD_SHIFT 24
+#define CMD_RELEASE_POWERDOWN_NOID  ((0xab << CMD_SHIFT))
+
+/* controller interface speeds */
+#define EP_SPEED_FULL 0x2	/* full speed */
+
+/*
+ * How long to wait for the EPROM to become available, in ms.
+ * The spec 32 Mb EPROM takes around 40s to erase then write.
+ * Double it for safety.
+ */
+#define EPROM_TIMEOUT 80000 /* ms */
+/*
+ * Initialize the EPROM handler.
+ */
+int eprom_init(struct hfi1_devdata *dd)
+{
+	int ret = 0;
+
+	/* only the discrete chip has an EPROM */
+	if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
+		return 0;
+
+	/*
+	 * It is OK if both HFIs reset the EPROM as long as they don't
+	 * do it at the same time.
+	 */
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd,
+			   "%s: unable to acquire EPROM resource, no EPROM support\n",
+			   __func__);
+		goto done_asic;
+	}
+
+	/* reset EPROM to be sure it is in a good state */
+
+	/* set reset */
+	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
+	/* clear reset, set speed */
+	write_csr(dd, ASIC_EEP_CTL_STAT,
+		  EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
+
+	/* wake the device with command "release powerdown NoID" */
+	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
+
+	dd->eprom_available = true;
+	release_chip_resource(dd, CR_EPROM);
+done_asic:
+	return ret;
+}
diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..d41f0b1afb15 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index c1c5bf82addb..7a5b0e676cc7 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -72,8 +72,6 @@
  */
 static int hfi1_file_open(struct inode *, struct file *);
 static int hfi1_file_close(struct inode *, struct file *);
-static ssize_t hfi1_file_write(struct file *, const char __user *,
-			       size_t, loff_t *);
 static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
 static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
 static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
@@ -86,8 +84,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32);
 static int get_base_info(struct file *, void __user *, __u32);
 static int setup_ctxt(struct file *);
 static int setup_subctxt(struct hfi1_ctxtdata *);
-static int get_user_context(struct file *, struct hfi1_user_info *,
-			    int, unsigned);
+static int get_user_context(struct file *, struct hfi1_user_info *, int);
 static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
 static int allocate_ctxt(struct file *, struct hfi1_devdata *,
 			 struct hfi1_user_info *);
@@ -97,13 +94,15 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
 static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
+			    unsigned long arg);
 
 static const struct file_operations hfi1_file_ops = {
 	.owner = THIS_MODULE,
-	.write = hfi1_file_write,
 	.write_iter = hfi1_write_iter,
 	.open = hfi1_file_open,
 	.release = hfi1_file_close,
+	.unlocked_ioctl = hfi1_file_ioctl,
 	.poll = hfi1_poll,
 	.mmap = hfi1_file_mmap,
 	.llseek = noop_llseek,
@@ -169,6 +168,13 @@ static inline int is_valid_mmap(u64 token)
 
 static int hfi1_file_open(struct inode *inode, struct file *fp)
 {
+	struct hfi1_devdata *dd = container_of(inode->i_cdev,
+					       struct hfi1_devdata,
+					       user_cdev);
+
+	/* Just take a ref now. Not all opens result in a context assign */
+	kobject_get(&dd->kobj);
+
 	/* The real work is performed later in assign_ctxt() */
 	fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL);
 	if (fp->private_data) /* no cpu affinity by default */
@@ -176,127 +182,59 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 	return fp->private_data ? 0 : -ENOMEM;
 }
 
-static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
-			       size_t count, loff_t *offset)
+static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
+			    unsigned long arg)
 {
-	const struct hfi1_cmd __user *ucmd;
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_cmd cmd;
 	struct hfi1_user_info uinfo;
 	struct hfi1_tid_info tinfo;
+	int ret = 0;
 	unsigned long addr;
-	ssize_t consumed = 0, copy = 0, ret = 0;
-	void *dest = NULL;
-	__u64 user_val = 0;
-	int uctxt_required = 1;
-	int must_be_root = 0;
-
-	/* FIXME: This interface cannot continue out of staging */
-	if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
-		return -EACCES;
-
-	if (count < sizeof(cmd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ucmd = (const struct hfi1_cmd __user *)data;
-	if (copy_from_user(&cmd, ucmd, sizeof(cmd))) {
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	consumed = sizeof(cmd);
-
-	switch (cmd.type) {
-	case HFI1_CMD_ASSIGN_CTXT:
-		uctxt_required = 0;	/* assigned user context not required */
-		copy = sizeof(uinfo);
-		dest = &uinfo;
-		break;
-	case HFI1_CMD_SDMA_STATUS_UPD:
-	case HFI1_CMD_CREDIT_UPD:
-		copy = 0;
-		break;
-	case HFI1_CMD_TID_UPDATE:
-	case HFI1_CMD_TID_FREE:
-	case HFI1_CMD_TID_INVAL_READ:
-		copy = sizeof(tinfo);
-		dest = &tinfo;
-		break;
-	case HFI1_CMD_USER_INFO:
-	case HFI1_CMD_RECV_CTRL:
-	case HFI1_CMD_POLL_TYPE:
-	case HFI1_CMD_ACK_EVENT:
-	case HFI1_CMD_CTXT_INFO:
-	case HFI1_CMD_SET_PKEY:
-	case HFI1_CMD_CTXT_RESET:
-		copy = 0;
-		user_val = cmd.addr;
-		break;
-	case HFI1_CMD_EP_INFO:
-	case HFI1_CMD_EP_ERASE_CHIP:
-	case HFI1_CMD_EP_ERASE_RANGE:
-	case HFI1_CMD_EP_READ_RANGE:
-	case HFI1_CMD_EP_WRITE_RANGE:
-		uctxt_required = 0;	/* assigned user context not required */
-		must_be_root = 1;	/* validate user */
-		copy = 0;
-		break;
-	default:
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* If the command comes with user data, copy it. */
-	if (copy) {
-		if (copy_from_user(dest, (void __user *)cmd.addr, copy)) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		consumed += copy;
-	}
-
-	/*
-	 * Make sure there is a uctxt when needed.
-	 */
-	if (uctxt_required && !uctxt) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	int uval = 0;
+	unsigned long ul_uval = 0;
+	u16 uval16 = 0;
+
+	hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
+	if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
+	    cmd != HFI1_IOCTL_GET_VERS &&
+	    !uctxt)
+		return -EINVAL;
 
-	/* only root can do these operations */
-	if (must_be_root && !capable(CAP_SYS_ADMIN)) {
-		ret = -EPERM;
-		goto bail;
-	}
+	switch (cmd) {
+	case HFI1_IOCTL_ASSIGN_CTXT:
+		if (copy_from_user(&uinfo,
+				   (struct hfi1_user_info __user *)arg,
+				   sizeof(uinfo)))
+			return -EFAULT;
 
-	switch (cmd.type) {
-	case HFI1_CMD_ASSIGN_CTXT:
 		ret = assign_ctxt(fp, &uinfo);
 		if (ret < 0)
-			goto bail;
-		ret = setup_ctxt(fp);
+			return ret;
+		setup_ctxt(fp);
 		if (ret)
-			goto bail;
+			return ret;
 		ret = user_init(fp);
 		break;
-	case HFI1_CMD_CTXT_INFO:
-		ret = get_ctxt_info(fp, (void __user *)(unsigned long)
-				    user_val, cmd.len);
-		break;
-	case HFI1_CMD_USER_INFO:
-		ret = get_base_info(fp, (void __user *)(unsigned long)
-				    user_val, cmd.len);
+	case HFI1_IOCTL_CTXT_INFO:
+		ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
+				    sizeof(struct hfi1_ctxt_info));
 		break;
-	case HFI1_CMD_SDMA_STATUS_UPD:
+	case HFI1_IOCTL_USER_INFO:
+		ret = get_base_info(fp, (void __user *)(unsigned long)arg,
+				    sizeof(struct hfi1_base_info));
 		break;
-	case HFI1_CMD_CREDIT_UPD:
+	case HFI1_IOCTL_CREDIT_UPD:
 		if (uctxt && uctxt->sc)
 			sc_return_credits(uctxt->sc);
 		break;
-	case HFI1_CMD_TID_UPDATE:
+
+	case HFI1_IOCTL_TID_UPDATE:
+		if (copy_from_user(&tinfo,
+				   (struct hfi11_tid_info __user *)arg,
+				   sizeof(tinfo)))
+			return -EFAULT;
+
 		ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
 		if (!ret) {
 			/*
@@ -305,57 +243,82 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 			 * These fields are adjacent in the structure so
 			 * we can copy them at the same time.
 			 */
-			addr = (unsigned long)cmd.addr +
-				offsetof(struct hfi1_tid_info, tidcnt);
+			addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
 			if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
 					 sizeof(tinfo.tidcnt) +
 					 sizeof(tinfo.length)))
 				ret = -EFAULT;
 		}
 		break;
-	case HFI1_CMD_TID_INVAL_READ:
-		ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+
+	case HFI1_IOCTL_TID_FREE:
+		if (copy_from_user(&tinfo,
+				   (struct hfi11_tid_info __user *)arg,
+				   sizeof(tinfo)))
+			return -EFAULT;
+
+		ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
 		if (ret)
 			break;
-		addr = (unsigned long)cmd.addr +
-			offsetof(struct hfi1_tid_info, tidcnt);
+		addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
 		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
 				 sizeof(tinfo.tidcnt)))
 			ret = -EFAULT;
 		break;
-	case HFI1_CMD_TID_FREE:
-		ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+
+	case HFI1_IOCTL_TID_INVAL_READ:
+		if (copy_from_user(&tinfo,
+				   (struct hfi11_tid_info __user *)arg,
+				   sizeof(tinfo)))
+			return -EFAULT;
+
+		ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
 		if (ret)
 			break;
-		addr = (unsigned long)cmd.addr +
-			offsetof(struct hfi1_tid_info, tidcnt);
+		addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
 		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
 				 sizeof(tinfo.tidcnt)))
 			ret = -EFAULT;
 		break;
-	case HFI1_CMD_RECV_CTRL:
-		ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
+
+	case HFI1_IOCTL_RECV_CTRL:
+		ret = get_user(uval, (int __user *)arg);
+		if (ret != 0)
+			return -EFAULT;
+		ret = manage_rcvq(uctxt, fd->subctxt, uval);
 		break;
-	case HFI1_CMD_POLL_TYPE:
-		uctxt->poll_type = (typeof(uctxt->poll_type))user_val;
+
+	case HFI1_IOCTL_POLL_TYPE:
+		ret = get_user(uval, (int __user *)arg);
+		if (ret != 0)
+			return -EFAULT;
+		uctxt->poll_type = (typeof(uctxt->poll_type))uval;
 		break;
-	case HFI1_CMD_ACK_EVENT:
-		ret = user_event_ack(uctxt, fd->subctxt, user_val);
+
+	case HFI1_IOCTL_ACK_EVENT:
+		ret = get_user(ul_uval, (unsigned long __user *)arg);
+		if (ret != 0)
+			return -EFAULT;
+		ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
 		break;
-	case HFI1_CMD_SET_PKEY:
+
+	case HFI1_IOCTL_SET_PKEY:
+		ret = get_user(uval16, (u16 __user *)arg);
+		if (ret != 0)
+			return -EFAULT;
 		if (HFI1_CAP_IS_USET(PKEY_CHECK))
-			ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val);
+			ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
 		else
-			ret = -EPERM;
+			return -EPERM;
 		break;
-	case HFI1_CMD_CTXT_RESET: {
+
+	case HFI1_IOCTL_CTXT_RESET: {
 		struct send_context *sc;
 		struct hfi1_devdata *dd;
 
-		if (!uctxt || !uctxt->dd || !uctxt->sc) {
-			ret = -EINVAL;
-			break;
-		}
+		if (!uctxt || !uctxt->dd || !uctxt->sc)
+			return -EINVAL;
+
 		/*
 		 * There is no protection here. User level has to
 		 * guarantee that no one will be writing to the send
@@ -373,10 +336,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 		wait_event_interruptible_timeout(
 			sc->halt_wait, (sc->flags & SCF_HALTED),
 			msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
-		if (!(sc->flags & SCF_HALTED)) {
-			ret = -ENOLCK;
-			break;
-		}
+		if (!(sc->flags & SCF_HALTED))
+			return -ENOLCK;
+
 		/*
 		 * If the send context was halted due to a Freeze,
 		 * wait until the device has been "unfrozen" before
@@ -387,18 +349,16 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 				dd->event_queue,
 				!(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
 				msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
-			if (dd->flags & HFI1_FROZEN) {
-				ret = -ENOLCK;
-				break;
-			}
-			if (dd->flags & HFI1_FORCED_FREEZE) {
+			if (dd->flags & HFI1_FROZEN)
+				return -ENOLCK;
+
+			if (dd->flags & HFI1_FORCED_FREEZE)
 				/*
 				 * Don't allow context reset if we are into
 				 * forced freeze
 				 */
-				ret = -ENODEV;
-				break;
-			}
+				return -ENODEV;
+
 			sc_disable(sc);
 			ret = sc_enable(sc);
 			hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
@@ -410,18 +370,17 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 			sc_return_credits(sc);
 		break;
 	}
-	case HFI1_CMD_EP_INFO:
-	case HFI1_CMD_EP_ERASE_CHIP:
-	case HFI1_CMD_EP_ERASE_RANGE:
-	case HFI1_CMD_EP_READ_RANGE:
-	case HFI1_CMD_EP_WRITE_RANGE:
-		ret = handle_eprom_command(fp, &cmd);
+
+	case HFI1_IOCTL_GET_VERS:
+		uval = HFI1_USER_SWVERSION;
+		if (put_user(uval, (int __user *)arg))
+			return -EFAULT;
 		break;
+
+	default:
+		return -EINVAL;
 	}
 
-	if (ret >= 0)
-		ret = consumed;
-bail:
 	return ret;
 }
 
@@ -738,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 {
 	struct hfi1_filedata *fdata = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
-	struct hfi1_devdata *dd;
+	struct hfi1_devdata *dd = container_of(inode->i_cdev,
+					       struct hfi1_devdata,
+					       user_cdev);
 	unsigned long flags, *ev;
 
 	fp->private_data = NULL;
@@ -747,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 		goto done;
 
 	hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
-	dd = uctxt->dd;
 	mutex_lock(&hfi1_mutex);
 
 	flush_wc();
@@ -813,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	mutex_unlock(&hfi1_mutex);
 	hfi1_free_ctxtdata(dd, uctxt);
 done:
+	kobject_put(&dd->kobj);
 	kfree(fdata);
 	return 0;
 }
@@ -836,7 +797,7 @@ static u64 kvirt_to_phys(void *addr)
 static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 {
 	int i_minor, ret = 0;
-	unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS;
+	unsigned int swmajor, swminor;
 
 	swmajor = uinfo->userversion >> 16;
 	if (swmajor != HFI1_USER_SWMAJOR) {
@@ -846,9 +807,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 
 	swminor = uinfo->userversion & 0xffff;
 
-	if (uinfo->hfi1_alg < HFI1_ALG_COUNT)
-		alg = uinfo->hfi1_alg;
-
 	mutex_lock(&hfi1_mutex);
 	/* First, lets check if we need to setup a shared context? */
 	if (uinfo->subctxt_cnt) {
@@ -868,7 +826,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 	 */
 	if (!ret) {
 		i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
-		ret = get_user_context(fp, uinfo, i_minor - 1, alg);
+		ret = get_user_context(fp, uinfo, i_minor);
 	}
 done_unlock:
 	mutex_unlock(&hfi1_mutex);
@@ -876,71 +834,26 @@ done:
 	return ret;
 }
 
-/* return true if the device available for general use */
-static int usable_device(struct hfi1_devdata *dd)
-{
-	struct hfi1_pportdata *ppd = dd->pport;
-
-	return driver_lstate(ppd) == IB_PORT_ACTIVE;
-}
-
 static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
-			    int devno, unsigned alg)
+			    int devno)
 {
 	struct hfi1_devdata *dd = NULL;
-	int ret = 0, devmax, npresent, nup, dev;
+	int devmax, npresent, nup;
 
 	devmax = hfi1_count_units(&npresent, &nup);
-	if (!npresent) {
-		ret = -ENXIO;
-		goto done;
-	}
-	if (!nup) {
-		ret = -ENETDOWN;
-		goto done;
-	}
-	if (devno >= 0) {
-		dd = hfi1_lookup(devno);
-		if (!dd)
-			ret = -ENODEV;
-		else if (!dd->freectxts)
-			ret = -EBUSY;
-	} else {
-		struct hfi1_devdata *pdd;
-
-		if (alg == HFI1_ALG_ACROSS) {
-			unsigned free = 0U;
-
-			for (dev = 0; dev < devmax; dev++) {
-				pdd = hfi1_lookup(dev);
-				if (!pdd)
-					continue;
-				if (!usable_device(pdd))
-					continue;
-				if (pdd->freectxts &&
-				    pdd->freectxts > free) {
-					dd = pdd;
-					free = pdd->freectxts;
-				}
-			}
-		} else {
-			for (dev = 0; dev < devmax; dev++) {
-				pdd = hfi1_lookup(dev);
-				if (!pdd)
-					continue;
-				if (!usable_device(pdd))
-					continue;
-				if (pdd->freectxts) {
-					dd = pdd;
-					break;
-				}
-			}
-		}
-		if (!dd)
-			ret = -EBUSY;
-	}
-done:
-	return ret ? ret : allocate_ctxt(fp, dd, uinfo);
+	if (!npresent)
+		return -ENXIO;
+
+	if (!nup)
+		return -ENETDOWN;
+
+	dd = hfi1_lookup(devno);
+	if (!dd)
+		return -ENODEV;
+	else if (!dd->freectxts)
+		return -EBUSY;
+
+	return allocate_ctxt(fp, dd, uinfo);
 }
 
 static int find_shared_ctxt(struct file *fp,
@@ -1546,170 +1459,10 @@ done:
 	return ret;
 }
 
-static int ui_open(struct inode *inode, struct file *filp)
-{
-	struct hfi1_devdata *dd;
-
-	dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev);
-	filp->private_data = dd; /* for other methods */
-	return 0;
-}
-
-static int ui_release(struct inode *inode, struct file *filp)
-{
-	/* nothing to do */
-	return 0;
-}
-
-static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
-{
-	struct hfi1_devdata *dd = filp->private_data;
-
-	return fixed_size_llseek(filp, offset, whence,
-		(dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
-}
-
-/* NOTE: assumes unsigned long is 8 bytes */
-static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
-		       loff_t *f_pos)
-{
-	struct hfi1_devdata *dd = filp->private_data;
-	void __iomem *base = dd->kregbase;
-	unsigned long total, csr_off,
-		barlen = (dd->kregend - dd->kregbase);
-	u64 data;
-
-	/* only read 8 byte quantities */
-	if ((count % 8) != 0)
-		return -EINVAL;
-	/* offset must be 8-byte aligned */
-	if ((*f_pos % 8) != 0)
-		return -EINVAL;
-	/* destination buffer must be 8-byte aligned */
-	if ((unsigned long)buf % 8 != 0)
-		return -EINVAL;
-	/* must be in range */
-	if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE))
-		return -EINVAL;
-	/* only set the base if we are not starting past the BAR */
-	if (*f_pos < barlen)
-		base += *f_pos;
-	csr_off = *f_pos;
-	for (total = 0; total < count; total += 8, csr_off += 8) {
-		/* accessing LCB CSRs requires more checks */
-		if (is_lcb_offset(csr_off)) {
-			if (read_lcb_csr(dd, csr_off, (u64 *)&data))
-				break; /* failed */
-		}
-		/*
-		 * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a
-		 * false parity error.  Avoid the whole issue by not reading
-		 * them.  These registers are defined as having a read value
-		 * of 0.
-		 */
-		else if (csr_off == ASIC_GPIO_CLEAR ||
-			 csr_off == ASIC_GPIO_FORCE ||
-			 csr_off == ASIC_QSFP1_CLEAR ||
-			 csr_off == ASIC_QSFP1_FORCE ||
-			 csr_off == ASIC_QSFP2_CLEAR ||
-			 csr_off == ASIC_QSFP2_FORCE)
-			data = 0;
-		else if (csr_off >= barlen) {
-			/*
-			 * read_8051_data can read more than just 8 bytes at
-			 * a time. However, folding this into the loop and
-			 * handling the reads in 8 byte increments allows us
-			 * to smoothly transition from chip memory to 8051
-			 * memory.
-			 */
-			if (read_8051_data(dd,
-					   (u32)(csr_off - barlen),
-					   sizeof(data), &data))
-				break; /* failed */
-		} else
-			data = readq(base + total);
-		if (put_user(data, (unsigned long __user *)(buf + total)))
-			break;
-	}
-	*f_pos += total;
-	return total;
-}
-
-/* NOTE: assumes unsigned long is 8 bytes */
-static ssize_t ui_write(struct file *filp, const char __user *buf,
-			size_t count, loff_t *f_pos)
-{
-	struct hfi1_devdata *dd = filp->private_data;
-	void __iomem *base;
-	unsigned long total, data, csr_off;
-	int in_lcb;
-
-	/* only write 8 byte quantities */
-	if ((count % 8) != 0)
-		return -EINVAL;
-	/* offset must be 8-byte aligned */
-	if ((*f_pos % 8) != 0)
-		return -EINVAL;
-	/* source buffer must be 8-byte aligned */
-	if ((unsigned long)buf % 8 != 0)
-		return -EINVAL;
-	/* must be in range */
-	if (*f_pos + count > dd->kregend - dd->kregbase)
-		return -EINVAL;
-
-	base = (void __iomem *)dd->kregbase + *f_pos;
-	csr_off = *f_pos;
-	in_lcb = 0;
-	for (total = 0; total < count; total += 8, csr_off += 8) {
-		if (get_user(data, (unsigned long __user *)(buf + total)))
-			break;
-		/* accessing LCB CSRs requires a special procedure */
-		if (is_lcb_offset(csr_off)) {
-			if (!in_lcb) {
-				int ret = acquire_lcb_access(dd, 1);
-
-				if (ret)
-					break;
-				in_lcb = 1;
-			}
-		} else {
-			if (in_lcb) {
-				release_lcb_access(dd, 1);
-				in_lcb = 0;
-			}
-		}
-		writeq(data, base + total);
-	}
-	if (in_lcb)
-		release_lcb_access(dd, 1);
-	*f_pos += total;
-	return total;
-}
-
-static const struct file_operations ui_file_ops = {
-	.owner = THIS_MODULE,
-	.llseek = ui_lseek,
-	.read = ui_read,
-	.write = ui_write,
-	.open = ui_open,
-	.release = ui_release,
-};
-
-#define UI_OFFSET 192	/* device minor offset for UI devices */
-static int create_ui = 1;
-
-static struct cdev wildcard_cdev;
-static struct device *wildcard_device;
-
-static atomic_t user_count = ATOMIC_INIT(0);
-
 static void user_remove(struct hfi1_devdata *dd)
 {
-	if (atomic_dec_return(&user_count) == 0)
-		hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device);
 
 	hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device);
-	hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device);
 }
 
 static int user_add(struct hfi1_devdata *dd)
@@ -1717,34 +1470,13 @@ static int user_add(struct hfi1_devdata *dd)
 	char name[10];
 	int ret;
 
-	if (atomic_inc_return(&user_count) == 1) {
-		ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops,
-				     &wildcard_cdev, &wildcard_device,
-				     true);
-		if (ret)
-			goto done;
-	}
-
 	snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
-	ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops,
+	ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
 			     &dd->user_cdev, &dd->user_device,
-			     true);
+			     true, &dd->kobj);
 	if (ret)
-		goto done;
+		user_remove(dd);
 
-	if (create_ui) {
-		snprintf(name, sizeof(name),
-			 "%s_ui%d", class_name(), dd->unit);
-		ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops,
-				     &dd->ui_cdev, &dd->ui_device,
-				     false);
-		if (ret)
-			goto done;
-	}
-
-	return 0;
-done:
-	user_remove(dd);
 	return ret;
 }
 
@@ -1753,13 +1485,7 @@ done:
  */
 int hfi1_device_create(struct hfi1_devdata *dd)
 {
-	int r, ret;
-
-	r = user_add(dd);
-	ret = hfi1_diag_add(dd);
-	if (r && !ret)
-		ret = r;
-	return ret;
+	return user_add(dd);
 }
 
 /*
@@ -1769,5 +1495,4 @@ int hfi1_device_create(struct hfi1_devdata *dd)
 void hfi1_device_remove(struct hfi1_devdata *dd)
 {
 	user_remove(dd);
-	hfi1_diag_remove(dd);
 }
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index ed680fda611d..ed680fda611d 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7b78d56de7f5..4417a0fd3ef9 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -453,6 +453,7 @@ struct rvt_sge_state;
 #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
 
 #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
+#define HLS_DOWN ~(HLS_UP)
 
 /* use this MTU size if none other is given */
 #define HFI1_DEFAULT_ACTIVE_MTU 10240
@@ -1168,6 +1169,7 @@ struct hfi1_devdata {
 	atomic_t aspm_disabled_cnt;
 
 	struct hfi1_affinity *affinity;
+	struct kobject kobj;
 };
 
 /* 8051 firmware version helper */
@@ -1882,9 +1884,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 		get_unit_name((dd)->unit), ##__VA_ARGS__)
 
 #define hfi1_dev_porterr(dd, port, fmt, ...) \
-	dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
-			get_unit_name((dd)->unit), (dd)->unit, (port), \
-			##__VA_ARGS__)
+	dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
+			get_unit_name((dd)->unit), (port), ##__VA_ARGS__)
 
 /*
  * this is used for formatting hw error messages...
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 502b7cf4647d..5cc492e5776d 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 		lastfail = hfi1_create_rcvhdrq(dd, rcd);
 		if (!lastfail)
 			lastfail = hfi1_setup_eagerbufs(rcd);
-		if (lastfail)
+		if (lastfail) {
 			dd_dev_err(dd,
 				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+			ret = lastfail;
+		}
 	}
-	if (lastfail)
-		ret = lastfail;
 
 	/* Allocate enough memory for user event notification. */
 	len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
@@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd)
 	dd->asic_data = NULL;
 }
 
-void hfi1_free_devdata(struct hfi1_devdata *dd)
+static void __hfi1_free_devdata(struct kobject *kobj)
 {
+	struct hfi1_devdata *dd =
+		container_of(kobj, struct hfi1_devdata, kobj);
 	unsigned long flags;
 
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
@@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
 	rvt_dealloc_device(&dd->verbs_dev.rdi);
 }
 
+static struct kobj_type hfi1_devdata_type = {
+	.release = __hfi1_free_devdata,
+};
+
+void hfi1_free_devdata(struct hfi1_devdata *dd)
+{
+	kobject_put(&dd->kobj);
+}
+
 /*
  * Allocate our primary per-unit data structure.  Must be done via verbs
  * allocator, because the verbs cleanup process both does cleanup and
@@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 			&pdev->dev,
 			"Could not alloc cpulist info, cpu affinity might be wrong\n");
 	}
+	kobject_init(&dd->kobj, &hfi1_devdata_type);
 	return dd;
 
 bail:
@@ -1300,7 +1312,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
 
 		spin_lock(&ppd->cc_state_lock);
 		cc_state = get_cc_state(ppd);
-		rcu_assign_pointer(ppd->cc_state, NULL);
+		RCU_INIT_POINTER(ppd->cc_state, NULL);
 		spin_unlock(&ppd->cc_state_lock);
 
 		if (cc_state)
diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..65348d16ab2f 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef38d389..2ec6ef38d389 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index ed58cf21e790..219029576ba0 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1403,6 +1403,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 		if (key == okey)
 			continue;
 		/*
+		 * Don't update pkeys[2], if an HFI port without MgmtAllowed
+		 * by neighbor is a switch.
+		 */
+		if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
+			continue;
+		/*
 		 * The SM gives us the complete PKey table. We have
 		 * to ensure that we put the PKeys in the matching
 		 * slots.
@@ -3363,6 +3369,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
 	return reply((struct ib_mad_hdr *)smp);
 }
 
+/*
+ * Apply congestion control information stored in the ppd to the
+ * active structure.
+ */
+static void apply_cc_state(struct hfi1_pportdata *ppd)
+{
+	struct cc_state *old_cc_state, *new_cc_state;
+
+	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
+	if (!new_cc_state)
+		return;
+
+	/*
+	 * Hold the lock for updating *and* to prevent ppd information
+	 * from changing during the update.
+	 */
+	spin_lock(&ppd->cc_state_lock);
+
+	old_cc_state = get_cc_state(ppd);
+	if (!old_cc_state) {
+		/* never active, or shutting down */
+		spin_unlock(&ppd->cc_state_lock);
+		kfree(new_cc_state);
+		return;
+	}
+
+	*new_cc_state = *old_cc_state;
+
+	new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
+	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
+	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
+
+	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
+	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
+	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
+	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
+
+	rcu_assign_pointer(ppd->cc_state, new_cc_state);
+
+	spin_unlock(&ppd->cc_state_lock);
+
+	call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+}
+
 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
 				       struct ib_device *ibdev, u8 port,
 				       u32 *resp_len)
@@ -3374,6 +3424,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
 	struct opa_congestion_setting_entry_shadow *entries;
 	int i;
 
+	/*
+	 * Save details from packet into the ppd.  Hold the cc_state_lock so
+	 * our information is consistent with anyone trying to apply the state.
+	 */
+	spin_lock(&ppd->cc_state_lock);
 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
 
 	entries = ppd->congestion_entries;
@@ -3384,6 +3439,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
 			p->entries[i].trigger_threshold;
 		entries[i].ccti_min = p->entries[i].ccti_min;
 	}
+	spin_unlock(&ppd->cc_state_lock);
+
+	/* now apply the information */
+	apply_cc_state(ppd);
 
 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
 					   resp_len);
@@ -3526,7 +3585,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 	int i, j;
 	u32 sentry, eentry;
 	u16 ccti_limit;
-	struct cc_state *old_cc_state, *new_cc_state;
 
 	/* sanity check n_blocks, start_block */
 	if (n_blocks == 0 ||
@@ -3546,45 +3604,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
-	if (!new_cc_state)
-		goto getit;
-
+	/*
+	 * Save details from packet into the ppd.  Hold the cc_state_lock so
+	 * our information is consistent with anyone trying to apply the state.
+	 */
 	spin_lock(&ppd->cc_state_lock);
-
-	old_cc_state = get_cc_state(ppd);
-
-	if (!old_cc_state) {
-		spin_unlock(&ppd->cc_state_lock);
-		kfree(new_cc_state);
-		return reply((struct ib_mad_hdr *)smp);
-	}
-
-	*new_cc_state = *old_cc_state;
-
-	new_cc_state->cct.ccti_limit = ccti_limit;
-
-	entries = ppd->ccti_entries;
 	ppd->total_cct_entry = ccti_limit + 1;
-
+	entries = ppd->ccti_entries;
 	for (j = 0, i = sentry; i < eentry; j++, i++)
 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
-
-	memcpy(new_cc_state->cct.entries, entries,
-	       eentry * sizeof(struct ib_cc_table_entry));
-
-	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
-	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
-	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
-	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
-
-	rcu_assign_pointer(ppd->cc_state, new_cc_state);
-
 	spin_unlock(&ppd->cc_state_lock);
 
-	call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+	/* now apply the information */
+	apply_cc_state(ppd);
 
-getit:
 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
 }
 
diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..55ee08675333 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 2b0e91d3093d..b7a80aa1ae30 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -45,6 +45,7 @@
  *
  */
 #include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/mmu_notifier.h>
 #include <linux/interval_tree_generic.h>
 
@@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
 int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
 {
 	struct mmu_rb_handler *handlr;
-	unsigned long flags;
 
 	if (!ops->invalidate)
 		return -EINVAL;
@@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
 	INIT_HLIST_NODE(&handlr->mn.hlist);
 	spin_lock_init(&handlr->lock);
 	handlr->mn.ops = &mn_opts;
-	spin_lock_irqsave(&mmu_rb_lock, flags);
-	list_add_tail(&handlr->list, &mmu_rb_handlers);
-	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+	spin_lock(&mmu_rb_lock);
+	list_add_tail_rcu(&handlr->list, &mmu_rb_handlers);
+	spin_unlock(&mmu_rb_lock);
 
 	return mmu_notifier_register(&handlr->mn, current->mm);
 }
@@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
 	if (current->mm)
 		mmu_notifier_unregister(&handler->mn, current->mm);
 
-	spin_lock_irqsave(&mmu_rb_lock, flags);
-	list_del(&handler->list);
-	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+	spin_lock(&mmu_rb_lock);
+	list_del_rcu(&handler->list);
+	spin_unlock(&mmu_rb_lock);
+	synchronize_rcu();
 
 	spin_lock_irqsave(&handler->lock, flags);
 	if (!RB_EMPTY_ROOT(root)) {
@@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
 static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
 {
 	struct mmu_rb_handler *handler;
-	unsigned long flags;
 
-	spin_lock_irqsave(&mmu_rb_lock, flags);
-	list_for_each_entry(handler, &mmu_rb_handlers, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) {
 		if (handler->root == root)
 			goto unlock;
 	}
 	handler = NULL;
 unlock:
-	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+	rcu_read_unlock();
 	return handler;
 }
 
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 7a57b9c49d27..7a57b9c49d27 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 6ef3c1cbdcd7..6ef3c1cbdcd7 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0bac21e6a658..0bac21e6a658 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index c67b9ad3fcf4..d5edb1afbb8f 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
 	struct pio_vl_map *oldmap, *newmap;
 
 	if (!vl_scontexts) {
-		/* send context 0 reserved for VL15 */
-		for (i = 1; i < dd->num_send_contexts; i++)
+		for (i = 0; i < dd->num_send_contexts; i++)
 			if (dd->send_contexts[i].type == SC_KERNEL)
 				num_kernel_send_contexts++;
 		/* truncate divide */
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 53a08edb7f64..464cbd27b975 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -49,10 +49,10 @@
 
 /* send context types */
 #define SC_KERNEL 0
-#define SC_ACK    1
-#define SC_USER   2
-#define SC_VL15   3
-#define SC_MAX    4
+#define SC_VL15   1
+#define SC_ACK    2
+#define SC_USER   3	/* must be the last one: it may take all left */
+#define SC_MAX    4	/* count of send context types */
 
 /* invalid send context index */
 #define INVALID_SCI 0xff
diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b58849..8c25e1b58849 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 8fe8a205b5bb..03df9322f862 100644
--- a/drivers/staging/rdma/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd)
 	 */
 }
 
+void get_port_type(struct hfi1_pportdata *ppd)
+{
+	int ret;
+
+	ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+					PORT_TABLE_PORT_TYPE, &ppd->port_type,
+					4);
+	if (ret)
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+}
+
 int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
 {
 	u8 tx_ctrl_byte = on ? 0x0 : 0xF;
@@ -529,7 +540,8 @@ static void apply_tunings(
 	/* Enable external device config if channel is limiting active */
 	read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
 			 GENERAL_CONFIG, &config_data);
-	config_data |= limiting_active;
+	config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT);
+	config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT);
 	ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
 			       GENERAL_CONFIG, config_data);
 	if (ret != HCMD_SUCCESS)
@@ -542,7 +554,8 @@ static void apply_tunings(
 	/* Pass tuning method to 8051 */
 	read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
 			 &config_data);
-	config_data |= tuning_method;
+	config_data &= ~(0xff << TUNING_METHOD_SHIFT);
+	config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT);
 	ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
 			       config_data);
 	if (ret != HCMD_SUCCESS)
@@ -564,8 +577,8 @@ static void apply_tunings(
 		ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
 				       GENERAL_CONFIG, &config_data);
 		/* Clear, then set the external device config field */
-		config_data &= ~(0xFF << 24);
-		config_data |= (external_device_config << 24);
+		config_data &= ~(u32)0xFF;
+		config_data |= external_device_config;
 		ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
 				       GENERAL_CONFIG, config_data);
 		if (ret != HCMD_SUCCESS)
@@ -784,12 +797,6 @@ void tune_serdes(struct hfi1_pportdata *ppd)
 		return;
 	}
 
-	ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
-					PORT_TABLE_PORT_TYPE, &ppd->port_type,
-					4);
-	if (ret)
-		ppd->port_type = PORT_TYPE_UNKNOWN;
-
 	switch (ppd->port_type) {
 	case PORT_TYPE_DISCONNECTED:
 		ppd->offline_disabled_reason =
diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index 19620cf546d5..e2c21613c326 100644
--- a/drivers/staging/rdma/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -298,6 +298,7 @@ enum link_tuning_encoding {
 /* platform.c */
 void get_platform_config(struct hfi1_devdata *dd);
 void free_platform_config(struct hfi1_devdata *dd);
+void get_port_type(struct hfi1_pportdata *ppd);
 int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
 void tune_serdes(struct hfi1_pportdata *ppd);
 
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 91eb42316df9..1a942ffba4cb 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -49,7 +49,6 @@
 #include <linux/vmalloc.h>
 #include <linux/hash.h>
 #include <linux/module.h>
-#include <linux/random.h>
 #include <linux/seq_file.h>
 #include <rdma/rdma_vt.h>
 #include <rdma/rdmavt_qp.h>
@@ -161,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu)
  * This function is what we would push to the core layer if we wanted to be a
  * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
  * to blindly pass the MTU enum value from the PathRecord to us.
- *
- * The actual flag used to determine "8k MTU" will change and is currently
- * unknown.
  */
 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 {
@@ -516,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason)
 static void iowait_sdma_drained(struct iowait *wait)
 {
 	struct rvt_qp *qp = iowait_to_qp(wait);
+	unsigned long flags;
 
 	/*
 	 * This happens when the send engine notes
@@ -523,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait)
 	 * do the flush work until that QP's
 	 * sdma work has finished.
 	 */
-	spin_lock(&qp->s_lock);
+	spin_lock_irqsave(&qp->s_lock, flags);
 	if (qp->s_flags & RVT_S_WAIT_DMA) {
 		qp->s_flags &= ~RVT_S_WAIT_DMA;
 		hfi1_schedule_send(qp);
 	}
-	spin_unlock(&qp->s_lock);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /**
diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index e7bc8d6cf681..e7bc8d6cf681 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..2441669f0817 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index dadc66c442b9..dadc66c442b9 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 792f15eb8efe..792f15eb8efe 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a659aec3c3c6..a659aec3c3c6 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index abb8ebc1fcac..f9befc05b349 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = {
 	[sdma_state_s99_running]                = "s99_Running",
 };
 
+#ifdef CONFIG_SDMA_VERBOSITY
 static const char * const sdma_event_names[] = {
 	[sdma_event_e00_go_hw_down]   = "e00_GoHwDown",
 	[sdma_event_e10_go_hw_start]  = "e10_GoHwStart",
@@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = {
 	[sdma_event_e85_link_down]    = "e85_LinkDown",
 	[sdma_event_e90_sw_halted]    = "e90_SwHalted",
 };
+#endif
 
 static const struct sdma_set_state_action sdma_action_table[] = {
 	[sdma_state_s00_hw_down] = {
@@ -376,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
 	sdma_txclean(sde->dd, tx);
 	if (complete)
 		(*complete)(tx, res);
-	if (iowait_sdma_dec(wait) && wait)
+	if (wait && iowait_sdma_dec(wait))
 		iowait_drain_wakeup(wait);
 }
 
diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..8f50c99fe711 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index bf7d777d756e..bf7d777d756e 100644
--- a/drivers/staging/rdma/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 8cd6df8634ad..91fc2aed6aed 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
 	}
 
 	dd_dev_info(dd,
-		    "IB%u: Congestion Control Agent enabled for port %d\n",
-		    dd->unit, port_num);
+		    "Congestion Control Agent enabled for port %d\n",
+		    port_num);
 
 	return 0;
 
diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 8b62fefcf903..79b2952c0dfb 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
 #define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
 #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
 #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
+#define IETH_PRN "ieth rkey 0x%.8x"
 #define ATOMICACKETH_PRN "origdata %lld"
 #define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
 
@@ -166,6 +167,12 @@ const char *parse_everbs_hdrs(
 				 be32_to_cpu(eh->ud.deth[0]),
 				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
 		break;
+	/* ieth */
+	case OP(RC, SEND_LAST_WITH_INVALIDATE):
+	case OP(RC, SEND_ONLY_WITH_INVALIDATE):
+		trace_seq_printf(p, IETH_PRN,
+				 be32_to_cpu(eh->ieth));
+		break;
 	}
 	trace_seq_putc(p, 0);
 	return ret;
@@ -233,3 +240,4 @@ __hfi1_trace_fn(FIRMWARE);
 __hfi1_trace_fn(RCVCTRL);
 __hfi1_trace_fn(TID);
 __hfi1_trace_fn(MMU);
+__hfi1_trace_fn(IOCTL);
diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 963dc948c38a..28c1d0832886 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -74,8 +74,8 @@ __print_symbolic(etype,                         \
 
 TRACE_EVENT(hfi1_rcvhdr,
 	    TP_PROTO(struct hfi1_devdata *dd,
-		     u64 eflags,
 		     u32 ctxt,
+		     u64 eflags,
 		     u32 etype,
 		     u32 hlen,
 		     u32 tlen,
@@ -392,6 +392,8 @@ __print_symbolic(opcode,                                   \
 	ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE),             \
 	ib_opcode_name(RC_COMPARE_SWAP),                   \
 	ib_opcode_name(RC_FETCH_ADD),                      \
+	ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE),      \
+	ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE),      \
 	ib_opcode_name(UC_SEND_FIRST),                     \
 	ib_opcode_name(UC_SEND_MIDDLE),                    \
 	ib_opcode_name(UC_SEND_LAST),                      \
@@ -1341,6 +1343,7 @@ __hfi1_trace_def(FIRMWARE);
 __hfi1_trace_def(RCVCTRL);
 __hfi1_trace_def(TID);
 __hfi1_trace_def(MMU);
+__hfi1_trace_def(IOCTL);
 
 #define hfi1_cdbg(which, fmt, ...) \
 	__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c
index e82e52a63d35..e82e52a63d35 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/infiniband/hw/hfi1/twsi.c
diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h
index 5b8a5b5e7eae..5b8a5b5e7eae 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/infiniband/hw/hfi1/twsi.h
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index df773d433297..df773d433297 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1e503ad0bebb..1e503ad0bebb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 1b640a35b3fe..1b640a35b3fe 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..9bc8d9fba87e 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 88e10b5f55f1..88e10b5f55f1 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0014c9c0e967..29f4795f866c 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8;
 
 #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
 
+struct sdma_mmu_node;
+
 struct user_sdma_iovec {
 	struct list_head list;
 	struct iovec iov;
@@ -178,6 +180,7 @@ struct user_sdma_iovec {
 	 * which we last left off.
 	 */
 	u64 offset;
+	struct sdma_mmu_node *node;
 };
 
 #define SDMA_CACHE_NODE_EVICT BIT(0)
@@ -507,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	struct sdma_req_info info;
 	struct user_sdma_request *req;
 	u8 opcode, sc, vl;
+	int req_queued = 0;
 
 	if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
 		hfi1_cdbg(
@@ -703,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 
 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
 	atomic_inc(&pq->n_reqs);
+	req_queued = 1;
 	/* Send the first N packets in the request to buy us some time */
 	ret = user_sdma_send_pkts(req, pcount);
 	if (unlikely(ret < 0 && ret != -EBUSY)) {
@@ -747,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	return 0;
 free_req:
 	user_sdma_free_request(req, true);
-	pq_update(pq);
+	if (req_queued)
+		pq_update(pq);
 	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
 	return ret;
 }
@@ -1153,6 +1159,7 @@ retry:
 	}
 	iovec->pages = node->pages;
 	iovec->npages = npages;
+	iovec->node = node;
 
 	ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
 	if (ret) {
@@ -1519,18 +1526,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 	}
 	if (req->data_iovs) {
 		struct sdma_mmu_node *node;
-		struct mmu_rb_node *mnode;
 		int i;
 
 		for (i = 0; i < req->data_iovs; i++) {
-			mnode = hfi1_mmu_rb_search(
-				&req->pq->sdma_rb_root,
-				(unsigned long)req->iovs[i].iov.iov_base,
-				req->iovs[i].iov.iov_len);
-			if (!mnode || IS_ERR(mnode))
+			node = req->iovs[i].node;
+			if (!node)
 				continue;
 
-			node = container_of(mnode, struct sdma_mmu_node, rb);
 			if (unpin)
 				hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
 						   &node->rb);
diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index b9240e351161..b9240e351161 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 9cdc85fa366f..849c4b9399d4 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -52,7 +52,6 @@
 #include <linux/utsname.h>
 #include <linux/rculist.h>
 #include <linux/mm.h>
-#include <linux/random.h>
 #include <linux/vmalloc.h>
 
 #include "hfi.h"
@@ -336,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = {
 	[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = 12 + 8 + 4,
 	[IB_OPCODE_RC_COMPARE_SWAP]                   = 12 + 8 + 28,
 	[IB_OPCODE_RC_FETCH_ADD]                      = 12 + 8 + 28,
+	[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE]      = 12 + 8 + 4,
+	[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE]      = 12 + 8 + 4,
 	/* UC */
 	[IB_OPCODE_UC_SEND_FIRST]                     = 12 + 8,
 	[IB_OPCODE_UC_SEND_MIDDLE]                    = 12 + 8,
@@ -946,7 +947,6 @@ static int pio_wait(struct rvt_qp *qp,
 
 			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
 			dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
-			dev->n_piowait++;
 			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
 			list_add_tail(&priv->s_iowait.list, &sc->piowait);
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3ee223983b20..488356775627 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -152,6 +152,7 @@ union ib_ehdrs {
 	} at;
 	__be32 imm_data;
 	__be32 aeth;
+	__be32 ieth;
 	struct ib_atomic_eth atomic_eth;
 }  __packed;
 
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..bc95c4112c61 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..1cf69b2fe4a5 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 82d7c4bf5970..ce4034071f9c 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1308,21 +1308,6 @@ static const struct  qib_hwerror_msgs qib_7322p_error_msgs[] = {
 	SYM_LSB(IntMask, fldname##17IntMask)), \
 	.msg = #fldname "_C", .sz = sizeof(#fldname "_C") }
 
-static const struct  qib_hwerror_msgs qib_7322_intr_msgs[] = {
-	INTR_AUTO_P(SDmaInt),
-	INTR_AUTO_P(SDmaProgressInt),
-	INTR_AUTO_P(SDmaIdleInt),
-	INTR_AUTO_P(SDmaCleanupDone),
-	INTR_AUTO_C(RcvUrg),
-	INTR_AUTO_P(ErrInt),
-	INTR_AUTO(ErrInt),      /* non-port-specific errs */
-	INTR_AUTO(AssertGPIOInt),
-	INTR_AUTO_P(SendDoneInt),
-	INTR_AUTO(SendBufAvailInt),
-	INTR_AUTO_C(RcvAvail),
-	{ .mask = 0, .sz = 0 }
-};
-
 #define TXSYMPTOM_AUTO_P(fldname) \
 	{ .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \
 	.msg = #fldname, .sz = sizeof(#fldname) }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 6888f03c6d61..4f878151f81f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -159,6 +159,7 @@ struct qib_other_headers {
 		} at;
 		__be32 imm_data;
 		__be32 aeth;
+		__be32 ieth;
 		struct ib_atomic_eth atomic_eth;
 	} u;
 } __packed;
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index b1ffc8b4a6c0..6ca6fa80dd6e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 		return PTR_ERR(task);
 	}
 
+	set_user_nice(task, MIN_NICE);
 	cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
 	kthread_bind(task, cpu);
 	wake_up_process(task);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 0ff765bfd619..0f4d4500f45e 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
 			    int count)
 {
 	int m, i = 0;
+	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
 
 	mr->mapsz = 0;
 	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
 	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
+		mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
+					  dev->dparms.node);
 		if (!mr->map[i]) {
 			rvt_deinit_mregion(mr);
 			return -ENOMEM;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 0f12c211c385..5fa4d4d81ee0 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
 static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
 {
 	unsigned n;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 
 	if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 		rvt_put_ss(&qp->s_rdma_read_sge);
@@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
 	if (qp->ibqp.qp_type != IB_QPT_RC)
 		return;
 
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
+	for (n = 0; n < rvt_max_atomic(rdi); n++) {
 		struct rvt_ack_entry *e = &qp->s_ack_queue[n];
 
 		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
@@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	if (qp->s_ack_queue)
+		memset(
+			qp->s_ack_queue,
+			0,
+			rvt_max_atomic(rdi) *
+				sizeof(*qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
@@ -653,9 +659,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		if (gfp == GFP_NOIO)
 			swq = __vmalloc(
 				(init_attr->cap.max_send_wr + 1) * sz,
-				gfp, PAGE_KERNEL);
+				gfp | __GFP_ZERO, PAGE_KERNEL);
 		else
-			swq = vmalloc_node(
+			swq = vzalloc_node(
 				(init_attr->cap.max_send_wr + 1) * sz,
 				rdi->dparms.node);
 		if (!swq)
@@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 			goto bail_swq;
 
 		RCU_INIT_POINTER(qp->next, NULL);
+		if (init_attr->qp_type == IB_QPT_RC) {
+			qp->s_ack_queue =
+				kzalloc_node(
+					sizeof(*qp->s_ack_queue) *
+					 rvt_max_atomic(rdi),
+					gfp,
+					rdi->dparms.node);
+			if (!qp->s_ack_queue)
+				goto bail_qp;
+		}
 
 		/*
 		 * Driver needs to set up it's private QP structure and do any
@@ -704,9 +720,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 				qp->r_rq.wq = __vmalloc(
 						sizeof(struct rvt_rwq) +
 						qp->r_rq.size * sz,
-						gfp, PAGE_KERNEL);
+						gfp | __GFP_ZERO, PAGE_KERNEL);
 			else
-				qp->r_rq.wq = vmalloc_node(
+				qp->r_rq.wq = vzalloc_node(
 						sizeof(struct rvt_rwq) +
 						qp->r_rq.size * sz,
 						rdi->dparms.node);
@@ -857,6 +873,7 @@ bail_driver_priv:
 	rdi->driver_f.qp_priv_free(rdi, qp);
 
 bail_qp:
+	kfree(qp->s_ack_queue);
 	kfree(qp);
 
 bail_swq:
@@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
 		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
 	rdi->driver_f.qp_priv_free(rdi, qp);
+	kfree(qp->s_ack_queue);
 	kfree(qp);
 	return 0;
 }
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
index f1f3ecadf0fb..2c5b0188ebbf 100644
--- a/drivers/staging/rdma/Kconfig
+++ b/drivers/staging/rdma/Kconfig
@@ -22,6 +22,4 @@ menuconfig STAGING_RDMA
 # Please keep entries in alphabetic order
 if STAGING_RDMA
 
-source "drivers/staging/rdma/hfi1/Kconfig"
-
 endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
index 8c7fc1de48a7..b5e94f169101 100644
--- a/drivers/staging/rdma/Makefile
+++ b/drivers/staging/rdma/Makefile
@@ -1,2 +1 @@
 # Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_HFI1)	+= hfi1/
diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO
deleted file mode 100644
index 4c6f1d7d2eaf..000000000000
--- a/drivers/staging/rdma/hfi1/TODO
+++ /dev/null
@@ -1,6 +0,0 @@
-July, 2015
-
-- Remove unneeded file entries in sysfs
-- Remove software processing of IB protocol and place in library for use
-  by qib, ipath (if still present), hfi1, and eventually soft-roce
-- Replace incorrect uAPI
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
deleted file mode 100644
index bb2409ad891a..000000000000
--- a/drivers/staging/rdma/hfi1/diag.c
+++ /dev/null
@@ -1,1925 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-/*
- * This file contains support for diagnostic functions.  It is accessed by
- * opening the hfi1_diag device, normally minor number 129.  Diagnostic use
- * of the chip may render the chip or board unusable until the driver
- * is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/vmalloc.h>
-#include <linux/export.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/module.h>
-#include <rdma/ib_smi.h>
-#include "hfi.h"
-#include "device.h"
-#include "common.h"
-#include "verbs_txreq.h"
-#include "trace.h"
-
-#undef pr_fmt
-#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-#define snoop_dbg(fmt, ...) \
-	hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__)
-
-/* Snoop option mask */
-#define SNOOP_DROP_SEND		BIT(0)
-#define SNOOP_USE_METADATA	BIT(1)
-#define SNOOP_SET_VL0TOVL15     BIT(2)
-
-static u8 snoop_flags;
-
-/*
- * Extract packet length from LRH header.
- * This is in Dwords so multiply by 4 to get size in bytes
- */
-#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
-
-enum hfi1_filter_status {
-	HFI1_FILTER_HIT,
-	HFI1_FILTER_ERR,
-	HFI1_FILTER_MISS
-};
-
-/* snoop processing functions */
-rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = {
-	[RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler,
-	[RHF_RCV_TYPE_EAGER]    = snoop_recv_handler,
-	[RHF_RCV_TYPE_IB]       = snoop_recv_handler,
-	[RHF_RCV_TYPE_ERROR]    = snoop_recv_handler,
-	[RHF_RCV_TYPE_BYPASS]   = snoop_recv_handler,
-	[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
-	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
-	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid
-};
-
-/* Snoop packet structure */
-struct snoop_packet {
-	struct list_head list;
-	u32 total_len;
-	u8 data[];
-};
-
-/* Do not make these an enum or it will blow up the capture_md */
-#define PKT_DIR_EGRESS 0x0
-#define PKT_DIR_INGRESS 0x1
-
-/* Packet capture metadata returned to the user with the packet. */
-struct capture_md {
-	u8 port;
-	u8 dir;
-	u8 reserved[6];
-	union {
-		u64 pbc;
-		u64 rhf;
-	} u;
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev diagpkt_cdev;
-static struct device *diagpkt_device;
-
-static ssize_t diagpkt_write(struct file *fp, const char __user *data,
-			     size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
-	.owner = THIS_MODULE,
-	.write = diagpkt_write,
-	.llseek = noop_llseek,
-};
-
-/*
- * This is used for communication with user space for snoop extended IOCTLs
- */
-struct hfi1_link_info {
-	__be64 node_guid;
-	u8 port_mode;
-	u8 port_state;
-	u16 link_speed_active;
-	u16 link_width_active;
-	u16 vl15_init;
-	u8 port_number;
-	/*
-	 * Add padding to make this a full IB SMP payload. Note: changing the
-	 * size of this structure will make the IOCTLs created with _IOWR
-	 * change.
-	 * Be sure to run tests on all IOCTLs when making changes to this
-	 * structure.
-	 */
-	u8 res[47];
-};
-
-/*
- * This starts our ioctl sequence numbers *way* off from the ones
- * defined in ib_core.
- */
-#define SNOOP_CAPTURE_VERSION 0x1
-
-#define IB_IOCTL_MAGIC          0x1b /* See Documentation/ioctl-number.txt */
-#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC
-#define HFI1_SNOOP_IOC_BASE_SEQ 0x80
-
-#define HFI1_SNOOP_IOCGETLINKSTATE \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ)
-#define HFI1_SNOOP_IOCSETLINKSTATE \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1)
-#define HFI1_SNOOP_IOCCLEARQUEUE \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2)
-#define HFI1_SNOOP_IOCCLEARFILTER \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3)
-#define HFI1_SNOOP_IOCSETFILTER \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4)
-#define HFI1_SNOOP_IOCGETVERSION \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5)
-#define HFI1_SNOOP_IOCSET_OPTS \
-	_IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6)
-
-/*
- * These offsets +6/+7 could change, but these are already known and used
- * IOCTL numbers so don't change them without a good reason.
- */
-#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \
-	_IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \
-		struct hfi1_link_info)
-#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \
-	_IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \
-		struct hfi1_link_info)
-
-static int hfi1_snoop_open(struct inode *in, struct file *fp);
-static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
-			       size_t pkt_len, loff_t *off);
-static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
-				size_t count, loff_t *off);
-static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
-static unsigned int hfi1_snoop_poll(struct file *fp,
-				    struct poll_table_struct *wait);
-static int hfi1_snoop_release(struct inode *in, struct file *fp);
-
-struct hfi1_packet_filter_command {
-	int opcode;
-	int length;
-	void *value_ptr;
-};
-
-/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */
-#define HFI1_SNOOP_INGRESS 0x1
-#define HFI1_SNOOP_EGRESS  0x2
-
-enum hfi1_packet_filter_opcodes {
-	FILTER_BY_LID,
-	FILTER_BY_DLID,
-	FILTER_BY_MAD_MGMT_CLASS,
-	FILTER_BY_QP_NUMBER,
-	FILTER_BY_PKT_TYPE,
-	FILTER_BY_SERVICE_LEVEL,
-	FILTER_BY_PKEY,
-	FILTER_BY_DIRECTION,
-};
-
-static const struct file_operations snoop_file_ops = {
-	.owner = THIS_MODULE,
-	.open = hfi1_snoop_open,
-	.read = hfi1_snoop_read,
-	.unlocked_ioctl = hfi1_ioctl,
-	.poll = hfi1_snoop_poll,
-	.write = hfi1_snoop_write,
-	.release = hfi1_snoop_release
-};
-
-struct hfi1_filter_array {
-	int (*filter)(void *, void *, void *);
-};
-
-static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
-				      void *value);
-static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
-				     void *value);
-static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
-					void *value);
-static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value);
-
-static const struct hfi1_filter_array hfi1_filters[] = {
-	{ hfi1_filter_lid },
-	{ hfi1_filter_dlid },
-	{ hfi1_filter_mad_mgmt_class },
-	{ hfi1_filter_qp_number },
-	{ hfi1_filter_ibpacket_type },
-	{ hfi1_filter_ib_service_level },
-	{ hfi1_filter_ib_pkey },
-	{ hfi1_filter_direction },
-};
-
-#define HFI1_MAX_FILTERS	ARRAY_SIZE(hfi1_filters)
-#define HFI1_DIAG_MINOR_BASE	129
-
-static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name);
-
-int hfi1_diag_add(struct hfi1_devdata *dd)
-{
-	char name[16];
-	int ret = 0;
-
-	snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(),
-		 dd->unit);
-	/*
-	 * Do this for each device as opposed to the normal diagpkt
-	 * interface which is one per host
-	 */
-	ret = hfi1_snoop_add(dd, name);
-	if (ret)
-		dd_dev_err(dd, "Unable to init snoop/capture device");
-
-	snprintf(name, sizeof(name), "%s_diagpkt", class_name());
-	if (atomic_inc_return(&diagpkt_count) == 1) {
-		ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name,
-				     &diagpkt_file_ops, &diagpkt_cdev,
-				     &diagpkt_device, false);
-	}
-
-	return ret;
-}
-
-/* this must be called w/ dd->snoop_in_lock held */
-static void drain_snoop_list(struct list_head *queue)
-{
-	struct list_head *pos, *q;
-	struct snoop_packet *packet;
-
-	list_for_each_safe(pos, q, queue) {
-		packet = list_entry(pos, struct snoop_packet, list);
-		list_del(pos);
-		kfree(packet);
-	}
-}
-
-static void hfi1_snoop_remove(struct hfi1_devdata *dd)
-{
-	unsigned long flags = 0;
-
-	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-	drain_snoop_list(&dd->hfi1_snoop.queue);
-	hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev);
-	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-}
-
-void hfi1_diag_remove(struct hfi1_devdata *dd)
-{
-	hfi1_snoop_remove(dd);
-	if (atomic_dec_and_test(&diagpkt_count))
-		hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
-	hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
-}
-
-/*
- * Allocated structure shared between the credit return mechanism and
- * diagpkt_send().
- */
-struct diagpkt_wait {
-	struct completion credits_returned;
-	int code;
-	atomic_t count;
-};
-
-/*
- * When each side is finished with the structure, they call this.
- * The last user frees the structure.
- */
-static void put_diagpkt_wait(struct diagpkt_wait *wait)
-{
-	if (atomic_dec_and_test(&wait->count))
-		kfree(wait);
-}
-
-/*
- * Callback from the credit return code.  Set the complete, which
- * will let diapkt_send() continue.
- */
-static void diagpkt_complete(void *arg, int code)
-{
-	struct diagpkt_wait *wait = (struct diagpkt_wait *)arg;
-
-	wait->code = code;
-	complete(&wait->credits_returned);
-	put_diagpkt_wait(wait);	/* finished with the structure */
-}
-
-/**
- * diagpkt_send - send a packet
- * @dp: diag packet descriptor
- */
-static ssize_t diagpkt_send(struct diag_pkt *dp)
-{
-	struct hfi1_devdata *dd;
-	struct send_context *sc;
-	struct pio_buf *pbuf;
-	u32 *tmpbuf = NULL;
-	ssize_t ret = 0;
-	u32 pkt_len, total_len;
-	pio_release_cb credit_cb = NULL;
-	void *credit_arg = NULL;
-	struct diagpkt_wait *wait = NULL;
-	int trycount = 0;
-
-	dd = hfi1_lookup(dp->unit);
-	if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) {
-		ret = -ENODEV;
-		goto bail;
-	}
-	if (!(dd->flags & HFI1_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (dp->version != _DIAG_PKT_VERS) {
-		dd_dev_err(dd, "Invalid version %u for diagpkt_write\n",
-			   dp->version);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* send count must be an exact number of dwords */
-	if (dp->len & 3) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* there is only port 1 */
-	if (dp->port != 1) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* need a valid context */
-	if (dp->sw_index >= dd->num_send_contexts) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	/* can only use kernel contexts */
-	if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
-	    dd->send_contexts[dp->sw_index].type != SC_VL15) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	/* must be allocated */
-	sc = dd->send_contexts[dp->sw_index].sc;
-	if (!sc) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	/* must be enabled */
-	if (!(sc->flags & SCF_ENABLED)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* allocate a buffer and copy the data in */
-	tmpbuf = vmalloc(dp->len);
-	if (!tmpbuf) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	if (copy_from_user(tmpbuf,
-			   (const void __user *)(unsigned long)dp->data,
-			   dp->len)) {
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	/*
-	 * pkt_len is how much data we have to write, includes header and data.
-	 * total_len is length of the packet in Dwords plus the PBC should not
-	 * include the CRC.
-	 */
-	pkt_len = dp->len >> 2;
-	total_len = pkt_len + 2; /* PBC + packet */
-
-	/* if 0, fill in a default */
-	if (dp->pbc == 0) {
-		struct hfi1_pportdata *ppd = dd->pport;
-
-		hfi1_cdbg(PKT, "Generating PBC");
-		dp->pbc = create_pbc(ppd, 0, 0, 0, total_len);
-	} else {
-		hfi1_cdbg(PKT, "Using passed in PBC");
-	}
-
-	hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc);
-
-	/*
-	 * The caller wants to wait until the packet is sent and to
-	 * check for errors.  The best we can do is wait until
-	 * the buffer credits are returned and check if any packet
-	 * error has occurred.  If there are any late errors, this
-	 * could miss it.  If there are other senders who generate
-	 * an error, this may find it.  However, in general, it
-	 * should catch most.
-	 */
-	if (dp->flags & F_DIAGPKT_WAIT) {
-		/* always force a credit return */
-		dp->pbc |= PBC_CREDIT_RETURN;
-		/* turn on credit return interrupts */
-		sc_add_credit_return_intr(sc);
-		wait = kmalloc(sizeof(*wait), GFP_KERNEL);
-		if (!wait) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-		init_completion(&wait->credits_returned);
-		atomic_set(&wait->count, 2);
-		wait->code = PRC_OK;
-
-		credit_cb = diagpkt_complete;
-		credit_arg = wait;
-	}
-
-retry:
-	pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg);
-	if (!pbuf) {
-		if (trycount == 0) {
-			/* force a credit return and try again */
-			sc_return_credits(sc);
-			trycount = 1;
-			goto retry;
-		}
-		/*
-		 * No send buffer means no credit callback.  Undo
-		 * the wait set-up that was done above.  We free wait
-		 * because the callback will never be called.
-		 */
-		if (dp->flags & F_DIAGPKT_WAIT) {
-			sc_del_credit_return_intr(sc);
-			kfree(wait);
-			wait = NULL;
-		}
-		ret = -ENOSPC;
-		goto bail;
-	}
-
-	pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len);
-	/* no flush needed as the HW knows the packet size */
-
-	ret = sizeof(*dp);
-
-	if (dp->flags & F_DIAGPKT_WAIT) {
-		/* wait for credit return */
-		ret = wait_for_completion_interruptible(
-						&wait->credits_returned);
-		/*
-		 * If the wait returns an error, the wait was interrupted,
-		 * e.g. with a ^C in the user program.  The callback is
-		 * still pending.  This is OK as the wait structure is
-		 * kmalloc'ed and the structure will free itself when
-		 * all users are done with it.
-		 *
-		 * A context disable occurs on a send context restart, so
-		 * include that in the list of errors below to check for.
-		 * NOTE: PRC_FILL_ERR is at best informational and cannot
-		 * be depended on.
-		 */
-		if (!ret && (((wait->code & PRC_STATUS_ERR) ||
-			      (wait->code & PRC_FILL_ERR) ||
-			      (wait->code & PRC_SC_DISABLE))))
-			ret = -EIO;
-
-		put_diagpkt_wait(wait);	/* finished with the structure */
-		sc_del_credit_return_intr(sc);
-	}
-
-bail:
-	vfree(tmpbuf);
-	return ret;
-}
-
-static ssize_t diagpkt_write(struct file *fp, const char __user *data,
-			     size_t count, loff_t *off)
-{
-	struct hfi1_devdata *dd;
-	struct send_context *sc;
-	u8 vl;
-
-	struct diag_pkt dp;
-
-	if (count != sizeof(dp))
-		return -EINVAL;
-
-	if (copy_from_user(&dp, data, sizeof(dp)))
-		return -EFAULT;
-
-	/*
-	* The Send Context is derived from the PbcVL value
-	* if PBC is populated
-	*/
-	if (dp.pbc) {
-		dd = hfi1_lookup(dp.unit);
-		if (!dd)
-			return -ENODEV;
-		vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
-		sc = dd->vld[vl].sc;
-		if (sc) {
-			dp.sw_index = sc->sw_index;
-			hfi1_cdbg(
-			       PKT,
-			       "Packet sent over VL %d via Send Context %u(%u)",
-			       vl, sc->sw_index, sc->hw_context);
-		}
-	}
-
-	return diagpkt_send(&dp);
-}
-
-static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name)
-{
-	int ret = 0;
-
-	dd->hfi1_snoop.mode_flag = 0;
-	spin_lock_init(&dd->hfi1_snoop.snoop_lock);
-	INIT_LIST_HEAD(&dd->hfi1_snoop.queue);
-	init_waitqueue_head(&dd->hfi1_snoop.waitq);
-
-	ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name,
-			     &snoop_file_ops,
-			     &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev,
-			     false);
-
-	if (ret) {
-		dd_dev_err(dd, "Couldn't create %s device: %d", name, ret);
-		hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev,
-				  &dd->hfi1_snoop.class_dev);
-	}
-
-	return ret;
-}
-
-static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in)
-{
-	int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE;
-	struct hfi1_devdata *dd;
-
-	dd = hfi1_lookup(unit);
-	return dd;
-}
-
-/* clear or restore send context integrity checks */
-static void adjust_integrity_checks(struct hfi1_devdata *dd)
-{
-	struct send_context *sc;
-	unsigned long sc_flags;
-	int i;
-
-	spin_lock_irqsave(&dd->sc_lock, sc_flags);
-	for (i = 0; i < dd->num_send_contexts; i++) {
-		int enable;
-
-		sc = dd->send_contexts[i].sc;
-
-		if (!sc)
-			continue;	/* not allocated */
-
-		enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
-			 dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE;
-
-		set_pio_integrity(sc);
-
-		if (enable) /* take HFI_CAP_* flags into account */
-			hfi1_init_ctxt(sc);
-	}
-	spin_unlock_irqrestore(&dd->sc_lock, sc_flags);
-}
-
-static int hfi1_snoop_open(struct inode *in, struct file *fp)
-{
-	int ret;
-	int mode_flag = 0;
-	unsigned long flags = 0;
-	struct hfi1_devdata *dd;
-	struct list_head *queue;
-
-	mutex_lock(&hfi1_mutex);
-
-	dd = hfi1_dd_from_sc_inode(in);
-	if (!dd) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	/*
-	 * File mode determines snoop or capture. Some existing user
-	 * applications expect the capture device to be able to be opened RDWR
-	 * because they expect a dedicated capture device. For this reason we
-	 * support a module param to force capture mode even if the file open
-	 * mode matches snoop.
-	 */
-	if ((fp->f_flags & O_ACCMODE) == O_RDONLY) {
-		snoop_dbg("Capture Enabled");
-		mode_flag = HFI1_PORT_CAPTURE_MODE;
-	} else if ((fp->f_flags & O_ACCMODE) == O_RDWR) {
-		snoop_dbg("Snoop Enabled");
-		mode_flag = HFI1_PORT_SNOOP_MODE;
-	} else {
-		snoop_dbg("Invalid");
-		ret =  -EINVAL;
-		goto bail;
-	}
-	queue = &dd->hfi1_snoop.queue;
-
-	/*
-	 * We are not supporting snoop and capture at the same time.
-	 */
-	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-	if (dd->hfi1_snoop.mode_flag) {
-		ret = -EBUSY;
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-		goto bail;
-	}
-
-	dd->hfi1_snoop.mode_flag = mode_flag;
-	drain_snoop_list(queue);
-
-	dd->hfi1_snoop.filter_callback = NULL;
-	dd->hfi1_snoop.filter_value = NULL;
-
-	/*
-	 * Send side packet integrity checks are not helpful when snooping so
-	 * disable and re-enable when we stop snooping.
-	 */
-	if (mode_flag == HFI1_PORT_SNOOP_MODE) {
-		/* clear after snoop mode is on */
-		adjust_integrity_checks(dd); /* clear */
-
-		/*
-		 * We also do not want to be doing the DLID LMC check for
-		 * ingressed packets.
-		 */
-		dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1);
-		write_csr(dd, DCC_CFG_PORT_CONFIG1,
-			  (dd->hfi1_snoop.dcc_cfg >> 32) << 32);
-	}
-
-	/*
-	 * As soon as we set these function pointers the recv and send handlers
-	 * are active. This is a race condition so we must make sure to drain
-	 * the queue and init filter values above. Technically we should add
-	 * locking here but all that will happen is on recv a packet will get
-	 * allocated and get stuck on the snoop_lock before getting added to the
-	 * queue. Same goes for send.
-	 */
-	dd->rhf_rcv_function_map = snoop_rhf_rcv_functions;
-	dd->process_pio_send = snoop_send_pio_handler;
-	dd->process_dma_send = snoop_send_pio_handler;
-	dd->pio_inline_send = snoop_inline_pio_send;
-
-	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-	ret = 0;
-
-bail:
-	mutex_unlock(&hfi1_mutex);
-
-	return ret;
-}
-
-static int hfi1_snoop_release(struct inode *in, struct file *fp)
-{
-	unsigned long flags = 0;
-	struct hfi1_devdata *dd;
-	int mode_flag;
-
-	dd = hfi1_dd_from_sc_inode(in);
-	if (!dd)
-		return -ENODEV;
-
-	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-
-	/* clear the snoop mode before re-adjusting send context CSRs */
-	mode_flag = dd->hfi1_snoop.mode_flag;
-	dd->hfi1_snoop.mode_flag = 0;
-
-	/*
-	 * Drain the queue and clear the filters we are done with it. Don't
-	 * forget to restore the packet integrity checks
-	 */
-	drain_snoop_list(&dd->hfi1_snoop.queue);
-	if (mode_flag == HFI1_PORT_SNOOP_MODE) {
-		/* restore after snoop mode is clear */
-		adjust_integrity_checks(dd); /* restore */
-
-		/*
-		 * Also should probably reset the DCC_CONFIG1 register for DLID
-		 * checking on incoming packets again. Use the value saved when
-		 * opening the snoop device.
-		 */
-		write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg);
-	}
-
-	dd->hfi1_snoop.filter_callback = NULL;
-	kfree(dd->hfi1_snoop.filter_value);
-	dd->hfi1_snoop.filter_value = NULL;
-
-	/*
-	 * User is done snooping and capturing, return control to the normal
-	 * handler. Re-enable SDMA handling.
-	 */
-	dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
-	dd->process_pio_send = hfi1_verbs_send_pio;
-	dd->process_dma_send = hfi1_verbs_send_dma;
-	dd->pio_inline_send = pio_copy;
-
-	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-
-	snoop_dbg("snoop/capture device released");
-
-	return 0;
-}
-
-static unsigned int hfi1_snoop_poll(struct file *fp,
-				    struct poll_table_struct *wait)
-{
-	int ret = 0;
-	unsigned long flags = 0;
-
-	struct hfi1_devdata *dd;
-
-	dd = hfi1_dd_from_sc_inode(fp->f_inode);
-	if (!dd)
-		return -ENODEV;
-
-	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-
-	poll_wait(fp, &dd->hfi1_snoop.waitq, wait);
-	if (!list_empty(&dd->hfi1_snoop.queue))
-		ret |= POLLIN | POLLRDNORM;
-
-	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-	return ret;
-}
-
-static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
-				size_t count, loff_t *off)
-{
-	struct diag_pkt dpkt;
-	struct hfi1_devdata *dd;
-	size_t ret;
-	u8 byte_two, sl, sc5, sc4, vl, byte_one;
-	struct send_context *sc;
-	u32 len;
-	u64 pbc;
-	struct hfi1_ibport *ibp;
-	struct hfi1_pportdata *ppd;
-
-	dd = hfi1_dd_from_sc_inode(fp->f_inode);
-	if (!dd)
-		return -ENODEV;
-
-	ppd = dd->pport;
-	snoop_dbg("received %lu bytes from user", count);
-
-	memset(&dpkt, 0, sizeof(struct diag_pkt));
-	dpkt.version = _DIAG_PKT_VERS;
-	dpkt.unit = dd->unit;
-	dpkt.port = 1;
-
-	if (likely(!(snoop_flags & SNOOP_USE_METADATA))) {
-		/*
-		* We need to generate the PBC and not let diagpkt_send do it,
-		* to do this we need the VL and the length in dwords.
-		* The VL can be determined by using the SL and looking up the
-		* SC. Then the SC can be converted into VL. The exception to
-		* this is those packets which are from an SMI queue pair.
-		* Since we can't detect anything about the QP here we have to
-		* rely on the SC. If its 0xF then we assume its SMI and
-		* do not look at the SL.
-		*/
-		if (copy_from_user(&byte_one, data, 1))
-			return -EINVAL;
-
-		if (copy_from_user(&byte_two, data + 1, 1))
-			return -EINVAL;
-
-		sc4 = (byte_one >> 4) & 0xf;
-		if (sc4 == 0xF) {
-			snoop_dbg("Detected VL15 packet ignoring SL in packet");
-			vl = sc4;
-		} else {
-			sl = (byte_two >> 4) & 0xf;
-			ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
-			sc5 = ibp->sl_to_sc[sl];
-			vl = sc_to_vlt(dd, sc5);
-			if (vl != sc4) {
-				snoop_dbg("VL %d does not match SC %d of packet",
-					  vl, sc4);
-				return -EINVAL;
-			}
-		}
-
-		sc = dd->vld[vl].sc; /* Look up the context based on VL */
-		if (sc) {
-			dpkt.sw_index = sc->sw_index;
-			snoop_dbg("Sending on context %u(%u)", sc->sw_index,
-				  sc->hw_context);
-		} else {
-			snoop_dbg("Could not find context for vl %d", vl);
-			return -EINVAL;
-		}
-
-		len = (count >> 2) + 2; /* Add in PBC */
-		pbc = create_pbc(ppd, 0, 0, vl, len);
-	} else {
-		if (copy_from_user(&pbc, data, sizeof(pbc)))
-			return -EINVAL;
-		vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
-		sc = dd->vld[vl].sc; /* Look up the context based on VL */
-		if (sc) {
-			dpkt.sw_index = sc->sw_index;
-		} else {
-			snoop_dbg("Could not find context for vl %d", vl);
-			return -EINVAL;
-		}
-		data += sizeof(pbc);
-		count -= sizeof(pbc);
-	}
-	dpkt.len = count;
-	dpkt.data = (unsigned long)data;
-
-	snoop_dbg("PBC: vl=0x%llx Length=0x%llx",
-		  (pbc >> 12) & 0xf,
-		  (pbc & 0xfff));
-
-	dpkt.pbc = pbc;
-	ret = diagpkt_send(&dpkt);
-	/*
-	 * diagpkt_send only returns number of bytes in the diagpkt so patch
-	 * that up here before returning.
-	 */
-	if (ret == sizeof(dpkt))
-		return count;
-
-	return ret;
-}
-
-static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
-			       size_t pkt_len, loff_t *off)
-{
-	ssize_t ret = 0;
-	unsigned long flags = 0;
-	struct snoop_packet *packet = NULL;
-	struct hfi1_devdata *dd;
-
-	dd = hfi1_dd_from_sc_inode(fp->f_inode);
-	if (!dd)
-		return -ENODEV;
-
-	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-
-	while (list_empty(&dd->hfi1_snoop.queue)) {
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-
-		if (fp->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-
-		if (wait_event_interruptible(
-				dd->hfi1_snoop.waitq,
-				!list_empty(&dd->hfi1_snoop.queue)))
-			return -EINTR;
-
-		spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-	}
-
-	if (!list_empty(&dd->hfi1_snoop.queue)) {
-		packet = list_entry(dd->hfi1_snoop.queue.next,
-				    struct snoop_packet, list);
-		list_del(&packet->list);
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-		if (pkt_len >= packet->total_len) {
-			if (copy_to_user(data, packet->data,
-					 packet->total_len))
-				ret = -EFAULT;
-			else
-				ret = packet->total_len;
-		} else {
-			ret = -EINVAL;
-		}
-
-		kfree(packet);
-	} else {
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-	}
-
-	return ret;
-}
-
-/**
- * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
- * @ppd : ptr to hfi1 port data
- * @value : options from user space
- *
- * Assumes the rest of the CM credit registers are zero from a
- * previous global or credit reset.
- * Leave shared count at zero for both global and all vls.
- * In snoop mode ideally we don't use shared credits
- * Reserve 8.5k for VL15
- * If total credits less than 8.5kbytes return error.
- * Divide the rest of the credits across VL0 to VL7 and if
- * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
- * return with an error.
- * The credit registers will be reset to zero on link negotiation or link up
- * so this function should be activated from user space only if the port has
- * gone past link negotiation and link up.
- *
- * Return -- 0 if successful else error condition
- *
- */
-static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
-					   int value)
-{
-#define  OPA_MIN_PER_VL_CREDITS  34  /* 2048 + 128 bytes */
-	struct buffer_control t;
-	int i;
-	struct hfi1_devdata *dd = ppd->dd;
-	u16  total_credits = (value >> 16) & 0xffff;
-	u16  vl15_credits = dd->vl15_init / 2;
-	u16  per_vl_credits;
-	__be16 be_per_vl_credits;
-
-	if (!(ppd->host_link_state & HLS_UP))
-		goto err_exit;
-	if (total_credits  <  vl15_credits)
-		goto err_exit;
-
-	per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
-
-	if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
-		goto err_exit;
-
-	memset(&t, 0, sizeof(t));
-	be_per_vl_credits = cpu_to_be16(per_vl_credits);
-
-	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		t.vl[i].dedicated = be_per_vl_credits;
-
-	t.vl[15].dedicated  = cpu_to_be16(vl15_credits);
-	return set_buffer_control(ppd, &t);
-
-err_exit:
-	snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
-		  ppd->host_link_state, total_credits, vl15_credits);
-
-	return -EINVAL;
-}
-
-static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-{
-	struct hfi1_devdata *dd;
-	void *filter_value = NULL;
-	long ret = 0;
-	int value = 0;
-	u8 phys_state = 0;
-	u8 link_state = 0;
-	u16 dev_state = 0;
-	unsigned long flags = 0;
-	unsigned long *argp = NULL;
-	struct hfi1_packet_filter_command filter_cmd = {0};
-	int mode_flag = 0;
-	struct hfi1_pportdata *ppd = NULL;
-	unsigned int index;
-	struct hfi1_link_info link_info;
-	int read_cmd, write_cmd, read_ok, write_ok;
-
-	dd = hfi1_dd_from_sc_inode(fp->f_inode);
-	if (!dd)
-		return -ENODEV;
-
-	mode_flag = dd->hfi1_snoop.mode_flag;
-	read_cmd = _IOC_DIR(cmd) & _IOC_READ;
-	write_cmd = _IOC_DIR(cmd) & _IOC_WRITE;
-	write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd));
-	read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd));
-
-	if ((read_cmd && !write_ok) || (write_cmd && !read_ok))
-		return -EFAULT;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if ((mode_flag & HFI1_PORT_CAPTURE_MODE) &&
-	    (cmd != HFI1_SNOOP_IOCCLEARQUEUE) &&
-	    (cmd != HFI1_SNOOP_IOCCLEARFILTER) &&
-	    (cmd != HFI1_SNOOP_IOCSETFILTER))
-		/* Capture devices are allowed only 3 operations
-		 * 1.Clear capture queue
-		 * 2.Clear capture filter
-		 * 3.Set capture filter
-		 * Other are invalid.
-		 */
-		return -EINVAL;
-
-	switch (cmd) {
-	case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA:
-		memset(&link_info, 0, sizeof(link_info));
-
-		if (copy_from_user(&link_info,
-				   (struct hfi1_link_info __user *)arg,
-				   sizeof(link_info)))
-			return -EFAULT;
-
-		value = link_info.port_state;
-		index = link_info.port_number;
-		if (index > dd->num_pports - 1)
-			return -EINVAL;
-
-		ppd = &dd->pport[index];
-		if (!ppd)
-			return -EINVAL;
-
-		/* What we want to transition to */
-		phys_state = (value >> 4) & 0xF;
-		link_state = value & 0xF;
-		snoop_dbg("Setting link state 0x%x", value);
-
-		switch (link_state) {
-		case IB_PORT_NOP:
-			if (phys_state == 0)
-				break;
-				/* fall through */
-		case IB_PORT_DOWN:
-			switch (phys_state) {
-			case 0:
-				dev_state = HLS_DN_DOWNDEF;
-				break;
-			case 2:
-				dev_state = HLS_DN_POLL;
-				break;
-			case 3:
-				dev_state = HLS_DN_DISABLE;
-				break;
-			default:
-				return -EINVAL;
-			}
-			ret = set_link_state(ppd, dev_state);
-			break;
-		case IB_PORT_ARMED:
-			ret = set_link_state(ppd, HLS_UP_ARMED);
-			if (!ret)
-				send_idle_sma(dd, SMA_IDLE_ARM);
-			break;
-		case IB_PORT_ACTIVE:
-			ret = set_link_state(ppd, HLS_UP_ACTIVE);
-			if (!ret)
-				send_idle_sma(dd, SMA_IDLE_ACTIVE);
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (ret)
-			break;
-		/* fall through */
-	case HFI1_SNOOP_IOCGETLINKSTATE:
-	case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA:
-		if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) {
-			memset(&link_info, 0, sizeof(link_info));
-			if (copy_from_user(&link_info,
-					   (struct hfi1_link_info __user *)arg,
-					   sizeof(link_info)))
-				return -EFAULT;
-			index = link_info.port_number;
-		} else {
-			ret = __get_user(index, (int __user *)arg);
-			if (ret !=  0)
-				break;
-		}
-
-		if (index > dd->num_pports - 1)
-			return -EINVAL;
-
-		ppd = &dd->pport[index];
-		if (!ppd)
-			return -EINVAL;
-
-		value = hfi1_ibphys_portstate(ppd);
-		value <<= 4;
-		value |= driver_lstate(ppd);
-
-		snoop_dbg("Link port | Link State: %d", value);
-
-		if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) ||
-		    (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) {
-			link_info.port_state = value;
-			link_info.node_guid = cpu_to_be64(ppd->guid);
-			link_info.link_speed_active =
-						ppd->link_speed_active;
-			link_info.link_width_active =
-						ppd->link_width_active;
-			if (copy_to_user((struct hfi1_link_info __user *)arg,
-					 &link_info, sizeof(link_info)))
-				return -EFAULT;
-		} else {
-			ret = __put_user(value, (int __user *)arg);
-		}
-		break;
-
-	case HFI1_SNOOP_IOCCLEARQUEUE:
-		snoop_dbg("Clearing snoop queue");
-		spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-		drain_snoop_list(&dd->hfi1_snoop.queue);
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-		break;
-
-	case HFI1_SNOOP_IOCCLEARFILTER:
-		snoop_dbg("Clearing filter");
-		spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-		if (dd->hfi1_snoop.filter_callback) {
-			/* Drain packets first */
-			drain_snoop_list(&dd->hfi1_snoop.queue);
-			dd->hfi1_snoop.filter_callback = NULL;
-		}
-		kfree(dd->hfi1_snoop.filter_value);
-		dd->hfi1_snoop.filter_value = NULL;
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-		break;
-
-	case HFI1_SNOOP_IOCSETFILTER:
-		snoop_dbg("Setting filter");
-		/* just copy command structure */
-		argp = (unsigned long *)arg;
-		if (copy_from_user(&filter_cmd, (void __user *)argp,
-				   sizeof(filter_cmd)))
-			return -EFAULT;
-
-		if (filter_cmd.opcode >= HFI1_MAX_FILTERS) {
-			pr_alert("Invalid opcode in request\n");
-			return -EINVAL;
-		}
-
-		snoop_dbg("Opcode %d Len %d Ptr %p",
-			  filter_cmd.opcode, filter_cmd.length,
-			  filter_cmd.value_ptr);
-
-		filter_value = kcalloc(filter_cmd.length, sizeof(u8),
-				       GFP_KERNEL);
-		if (!filter_value)
-			return -ENOMEM;
-
-		/* copy remaining data from userspace */
-		if (copy_from_user((u8 *)filter_value,
-				   (void __user *)filter_cmd.value_ptr,
-				   filter_cmd.length)) {
-			kfree(filter_value);
-			return -EFAULT;
-		}
-		/* Drain packets first */
-		spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-		drain_snoop_list(&dd->hfi1_snoop.queue);
-		dd->hfi1_snoop.filter_callback =
-			hfi1_filters[filter_cmd.opcode].filter;
-		/* just in case we see back to back sets */
-		kfree(dd->hfi1_snoop.filter_value);
-		dd->hfi1_snoop.filter_value = filter_value;
-		spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-		break;
-	case HFI1_SNOOP_IOCGETVERSION:
-		value = SNOOP_CAPTURE_VERSION;
-		snoop_dbg("Getting version: %d", value);
-		ret = __put_user(value, (int __user *)arg);
-		break;
-	case HFI1_SNOOP_IOCSET_OPTS:
-		snoop_flags = 0;
-		ret = __get_user(value, (int __user *)arg);
-		if (ret != 0)
-			break;
-
-		snoop_dbg("Setting snoop option %d", value);
-		if (value & SNOOP_DROP_SEND)
-			snoop_flags |= SNOOP_DROP_SEND;
-		if (value & SNOOP_USE_METADATA)
-			snoop_flags |= SNOOP_USE_METADATA;
-		if (value & (SNOOP_SET_VL0TOVL15)) {
-			ppd = &dd->pport[0];  /* first port will do */
-			ret = hfi1_assign_snoop_link_credits(ppd, value);
-		}
-		break;
-	default:
-		return -ENOTTY;
-	}
-
-	return ret;
-}
-
-static void snoop_list_add_tail(struct snoop_packet *packet,
-				struct hfi1_devdata *dd)
-{
-	unsigned long flags = 0;
-
-	spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-	if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) ||
-		   (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) {
-		list_add_tail(&packet->list, &dd->hfi1_snoop.queue);
-		snoop_dbg("Added packet to list");
-	}
-
-	/*
-	 * Technically we can could have closed the snoop device while waiting
-	 * on the above lock and it is gone now. The snoop mode_flag will
-	 * prevent us from adding the packet to the queue though.
-	 */
-
-	spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-	wake_up_interruptible(&dd->hfi1_snoop.waitq);
-}
-
-static inline int hfi1_filter_check(void *val, const char *msg)
-{
-	if (!val) {
-		snoop_dbg("Error invalid %s value for filter", msg);
-		return HFI1_FILTER_ERR;
-	}
-	return 0;
-}
-
-static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value)
-{
-	struct hfi1_ib_header *hdr;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */
-		return HFI1_FILTER_HIT; /* matched */
-
-	return HFI1_FILTER_MISS; /* Not matched */
-}
-
-static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value)
-{
-	struct hfi1_ib_header *hdr;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1]))
-		return HFI1_FILTER_HIT;
-
-	return HFI1_FILTER_MISS;
-}
-
-/* Not valid for outgoing packets, send handler passes null for data*/
-static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
-				      void *value)
-{
-	struct hfi1_ib_header *hdr;
-	struct hfi1_other_headers *ohdr = NULL;
-	struct ib_smp *smp = NULL;
-	u32 qpn = 0;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(packet_data, "packet_data");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	/* Check for GRH */
-	if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
-		ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
-	else
-		ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-
-	qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF;
-	if (qpn <= 1) {
-		smp = (struct ib_smp *)packet_data;
-		if (*((u8 *)value) == smp->mgmt_class)
-			return HFI1_FILTER_HIT;
-		else
-			return HFI1_FILTER_MISS;
-	}
-	return HFI1_FILTER_ERR;
-}
-
-static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value)
-{
-	struct hfi1_ib_header *hdr;
-	struct hfi1_other_headers *ohdr = NULL;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	/* Check for GRH */
-	if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
-		ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
-	else
-		ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-	if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF))
-		return HFI1_FILTER_HIT;
-
-	return HFI1_FILTER_MISS;
-}
-
-static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
-				     void *value)
-{
-	u32 lnh = 0;
-	u8 opcode = 0;
-	struct hfi1_ib_header *hdr;
-	struct hfi1_other_headers *ohdr = NULL;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-
-	if (lnh == HFI1_LRH_BTH)
-		ohdr = &hdr->u.oth;
-	else if (lnh == HFI1_LRH_GRH)
-		ohdr = &hdr->u.l.oth;
-	else
-		return HFI1_FILTER_ERR;
-
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
-	if (*((u8 *)value) == ((opcode >> 5) & 0x7))
-		return HFI1_FILTER_HIT;
-
-	return HFI1_FILTER_MISS;
-}
-
-static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
-					void *value)
-{
-	struct hfi1_ib_header *hdr;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF))
-		return HFI1_FILTER_HIT;
-
-	return HFI1_FILTER_MISS;
-}
-
-static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value)
-{
-	u32 lnh = 0;
-	struct hfi1_ib_header *hdr;
-	struct hfi1_other_headers *ohdr = NULL;
-	int ret;
-
-	ret = hfi1_filter_check(ibhdr, "header");
-	if (ret)
-		return ret;
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	hdr = (struct hfi1_ib_header *)ibhdr;
-
-	lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-	if (lnh == HFI1_LRH_BTH)
-		ohdr = &hdr->u.oth;
-	else if (lnh == HFI1_LRH_GRH)
-		ohdr = &hdr->u.l.oth;
-	else
-		return HFI1_FILTER_ERR;
-
-	/* P_key is 16-bit entity, however top most bit indicates
-	 * type of membership. 0 for limited and 1 for Full.
-	 * Limited members cannot accept information from other
-	 * Limited members, but communication is allowed between
-	 * every other combination of membership.
-	 * Hence we'll omit comparing top-most bit while filtering
-	 */
-
-	if ((*(u16 *)value & 0x7FFF) ==
-		((be32_to_cpu(ohdr->bth[0])) & 0x7FFF))
-		return HFI1_FILTER_HIT;
-
-	return HFI1_FILTER_MISS;
-}
-
-/*
- * If packet_data is NULL then this is coming from one of the send functions.
- * Thus we know if its an ingressed or egressed packet.
- */
-static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value)
-{
-	u8 user_dir = *(u8 *)value;
-	int ret;
-
-	ret = hfi1_filter_check(value, "user");
-	if (ret)
-		return ret;
-
-	if (packet_data) {
-		/* Incoming packet */
-		if (user_dir & HFI1_SNOOP_INGRESS)
-			return HFI1_FILTER_HIT;
-	} else {
-		/* Outgoing packet */
-		if (user_dir & HFI1_SNOOP_EGRESS)
-			return HFI1_FILTER_HIT;
-	}
-
-	return HFI1_FILTER_MISS;
-}
-
-/*
- * Allocate a snoop packet. The structure that is stored in the ring buffer, not
- * to be confused with an hfi packet type.
- */
-static struct snoop_packet *allocate_snoop_packet(u32 hdr_len,
-						  u32 data_len,
-						  u32 md_len)
-{
-	struct snoop_packet *packet;
-
-	packet = kzalloc(sizeof(*packet) + hdr_len + data_len
-			 + md_len,
-			 GFP_ATOMIC | __GFP_NOWARN);
-	if (likely(packet))
-		INIT_LIST_HEAD(&packet->list);
-
-	return packet;
-}
-
-/*
- * Instead of having snoop and capture code intermixed with the recv functions,
- * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call
- * and land in here for snoop/capture but if not enabled the call will go
- * through as before. This gives us a single point to constrain all of the snoop
- * snoop recv logic. There is nothing special that needs to happen for bypass
- * packets. This routine should not try to look into the packet. It just copied
- * it. There is no guarantee for filters when it comes to bypass packets as
- * there is no specific support. Bottom line is this routine does now even know
- * what a bypass packet is.
- */
-int snoop_recv_handler(struct hfi1_packet *packet)
-{
-	struct hfi1_pportdata *ppd = packet->rcd->ppd;
-	struct hfi1_ib_header *hdr = packet->hdr;
-	int header_size = packet->hlen;
-	void *data = packet->ebuf;
-	u32 tlen = packet->tlen;
-	struct snoop_packet *s_packet = NULL;
-	int ret;
-	int snoop_mode = 0;
-	u32 md_len = 0;
-	struct capture_md md;
-
-	snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen,
-		  data);
-
-	trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size,
-			    data);
-
-	if (!ppd->dd->hfi1_snoop.filter_callback) {
-		snoop_dbg("filter not set");
-		ret = HFI1_FILTER_HIT;
-	} else {
-		ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data,
-					ppd->dd->hfi1_snoop.filter_value);
-	}
-
-	switch (ret) {
-	case HFI1_FILTER_ERR:
-		snoop_dbg("Error in filter call");
-		break;
-	case HFI1_FILTER_MISS:
-		snoop_dbg("Filter Miss");
-		break;
-	case HFI1_FILTER_HIT:
-
-		if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
-			snoop_mode = 1;
-		if ((snoop_mode == 0) ||
-		    unlikely(snoop_flags & SNOOP_USE_METADATA))
-			md_len = sizeof(struct capture_md);
-
-		s_packet = allocate_snoop_packet(header_size,
-						 tlen - header_size,
-						 md_len);
-
-		if (unlikely(!s_packet)) {
-			dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
-			break;
-		}
-
-		if (md_len > 0) {
-			memset(&md, 0, sizeof(struct capture_md));
-			md.port = 1;
-			md.dir = PKT_DIR_INGRESS;
-			md.u.rhf = packet->rhf;
-			memcpy(s_packet->data, &md, md_len);
-		}
-
-		/* We should always have a header */
-		if (hdr) {
-			memcpy(s_packet->data + md_len, hdr, header_size);
-		} else {
-			dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n");
-			kfree(s_packet);
-			break;
-		}
-
-		/*
-		 * Packets with no data are possible. If there is no data needed
-		 * to take care of the last 4 bytes which are normally included
-		 * with data buffers and are included in tlen.  Since we kzalloc
-		 * the buffer we do not need to set any values but if we decide
-		 * not to use kzalloc we should zero them.
-		 */
-		if (data)
-			memcpy(s_packet->data + header_size + md_len, data,
-			       tlen - header_size);
-
-		s_packet->total_len = tlen + md_len;
-		snoop_list_add_tail(s_packet, ppd->dd);
-
-		/*
-		 * If we are snooping the packet not capturing then throw away
-		 * after adding to the list.
-		 */
-		snoop_dbg("Capturing packet");
-		if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) {
-			snoop_dbg("Throwing packet away");
-			/*
-			 * If we are dropping the packet we still may need to
-			 * handle the case where error flags are set, this is
-			 * normally done by the type specific handler but that
-			 * won't be called in this case.
-			 */
-			if (unlikely(rhf_err_flags(packet->rhf)))
-				handle_eflags(packet);
-
-			/* throw the packet on the floor */
-			return RHF_RCV_CONTINUE;
-		}
-		break;
-	default:
-		break;
-	}
-
-	/*
-	 * We do not care what type of packet came in here - just pass it off
-	 * to the normal handler.
-	 */
-	return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)]
-			(packet);
-}
-
-/*
- * Handle snooping and capturing packets when sdma is being used.
- */
-int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-			   u64 pbc)
-{
-	pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
-	snoop_dbg("Unsupported Operation");
-	return hfi1_verbs_send_dma(qp, ps, 0);
-}
-
-/*
- * Handle snooping and capturing packets when pio is being used. Does not handle
- * bypass packets. The only way to send a bypass packet currently is to use the
- * diagpkt interface. When that interface is enable snoop/capture is not.
- */
-int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-			   u64 pbc)
-{
-	u32 hdrwords = qp->s_hdrwords;
-	struct rvt_sge_state *ss = qp->s_cur_sge;
-	u32 len = qp->s_cur_size;
-	u32 dwords = (len + 3) >> 2;
-	u32 plen = hdrwords + dwords + 2; /* includes pbc */
-	struct hfi1_pportdata *ppd = ps->ppd;
-	struct snoop_packet *s_packet = NULL;
-	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
-	u32 length = 0;
-	struct rvt_sge_state temp_ss;
-	void *data = NULL;
-	void *data_start = NULL;
-	int ret;
-	int snoop_mode = 0;
-	int md_len = 0;
-	struct capture_md md;
-	u32 vl;
-	u32 hdr_len = hdrwords << 2;
-	u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
-
-	md.u.pbc = 0;
-
-	snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u",
-		  hdrwords, len, plen, dwords, tlen);
-	if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
-		snoop_mode = 1;
-	if ((snoop_mode == 0) ||
-	    unlikely(snoop_flags & SNOOP_USE_METADATA))
-		md_len = sizeof(struct capture_md);
-
-	/* not using ss->total_len as arg 2 b/c that does not count CRC */
-	s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len);
-
-	if (unlikely(!s_packet)) {
-		dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
-		goto out;
-	}
-
-	s_packet->total_len = tlen + md_len;
-
-	if (md_len > 0) {
-		memset(&md, 0, sizeof(struct capture_md));
-		md.port = 1;
-		md.dir = PKT_DIR_EGRESS;
-		if (likely(pbc == 0)) {
-			vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
-			md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
-		} else {
-			md.u.pbc = 0;
-		}
-		memcpy(s_packet->data, &md, md_len);
-	} else {
-		md.u.pbc = pbc;
-	}
-
-	/* Copy header */
-	if (likely(hdr)) {
-		memcpy(s_packet->data + md_len, hdr, hdr_len);
-	} else {
-		dd_dev_err(ppd->dd,
-			   "Unable to copy header to snoop/capture packet\n");
-		kfree(s_packet);
-		goto out;
-	}
-
-	if (ss) {
-		data = s_packet->data + hdr_len + md_len;
-		data_start = data;
-
-		/*
-		 * Copy SGE State
-		 * The update_sge() function below will not modify the
-		 * individual SGEs in the array. It will make a copy each time
-		 * and operate on that. So we only need to copy this instance
-		 * and it won't impact PIO.
-		 */
-		temp_ss = *ss;
-		length = len;
-
-		snoop_dbg("Need to copy %d bytes", length);
-		while (length) {
-			void *addr = temp_ss.sge.vaddr;
-			u32 slen = temp_ss.sge.length;
-
-			if (slen > length) {
-				slen = length;
-				snoop_dbg("slen %d > len %d", slen, length);
-			}
-			snoop_dbg("copy %d to %p", slen, addr);
-			memcpy(data, addr, slen);
-			update_sge(&temp_ss, slen);
-			length -= slen;
-			data += slen;
-			snoop_dbg("data is now %p bytes left %d", data, length);
-		}
-		snoop_dbg("Completed SGE copy");
-	}
-
-	/*
-	 * Why do the filter check down here? Because the event tracing has its
-	 * own filtering and we need to have the walked the SGE list.
-	 */
-	if (!ppd->dd->hfi1_snoop.filter_callback) {
-		snoop_dbg("filter not set\n");
-		ret = HFI1_FILTER_HIT;
-	} else {
-		ret = ppd->dd->hfi1_snoop.filter_callback(
-					&ps->s_txreq->phdr.hdr,
-					NULL,
-					ppd->dd->hfi1_snoop.filter_value);
-	}
-
-	switch (ret) {
-	case HFI1_FILTER_ERR:
-		snoop_dbg("Error in filter call");
-		/* fall through */
-	case HFI1_FILTER_MISS:
-		snoop_dbg("Filter Miss");
-		kfree(s_packet);
-		break;
-	case HFI1_FILTER_HIT:
-		snoop_dbg("Capturing packet");
-		snoop_list_add_tail(s_packet, ppd->dd);
-
-		if (unlikely((snoop_flags & SNOOP_DROP_SEND) &&
-			     (ppd->dd->hfi1_snoop.mode_flag &
-			      HFI1_PORT_SNOOP_MODE))) {
-			unsigned long flags;
-
-			snoop_dbg("Dropping packet");
-			if (qp->s_wqe) {
-				spin_lock_irqsave(&qp->s_lock, flags);
-				hfi1_send_complete(
-					qp,
-					qp->s_wqe,
-					IB_WC_SUCCESS);
-				spin_unlock_irqrestore(&qp->s_lock, flags);
-			} else if (qp->ibqp.qp_type == IB_QPT_RC) {
-				spin_lock_irqsave(&qp->s_lock, flags);
-				hfi1_rc_send_complete(qp,
-						      &ps->s_txreq->phdr.hdr);
-				spin_unlock_irqrestore(&qp->s_lock, flags);
-			}
-
-			/*
-			 * If snoop is dropping the packet we need to put the
-			 * txreq back because no one else will.
-			 */
-			hfi1_put_txreq(ps->s_txreq);
-			return 0;
-		}
-		break;
-	default:
-		kfree(s_packet);
-		break;
-	}
-out:
-	return hfi1_verbs_send_pio(qp, ps, md.u.pbc);
-}
-
-/*
- * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently
- * this can be used anywhere, but the intention is for inline ACKs for RC and
- * CCA packets. We don't restrict this usage though.
- */
-void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
-			   u64 pbc, const void *from, size_t count)
-{
-	int snoop_mode = 0;
-	int md_len = 0;
-	struct capture_md md;
-	struct snoop_packet *s_packet = NULL;
-
-	/*
-	 * count is in dwords so we need to convert to bytes.
-	 * We also need to account for CRC which would be tacked on by hardware.
-	 */
-	int packet_len = (count << 2) + 4;
-	int ret;
-
-	snoop_dbg("ACK OUT: len %d", packet_len);
-
-	if (!dd->hfi1_snoop.filter_callback) {
-		snoop_dbg("filter not set");
-		ret = HFI1_FILTER_HIT;
-	} else {
-		ret = dd->hfi1_snoop.filter_callback(
-				(struct hfi1_ib_header *)from,
-				NULL,
-				dd->hfi1_snoop.filter_value);
-	}
-
-	switch (ret) {
-	case HFI1_FILTER_ERR:
-		snoop_dbg("Error in filter call");
-		/* fall through */
-	case HFI1_FILTER_MISS:
-		snoop_dbg("Filter Miss");
-		break;
-	case HFI1_FILTER_HIT:
-		snoop_dbg("Capturing packet");
-		if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
-			snoop_mode = 1;
-		if ((snoop_mode == 0) ||
-		    unlikely(snoop_flags & SNOOP_USE_METADATA))
-			md_len = sizeof(struct capture_md);
-
-		s_packet = allocate_snoop_packet(packet_len, 0, md_len);
-
-		if (unlikely(!s_packet)) {
-			dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n");
-			goto inline_pio_out;
-		}
-
-		s_packet->total_len = packet_len + md_len;
-
-		/* Fill in the metadata for the packet */
-		if (md_len > 0) {
-			memset(&md, 0, sizeof(struct capture_md));
-			md.port = 1;
-			md.dir = PKT_DIR_EGRESS;
-			md.u.pbc = pbc;
-			memcpy(s_packet->data, &md, md_len);
-		}
-
-		/* Add the packet data which is a single buffer */
-		memcpy(s_packet->data + md_len, from, packet_len);
-
-		snoop_list_add_tail(s_packet, dd);
-
-		if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) {
-			snoop_dbg("Dropping packet");
-			return;
-		}
-		break;
-	default:
-		break;
-	}
-
-inline_pio_out:
-	pio_copy(dd, pbuf, pbc, from, count);
-}
diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
deleted file mode 100644
index bd8771570f81..000000000000
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/delay.h>
-#include "hfi.h"
-#include "common.h"
-#include "eprom.h"
-
-/*
- * The EPROM is logically divided into three partitions:
- *	partition 0: the first 128K, visible from PCI ROM BAR
- *	partition 1: 4K config file (sector size)
- *	partition 2: the rest
- */
-#define P0_SIZE (128 * 1024)
-#define P1_SIZE   (4 * 1024)
-#define P1_START P0_SIZE
-#define P2_START (P0_SIZE + P1_SIZE)
-
-/* erase sizes supported by the controller */
-#define SIZE_4KB (4 * 1024)
-#define MASK_4KB (SIZE_4KB - 1)
-
-#define SIZE_32KB (32 * 1024)
-#define MASK_32KB (SIZE_32KB - 1)
-
-#define SIZE_64KB (64 * 1024)
-#define MASK_64KB (SIZE_64KB - 1)
-
-/* controller page size, in bytes */
-#define EP_PAGE_SIZE 256
-#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1)
-
-/* controller commands */
-#define CMD_SHIFT 24
-#define CMD_NOP			    (0)
-#define CMD_PAGE_PROGRAM(addr)	    ((0x02 << CMD_SHIFT) | addr)
-#define CMD_READ_DATA(addr)	    ((0x03 << CMD_SHIFT) | addr)
-#define CMD_READ_SR1		    ((0x05 << CMD_SHIFT))
-#define CMD_WRITE_ENABLE	    ((0x06 << CMD_SHIFT))
-#define CMD_SECTOR_ERASE_4KB(addr)  ((0x20 << CMD_SHIFT) | addr)
-#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr)
-#define CMD_CHIP_ERASE		    ((0x60 << CMD_SHIFT))
-#define CMD_READ_MANUF_DEV_ID	    ((0x90 << CMD_SHIFT))
-#define CMD_RELEASE_POWERDOWN_NOID  ((0xab << CMD_SHIFT))
-#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr)
-
-/* controller interface speeds */
-#define EP_SPEED_FULL 0x2	/* full speed */
-
-/* controller status register 1 bits */
-#define SR1_BUSY 0x1ull		/* the BUSY bit in SR1 */
-
-/* sleep length while waiting for controller */
-#define WAIT_SLEEP_US 100	/* must be larger than 5 (see usage) */
-#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
-
-/* GPIO pins */
-#define EPROM_WP_N BIT_ULL(14)	/* EPROM write line */
-
-/*
- * How long to wait for the EPROM to become available, in ms.
- * The spec 32 Mb EPROM takes around 40s to erase then write.
- * Double it for safety.
- */
-#define EPROM_TIMEOUT 80000 /* ms */
-
-/*
- * Turn on external enable line that allows writing on the flash.
- */
-static void write_enable(struct hfi1_devdata *dd)
-{
-	/* raise signal */
-	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
-	/* raise enable */
-	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
-}
-
-/*
- * Turn off external enable line that allows writing on the flash.
- */
-static void write_disable(struct hfi1_devdata *dd)
-{
-	/* lower signal */
-	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
-	/* lower enable */
-	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
-}
-
-/*
- * Wait for the device to become not busy.  Must be called after all
- * write or erase operations.
- */
-static int wait_for_not_busy(struct hfi1_devdata *dd)
-{
-	unsigned long count = 0;
-	u64 reg;
-	int ret = 0;
-
-	/* starts page mode */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1);
-	while (1) {
-		udelay(WAIT_SLEEP_US);
-		usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5);
-		count++;
-		reg = read_csr(dd, ASIC_EEP_DATA);
-		if ((reg & SR1_BUSY) == 0)
-			break;
-		/* 200s is the largest time for a 128Mb device */
-		if (count > COUNT_DELAY_SEC(200)) {
-			dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n");
-			ret = -ETIMEDOUT;
-			break; /* break, not goto - must stop page mode */
-		}
-	}
-
-	/* stop page mode with a NOP */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP);
-
-	return ret;
-}
-
-/*
- * Read the device ID from the SPI controller.
- */
-static u32 read_device_id(struct hfi1_devdata *dd)
-{
-	/* read the Manufacture Device ID */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID);
-	return (u32)read_csr(dd, ASIC_EEP_DATA);
-}
-
-/*
- * Erase the whole flash.
- */
-static int erase_chip(struct hfi1_devdata *dd)
-{
-	int ret;
-
-	write_enable(dd);
-
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE);
-	ret = wait_for_not_busy(dd);
-
-	write_disable(dd);
-
-	return ret;
-}
-
-/*
- * Erase a range.
- */
-static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len)
-{
-	u32 end = start + len;
-	int ret = 0;
-
-	if (end < start)
-		return -EINVAL;
-
-	/* check the end points for the minimum erase */
-	if ((start & MASK_4KB) || (end & MASK_4KB)) {
-		dd_dev_err(dd,
-			   "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
-			   __func__, start, end);
-		return -EINVAL;
-	}
-
-	write_enable(dd);
-
-	while (start < end) {
-		write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
-		/* check in order of largest to smallest */
-		if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) {
-			write_csr(dd, ASIC_EEP_ADDR_CMD,
-				  CMD_SECTOR_ERASE_64KB(start));
-			start += SIZE_64KB;
-		} else if (((start & MASK_32KB) == 0) &&
-			   (start + SIZE_32KB <= end)) {
-			write_csr(dd, ASIC_EEP_ADDR_CMD,
-				  CMD_SECTOR_ERASE_32KB(start));
-			start += SIZE_32KB;
-		} else {	/* 4KB will work */
-			write_csr(dd, ASIC_EEP_ADDR_CMD,
-				  CMD_SECTOR_ERASE_4KB(start));
-			start += SIZE_4KB;
-		}
-		ret = wait_for_not_busy(dd);
-		if (ret)
-			goto done;
-	}
-
-done:
-	write_disable(dd);
-
-	return ret;
-}
-
-/*
- * Read a 256 byte (64 dword) EPROM page.
- * All callers have verified the offset is at a page boundary.
- */
-static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
-{
-	int i;
-
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
-	for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
-		result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
-}
-
-/*
- * Read length bytes starting at offset.  Copy to user address addr.
- */
-static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
-{
-	u32 offset;
-	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
-	int ret = 0;
-
-	/* reject anything not on an EPROM page boundary */
-	if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
-		return -EINVAL;
-
-	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
-		read_page(dd, start + offset, buffer);
-		if (copy_to_user((void __user *)(addr + offset),
-				 buffer, EP_PAGE_SIZE)) {
-			ret = -EFAULT;
-			goto done;
-		}
-	}
-
-done:
-	return ret;
-}
-
-/*
- * Write a 256 byte (64 dword) EPROM page.
- * All callers have verified the offset is at a page boundary.
- */
-static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
-{
-	int i;
-
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
-	write_csr(dd, ASIC_EEP_DATA, data[0]);
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
-	for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
-		write_csr(dd, ASIC_EEP_DATA, data[i]);
-	/* will close the open page */
-	return wait_for_not_busy(dd);
-}
-
-/*
- * Write length bytes starting at offset.  Read from user address addr.
- */
-static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
-{
-	u32 offset;
-	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
-	int ret = 0;
-
-	/* reject anything not on an EPROM page boundary */
-	if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
-		return -EINVAL;
-
-	write_enable(dd);
-
-	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
-		if (copy_from_user(buffer, (void __user *)(addr + offset),
-				   EP_PAGE_SIZE)) {
-			ret = -EFAULT;
-			goto done;
-		}
-		ret = write_page(dd, start + offset, buffer);
-		if (ret)
-			goto done;
-	}
-
-done:
-	write_disable(dd);
-	return ret;
-}
-
-/* convert an range composite to a length, in bytes */
-static inline u32 extract_rlen(u32 composite)
-{
-	return (composite & 0xffff) * EP_PAGE_SIZE;
-}
-
-/* convert an range composite to a start, in bytes */
-static inline u32 extract_rstart(u32 composite)
-{
-	return (composite >> 16) * EP_PAGE_SIZE;
-}
-
-/*
- * Perform the given operation on the EPROM.  Called from user space.  The
- * user credentials have already been checked.
- *
- * Return 0 on success, -ERRNO on error
- */
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
-{
-	struct hfi1_devdata *dd;
-	u32 dev_id;
-	u32 rlen;	/* range length */
-	u32 rstart;	/* range start */
-	int i_minor;
-	int ret = 0;
-
-	/*
-	 * Map the device file to device data using the relative minor.
-	 * The device file minor number is the unit number + 1.  0 is
-	 * the generic device file - reject it.
-	 */
-	i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
-	if (i_minor <= 0)
-		return -EINVAL;
-	dd = hfi1_lookup(i_minor - 1);
-	if (!dd) {
-		pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
-		return -EINVAL;
-	}
-
-	/* some devices do not have an EPROM */
-	if (!dd->eprom_available)
-		return -EOPNOTSUPP;
-
-	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
-	if (ret) {
-		dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
-			   __func__);
-		goto done_asic;
-	}
-
-	dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
-		    __func__, cmd->type, cmd->len, cmd->addr);
-
-	switch (cmd->type) {
-	case HFI1_CMD_EP_INFO:
-		if (cmd->len != sizeof(u32)) {
-			ret = -ERANGE;
-			break;
-		}
-		dev_id = read_device_id(dd);
-		/* addr points to a u32 user buffer */
-		if (copy_to_user((void __user *)cmd->addr, &dev_id,
-				 sizeof(u32)))
-			ret = -EFAULT;
-		break;
-
-	case HFI1_CMD_EP_ERASE_CHIP:
-		ret = erase_chip(dd);
-		break;
-
-	case HFI1_CMD_EP_ERASE_RANGE:
-		rlen = extract_rlen(cmd->len);
-		rstart = extract_rstart(cmd->len);
-		ret = erase_range(dd, rstart, rlen);
-		break;
-
-	case HFI1_CMD_EP_READ_RANGE:
-		rlen = extract_rlen(cmd->len);
-		rstart = extract_rstart(cmd->len);
-		ret = read_length(dd, rstart, rlen, cmd->addr);
-		break;
-
-	case HFI1_CMD_EP_WRITE_RANGE:
-		rlen = extract_rlen(cmd->len);
-		rstart = extract_rstart(cmd->len);
-		ret = write_length(dd, rstart, rlen, cmd->addr);
-		break;
-
-	default:
-		dd_dev_err(dd, "%s: unexpected command %d\n",
-			   __func__, cmd->type);
-		ret = -EINVAL;
-		break;
-	}
-
-	release_chip_resource(dd, CR_EPROM);
-done_asic:
-	return ret;
-}
-
-/*
- * Initialize the EPROM handler.
- */
-int eprom_init(struct hfi1_devdata *dd)
-{
-	int ret = 0;
-
-	/* only the discrete chip has an EPROM */
-	if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
-		return 0;
-
-	/*
-	 * It is OK if both HFIs reset the EPROM as long as they don't
-	 * do it at the same time.
-	 */
-	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
-	if (ret) {
-		dd_dev_err(dd,
-			   "%s: unable to acquire EPROM resource, no EPROM support\n",
-			   __func__);
-		goto done_asic;
-	}
-
-	/* reset EPROM to be sure it is in a good state */
-
-	/* set reset */
-	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
-	/* clear reset, set speed */
-	write_csr(dd, ASIC_EEP_CTL_STAT,
-		  EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
-
-	/* wake the device with command "release powerdown NoID" */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
-
-	dd->eprom_available = true;
-	release_chip_resource(dd, CR_EPROM);
-done_asic:
-	return ret;
-}
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index 0f3daae44bf9..b13419ce99ff 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -103,6 +103,9 @@ enum {
 	IB_OPCODE_ATOMIC_ACKNOWLEDGE                = 0x12,
 	IB_OPCODE_COMPARE_SWAP                      = 0x13,
 	IB_OPCODE_FETCH_ADD                         = 0x14,
+	/* opcode 0x15 is reserved */
+	IB_OPCODE_SEND_LAST_WITH_INVALIDATE         = 0x16,
+	IB_OPCODE_SEND_ONLY_WITH_INVALIDATE         = 0x17,
 
 	/* real constants follow -- see comment about above IB_OPCODE()
 	   macro for more details */
@@ -129,6 +132,8 @@ enum {
 	IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
 	IB_OPCODE(RC, COMPARE_SWAP),
 	IB_OPCODE(RC, FETCH_ADD),
+	IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
+	IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
 
 	/* UC */
 	IB_OPCODE(UC, SEND_FIRST),
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index d57ceee90d26..16274e2133cd 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,15 +149,15 @@ struct rvt_driver_params {
 	int qpn_res_end;
 	int nports;
 	int npkeys;
-	u8 qos_shift;
 	char cq_name[RVT_CQN_MAX];
 	int node;
-	int max_rdma_atomic;
 	int psn_mask;
 	int psn_shift;
 	int psn_modify_mask;
 	u32 core_cap_flags;
 	u32 max_mad_size;
+	u8 qos_shift;
+	u8 max_rdma_atomic;
 };
 
 /* Protection domain */
@@ -426,6 +426,15 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
 }
 
 /*
+ * Return the max atomic suitable for determining
+ * the size of the ack ring buffer in a QP.
+ */
+static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
+{
+	return rdi->dparms.max_rdma_atomic + 1;
+}
+
+/*
  * Return the indexed PKEY from the port PKEY table.
  */
 static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 0e1ff2abfe92..6d23b879416a 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -211,8 +211,6 @@ struct rvt_mmap_info {
 	unsigned size;
 };
 
-#define RVT_MAX_RDMA_ATOMIC	16
-
 /*
  * This structure holds the information that the send tasklet needs
  * to send a RDMA read response or atomic operation.
@@ -282,8 +280,7 @@ struct rvt_qp {
 	atomic_t refcount ____cacheline_aligned_in_smp;
 	wait_queue_head_t wait;
 
-	struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
+	struct rvt_ack_entry *s_ack_queue;
 	struct rvt_sge_state s_rdma_read_sge;
 
 	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index a533cecab14f..98bebf8bef55 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -66,7 +66,7 @@
  * The major version changes when data structures change in an incompatible
  * way. The driver must be the same for initialization to succeed.
  */
-#define HFI1_USER_SWMAJOR 5
+#define HFI1_USER_SWMAJOR 6
 
 /*
  * Minor version differences are always compatible
@@ -75,7 +75,12 @@
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define HFI1_USER_SWMINOR 0
+#define HFI1_USER_SWMINOR 1
+
+/*
+ * We will encode the major/minor inside a single 32bit version number.
+ */
+#define HFI1_SWMAJOR_SHIFT 16
 
 /*
  * Set of HW and driver capability/feature bits.
@@ -107,19 +112,6 @@
 #define HFI1_RCVHDR_ENTSIZE_16   (1UL << 1)
 #define HFI1_RCVDHR_ENTSIZE_32   (1UL << 2)
 
-/*
- * If the unit is specified via open, HFI choice is fixed.  If port is
- * specified, it's also fixed.  Otherwise we try to spread contexts
- * across ports and HFIs, using different algorithms.  WITHIN is
- * the old default, prior to this mechanism.
- */
-#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then
-			  * ports; this is the default */
-#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin
-			  * active ports within), then next HFI */
-#define HFI1_ALG_COUNT  2 /* number of algorithm choices */
-
-
 /* User commands. */
 #define HFI1_CMD_ASSIGN_CTXT     1	/* allocate HFI and context */
 #define HFI1_CMD_CTXT_INFO       2	/* find out what resources we got */
@@ -127,7 +119,6 @@
 #define HFI1_CMD_TID_UPDATE      4	/* update expected TID entries */
 #define HFI1_CMD_TID_FREE        5	/* free expected TID entries */
 #define HFI1_CMD_CREDIT_UPD      6	/* force an update of PIO credit */
-#define HFI1_CMD_SDMA_STATUS_UPD 7      /* force update of SDMA status ring */
 
 #define HFI1_CMD_RECV_CTRL       8	/* control receipt of packets */
 #define HFI1_CMD_POLL_TYPE       9	/* set the kind of polling we want */
@@ -135,13 +126,46 @@
 #define HFI1_CMD_SET_PKEY        11     /* set context's pkey */
 #define HFI1_CMD_CTXT_RESET      12     /* reset context's HW send context */
 #define HFI1_CMD_TID_INVAL_READ  13     /* read TID cache invalidations */
-/* separate EPROM commands from normal PSM commands */
-#define HFI1_CMD_EP_INFO         64      /* read EPROM device ID */
-#define HFI1_CMD_EP_ERASE_CHIP   65      /* erase whole EPROM */
-/* range 66-74 no longer used */
-#define HFI1_CMD_EP_ERASE_RANGE  75      /* erase EPROM range */
-#define HFI1_CMD_EP_READ_RANGE   76      /* read EPROM range */
-#define HFI1_CMD_EP_WRITE_RANGE  77      /* write EPROM range */
+#define HFI1_CMD_GET_VERS	 14	/* get the version of the user cdev */
+
+/*
+ * User IOCTLs can not go above 128 if they do then see common.h and change the
+ * base for the snoop ioctl
+ */
+#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */
+
+/*
+ * Make the ioctls occupy the last 0xf0-0xff portion of the IB range
+ */
+#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0)
+
+struct hfi1_cmd;
+#define HFI1_IOCTL_ASSIGN_CTXT \
+	_IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info)
+#define HFI1_IOCTL_CTXT_INFO \
+	_IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info)
+#define HFI1_IOCTL_USER_INFO \
+	_IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info)
+#define HFI1_IOCTL_TID_UPDATE \
+	_IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info)
+#define HFI1_IOCTL_TID_FREE \
+	_IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info)
+#define HFI1_IOCTL_CREDIT_UPD \
+	_IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD))
+#define HFI1_IOCTL_RECV_CTRL \
+	_IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int)
+#define HFI1_IOCTL_POLL_TYPE \
+	_IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int)
+#define HFI1_IOCTL_ACK_EVENT \
+	_IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long)
+#define HFI1_IOCTL_SET_PKEY \
+	_IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16)
+#define HFI1_IOCTL_CTXT_RESET \
+	_IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET))
+#define HFI1_IOCTL_TID_INVAL_READ \
+	_IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info)
+#define HFI1_IOCTL_GET_VERS \
+	_IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int)
 
 #define _HFI1_EVENT_FROZEN_BIT         0
 #define _HFI1_EVENT_LINKDOWN_BIT       1
@@ -199,9 +223,7 @@ struct hfi1_user_info {
 	 * Should be set to HFI1_USER_SWVERSION.
 	 */
 	__u32 userversion;
-	__u16 pad;
-	/* HFI selection algorithm, if unit has not selected */
-	__u16 hfi1_alg;
+	__u32 pad;
 	/*
 	 * If two or more processes wish to share a context, each process
 	 * must set the subcontext_cnt and subcontext_id to the same
@@ -243,12 +265,6 @@ struct hfi1_tid_info {
 	__u32 length;
 };
 
-struct hfi1_cmd {
-	__u32 type;        /* command type */
-	__u32 len;         /* length of struct pointed to by add */
-	__u64 addr;        /* pointer to user structure */
-};
-
 enum hfi1_sdma_comp_state {
 	FREE = 0,
 	QUEUED,