summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c47
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h10
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c42
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c425
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h105
-rw-r--r--drivers/infiniband/hw/hfi1/init.c33
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c3
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c4
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c13
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c80
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h17
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h34
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c180
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h17
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c191
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h18
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h5
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c8
-rw-r--r--drivers/infiniband/hw/mlx5/main.c22
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib.h2
-rw-r--r--include/linux/mlx5/driver.h19
-rw-r--r--include/rdma/rdmavt_qp.h1
-rw-r--r--include/uapi/linux/ethtool.h1
29 files changed, 764 insertions, 623 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 39279fd630bc..5d6b1eeaa9a0 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1055,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
-static void set_partition_keys(struct hfi1_pportdata *);
+static void set_partition_keys(struct hfi1_pportdata *ppd);
 static const char *link_state_name(u32 state);
 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
 					  u32 state);
@@ -1068,9 +1068,9 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
 				  int msecs);
 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
 static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
-static void handle_temp_err(struct hfi1_devdata *);
-static void dc_shutdown(struct hfi1_devdata *);
-static void dc_start(struct hfi1_devdata *);
+static void handle_temp_err(struct hfi1_devdata *dd);
+static void dc_shutdown(struct hfi1_devdata *dd);
+static void dc_start(struct hfi1_devdata *dd);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
 static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
@@ -10233,7 +10233,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 	if (pstate == PLS_OFFLINE) {
 		do_transition = 0;	/* in right state */
 		do_wait = 0;		/* ...no need to wait */
-	} else if ((pstate & 0xff) == PLS_OFFLINE) {
+	} else if ((pstate & 0xf0) == PLS_OFFLINE) {
 		do_transition = 0;	/* in an offline transient state */
 		do_wait = 1;		/* ...wait for it to settle */
 	} else {
@@ -12662,7 +12662,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 
-int hfi1_init_ctxt(struct send_context *sc)
+void hfi1_init_ctxt(struct send_context *sc)
 {
 	if (sc) {
 		struct hfi1_devdata *dd = sc->dd;
@@ -12679,7 +12679,6 @@ int hfi1_init_ctxt(struct send_context *sc)
 		write_kctxt_csr(dd, sc->hw_context,
 				SEND_CTXT_CHECK_ENABLE, reg);
 	}
-	return 0;
 }
 
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
@@ -14528,30 +14527,24 @@ done:
 	return ret;
 }
 
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
 {
-	struct hfi1_ctxtdata *rcd;
-	unsigned sctxt;
-	int ret = 0;
+	u8 hw_ctxt;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts) {
-		rcd = dd->rcd[ctxt];
-	} else {
-		ret = -EINVAL;
-		goto done;
-	}
-	if (!rcd || !rcd->sc) {
-		ret = -EINVAL;
-		goto done;
-	}
-	sctxt = rcd->sc->hw_context;
-	reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+	if (!ctxt || !ctxt->sc)
+		return -EINVAL;
+
+	if (ctxt->ctxt >= dd->num_rcv_contexts)
+		return -EINVAL;
+
+	hw_ctxt = ctxt->sc->hw_context;
+	reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
 	reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
-	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
-	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
-done:
-	return ret;
+	write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+	write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
+
+	return 0;
 }
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b9dbf16d7703..cbe455d9ab8b 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -636,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
 	write_csr(dd, offset0 + (0x1000 * ctxt), value);
 }
 
-u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
+u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
+	       u32 dw_len);
 
 /* firmware.c */
 #define SBUS_MASTER_BROADCAST 0xfd
@@ -728,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd);
 void set_intr_state(struct hfi1_devdata *dd, u32 enable);
 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
 				 int refresh_widths);
-void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32);
+void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
+		    u32 intr_adjust, u32 npkts);
 int stop_drain_data_vls(struct hfi1_devdata *dd);
 int open_fill_data_vls(struct hfi1_devdata *dd);
 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns);
@@ -1347,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd);
 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
 struct ib_header *hfi1_get_msgheader(
 				struct hfi1_devdata *dd, __le32 *rhf_addr);
-int hfi1_init_ctxt(struct send_context *sc);
+void hfi1_init_ctxt(struct send_context *sc);
 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
 		  u32 type, unsigned long pa, u16 order);
 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
@@ -1360,7 +1362,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
 void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
 void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 527895487175..a50870e455a3 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -85,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO);
 MODULE_PARM_DESC(cu, "Credit return units");
 
 unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
-static int hfi1_caps_set(const char *, const struct kernel_param *);
-static int hfi1_caps_get(char *, const struct kernel_param *);
+static int hfi1_caps_set(const char *val, const struct kernel_param *kp);
+static int hfi1_caps_get(char *buffer, const struct kernel_param *kp);
 static const struct kernel_param_ops cap_ops = {
 	.set = hfi1_caps_set,
 	.get = hfi1_caps_get
@@ -211,42 +211,6 @@ int hfi1_count_active_units(void)
 }
 
 /*
- * Return count of all units, optionally return in arguments
- * the number of usable (present) units, and the number of
- * ports that are up.
- */
-int hfi1_count_units(int *npresentp, int *nupp)
-{
-	int nunits = 0, npresent = 0, nup = 0;
-	struct hfi1_devdata *dd;
-	unsigned long flags;
-	int pidx;
-	struct hfi1_pportdata *ppd;
-
-	spin_lock_irqsave(&hfi1_devs_lock, flags);
-
-	list_for_each_entry(dd, &hfi1_dev_list, list) {
-		nunits++;
-		if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
-			npresent++;
-		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
-			ppd = dd->pport + pidx;
-			if (ppd->lid && ppd->linkup)
-				nup++;
-		}
-	}
-
-	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
-	if (npresentp)
-		*npresentp = npresent;
-	if (nupp)
-		*nupp = nup;
-
-	return nunits;
-}
-
-/*
  * Get address of eager buffer from it's index (allocated in chunks, not
  * contiguous).
  */
@@ -1325,7 +1289,7 @@ int hfi1_reset_device(int unit)
 	if (dd->rcd)
 		for (i = dd->first_dyn_alloc_ctxt;
 		     i < dd->num_rcv_contexts; i++) {
-			if (!dd->rcd[i] || !dd->rcd[i]->cnt)
+			if (!dd->rcd[i])
 				continue;
 			spin_unlock_irqrestore(&dd->uctxt_lock, flags);
 			ret = -EBUSY;
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 3d9bce4bfcc7..3158128d57e8 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -49,6 +49,7 @@
 #include <linux/vmalloc.h>
 #include <linux/io.h>
 #include <linux/sched/mm.h>
+#include <linux/bitmap.h>
 
 #include <rdma/ib.h>
 
@@ -70,30 +71,37 @@
 /*
  * File operation functions
  */
-static int hfi1_file_open(struct inode *, struct file *);
-static int hfi1_file_close(struct inode *, struct file *);
-static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
-static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
-static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
-
-static u64 kvirt_to_phys(void *);
-static int assign_ctxt(struct file *, struct hfi1_user_info *);
-static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *);
-static int user_init(struct file *);
-static int get_ctxt_info(struct file *, void __user *, __u32);
-static int get_base_info(struct file *, void __user *, __u32);
-static int setup_ctxt(struct file *);
-static int setup_subctxt(struct hfi1_ctxtdata *);
-static int get_user_context(struct file *, struct hfi1_user_info *, int);
-static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
-static int allocate_ctxt(struct file *, struct hfi1_devdata *,
-			 struct hfi1_user_info *);
-static unsigned int poll_urgent(struct file *, struct poll_table_struct *);
-static unsigned int poll_next(struct file *, struct poll_table_struct *);
-static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
-static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
-static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_fault *);
+static int hfi1_file_open(struct inode *inode, struct file *fp);
+static int hfi1_file_close(struct inode *inode, struct file *fp);
+static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
+static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt);
+static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
+
+static u64 kvirt_to_phys(void *addr);
+static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
+static int init_subctxts(struct hfi1_ctxtdata *uctxt,
+			 const struct hfi1_user_info *uinfo);
+static int init_user_ctxt(struct hfi1_filedata *fd);
+static void user_init(struct hfi1_ctxtdata *uctxt);
+static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
+			 __u32 len);
+static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
+			 __u32 len);
+static int setup_base_ctxt(struct hfi1_filedata *fd);
+static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
+
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+			 const struct hfi1_user_info *uinfo);
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
+			 struct hfi1_user_info *uinfo);
+static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
+static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+			  unsigned long events);
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey);
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+		       int start_stop);
+static int vma_fault(struct vm_fault *vmf);
 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 			    unsigned long arg);
 
@@ -173,6 +181,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 					       struct hfi1_devdata,
 					       user_cdev);
 
+	if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
+		return -EINVAL;
+
 	if (!atomic_inc_not_zero(&dd->user_refcount))
 		return -ENXIO;
 
@@ -187,6 +198,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 		fd->rec_cpu_num = -1; /* no cpu affinity by default */
 		fd->mm = current->mm;
 		mmgrab(fd->mm);
+		fd->dd = dd;
 		fp->private_data = fd;
 	} else {
 		fp->private_data = NULL;
@@ -229,20 +241,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 				   sizeof(uinfo)))
 			return -EFAULT;
 
-		ret = assign_ctxt(fp, &uinfo);
-		if (ret < 0)
-			return ret;
-		ret = setup_ctxt(fp);
-		if (ret)
-			return ret;
-		ret = user_init(fp);
+		ret = assign_ctxt(fd, &uinfo);
 		break;
 	case HFI1_IOCTL_CTXT_INFO:
-		ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
+		ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg,
 				    sizeof(struct hfi1_ctxt_info));
 		break;
 	case HFI1_IOCTL_USER_INFO:
-		ret = get_base_info(fp, (void __user *)(unsigned long)arg,
+		ret = get_base_info(fd, (void __user *)(unsigned long)arg,
 				    sizeof(struct hfi1_base_info));
 		break;
 	case HFI1_IOCTL_CREDIT_UPD:
@@ -256,7 +262,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 				   sizeof(tinfo)))
 			return -EFAULT;
 
-		ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
 		if (!ret) {
 			/*
 			 * Copy the number of tidlist entries we used
@@ -278,7 +284,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 				   sizeof(tinfo)))
 			return -EFAULT;
 
-		ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
 		if (ret)
 			break;
 		addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
@@ -293,7 +299,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 				   sizeof(tinfo)))
 			return -EFAULT;
 
-		ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
 		if (ret)
 			break;
 		addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
@@ -430,7 +436,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
 		unsigned long count = 0;
 
 		ret = hfi1_user_sdma_process_request(
-			kiocb->ki_filp,	(struct iovec *)(from->iov + done),
+			fd, (struct iovec *)(from->iov + done),
 			dim, &count);
 		if (ret) {
 			reqs = ret;
@@ -756,6 +762,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	/* release the cpu */
 	hfi1_put_proc_affinity(fdata->rec_cpu_num);
 
+	/* clean up rcv side */
+	hfi1_user_exp_rcv_free(fdata);
+
 	/*
 	 * Clear any left over, unhandled events so the next process that
 	 * gets this context doesn't get confused.
@@ -764,8 +773,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 			   HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
 	*ev = 0;
 
-	if (--uctxt->cnt) {
-		uctxt->active_slaves &= ~(1 << fdata->subctxt);
+	__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+	if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
 		mutex_unlock(&hfi1_mutex);
 		goto done;
 	}
@@ -795,8 +804,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 
 	dd->rcd[uctxt->ctxt] = NULL;
 
-	hfi1_user_exp_rcv_free(fdata);
-	hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+	hfi1_user_exp_rcv_grp_free(uctxt);
+	hfi1_clear_ctxt_pkey(dd, uctxt);
 
 	uctxt->rcvwait_to = 0;
 	uctxt->piowait_to = 0;
@@ -836,127 +845,135 @@ static u64 kvirt_to_phys(void *addr)
 	return paddr;
 }
 
-static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
+static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
 {
-	int i_minor, ret = 0;
+	int ret;
 	unsigned int swmajor, swminor;
 
 	swmajor = uinfo->userversion >> 16;
-	if (swmajor != HFI1_USER_SWMAJOR) {
-		ret = -ENODEV;
-		goto done;
-	}
+	if (swmajor != HFI1_USER_SWMAJOR)
+		return -ENODEV;
 
 	swminor = uinfo->userversion & 0xffff;
 
 	mutex_lock(&hfi1_mutex);
-	/* First, lets check if we need to setup a shared context? */
+	/*
+	 * Get a sub context if necessary.
+	 * ret < 0 error, 0 no context, 1 sub-context found
+	 */
+	ret = 0;
 	if (uinfo->subctxt_cnt) {
-		struct hfi1_filedata *fd = fp->private_data;
-
-		ret = find_shared_ctxt(fp, uinfo);
-		if (ret < 0)
-			goto done_unlock;
-		if (ret) {
+		ret = find_sub_ctxt(fd, uinfo);
+		if (ret > 0)
 			fd->rec_cpu_num =
 				hfi1_get_proc_affinity(fd->uctxt->numa_id);
-		}
 	}
 
 	/*
-	 * We execute the following block if we couldn't find a
-	 * shared context or if context sharing is not required.
+	 * Allocate a base context if context sharing is not required or we
+	 * couldn't find a sub context.
 	 */
-	if (!ret) {
-		i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
-		ret = get_user_context(fp, uinfo, i_minor);
-	}
-done_unlock:
+	if (!ret)
+		ret = allocate_ctxt(fd, fd->dd, uinfo);
+
 	mutex_unlock(&hfi1_mutex);
-done:
+
+	/* Depending on the context type, do the appropriate init */
+	if (ret > 0) {
+		/*
+		 * sub-context info can only be set up after the base
+		 * context has been completed.
+		 */
+		ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
+					       HFI1_CTXT_BASE_UNINIT,
+					       &fd->uctxt->event_flags));
+		if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
+			clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+			return -ENOMEM;
+		}
+		/* The only thing a sub context needs is the user_xxx stuff */
+		if (!ret)
+			ret = init_user_ctxt(fd);
+
+		if (ret)
+			clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+	} else if (!ret) {
+		ret = setup_base_ctxt(fd);
+		if (fd->uctxt->subctxt_cnt) {
+			/* If there is an error, set the failed bit. */
+			if (ret)
+				set_bit(HFI1_CTXT_BASE_FAILED,
+					&fd->uctxt->event_flags);
+			/*
+			 * Base context is done, notify anybody using a
+			 * sub-context that is waiting for this completion
+			 */
+			clear_bit(HFI1_CTXT_BASE_UNINIT,
+				  &fd->uctxt->event_flags);
+			wake_up(&fd->uctxt->wait);
+		}
+	}
+
 	return ret;
 }
 
-static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
-			    int devno)
+/*
+ * The hfi1_mutex must be held when this function is called.  It is
+ * necessary to ensure serialized access to the bitmask in_use_ctxts.
+ */
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+			 const struct hfi1_user_info *uinfo)
 {
-	struct hfi1_devdata *dd = NULL;
-	int devmax, npresent, nup;
+	int i;
+	struct hfi1_devdata *dd = fd->dd;
+	u16 subctxt;
 
-	devmax = hfi1_count_units(&npresent, &nup);
-	if (!npresent)
-		return -ENXIO;
+	for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
+		struct hfi1_ctxtdata *uctxt = dd->rcd[i];
 
-	if (!nup)
-		return -ENETDOWN;
+		/* Skip ctxts which are not yet open */
+		if (!uctxt ||
+		    bitmap_empty(uctxt->in_use_ctxts,
+				 HFI1_MAX_SHARED_CTXTS))
+			continue;
 
-	dd = hfi1_lookup(devno);
-	if (!dd)
-		return -ENODEV;
-	else if (!dd->freectxts)
-		return -EBUSY;
+		/* Skip dynamically allocted kernel contexts */
+		if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+			continue;
 
-	return allocate_ctxt(fp, dd, uinfo);
-}
+		/* Skip ctxt if it doesn't match the requested one */
+		if (memcmp(uctxt->uuid, uinfo->uuid,
+			   sizeof(uctxt->uuid)) ||
+		    uctxt->jkey != generate_jkey(current_uid()) ||
+		    uctxt->subctxt_id != uinfo->subctxt_id ||
+		    uctxt->subctxt_cnt != uinfo->subctxt_cnt)
+			continue;
 
-static int find_shared_ctxt(struct file *fp,
-			    const struct hfi1_user_info *uinfo)
-{
-	int devmax, ndev, i;
-	int ret = 0;
-	struct hfi1_filedata *fd = fp->private_data;
+		/* Verify the sharing process matches the master */
+		if (uctxt->userversion != uinfo->userversion)
+			return -EINVAL;
 
-	devmax = hfi1_count_units(NULL, NULL);
+		/* Find an unused context */
+		subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
+					      HFI1_MAX_SHARED_CTXTS);
+		if (subctxt >= uctxt->subctxt_cnt)
+			return -EBUSY;
 
-	for (ndev = 0; ndev < devmax; ndev++) {
-		struct hfi1_devdata *dd = hfi1_lookup(ndev);
+		fd->uctxt = uctxt;
+		fd->subctxt = subctxt;
+		__set_bit(fd->subctxt, uctxt->in_use_ctxts);
 
-		if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
-			continue;
-		for (i = dd->first_dyn_alloc_ctxt;
-		     i < dd->num_rcv_contexts; i++) {
-			struct hfi1_ctxtdata *uctxt = dd->rcd[i];
-
-			/* Skip ctxts which are not yet open */
-			if (!uctxt || !uctxt->cnt)
-				continue;
-
-			/* Skip dynamically allocted kernel contexts */
-			if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
-				continue;
-
-			/* Skip ctxt if it doesn't match the requested one */
-			if (memcmp(uctxt->uuid, uinfo->uuid,
-				   sizeof(uctxt->uuid)) ||
-			    uctxt->jkey != generate_jkey(current_uid()) ||
-			    uctxt->subctxt_id != uinfo->subctxt_id ||
-			    uctxt->subctxt_cnt != uinfo->subctxt_cnt)
-				continue;
-
-			/* Verify the sharing process matches the master */
-			if (uctxt->userversion != uinfo->userversion ||
-			    uctxt->cnt >= uctxt->subctxt_cnt) {
-				ret = -EINVAL;
-				goto done;
-			}
-			fd->uctxt = uctxt;
-			fd->subctxt  = uctxt->cnt++;
-			uctxt->active_slaves |= 1 << fd->subctxt;
-			ret = 1;
-			goto done;
-		}
+		return 1;
 	}
 
-done:
-	return ret;
+	return 0;
 }
 
-static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
 			 struct hfi1_user_info *uinfo)
 {
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt;
-	unsigned ctxt;
+	unsigned int ctxt;
 	int ret, numa;
 
 	if (dd->flags & HFI1_FROZEN) {
@@ -970,6 +987,14 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 		return -EIO;
 	}
 
+	/*
+	 * This check is sort of redundant to the next EBUSY error. It would
+	 * also indicate an inconsistancy in the driver if this value was
+	 * zero, but there were still contexts available.
+	 */
+	if (!dd->freectxts)
+		return -EBUSY;
+
 	for (ctxt = dd->first_dyn_alloc_ctxt;
 	     ctxt < dd->num_rcv_contexts; ctxt++)
 		if (!dd->rcd[ctxt])
@@ -1013,12 +1038,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 		goto ctxdata_free;
 
 	/*
-	 * Setup shared context resources if the user-level has requested
-	 * shared contexts and this is the 'master' process.
+	 * Setup sub context resources if the user-level has requested
+	 * sub contexts.
 	 * This has to be done here so the rest of the sub-contexts find the
 	 * proper master.
 	 */
-	if (uinfo->subctxt_cnt && !fd->subctxt) {
+	if (uinfo->subctxt_cnt) {
 		ret = init_subctxts(uctxt, uinfo);
 		/*
 		 * On error, we don't need to disable and de-allocate the
@@ -1055,7 +1080,7 @@ ctxdata_free:
 static int init_subctxts(struct hfi1_ctxtdata *uctxt,
 			 const struct hfi1_user_info *uinfo)
 {
-	unsigned num_subctxts;
+	u16 num_subctxts;
 
 	num_subctxts = uinfo->subctxt_cnt;
 	if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
@@ -1063,9 +1088,8 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
 
 	uctxt->subctxt_cnt = uinfo->subctxt_cnt;
 	uctxt->subctxt_id = uinfo->subctxt_id;
-	uctxt->active_slaves = 1;
 	uctxt->redirect_seq_cnt = 1;
-	set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
+	set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
 
 	return 0;
 }
@@ -1073,13 +1097,12 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
 static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
 {
 	int ret = 0;
-	unsigned num_subctxts = uctxt->subctxt_cnt;
+	u16 num_subctxts = uctxt->subctxt_cnt;
 
 	uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
-	if (!uctxt->subctxt_uregbase) {
-		ret = -ENOMEM;
-		goto bail;
-	}
+	if (!uctxt->subctxt_uregbase)
+		return -ENOMEM;
+
 	/* We can take the size of the RcvHdr Queue from the master */
 	uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
 						  num_subctxts);
@@ -1094,25 +1117,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
 		ret = -ENOMEM;
 		goto bail_rhdr;
 	}
-	goto bail;
+
+	return 0;
+
 bail_rhdr:
 	vfree(uctxt->subctxt_rcvhdr_base);
+	uctxt->subctxt_rcvhdr_base = NULL;
 bail_ureg:
 	vfree(uctxt->subctxt_uregbase);
 	uctxt->subctxt_uregbase = NULL;
-bail:
+
 	return ret;
 }
 
-static int user_init(struct file *fp)
+static void user_init(struct hfi1_ctxtdata *uctxt)
 {
 	unsigned int rcvctrl_ops = 0;
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-
-	/* make sure that the context has already been setup */
-	if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
-		return -EFAULT;
 
 	/* initialize poll variables... */
 	uctxt->urgent = 0;
@@ -1160,20 +1180,12 @@ static int user_init(struct file *fp)
 	else
 		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
 	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
-
-	/* Notify any waiting slaves */
-	if (uctxt->subctxt_cnt) {
-		clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-		wake_up(&uctxt->wait);
-	}
-
-	return 0;
 }
 
-static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
+			 __u32 len)
 {
 	struct hfi1_ctxt_info cinfo;
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	int ret = 0;
 
@@ -1211,75 +1223,71 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
 	return ret;
 }
 
-static int setup_ctxt(struct file *fp)
+static int init_user_ctxt(struct hfi1_filedata *fd)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	int ret;
+
+	ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
+	if (ret)
+		return ret;
+
+	ret = hfi1_user_exp_rcv_init(fd);
+
+	return ret;
+}
+
+static int setup_base_ctxt(struct hfi1_filedata *fd)
 {
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd = uctxt->dd;
 	int ret = 0;
 
-	/*
-	 * Context should be set up only once, including allocation and
-	 * programming of eager buffers. This is done if context sharing
-	 * is not requested or by the master process.
-	 */
-	if (!uctxt->subctxt_cnt || !fd->subctxt) {
-		ret = hfi1_init_ctxt(uctxt->sc);
-		if (ret)
-			goto done;
+	hfi1_init_ctxt(uctxt->sc);
 
-		/* Now allocate the RcvHdr queue and eager buffers. */
-		ret = hfi1_create_rcvhdrq(dd, uctxt);
-		if (ret)
-			goto done;
-		ret = hfi1_setup_eagerbufs(uctxt);
-		if (ret)
-			goto done;
-		if (uctxt->subctxt_cnt && !fd->subctxt) {
-			ret = setup_subctxt(uctxt);
-			if (ret)
-				goto done;
-		}
-	} else {
-		ret = wait_event_interruptible(uctxt->wait, !test_bit(
-					       HFI1_CTXT_MASTER_UNINIT,
-					       &uctxt->event_flags));
-		if (ret)
-			goto done;
-	}
+	/* Now allocate the RcvHdr queue and eager buffers. */
+	ret = hfi1_create_rcvhdrq(dd, uctxt);
+	if (ret)
+		return ret;
 
-	ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
+	ret = hfi1_setup_eagerbufs(uctxt);
 	if (ret)
-		goto done;
-	/*
-	 * Expected receive has to be setup for all processes (including
-	 * shared contexts). However, it has to be done after the master
-	 * context has been fully configured as it depends on the
-	 * eager/expected split of the RcvArray entries.
-	 * Setting it up here ensures that the subcontexts will be waiting
-	 * (due to the above wait_event_interruptible() until the master
-	 * is setup.
-	 */
-	ret = hfi1_user_exp_rcv_init(fp);
+		goto setup_failed;
+
+	/* If sub-contexts are enabled, do the appropriate setup */
+	if (uctxt->subctxt_cnt)
+		ret = setup_subctxt(uctxt);
 	if (ret)
-		goto done;
+		goto setup_failed;
 
-	set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
-done:
+	ret = hfi1_user_exp_rcv_grp_init(fd);
+	if (ret)
+		goto setup_failed;
+
+	ret = init_user_ctxt(fd);
+	if (ret)
+		goto setup_failed;
+
+	user_init(uctxt);
+
+	return 0;
+
+setup_failed:
+	hfi1_free_ctxtdata(dd, uctxt);
 	return ret;
 }
 
-static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
+			 __u32 len)
 {
 	struct hfi1_base_info binfo;
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd = uctxt->dd;
 	ssize_t sz;
 	unsigned offset;
 	int ret = 0;
 
-	trace_hfi1_uctxtdata(uctxt->dd, uctxt);
+	trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
 
 	memset(&binfo, 0, sizeof(binfo));
 	binfo.hw_version = dd->revision;
@@ -1443,7 +1451,7 @@ done:
  * overflow conditions.  start_stop==1 re-enables, to be used to
  * re-init the software copy of the head register
  */
-static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
 		       int start_stop)
 {
 	struct hfi1_devdata *dd = uctxt->dd;
@@ -1478,7 +1486,7 @@ bail:
  * User process then performs actions appropriate to bit having been
  * set, if desired, and checks again in future.
  */
-static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
 			  unsigned long events)
 {
 	int i;
@@ -1499,8 +1507,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
 	return 0;
 }
 
-static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
-			 u16 pkey)
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
 {
 	int ret = -ENOENT, i, intable = 0;
 	struct hfi1_pportdata *ppd = uctxt->ppd;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 14063bd30c2a..da322e6668cc 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -196,12 +196,6 @@ struct hfi1_ctxtdata {
 	void *rcvhdrq;
 	/* kernel virtual address where hdrqtail is updated */
 	volatile __le64 *rcvhdrtail_kvaddr;
-	/*
-	 * Shared page for kernel to signal user processes that send buffers
-	 * need disarming.  The process should call HFI1_CMD_DISARM_BUFS
-	 * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
-	 */
-	unsigned long *user_event_mask;
 	/* when waiting for rcv or pioavail */
 	wait_queue_head_t wait;
 	/* rcvhdrq size (for freeing) */
@@ -224,13 +218,12 @@ struct hfi1_ctxtdata {
 	 * (ignoring forks, dup, etc. for now)
 	 */
 	int cnt;
+	/* Device context index */
+	unsigned ctxt;
 	/*
-	 * how much space to leave at start of eager TID entries for
-	 * protocol use, on each TID
+	 * non-zero if ctxt can be shared, and defines the maximum number of
+	 * sub-contexts for this device context.
 	 */
-	/* instead of calculating it */
-	unsigned ctxt;
-	/* non-zero if ctxt is being shared. */
 	u16 subctxt_cnt;
 	/* non-zero if ctxt is being shared. */
 	u16 subctxt_id;
@@ -288,10 +281,10 @@ struct hfi1_ctxtdata {
 	void *subctxt_rcvegrbuf;
 	/* An array of pages for the eager header queue entries * N */
 	void *subctxt_rcvhdr_base;
+	/* Bitmask of in use context(s) */
+	DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
 	/* The version of the library which opened this ctxt */
 	u32 userversion;
-	/* Bitmask of active slaves */
-	u32 active_slaves;
 	/* Type of packets or conditions we want to poll for */
 	u16 poll_type;
 	/* receive packet sequence counter */
@@ -1238,10 +1231,11 @@ struct mmu_rb_handler;
 
 /* Private data for file operations */
 struct hfi1_filedata {
+	struct hfi1_devdata *dd;
 	struct hfi1_ctxtdata *uctxt;
-	unsigned subctxt;
 	struct hfi1_user_sdma_comp_q *cq;
 	struct hfi1_user_sdma_pkt_q *pq;
+	u16 subctxt;
 	/* for cpu affinity; -1 if none */
 	int rec_cpu_num;
 	u32 tid_n_pinned;
@@ -1263,27 +1257,27 @@ struct hfi1_devdata *hfi1_lookup(int unit);
 extern u32 hfi1_cpulist_count;
 extern unsigned long *hfi1_cpulist;
 
-int hfi1_init(struct hfi1_devdata *, int);
-int hfi1_count_units(int *npresentp, int *nupp);
+int hfi1_init(struct hfi1_devdata *dd, int reinit);
 int hfi1_count_active_units(void);
 
-int hfi1_diag_add(struct hfi1_devdata *);
-void hfi1_diag_remove(struct hfi1_devdata *);
+int hfi1_diag_add(struct hfi1_devdata *dd);
+void hfi1_diag_remove(struct hfi1_devdata *dd);
 void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
 
 void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
 
-int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
+int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
 int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
-void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
-			 struct hfi1_devdata *, u8, u8);
-void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-
-int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+					   int numa);
+void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
+			 struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
+void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+
+int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
 void set_all_slowpath(struct hfi1_devdata *dd);
 void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
 void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
@@ -1580,7 +1574,7 @@ bad:
 
 u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
 int mtu_to_enum(u32 mtu, int default_if_bad);
-u16 enum_to_mtu(int);
+u16 enum_to_mtu(int mtu);
 static inline int valid_ib_mtu(unsigned int mtu)
 {
 	return mtu == 256 || mtu == 512 ||
@@ -1594,15 +1588,15 @@ static inline int valid_opa_max_mtu(unsigned int mtu)
 		(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
 }
 
-int set_mtu(struct hfi1_pportdata *);
+int set_mtu(struct hfi1_pportdata *ppd);
 
-int hfi1_set_lid(struct hfi1_pportdata *, u32, u8);
-void hfi1_disable_after_error(struct hfi1_devdata *);
-int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int);
-int hfi1_rcvbuf_validate(u32, u8, u16 *);
+int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
+void hfi1_disable_after_error(struct hfi1_devdata *dd);
+int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
 
-int fm_get_table(struct hfi1_pportdata *, int, void *);
-int fm_set_table(struct hfi1_pportdata *, int, void *);
+int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
+int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
 
 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
 void reset_link_credits(struct hfi1_devdata *dd);
@@ -1724,19 +1718,19 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
 #define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
 
 /* ctxt_flag bit offsets */
-		/* context has been setup */
-#define HFI1_CTXT_SETUP_DONE 1
+		/* base context has not finished initializing */
+#define HFI1_CTXT_BASE_UNINIT 1
+		/* base context initaliation failed */
+#define HFI1_CTXT_BASE_FAILED 2
 		/* waiting for a packet to arrive */
-#define HFI1_CTXT_WAITING_RCV   2
-		/* master has not finished initializing */
-#define HFI1_CTXT_MASTER_UNINIT 4
+#define HFI1_CTXT_WAITING_RCV 3
 		/* waiting for an urgent packet to arrive */
-#define HFI1_CTXT_WAITING_URG 5
+#define HFI1_CTXT_WAITING_URG 4
 
 /* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
-				  const struct pci_device_id *);
-void hfi1_free_devdata(struct hfi1_devdata *);
+struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
+				  const struct pci_device_id *ent);
+void hfi1_free_devdata(struct hfi1_devdata *dd);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
 /* LED beaconing functions */
@@ -1811,23 +1805,24 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 
 extern const char ib_hfi1_version[];
 
-int hfi1_device_create(struct hfi1_devdata *);
-void hfi1_device_remove(struct hfi1_devdata *);
+int hfi1_device_create(struct hfi1_devdata *dd);
+void hfi1_device_remove(struct hfi1_devdata *dd);
 
 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
 			   struct kobject *kobj);
-int hfi1_verbs_register_sysfs(struct hfi1_devdata *);
-void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *);
+int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
+void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
 /* Hook for sysfs read of QSFP */
 int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
 
-int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
-void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
+int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
+void hfi1_pcie_cleanup(struct pci_dev *pdev);
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
-int pcie_speeds(struct hfi1_devdata *);
-void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *);
-void hfi1_enable_intx(struct pci_dev *);
+int pcie_speeds(struct hfi1_devdata *dd);
+void request_msix(struct hfi1_devdata *dd, u32 *nent,
+		  struct hfi1_msix_entry *entry);
+void hfi1_enable_intx(struct pci_dev *pdev);
 void restore_pci_variables(struct hfi1_devdata *dd);
 int do_pcie_gen3_transition(struct hfi1_devdata *dd);
 int parse_platform_config(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 4d6b9f82efa3..4a11d4da4c92 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -53,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
+#include <linux/bitmap.h>
 #include <rdma/rdma_vt.h>
 
 #include "hfi.h"
@@ -70,6 +71,7 @@
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
 
+#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
 /*
  * min buffers we want to have per context, after driver
  */
@@ -101,9 +103,9 @@ static unsigned hfi1_rcvarr_split = 25;
 module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
 MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
 
-static uint eager_buffer_size = (2 << 20); /* 2MB */
+static uint eager_buffer_size = (8 << 20); /* 8MB */
 module_param(eager_buffer_size, uint, S_IRUGO);
-MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB");
+MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
 
 static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
 module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
@@ -117,7 +119,7 @@ unsigned int user_credit_return_threshold = 33;	/* default is 33% */
 module_param(user_credit_return_threshold, uint, S_IRUGO);
 MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
 
-static inline u64 encode_rcv_header_entry_size(u16);
+static inline u64 encode_rcv_header_entry_size(u16 size);
 
 static struct idr hfi1_unit_table;
 u32 hfi1_cpulist_count;
@@ -175,13 +177,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 			goto nomem;
 		}
 
-		ret = hfi1_init_ctxt(rcd->sc);
-		if (ret < 0) {
-			dd_dev_err(dd,
-				   "Failed to setup kernel receive context, failing\n");
-			ret = -EFAULT;
-			goto bail;
-		}
+		hfi1_init_ctxt(rcd->sc);
 	}
 
 	/*
@@ -193,7 +189,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 	return 0;
 nomem:
 	ret = -ENOMEM;
-bail:
+
 	if (dd->rcd) {
 		for (i = 0; i < dd->num_rcv_contexts; ++i)
 			hfi1_free_ctxtdata(dd, dd->rcd[i]);
@@ -227,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
 		INIT_LIST_HEAD(&rcd->qp_wait_list);
 		rcd->ppd = ppd;
 		rcd->dd = dd;
-		rcd->cnt = 1;
+		__set_bit(0, rcd->in_use_ctxts);
 		rcd->ctxt = ctxt;
 		dd->rcd[ctxt] = rcd;
 		rcd->numa_id = numa;
@@ -623,7 +619,7 @@ static int create_workqueues(struct hfi1_devdata *dd)
 				alloc_workqueue(
 				    "hfi%d_%d",
 				    WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
-				    dd->num_sdma,
+				    HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
 				    dd->unit, pidx);
 			if (!ppd->hfi1_wq)
 				goto wq_error;
@@ -968,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 	kfree(rcd->egrbufs.buffers);
 
 	sc_free(rcd->sc);
-	vfree(rcd->user_event_mask);
 	vfree(rcd->subctxt_uregbase);
 	vfree(rcd->subctxt_rcvegrbuf);
 	vfree(rcd->subctxt_rcvhdr_base);
@@ -1687,8 +1682,6 @@ bail_free:
 	dd_dev_err(dd,
 		   "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
 		   rcd->ctxt);
-	vfree(rcd->user_event_mask);
-	rcd->user_event_mask = NULL;
 	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
 			  rcd->rcvhdrq_dma);
 	rcd->rcvhdrq = NULL;
@@ -1777,6 +1770,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 			    !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
 				dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
 					   rcd->ctxt);
+				ret = -ENOMEM;
 				goto bail_rcvegrbuf_phys;
 			}
 
@@ -1854,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 			  "ctxt%u: current Eager buffer size is invalid %u\n",
 			  rcd->ctxt, rcd->egrbufs.rcvtid_size);
 		ret = -EINVAL;
-		goto bail;
+		goto bail_rcvegrbuf_phys;
 	}
 
 	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
@@ -1862,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 			     rcd->egrbufs.rcvtids[idx].dma, order);
 		cond_resched();
 	}
-	goto bail;
+
+	return 0;
 
 bail_rcvegrbuf_phys:
 	for (idx = 0; idx < rcd->egrbufs.alloced &&
@@ -1876,6 +1871,6 @@ bail_rcvegrbuf_phys:
 		rcd->egrbufs.buffers[idx].dma = 0;
 		rcd->egrbufs.buffers[idx].len = 0;
 	}
-bail:
+
 	return ret;
 }
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 232014d46f79..ba265d0ae93b 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -47,6 +47,7 @@
 
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/bitmap.h>
 
 #include "hfi.h"
 #include "common.h"
@@ -189,7 +190,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
 	unsigned long flags;
 
 	spin_lock_irqsave(&dd->uctxt_lock, flags);
-	if (!rcd->cnt)
+	if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS))
 		goto done;
 
 	if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4573e4c9f35c..650305cc0373 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -731,9 +731,7 @@ void quiesce_qp(struct rvt_qp *qp)
 
 void notify_qp_reset(struct rvt_qp *qp)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	priv->r_adefered = 0;
+	qp->r_adefered = 0;
 	clear_ahg(qp);
 }
 
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 75a729cd0c3d..069bdaf061ab 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -727,10 +727,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
 	struct ib_header hdr;
 	struct ib_other_headers *ohdr;
 	unsigned long flags;
-	struct hfi1_qp_priv *priv = qp->priv;
 
 	/* clear the defer count */
-	priv->r_adefered = 0;
+	qp->r_adefered = 0;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
 	if (qp->s_flags & RVT_S_RESP_PENDING)
@@ -1604,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
 
 static inline void rc_cancel_ack(struct rvt_qp *qp)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	priv->r_adefered = 0;
+	qp->r_adefered = 0;
 	if (list_empty(&qp->rspwait))
 		return;
 	list_del_init(&qp->rspwait);
@@ -2314,13 +2311,11 @@ send_last:
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
 	if (psn & IB_BTH_REQ_ACK) {
-		struct hfi1_qp_priv *priv = qp->priv;
-
 		if (packet->numpkt == 0) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		if (priv->r_adefered >= HFI1_PSN_CREDIT) {
+		if (qp->r_adefered >= HFI1_PSN_CREDIT) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
@@ -2328,7 +2323,7 @@ send_last:
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		priv->r_adefered++;
+		qp->r_adefered++;
 		rc_defered_ack(rcd, qp);
 	}
 	return;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 891ba0a81bbd..3a17daba28a9 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -800,6 +800,43 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 /* when sending, force a reschedule every one of these periods */
 #define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
 
+/**
+ * schedule_send_yield - test for a yield required for QP send engine
+ * @timeout: Final time for timeout slice for jiffies
+ * @qp: a pointer to QP
+ * @ps: a pointer to a structure with commonly lookup values for
+ *      the the send engine progress
+ *
+ * This routine checks if the time slice for the QP has expired
+ * for RC QPs, if so an additional work entry is queued. At this
+ * point, other QPs have an opportunity to be scheduled. It
+ * returns true if a yield is required, otherwise, false
+ * is returned.
+ */
+static bool schedule_send_yield(struct rvt_qp *qp,
+				struct hfi1_pkt_state *ps)
+{
+	if (unlikely(time_after(jiffies, ps->timeout))) {
+		if (!ps->in_thread ||
+		    workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
+			spin_lock_irqsave(&qp->s_lock, ps->flags);
+			qp->s_flags &= ~RVT_S_BUSY;
+			hfi1_schedule_send(qp);
+			spin_unlock_irqrestore(&qp->s_lock, ps->flags);
+			this_cpu_inc(*ps->ppd->dd->send_schedule);
+			trace_hfi1_rc_expired_time_slice(qp, true);
+			return true;
+		}
+
+		cond_resched();
+		this_cpu_inc(*ps->ppd->dd->send_schedule);
+		ps->timeout = jiffies + ps->timeout_int;
+	}
+
+	trace_hfi1_rc_expired_time_slice(qp, false);
+	return false;
+}
+
 void hfi1_do_send_from_rvt(struct rvt_qp *qp)
 {
 	hfi1_do_send(qp, false);
@@ -827,13 +864,13 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	struct hfi1_pkt_state ps;
 	struct hfi1_qp_priv *priv = qp->priv;
 	int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-	unsigned long timeout;
-	unsigned long timeout_int;
-	int cpu;
 
 	ps.dev = to_idev(qp->ibqp.device);
 	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ps.ppd = ppd_from_ibp(ps.ibp);
+	ps.in_thread = in_thread;
+
+	trace_hfi1_rc_do_send(qp, in_thread);
 
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_RC:
@@ -844,7 +881,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 			return;
 		}
 		make_req = hfi1_make_rc_req;
-		timeout_int = (qp->timeout_jiffies);
+		ps.timeout_int = qp->timeout_jiffies;
 		break;
 	case IB_QPT_UC:
 		if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
@@ -854,11 +891,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 			return;
 		}
 		make_req = hfi1_make_uc_req;
-		timeout_int = SEND_RESCHED_TIMEOUT;
+		ps.timeout_int = SEND_RESCHED_TIMEOUT;
 		break;
 	default:
 		make_req = hfi1_make_ud_req;
-		timeout_int = SEND_RESCHED_TIMEOUT;
+		ps.timeout_int = SEND_RESCHED_TIMEOUT;
 	}
 
 	spin_lock_irqsave(&qp->s_lock, ps.flags);
@@ -871,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 
 	qp->s_flags |= RVT_S_BUSY;
 
-	timeout = jiffies + (timeout_int) / 8;
-	cpu = priv->s_sde ? priv->s_sde->cpu :
+	ps.timeout_int = ps.timeout_int / 8;
+	ps.timeout = jiffies + ps.timeout_int;
+	ps.cpu = priv->s_sde ? priv->s_sde->cpu :
 			cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+
 	/* insure a pre-built packet is handled  */
 	ps.s_txreq = get_waiting_verbs_txreq(qp);
 	do {
@@ -889,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 			/* Record that s_ahg is empty. */
 			qp->s_hdrwords = 0;
 			/* allow other tasks to run */
-			if (unlikely(time_after(jiffies, timeout))) {
-				if (!in_thread ||
-				    workqueue_congested(
-						cpu,
-						ps.ppd->hfi1_wq)) {
-					spin_lock_irqsave(
-						&qp->s_lock,
-						ps.flags);
-					qp->s_flags &= ~RVT_S_BUSY;
-					hfi1_schedule_send(qp);
-					spin_unlock_irqrestore(
-						&qp->s_lock,
-						ps.flags);
-					this_cpu_inc(
-						*ps.ppd->dd->send_schedule);
-					return;
-				}
-				cond_resched();
-				this_cpu_inc(
-					*ps.ppd->dd->send_schedule);
-				timeout = jiffies + (timeout_int) / 8;
-			}
+			if (schedule_send_yield(qp, &ps))
+				return;
+
 			spin_lock_irqsave(&qp->s_lock, ps.flags);
 		}
 	} while (make_req(qp, &ps));
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 26ae789e47cf..4eb4cc798035 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -57,12 +57,14 @@
 
 #define UCTXT_FMT \
 	"cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, "	\
-	"rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
+	"rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u"
 TRACE_EVENT(hfi1_uctxtdata,
-	    TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
-	    TP_ARGS(dd, uctxt),
+	    TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt,
+		     unsigned int subctxt),
+	    TP_ARGS(dd, uctxt, subctxt),
 	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
 			     __field(unsigned int, ctxt)
+			     __field(unsigned int, subctxt)
 			     __field(u32, credits)
 			     __field(u64, hw_free)
 			     __field(void __iomem *, piobase)
@@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata,
 			     __field(u64, rcvhdrq_dma)
 			     __field(u32, eager_cnt)
 			     __field(u64, rcvegr_dma)
+			     __field(unsigned int, subctxt_cnt)
 			     ),
 	    TP_fast_assign(DD_DEV_ASSIGN(dd);
 			   __entry->ctxt = uctxt->ctxt;
+			   __entry->subctxt = subctxt;
 			   __entry->credits = uctxt->sc->credits;
 			   __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
 			   __entry->piobase = uctxt->sc->base_addr;
@@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata,
 			   __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
 			   __entry->eager_cnt = uctxt->egrbufs.alloced;
 			   __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
+			   __entry->subctxt_cnt = uctxt->subctxt_cnt;
 			   ),
-	    TP_printk("[%s] ctxt %u " UCTXT_FMT,
+	    TP_printk("[%s] ctxt %u:%u " UCTXT_FMT,
 		      __get_str(dev),
 		      __entry->ctxt,
+		      __entry->subctxt,
 		      __entry->credits,
 		      __entry->hw_free,
 		      __entry->piobase,
 		      __entry->rcvhdrq_cnt,
 		      __entry->rcvhdrq_dma,
 		      __entry->eager_cnt,
-		      __entry->rcvegr_dma
+		      __entry->rcvegr_dma,
+		      __entry->subctxt_cnt
 		      )
 );
 
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 2c9ac57657d3..c59809a7f121 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -676,6 +676,40 @@ TRACE_EVENT(
 	)
 );
 
+DECLARE_EVENT_CLASS(
+	hfi1_do_send_template,
+	TP_PROTO(struct rvt_qp *qp, bool flag),
+	TP_ARGS(qp, flag),
+	TP_STRUCT__entry(
+		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+		__field(u32, qpn)
+		__field(bool, flag)
+	),
+	TP_fast_assign(
+		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->flag = flag;
+	),
+	TP_printk(
+		"[%s] qpn %x flag %d",
+		__get_str(dev),
+		__entry->qpn,
+		__entry->flag
+	)
+);
+
+DEFINE_EVENT(
+	hfi1_do_send_template, hfi1_rc_do_send,
+	TP_PROTO(struct rvt_qp *qp, bool flag),
+	TP_ARGS(qp, flag)
+);
+
+DEFINE_EVENT(
+	hfi1_do_send_template, hfi1_rc_expired_time_slice,
+	TP_PROTO(struct rvt_qp *qp, bool flag),
+	TP_ARGS(qp, flag)
+);
+
 #endif /* __HFI1_TRACE_TX_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 35c6e7ec8ad6..a8f0aa4722f6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -53,7 +53,7 @@
 
 struct tid_group {
 	struct list_head list;
-	unsigned base;
+	u32 base;
 	u8 size;
 	u8 used;
 	u8 map;
@@ -82,20 +82,25 @@ struct tid_pageset {
 		 (unsigned long)(len) - 1) & PAGE_MASK) -	       \
 	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
 
-static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
-			    struct hfi1_filedata *);
-static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
-static int set_rcvarray_entry(struct file *, unsigned long, u32,
-			      struct tid_group *, struct page **, unsigned);
-static int tid_rb_insert(void *, struct mmu_rb_node *);
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+			    struct exp_tid_set *set,
+			    struct hfi1_filedata *fd);
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+			    struct tid_pageset *list);
+static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
+			      u32 rcventry, struct tid_group *grp,
+			      struct page **pages, unsigned npages);
+static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
 				    struct tid_rb_node *tnode);
-static void tid_rb_remove(void *, struct mmu_rb_node *);
-static int tid_rb_invalidate(void *, struct mmu_rb_node *);
-static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
-			    struct tid_pageset *, unsigned, u16, struct page **,
-			    u32 *, unsigned *, unsigned *);
-static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
+static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
+			    struct tid_group *grp, struct tid_pageset *sets,
+			    unsigned start, u16 count, struct page **pages,
+			    u32 *tidlist, unsigned *tididx, unsigned *pmapped);
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+			      struct tid_group **grp);
 static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
 
 static struct mmu_rb_ops tid_rb_ops = {
@@ -149,52 +154,60 @@ static inline void tid_group_move(struct tid_group *group,
 	tid_group_add_tail(group, s2);
 }
 
+int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = fd->dd;
+	u32 tidbase;
+	u32 i;
+	struct tid_group *grp, *gptr;
+
+	exp_tid_group_init(&uctxt->tid_group_list);
+	exp_tid_group_init(&uctxt->tid_used_list);
+	exp_tid_group_init(&uctxt->tid_full_list);
+
+	tidbase = uctxt->expected_base;
+	for (i = 0; i < uctxt->expected_count /
+		     dd->rcv_entries.group_size; i++) {
+		grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+		if (!grp)
+			goto grp_failed;
+
+		grp->size = dd->rcv_entries.group_size;
+		grp->base = tidbase;
+		tid_group_add_tail(grp, &uctxt->tid_group_list);
+		tidbase += dd->rcv_entries.group_size;
+	}
+
+	return 0;
+
+grp_failed:
+	list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+				 list) {
+		list_del_init(&grp->list);
+		kfree(grp);
+	}
+
+	return -ENOMEM;
+}
+
 /*
  * Initialize context and file private data needed for Expected
  * receive caching. This needs to be done after the context has
  * been configured with the eager/expected RcvEntry counts.
  */
-int hfi1_user_exp_rcv_init(struct file *fp)
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
 {
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tidbase;
-	int i, ret = 0;
+	int ret = 0;
 
 	spin_lock_init(&fd->tid_lock);
 	spin_lock_init(&fd->invalid_lock);
 
-	if (!uctxt->subctxt_cnt || !fd->subctxt) {
-		exp_tid_group_init(&uctxt->tid_group_list);
-		exp_tid_group_init(&uctxt->tid_used_list);
-		exp_tid_group_init(&uctxt->tid_full_list);
-
-		tidbase = uctxt->expected_base;
-		for (i = 0; i < uctxt->expected_count /
-			     dd->rcv_entries.group_size; i++) {
-			struct tid_group *grp;
-
-			grp = kzalloc(sizeof(*grp), GFP_KERNEL);
-			if (!grp) {
-				/*
-				 * If we fail here, the groups already
-				 * allocated will be freed by the close
-				 * call.
-				 */
-				ret = -ENOMEM;
-				goto done;
-			}
-			grp->size = dd->rcv_entries.group_size;
-			grp->base = tidbase;
-			tid_group_add_tail(grp, &uctxt->tid_group_list);
-			tidbase += dd->rcv_entries.group_size;
-		}
-	}
-
 	fd->entry_to_rb = kcalloc(uctxt->expected_count,
-				     sizeof(struct rb_node *),
-				     GFP_KERNEL);
+				  sizeof(struct rb_node *),
+				  GFP_KERNEL);
 	if (!fd->entry_to_rb)
 		return -ENOMEM;
 
@@ -204,8 +217,9 @@ int hfi1_user_exp_rcv_init(struct file *fp)
 					   sizeof(*fd->invalid_tids),
 					   GFP_KERNEL);
 		if (!fd->invalid_tids) {
-			ret = -ENOMEM;
-			goto done;
+			kfree(fd->entry_to_rb);
+			fd->entry_to_rb = NULL;
+			return -ENOMEM;
 		}
 
 		/*
@@ -248,41 +262,44 @@ int hfi1_user_exp_rcv_init(struct file *fp)
 		fd->tid_limit = uctxt->expected_count;
 	}
 	spin_unlock(&fd->tid_lock);
-done:
+
 	return ret;
 }
 
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
 {
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct tid_group *grp, *gptr;
 
-	if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
-		return 0;
+	list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+				 list) {
+		list_del_init(&grp->list);
+		kfree(grp);
+	}
+	hfi1_clear_tids(uctxt);
+}
+
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+
 	/*
 	 * The notifier would have been removed when the process'es mm
 	 * was freed.
 	 */
-	if (fd->handler)
+	if (fd->handler) {
 		hfi1_mmu_rb_unregister(fd->handler);
-
-	kfree(fd->invalid_tids);
-
-	if (!uctxt->cnt) {
+	} else {
 		if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
 			unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
 		if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
 			unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
-		list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
-					 list) {
-			list_del_init(&grp->list);
-			kfree(grp);
-		}
-		hfi1_clear_tids(uctxt);
 	}
 
+	kfree(fd->invalid_tids);
+	fd->invalid_tids = NULL;
+
 	kfree(fd->entry_to_rb);
-	return 0;
+	fd->entry_to_rb = NULL;
 }
 
 /*
@@ -351,10 +368,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
  *          can fit into the group. If the group becomes fully
  *          used, move it to tid_full_list.
  */
-int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+			    struct hfi1_tid_info *tinfo)
 {
 	int ret = 0, need_group = 0, pinned;
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd = uctxt->dd;
 	unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
@@ -451,7 +468,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
 		struct tid_group *grp =
 			tid_group_pop(&uctxt->tid_group_list);
 
-		ret = program_rcvarray(fp, vaddr, grp, pagesets,
+		ret = program_rcvarray(fd, vaddr, grp, pagesets,
 				       pageidx, dd->rcv_entries.group_size,
 				       pages, tidlist, &tididx, &mapped);
 		/*
@@ -497,7 +514,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
 			unsigned use = min_t(unsigned, pageset_count - pageidx,
 					     grp->size - grp->used);
 
-			ret = program_rcvarray(fp, vaddr, grp, pagesets,
+			ret = program_rcvarray(fd, vaddr, grp, pagesets,
 					       pageidx, use, pages, tidlist,
 					       &tididx, &mapped);
 			if (ret < 0) {
@@ -547,7 +564,7 @@ nomem:
 			 * everything done so far so we don't leak resources.
 			 */
 			tinfo->tidlist = (unsigned long)&tidlist;
-			hfi1_user_exp_rcv_clear(fp, tinfo);
+			hfi1_user_exp_rcv_clear(fd, tinfo);
 			tinfo->tidlist = 0;
 			ret = -EFAULT;
 			goto bail;
@@ -571,10 +588,10 @@ bail:
 	return ret > 0 ? 0 : ret;
 }
 
-int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+			    struct hfi1_tid_info *tinfo)
 {
 	int ret = 0;
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	u32 *tidinfo;
 	unsigned tididx;
@@ -589,7 +606,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
 
 	mutex_lock(&uctxt->exp_lock);
 	for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
-		ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+		ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
 		if (ret) {
 			hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
 				  ret);
@@ -606,9 +623,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
 	return ret;
 }
 
-int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+			      struct hfi1_tid_info *tinfo)
 {
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	unsigned long *ev = uctxt->dd->events +
 		(((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
@@ -723,7 +740,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
 
 /**
  * program_rcvarray() - program an RcvArray group with receive buffers
- * @fp: file pointer
+ * @fd: filedata pointer
  * @vaddr: starting user virtual address
  * @grp: RcvArray group
  * @sets: array of struct tid_pageset holding information on physically
@@ -748,13 +765,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
  * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
  * number of RcvArray entries programmed.
  */
-static int program_rcvarray(struct file *fp, unsigned long vaddr,
+static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
 			    struct tid_group *grp,
 			    struct tid_pageset *sets,
 			    unsigned start, u16 count, struct page **pages,
 			    u32 *tidlist, unsigned *tididx, unsigned *pmapped)
 {
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd = uctxt->dd;
 	u16 idx;
@@ -795,7 +811,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
 		npages = sets[setidx].count;
 		pageidx = sets[setidx].idx;
 
-		ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+		ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE),
 					 rcventry, grp, pages + pageidx,
 					 npages);
 		if (ret)
@@ -817,12 +833,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
 	return idx;
 }
 
-static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
 			      u32 rcventry, struct tid_group *grp,
 			      struct page **pages, unsigned npages)
 {
 	int ret;
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct tid_rb_node *node;
 	struct hfi1_devdata *dd = uctxt->dd;
@@ -876,10 +891,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
 	return 0;
 }
 
-static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
 			      struct tid_group **grp)
 {
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd = uctxt->dd;
 	struct tid_rb_node *node;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..5250c897298d 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_USER_EXP_RCV_H
 #define _HFI1_USER_EXP_RCV_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -70,10 +70,15 @@
 		(tid) |= EXP_TID_SET(field, (value));			\
 	} while (0)
 
-int hfi1_user_exp_rcv_init(struct file *);
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
-int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
+int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+			    struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+			    struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+			      struct hfi1_tid_info *tinfo);
 
 #endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0749689d7643..d55339f5d73b 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 
 /* KDETH OM multipliers and switch over point */
 #define KDETH_OM_SMALL     4
+#define KDETH_OM_SMALL_SHIFT     2
 #define KDETH_OM_LARGE     64
+#define KDETH_OM_LARGE_SHIFT     6
 #define KDETH_OM_MAX_SIZE  (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
 
 /* Tx request flag bits */
@@ -153,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 /* SDMA request flag bits */
 #define SDMA_REQ_FOR_THREAD 1
 #define SDMA_REQ_SEND_DONE  2
-#define SDMA_REQ_HAVE_AHG   3
-#define SDMA_REQ_HAS_ERROR  4
-#define SDMA_REQ_DONE_ERROR 5
+#define SDMA_REQ_HAS_ERROR  3
+#define SDMA_REQ_DONE_ERROR 4
 
 #define SDMA_PKT_Q_INACTIVE BIT(0)
 #define SDMA_PKT_Q_ACTIVE   BIT(1)
@@ -214,7 +215,7 @@ struct user_sdma_request {
 	 * each request will need it's own engine pointer.
 	 */
 	struct sdma_engine *sde;
-	u8 ahg_idx;
+	s8 ahg_idx;
 	u32 ahg[9];
 	/*
 	 * KDETH.Offset (Eager) field
@@ -229,12 +230,6 @@ struct user_sdma_request {
 	 */
 	u32 tidoffset;
 	/*
-	 * KDETH.OM
-	 * Remember this because the header template always sets it
-	 * to 0.
-	 */
-	u8 omfactor;
-	/*
 	 * We copy the iovs for this request (based on
 	 * info.iovcnt). These are only the data vectors
 	 */
@@ -284,39 +279,43 @@ struct user_sdma_txreq {
 	hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
 		 (pq)->subctxt, ##__VA_ARGS__)
 
-static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
-static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int);
-static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
-static void user_sdma_free_request(struct user_sdma_request *, bool);
-static int pin_vector_pages(struct user_sdma_request *,
-			    struct user_sdma_iovec *);
-static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
-			       unsigned);
-static int check_header_template(struct user_sdma_request *,
-				 struct hfi1_pkt_header *, u32, u32);
-static int set_txreq_header(struct user_sdma_request *,
-			    struct user_sdma_txreq *, u32);
-static int set_txreq_header_ahg(struct user_sdma_request *,
-				struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
-				  struct hfi1_user_sdma_comp_q *,
-				  u16, enum hfi1_sdma_comp_state, int);
-static inline u32 set_pkt_bth_psn(__be32, u8, u32);
+static int user_sdma_send_pkts(struct user_sdma_request *req,
+			       unsigned maxpkts);
+static int num_user_pages(const struct iovec *iov);
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
+static int pin_vector_pages(struct user_sdma_request *req,
+			    struct user_sdma_iovec *iovec);
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+			       unsigned start, unsigned npages);
+static int check_header_template(struct user_sdma_request *req,
+				 struct hfi1_pkt_header *hdr, u32 lrhlen,
+				 u32 datalen);
+static int set_txreq_header(struct user_sdma_request *req,
+			    struct user_sdma_txreq *tx, u32 datalen);
+static int set_txreq_header_ahg(struct user_sdma_request *req,
+				struct user_sdma_txreq *tx, u32 len);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+				  struct hfi1_user_sdma_comp_q *cq,
+				  u16 idx, enum hfi1_sdma_comp_state state,
+				  int ret);
+static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
 
 static int defer_packet_queue(
-	struct sdma_engine *,
-	struct iowait *,
-	struct sdma_txreq *,
-	unsigned seq);
-static void activate_packet_queue(struct iowait *, int);
-static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
-static int sdma_rb_insert(void *, struct mmu_rb_node *);
+	struct sdma_engine *sde,
+	struct iowait *wait,
+	struct sdma_txreq *txreq,
+	unsigned int seq);
+static void activate_packet_queue(struct iowait *wait, int reason);
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+			   unsigned long len);
+static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
 static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
 			 void *arg2, bool *stop);
-static void sdma_rb_remove(void *, struct mmu_rb_node *);
-static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
 
 static struct mmu_rb_ops sdma_rb_ops = {
 	.filter = sdma_rb_filter,
@@ -372,45 +371,27 @@ static void sdma_kmem_cache_ctor(void *obj)
 	memset(tx, 0, sizeof(*tx));
 }
 
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+				struct hfi1_filedata *fd)
 {
-	struct hfi1_filedata *fd;
-	int ret = 0;
+	int ret = -ENOMEM;
 	char buf[64];
 	struct hfi1_devdata *dd;
 	struct hfi1_user_sdma_comp_q *cq;
 	struct hfi1_user_sdma_pkt_q *pq;
 	unsigned long flags;
 
-	if (!uctxt || !fp) {
-		ret = -EBADF;
-		goto done;
-	}
-
-	fd = fp->private_data;
+	if (!uctxt || !fd)
+		return -EBADF;
 
-	if (!hfi1_sdma_comp_ring_size) {
-		ret = -EINVAL;
-		goto done;
-	}
+	if (!hfi1_sdma_comp_ring_size)
+		return -EINVAL;
 
 	dd = uctxt->dd;
 
 	pq = kzalloc(sizeof(*pq), GFP_KERNEL);
 	if (!pq)
-		goto pq_nomem;
-
-	pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
-			   sizeof(*pq->reqs),
-			   GFP_KERNEL);
-	if (!pq->reqs)
-		goto pq_reqs_nomem;
-
-	pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
-				 sizeof(*pq->req_in_use),
-				 GFP_KERNEL);
-	if (!pq->req_in_use)
-		goto pq_reqs_no_in_use;
+		return -ENOMEM;
 
 	INIT_LIST_HEAD(&pq->list);
 	pq->dd = dd;
@@ -426,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 	iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
 		    activate_packet_queue, NULL);
 	pq->reqidx = 0;
+
+	pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+			   sizeof(*pq->reqs),
+			   GFP_KERNEL);
+	if (!pq->reqs)
+		goto pq_reqs_nomem;
+
+	pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+				 sizeof(*pq->req_in_use),
+				 GFP_KERNEL);
+	if (!pq->req_in_use)
+		goto pq_reqs_no_in_use;
+
 	snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
 		 fd->subctxt);
 	pq->txreq_cache = kmem_cache_create(buf,
-			       sizeof(struct user_sdma_txreq),
+					    sizeof(struct user_sdma_txreq),
 					    L1_CACHE_BYTES,
 					    SLAB_HWCACHE_ALIGN,
 					    sdma_kmem_cache_ctor);
@@ -438,7 +432,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 			   uctxt->ctxt);
 		goto pq_txreq_nomem;
 	}
-	fd->pq = pq;
+
 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq)
 		goto cq_nomem;
@@ -449,20 +443,25 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 		goto cq_comps_nomem;
 
 	cq->nentries = hfi1_sdma_comp_ring_size;
-	fd->cq = cq;
 
 	ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
 				   &pq->handler);
 	if (ret) {
 		dd_dev_err(dd, "Failed to register with MMU %d", ret);
-		goto done;
+		goto pq_mmu_fail;
 	}
 
+	fd->pq = pq;
+	fd->cq = cq;
+
 	spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 	list_add(&pq->list, &uctxt->sdma_queues);
 	spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
-	goto done;
 
+	return 0;
+
+pq_mmu_fail:
+	vfree(cq->comps);
 cq_comps_nomem:
 	kfree(cq);
 cq_nomem:
@@ -473,10 +472,7 @@ pq_reqs_no_in_use:
 	kfree(pq->reqs);
 pq_reqs_nomem:
 	kfree(pq);
-	fd->pq = NULL;
-pq_nomem:
-	ret = -ENOMEM;
-done:
+
 	return ret;
 }
 
@@ -536,11 +532,11 @@ static u8 dlid_to_selector(u16 dlid)
 	return mapping[hash];
 }
 
-int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
-				   unsigned long dim, unsigned long *count)
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+				   struct iovec *iovec, unsigned long dim,
+				   unsigned long *count)
 {
 	int ret = 0, i;
-	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_user_sdma_pkt_q *pq = fd->pq;
 	struct hfi1_user_sdma_comp_q *cq = fd->cq;
@@ -616,6 +612,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	req->pq = pq;
 	req->cq = cq;
 	req->status = -1;
+	req->ahg_idx = -1;
 	INIT_LIST_HEAD(&req->txps);
 
 	memcpy(&req->info, &info, sizeof(info));
@@ -766,14 +763,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	}
 
 	/* We don't need an AHG entry if the request contains only one packet */
-	if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) {
-		int ahg = sdma_ahg_alloc(req->sde);
-
-		if (likely(ahg >= 0)) {
-			req->ahg_idx = (u8)ahg;
-			set_bit(SDMA_REQ_HAVE_AHG, &req->flags);
-		}
-	}
+	if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
+		req->ahg_idx = sdma_ahg_alloc(req->sde);
 
 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
 	atomic_inc(&pq->n_reqs);
@@ -991,7 +982,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 			}
 		}
 
-		if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
+		if (req->ahg_idx >= 0) {
 			if (!req->seqnum) {
 				u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
 				u32 lrhlen = get_lrh_len(req->hdr,
@@ -1121,7 +1112,7 @@ dosend:
 		 * happen due to the sequential manner in which
 		 * descriptors are processed.
 		 */
-		if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+		if (req->ahg_idx >= 0)
 			sdma_ahg_free(req->sde, req->ahg_idx);
 	}
 	return ret;
@@ -1323,6 +1314,7 @@ static int set_txreq_header(struct user_sdma_request *req,
 {
 	struct hfi1_user_sdma_pkt_q *pq = req->pq;
 	struct hfi1_pkt_header *hdr = &tx->hdr;
+	u8 omfactor; /* KDETH.OM */
 	u16 pbclen;
 	int ret;
 	u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
@@ -1400,8 +1392,9 @@ static int set_txreq_header(struct user_sdma_request *req,
 			}
 			tidval = req->tids[req->tididx];
 		}
-		req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
-			KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
+		omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
+			KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
+			KDETH_OM_SMALL_SHIFT;
 		/* Set KDETH.TIDCtrl based on value for this TID. */
 		KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
 			  EXP_TID_GET(tidval, CTRL));
@@ -1416,12 +1409,12 @@ static int set_txreq_header(struct user_sdma_request *req,
 		 * transfer.
 		 */
 		SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
-			 req->tidoffset, req->tidoffset / req->omfactor,
-			 req->omfactor != KDETH_OM_SMALL);
+			 req->tidoffset, req->tidoffset >> omfactor,
+			 omfactor != KDETH_OM_SMALL_SHIFT);
 		KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
-			  req->tidoffset / req->omfactor);
+			  req->tidoffset >> omfactor);
 		KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
-			  req->omfactor != KDETH_OM_SMALL);
+			  omfactor != KDETH_OM_SMALL_SHIFT);
 	}
 done:
 	trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
@@ -1433,6 +1426,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 				struct user_sdma_txreq *tx, u32 len)
 {
 	int diff = 0;
+	u8 omfactor; /* KDETH.OM */
 	struct hfi1_user_sdma_pkt_q *pq = req->pq;
 	struct hfi1_pkt_header *hdr = &req->hdr;
 	u16 pbclen = le16_to_cpu(hdr->pbc[0]);
@@ -1484,14 +1478,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 			}
 			tidval = req->tids[req->tididx];
 		}
-		req->omfactor = ((EXP_TID_GET(tidval, LEN) *
+		omfactor = ((EXP_TID_GET(tidval, LEN) *
 				  PAGE_SIZE) >=
-				 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
-			KDETH_OM_SMALL;
+				 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
+				 KDETH_OM_SMALL_SHIFT;
 		/* KDETH.OM and KDETH.OFFSET (TID) */
 		AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
-			       ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
-				((req->tidoffset / req->omfactor) & 0x7fff)));
+			       ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
+				((req->tidoffset >> omfactor)
+				 & 0x7fff)));
 		/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
 		val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
 				   (EXP_TID_GET(tidval, IDX) & 0x3ff));
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 39001714f551..e5b10aefe212 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,5 +1,7 @@
+#ifndef _HFI1_USER_SDMA_H
+#define _HFI1_USER_SDMA_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -56,7 +58,7 @@ extern uint extended_psn;
 struct hfi1_user_sdma_pkt_q {
 	struct list_head list;
 	unsigned ctxt;
-	unsigned subctxt;
+	u16 subctxt;
 	u16 n_max_reqs;
 	atomic_t n_reqs;
 	u16 reqidx;
@@ -78,7 +80,11 @@ struct hfi1_user_sdma_comp_q {
 	struct hfi1_sdma_comp_entry *comps;
 };
 
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
-int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
-				   unsigned long *);
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+				struct hfi1_filedata *fd);
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+				   struct iovec *iovec, unsigned long dim,
+				   unsigned long *count);
+
+#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 52ff275caf54..cd635d0c1d3b 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -125,7 +125,6 @@ struct hfi1_qp_priv {
 	struct sdma_engine *s_sde;                /* current sde */
 	struct send_context *s_sendcontext;       /* current sendcontext */
 	u8 s_sc;		                  /* SC[0..4] for next packet */
-	u8 r_adefered;                            /* number of acks defered */
 	struct iowait s_iowait;
 	struct rvt_qp *owner;
 };
@@ -140,6 +139,10 @@ struct hfi1_pkt_state {
 	struct hfi1_pportdata *ppd;
 	struct verbs_txreq *s_txreq;
 	unsigned long flags;
+	unsigned long timeout;
+	unsigned long timeout_int;
+	int cpu;
+	bool in_thread;
 };
 
 #define HFI1_PSN_CREDIT  16
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 392f4d57f3e3..b601c2929f8f 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -67,9 +67,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
 	unsigned int rcvctrl_ops = 0;
 	int ret;
 
-	ret = hfi1_init_ctxt(uctxt->sc);
-	if (ret)
-		goto done;
+	hfi1_init_ctxt(uctxt->sc);
 
 	uctxt->do_interrupt = &handle_receive_interrupt;
 
@@ -82,8 +80,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
 	if (ret)
 		goto done;
 
-	set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
-
 	if (uctxt->rcvhdrtail_kvaddr)
 		clear_rcvhdrtail(uctxt);
 
@@ -209,7 +205,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 	uctxt->event_flags = 0;
 
 	hfi1_clear_tids(uctxt);
-	hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+	hfi1_clear_ctxt_pkey(dd, uctxt);
 
 	hfi1_stats.sps_ctxts--;
 	hfi1_free_ctxtdata(dd, uctxt);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9f3ba320ce70..d45772da0963 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3530,6 +3530,26 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 	return num_counters;
 }
 
+static struct net_device*
+mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
+			  u8 port_num,
+			  enum rdma_netdev_t type,
+			  const char *name,
+			  unsigned char name_assign_type,
+			  void (*setup)(struct net_device *))
+{
+	if (type != RDMA_NETDEV_IPOIB)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
+				      name, setup);
+}
+
+static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
+{
+	return mlx5_rdma_netdev_free(netdev);
+}
+
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_ib_dev *dev;
@@ -3660,6 +3680,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
+	dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
+	dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
 	if (mlx5_core_is_pf(mdev)) {
 		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
 		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index ced15c4446bd..e37cc89987e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -368,7 +368,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
 			((void *)(uintptr_t)iova) : addr;
 
 		if (crcp)
-			crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+			*crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
 					*crcp, src, length);
 
 		memcpy(dest, src, length);
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 13ed2cc6eaa2..1b596fbbe251 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -114,7 +114,6 @@ enum rxe_device_param {
 	RXE_MAX_UCONTEXT		= 512,
 
 	RXE_NUM_PORT			= 1,
-	RXE_NUM_COMP_VECTORS		= 1,
 
 	RXE_MIN_QP_INDEX		= 16,
 	RXE_MAX_QP_INDEX		= 0x00020000,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 299b0f8423f2..83d709e74dfb 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1239,7 +1239,7 @@ int rxe_register_device(struct rxe_dev *rxe)
 	dev->owner = THIS_MODULE;
 	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
-	dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
+	dev->num_comp_vectors = num_possible_cpus();
 	dev->dev.parent = rxe_dma_device(rxe);
 	dev->local_dma_lkey = 0;
 	addrconf_addr_eui48((unsigned char *)&dev->node_guid,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 379c02fb4181..874b24366e4d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
 	return -EOPNOTSUPP;
 }
 
+/* Return lane speed in unit of 1e6 bit/sec */
+static inline int ib_speed_enum_to_int(int speed)
+{
+	switch (speed) {
+	case IB_SPEED_SDR:
+		return SPEED_2500;
+	case IB_SPEED_DDR:
+		return SPEED_5000;
+	case IB_SPEED_QDR:
+	case IB_SPEED_FDR10:
+		return SPEED_10000;
+	case IB_SPEED_FDR:
+		return SPEED_14000;
+	case IB_SPEED_EDR:
+		return SPEED_25000;
+	}
+
+	return SPEED_UNKNOWN;
+}
+
+static int ipoib_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(netdev);
+	struct ib_port_attr attr;
+	int ret, speed, width;
+
+	if (!netif_carrier_ok(netdev)) {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+		return 0;
+	}
+
+	ret = ib_query_port(priv->ca, priv->port, &attr);
+	if (ret < 0)
+		return -EINVAL;
+
+	speed = ib_speed_enum_to_int(attr.active_speed);
+	width = ib_width_enum_to_int(attr.active_width);
+
+	if (speed < 0 || width < 0)
+		return -EINVAL;
+
+	/* Except the following are set, the other members of
+	 * the struct ethtool_link_settings are initialized to
+	 * zero in the function __ethtool_get_link_ksettings.
+	 */
+	cmd->base.speed		 = speed * width;
+	cmd->base.duplex	 = DUPLEX_FULL;
+
+	cmd->base.phy_address	 = 0xFF;
+
+	cmd->base.autoneg	 = AUTONEG_ENABLE;
+	cmd->base.port		 = PORT_OTHER;
+
+	return 0;
+}
+
 static const struct ethtool_ops ipoib_ethtool_ops = {
+	.get_link_ksettings	= ipoib_get_link_ksettings,
 	.get_drvinfo		= ipoib_get_drvinfo,
 	.get_coalesce		= ipoib_get_coalesce,
 	.set_coalesce		= ipoib_set_coalesce,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index a84b652f9b54..fc52d742b7f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -35,6 +35,6 @@ config MLX5_CORE_EN_DCB
 config MLX5_CORE_IPOIB
 	bool "Mellanox Technologies ConnectX-4 IPoIB offloads support"
 	depends on MLX5_CORE_EN
-	default y
+	default n
 	---help---
 	  MLX5 IPoIB offloads & acceleration support.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
index 3c84e36af018..019c230da498 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <rdma/ib_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "en.h"
 #include "ipoib.h"
@@ -359,10 +360,10 @@ unlock:
 	return 0;
 }
 
-#ifdef notusedyet
 /* IPoIB RDMA netdev callbacks */
 static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
-			      union ib_gid *gid, u16 lid, int set_qkey)
+			      union ib_gid *gid, u16 lid, int set_qkey,
+			      u32 qkey)
 {
 	struct mlx5e_priv    *epriv = mlx5i_epriv(netdev);
 	struct mlx5_core_dev *mdev  = epriv->mdev;
@@ -375,6 +376,12 @@ static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
 		mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
 			       ipriv->qp.qpn, gid->raw);
 
+	if (set_qkey) {
+		mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
+			      netdev->name, qkey);
+		ipriv->qkey = qkey;
+	}
+
 	return err;
 }
 
@@ -397,15 +404,15 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
 }
 
 static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
-	       struct ib_ah *address, u32 dqpn, u32 dqkey)
+		      struct ib_ah *address, u32 dqpn)
 {
 	struct mlx5e_priv *epriv = mlx5i_epriv(dev);
 	struct mlx5e_txqsq *sq   = epriv->txq2sq[skb_get_queue_mapping(skb)];
 	struct mlx5_ib_ah *mah   = to_mah(address);
+	struct mlx5i_priv *ipriv = epriv->ppriv;
 
-	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey);
+	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
 }
-#endif
 
 static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 {
@@ -414,22 +421,23 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 
 	if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
 		mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n");
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	}
 
 	return 0;
 }
 
-static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-						 struct ib_device *ibdev,
-						 const char *name,
-						 void (*setup)(struct net_device *))
+struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
+					  struct ib_device *ibdev,
+					  const char *name,
+					  void (*setup)(struct net_device *))
 {
 	const struct mlx5e_profile *profile = &mlx5i_nic_profile;
 	int nch = profile->max_nch(mdev);
 	struct net_device *netdev;
 	struct mlx5i_priv *ipriv;
 	struct mlx5e_priv *epriv;
+	struct rdma_netdev *rn;
 	int err;
 
 	if (mlx5i_check_required_hca_cap(mdev)) {
@@ -464,13 +472,13 @@ static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	mlx5e_attach_netdev(epriv);
 	netif_carrier_off(netdev);
 
-	/* TODO: set rdma_netdev func pointers
-	 * rn = &ipriv->rn;
-	 * rn->hca  = ibdev;
-	 * rn->send = mlx5i_xmit;
-	 * rn->attach_mcast = mlx5i_attach_mcast;
-	 * rn->detach_mcast = mlx5i_detach_mcast;
-	 */
+	/* set rdma_netdev func pointers */
+	rn = &ipriv->rn;
+	rn->hca  = ibdev;
+	rn->send = mlx5i_xmit;
+	rn->attach_mcast = mlx5i_attach_mcast;
+	rn->detach_mcast = mlx5i_detach_mcast;
+
 	return netdev;
 
 err_free_netdev:
@@ -482,7 +490,7 @@ free_mdev_resources:
 }
 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
 
-static void mlx5_rdma_netdev_free(struct net_device *netdev)
+void mlx5_rdma_netdev_free(struct net_device *netdev)
 {
 	struct mlx5e_priv          *priv    = mlx5i_epriv(netdev);
 	const struct mlx5e_profile *profile = priv->profile;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h
index bae0a5cbc8ad..213191a78464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h
@@ -40,7 +40,9 @@
 
 /* ipoib rdma netdev's private data structure */
 struct mlx5i_priv {
+	struct rdma_netdev rn; /* keep this first */
 	struct mlx5_core_qp qp;
+	u32    qkey;
 	char  *mlx5e_priv[0];
 };
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 18fc65b84b79..bcdf739ee41a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1097,6 +1097,25 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
+#ifndef CONFIG_MLX5_CORE_IPOIB
+static inline
+struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
+					  struct ib_device *ibdev,
+					  const char *name,
+					  void (*setup)(struct net_device *))
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
+#else
+struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
+					  struct ib_device *ibdev,
+					  const char *name,
+					  void (*setup)(struct net_device *));
+void mlx5_rdma_netdev_free(struct net_device *netdev);
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+
 struct mlx5_profile {
 	u64	mask;
 	u8	log_max_qp;
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 1d8141a88d3c..be6472e5b06b 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -324,6 +324,7 @@ struct rvt_qp {
 	u8 r_state;             /* opcode of last packet received */
 	u8 r_flags;
 	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
+	u8 r_adefered;          /* defered ack count */
 
 	struct list_head rspwait;       /* link for waiting to respond */
 
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 5f4ea28eabe4..d179d7767f51 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1494,6 +1494,7 @@ enum ethtool_link_mode_bit_indices {
 #define SPEED_2500		2500
 #define SPEED_5000		5000
 #define SPEED_10000		10000
+#define SPEED_14000		14000
 #define SPEED_20000		20000
 #define SPEED_25000		25000
 #define SPEED_40000		40000