summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/Makefile9
-rw-r--r--drivers/infiniband/core/addr.c2
-rw-r--r--drivers/infiniband/core/cache.c151
-rw-r--r--drivers/infiniband/core/cm.c1000
-rw-r--r--drivers/infiniband/core/cm_msgs.h755
-rw-r--r--drivers/infiniband/core/cma.c90
-rw-r--r--drivers/infiniband/core/cma_trace.c16
-rw-r--r--drivers/infiniband/core/cma_trace.h391
-rw-r--r--drivers/infiniband/core/core_priv.h3
-rw-r--r--drivers/infiniband/core/cq.c27
-rw-r--r--drivers/infiniband/core/device.c42
-rw-r--r--drivers/infiniband/core/ib_core_uverbs.c2
-rw-r--r--drivers/infiniband/core/nldev.c3
-rw-r--r--drivers/infiniband/core/rdma_core.c235
-rw-r--r--drivers/infiniband/core/rdma_core.h45
-rw-r--r--drivers/infiniband/core/sa_query.c4
-rw-r--r--drivers/infiniband/core/trace.c14
-rw-r--r--drivers/infiniband/core/umem.c9
-rw-r--r--drivers/infiniband/core/umem_odp.c22
-rw-r--r--drivers/infiniband/core/uverbs.h48
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c320
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c45
-rw-r--r--drivers/infiniband/core/uverbs_main.c301
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c44
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c52
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c19
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c38
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c7
-rw-r--r--drivers/infiniband/core/verbs.c55
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c2
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h37
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c198
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h8
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h1
-rw-r--r--drivers/infiniband/hw/hfi1/common.h3
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c237
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c12
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h195
-rw-r--r--drivers/infiniband/hw/hfi1/init.c87
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c106
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h1
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h15
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h44
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c51
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c874
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h159
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c106
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c92
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c29
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c18
-rw-r--r--drivers/infiniband/hw/mlx4/main.c20
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c159
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c3
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c28
-rw-r--r--drivers/infiniband/hw/mlx5/main.c224
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c25
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h38
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c81
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c44
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c4
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c13
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
-rw-r--r--drivers/infiniband/sw/siw/siw.h26
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c13
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c6
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c61
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c3
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--include/linux/mlx4/cq.h5
-rw-r--r--include/linux/mlx5/driver.h5
-rw-r--r--include/linux/mlx5/mlx5_ifc.h24
-rw-r--r--include/rdma/ib_cm.h34
-rw-r--r--include/rdma/ib_verbs.h42
-rw-r--r--include/rdma/iba.h146
-rw-r--r--include/rdma/ibta_vol1_c12.h213
-rw-r--r--include/rdma/rdmavt_qp.h22
-rw-r--r--include/rdma/uverbs_named_ioctl.h6
-rw-r--r--include/rdma/uverbs_std_types.h13
-rw-r--r--include/rdma/uverbs_types.h34
-rw-r--r--include/trace/events/rdma_core.h394
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h15
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h12
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h17
-rw-r--r--include/uapi/rdma/qedr-abi.h18
103 files changed, 4659 insertions, 3200 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 9a8871e21545..d1b14887960e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,7 +11,8 @@ ib_core-y :=			packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
 				device.o fmr_pool.o cache.o netlink.o \
 				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
 				multicast.o mad.o smi.o agent.o mad_rmpp.o \
-				nldev.o restrack.o counters.o ib_core_uverbs.o
+				nldev.o restrack.o counters.o ib_core_uverbs.o \
+				trace.o
 
 ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
 ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -20,7 +21,8 @@ ib_cm-y :=			cm.o
 
 iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o
 
-rdma_cm-y :=			cma.o
+CFLAGS_cma_trace.o +=		-I$(src)
+rdma_cm-y :=			cma.o cma_trace.o
 
 rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
 
@@ -33,6 +35,7 @@ ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
 				uverbs_std_types_cq.o \
 				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
 				uverbs_std_types_mr.o uverbs_std_types_counters.o \
-				uverbs_uapi.o uverbs_std_types_device.o
+				uverbs_uapi.o uverbs_std_types_device.o \
+				uverbs_std_types_async_fd.o
 ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 606fa6d86685..1753a9801b70 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
 	if (ib_nl_is_good_ip_resp(nlh))
 		ib_nl_process_good_ip_rsep(nlh);
 
-	return skb->len;
+	return 0;
 }
 
 static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index d535995711c3..17bfedd24cc3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -51,9 +51,8 @@ struct ib_pkey_cache {
 
 struct ib_update_work {
 	struct work_struct work;
-	struct ib_device  *device;
-	u8                 port_num;
-	bool		   enforce_security;
+	struct ib_event event;
+	bool enforce_security;
 };
 
 union ib_gid zgid;
@@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 	event.element.port_num	= port;
 	event.event		= IB_EVENT_GID_CHANGE;
 
-	ib_dispatch_event(&event);
+	ib_dispatch_event_clients(&event);
 }
 
 static const char * const gid_type_str[] = {
@@ -1034,7 +1033,7 @@ int ib_get_cached_pkey(struct ib_device *device,
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	read_lock_irqsave(&device->cache.lock, flags);
+	read_lock_irqsave(&device->cache_lock, flags);
 
 	cache = device->port_data[port_num].cache.pkey;
 
@@ -1043,7 +1042,7 @@ int ib_get_cached_pkey(struct ib_device *device,
 	else
 		*pkey = cache->table[index];
 
-	read_unlock_irqrestore(&device->cache.lock, flags);
+	read_unlock_irqrestore(&device->cache_lock, flags);
 
 	return ret;
 }
@@ -1058,9 +1057,9 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	read_lock_irqsave(&device->cache.lock, flags);
+	read_lock_irqsave(&device->cache_lock, flags);
 	*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
-	read_unlock_irqrestore(&device->cache.lock, flags);
+	read_unlock_irqrestore(&device->cache_lock, flags);
 
 	return 0;
 }
@@ -1080,7 +1079,7 @@ int ib_find_cached_pkey(struct ib_device *device,
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	read_lock_irqsave(&device->cache.lock, flags);
+	read_lock_irqsave(&device->cache_lock, flags);
 
 	cache = device->port_data[port_num].cache.pkey;
 
@@ -1101,7 +1100,7 @@ int ib_find_cached_pkey(struct ib_device *device,
 		ret = 0;
 	}
 
-	read_unlock_irqrestore(&device->cache.lock, flags);
+	read_unlock_irqrestore(&device->cache_lock, flags);
 
 	return ret;
 }
@@ -1120,7 +1119,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	read_lock_irqsave(&device->cache.lock, flags);
+	read_lock_irqsave(&device->cache_lock, flags);
 
 	cache = device->port_data[port_num].cache.pkey;
 
@@ -1133,7 +1132,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
 			break;
 		}
 
-	read_unlock_irqrestore(&device->cache.lock, flags);
+	read_unlock_irqrestore(&device->cache_lock, flags);
 
 	return ret;
 }
@@ -1149,9 +1148,9 @@ int ib_get_cached_lmc(struct ib_device *device,
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	read_lock_irqsave(&device->cache.lock, flags);
+	read_lock_irqsave(&device->cache_lock, flags);
 	*lmc = device->port_data[port_num].cache.lmc;
-	read_unlock_irqrestore(&device->cache.lock, flags);
+	read_unlock_irqrestore(&device->cache_lock, flags);
 
 	return ret;
 }
@@ -1167,9 +1166,9 @@ int ib_get_cached_port_state(struct ib_device   *device,
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	read_lock_irqsave(&device->cache.lock, flags);
+	read_lock_irqsave(&device->cache_lock, flags);
 	*port_state = device->port_data[port_num].cache.port_state;
-	read_unlock_irqrestore(&device->cache.lock, flags);
+	read_unlock_irqrestore(&device->cache_lock, flags);
 
 	return ret;
 }
@@ -1381,9 +1380,8 @@ err:
 	return ret;
 }
 
-static void ib_cache_update(struct ib_device *device,
-			    u8                port,
-			    bool	      enforce_security)
+static int
+ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
 {
 	struct ib_port_attr       *tprops = NULL;
 	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
@@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device,
 	int                        ret;
 
 	if (!rdma_is_port_valid(device, port))
-		return;
+		return -EINVAL;
 
 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
 	if (!tprops)
-		return;
+		return -ENOMEM;
 
 	ret = ib_query_port(device, port, tprops);
 	if (ret) {
@@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device,
 	pkey_cache = kmalloc(struct_size(pkey_cache, table,
 					 tprops->pkey_tbl_len),
 			     GFP_KERNEL);
-	if (!pkey_cache)
+	if (!pkey_cache) {
+		ret = -ENOMEM;
 		goto err;
+	}
 
 	pkey_cache->table_len = tprops->pkey_tbl_len;
 
@@ -1428,7 +1428,7 @@ static void ib_cache_update(struct ib_device *device,
 		}
 	}
 
-	write_lock_irq(&device->cache.lock);
+	write_lock_irq(&device->cache_lock);
 
 	old_pkey_cache = device->port_data[port].cache.pkey;
 
@@ -1437,7 +1437,7 @@ static void ib_cache_update(struct ib_device *device,
 	device->port_data[port].cache.port_state = tprops->state;
 
 	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
-	write_unlock_irq(&device->cache.lock);
+	write_unlock_irq(&device->cache_lock);
 
 	if (enforce_security)
 		ib_security_cache_change(device,
@@ -1446,57 +1446,91 @@ static void ib_cache_update(struct ib_device *device,
 
 	kfree(old_pkey_cache);
 	kfree(tprops);
-	return;
+	return 0;
 
 err:
 	kfree(pkey_cache);
 	kfree(tprops);
+	return ret;
+}
+
+static void ib_cache_event_task(struct work_struct *_work)
+{
+	struct ib_update_work *work =
+		container_of(_work, struct ib_update_work, work);
+	int ret;
+
+	/* Before distributing the cache update event, first sync
+	 * the cache.
+	 */
+	ret = ib_cache_update(work->event.device, work->event.element.port_num,
+			      work->enforce_security);
+
+	/* GID event is notified already for individual GID entries by
+	 * dispatch_gid_change_event(). Hence, notifiy for rest of the
+	 * events.
+	 */
+	if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
+		ib_dispatch_event_clients(&work->event);
+
+	kfree(work);
 }
 
-static void ib_cache_task(struct work_struct *_work)
+static void ib_generic_event_task(struct work_struct *_work)
 {
 	struct ib_update_work *work =
 		container_of(_work, struct ib_update_work, work);
 
-	ib_cache_update(work->device,
-			work->port_num,
-			work->enforce_security);
+	ib_dispatch_event_clients(&work->event);
 	kfree(work);
 }
 
-static void ib_cache_event(struct ib_event_handler *handler,
-			   struct ib_event *event)
+static bool is_cache_update_event(const struct ib_event *event)
+{
+	return (event->event == IB_EVENT_PORT_ERR    ||
+		event->event == IB_EVENT_PORT_ACTIVE ||
+		event->event == IB_EVENT_LID_CHANGE  ||
+		event->event == IB_EVENT_PKEY_CHANGE ||
+		event->event == IB_EVENT_CLIENT_REREGISTER ||
+		event->event == IB_EVENT_GID_CHANGE);
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(const struct ib_event *event)
 {
 	struct ib_update_work *work;
 
-	if (event->event == IB_EVENT_PORT_ERR    ||
-	    event->event == IB_EVENT_PORT_ACTIVE ||
-	    event->event == IB_EVENT_LID_CHANGE  ||
-	    event->event == IB_EVENT_PKEY_CHANGE ||
-	    event->event == IB_EVENT_CLIENT_REREGISTER ||
-	    event->event == IB_EVENT_GID_CHANGE) {
-		work = kmalloc(sizeof *work, GFP_ATOMIC);
-		if (work) {
-			INIT_WORK(&work->work, ib_cache_task);
-			work->device   = event->device;
-			work->port_num = event->element.port_num;
-			if (event->event == IB_EVENT_PKEY_CHANGE ||
-			    event->event == IB_EVENT_GID_CHANGE)
-				work->enforce_security = true;
-			else
-				work->enforce_security = false;
-
-			queue_work(ib_wq, &work->work);
-		}
-	}
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	if (is_cache_update_event(event))
+		INIT_WORK(&work->work, ib_cache_event_task);
+	else
+		INIT_WORK(&work->work, ib_generic_event_task);
+
+	work->event = *event;
+	if (event->event == IB_EVENT_PKEY_CHANGE ||
+	    event->event == IB_EVENT_GID_CHANGE)
+		work->enforce_security = true;
+
+	queue_work(ib_wq, &work->work);
 }
+EXPORT_SYMBOL(ib_dispatch_event);
 
 int ib_cache_setup_one(struct ib_device *device)
 {
 	unsigned int p;
 	int err;
 
-	rwlock_init(&device->cache.lock);
+	rwlock_init(&device->cache_lock);
 
 	err = gid_table_setup_one(device);
 	if (err)
@@ -1505,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device)
 	rdma_for_each_port (device, p)
 		ib_cache_update(device, p, true);
 
-	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
-			      device, ib_cache_event);
-	ib_register_event_handler(&device->cache.event_handler);
 	return 0;
 }
 
@@ -1529,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device)
 
 void ib_cache_cleanup_one(struct ib_device *device)
 {
-	/* The cleanup function unregisters the event handler,
-	 * waits for all in-progress workqueue elements and cleans
-	 * up the GID cache. This function should be called after
-	 * the device was removed from the devices list and all
-	 * clients were removed, so the cache exists but is
+	/* The cleanup function waits for all in-progress workqueue
+	 * elements and cleans up the GID cache. This function should be
+	 * called after the device was removed from the devices list and
+	 * all clients were removed, so the cache exists but is
 	 * non-functional and shouldn't be updated anymore.
 	 */
-	ib_unregister_event_handler(&device->cache.event_handler);
 	flush_workqueue(ib_wq);
 	gid_table_cleanup_one(device);
 
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 455b3659d84b..68cc1b2d6824 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -241,6 +241,7 @@ struct cm_id_private {
 	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
 	 * Protected by the cm.lock spinlock. */
 	int listen_sharecount;
+	struct rcu_head rcu;
 
 	struct ib_mad_send_buf *msg;
 	struct cm_timewait_info *timewait_info;
@@ -593,28 +594,16 @@ static void cm_free_id(__be32 local_id)
 	xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
 }
 
-static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
 {
 	struct cm_id_private *cm_id_priv;
 
+	rcu_read_lock();
 	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
-	if (cm_id_priv) {
-		if (cm_id_priv->id.remote_id == remote_id)
-			refcount_inc(&cm_id_priv->refcount);
-		else
-			cm_id_priv = NULL;
-	}
-
-	return cm_id_priv;
-}
-
-static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
-{
-	struct cm_id_private *cm_id_priv;
-
-	spin_lock_irq(&cm.lock);
-	cm_id_priv = cm_get_id(local_id, remote_id);
-	spin_unlock_irq(&cm.lock);
+	if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
+	    !refcount_inc_not_zero(&cm_id_priv->refcount))
+		cm_id_priv = NULL;
+	rcu_read_unlock();
 
 	return cm_id_priv;
 }
@@ -1089,7 +1078,7 @@ retest:
 	rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
 	rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
 	kfree(cm_id_priv->private_data);
-	kfree(cm_id_priv);
+	kfree_rcu(cm_id_priv, rcu);
 }
 
 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
@@ -1262,54 +1251,72 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
 			  cm_form_tid(cm_id_priv));
 
-	req_msg->local_comm_id = cm_id_priv->id.local_id;
-	req_msg->service_id = param->service_id;
-	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
-	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
-	cm_req_set_init_depth(req_msg, param->initiator_depth);
-	cm_req_set_remote_resp_timeout(req_msg,
-				       param->remote_cm_response_timeout);
+	IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
+	IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
+		be64_to_cpu(cm_id_priv->id.device->node_guid));
+	IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
+	IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
+	IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
+		param->remote_cm_response_timeout);
 	cm_req_set_qp_type(req_msg, param->qp_type);
-	cm_req_set_flow_ctrl(req_msg, param->flow_control);
-	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
-	cm_req_set_local_resp_timeout(req_msg,
-				      param->local_cm_response_timeout);
-	req_msg->pkey = param->primary_path->pkey;
-	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
-	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+	IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
+	IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
+	IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
+		param->local_cm_response_timeout);
+	IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
+		be16_to_cpu(param->primary_path->pkey));
+	IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
+		param->primary_path->mtu);
+	IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
 
 	if (param->qp_type != IB_QPT_XRC_INI) {
-		cm_req_set_resp_res(req_msg, param->responder_resources);
-		cm_req_set_retry_count(req_msg, param->retry_count);
-		cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
-		cm_req_set_srq(req_msg, param->srq);
+		IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
+			param->responder_resources);
+		IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
+		IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
+			param->rnr_retry_count);
+		IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
 	}
 
-	req_msg->primary_local_gid = pri_path->sgid;
-	req_msg->primary_remote_gid = pri_path->dgid;
+	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
+		pri_path->sgid;
+	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
+		pri_path->dgid;
 	if (pri_ext) {
-		req_msg->primary_local_gid.global.interface_id
-			= OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
-		req_msg->primary_remote_gid.global.interface_id
-			= OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
+		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
+			->global.interface_id =
+			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
+		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
+			->global.interface_id =
+			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
 	}
 	if (pri_path->hop_limit <= 1) {
-		req_msg->primary_local_lid = pri_ext ? 0 :
-			htons(ntohl(sa_path_get_slid(pri_path)));
-		req_msg->primary_remote_lid = pri_ext ? 0 :
-			htons(ntohl(sa_path_get_dlid(pri_path)));
+		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+			be16_to_cpu(pri_ext ? 0 :
+					      htons(ntohl(sa_path_get_slid(
+						      pri_path)))));
+		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+			be16_to_cpu(pri_ext ? 0 :
+					      htons(ntohl(sa_path_get_dlid(
+						      pri_path)))));
 	} else {
 		/* Work-around until there's a way to obtain remote LID info */
-		req_msg->primary_local_lid = IB_LID_PERMISSIVE;
-		req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
+		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+			be16_to_cpu(IB_LID_PERMISSIVE));
+		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+			be16_to_cpu(IB_LID_PERMISSIVE));
 	}
-	cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
-	cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
-	req_msg->primary_traffic_class = pri_path->traffic_class;
-	req_msg->primary_hop_limit = pri_path->hop_limit;
-	cm_req_set_primary_sl(req_msg, pri_path->sl);
-	cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
-	cm_req_set_primary_local_ack_timeout(req_msg,
+	IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
+		be32_to_cpu(pri_path->flow_label));
+	IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
+	IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
+	IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
+	IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
+	IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
+		(pri_path->hop_limit <= 1));
+	IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
 		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
 			       pri_path->packet_life_time));
 
@@ -1320,38 +1327,55 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 			alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
 						      alt_path->opa.slid);
 
-		req_msg->alt_local_gid = alt_path->sgid;
-		req_msg->alt_remote_gid = alt_path->dgid;
+		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
+			alt_path->sgid;
+		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
+			alt_path->dgid;
 		if (alt_ext) {
-			req_msg->alt_local_gid.global.interface_id
-				= OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
-			req_msg->alt_remote_gid.global.interface_id
-				= OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
+			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+					req_msg)
+				->global.interface_id =
+				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
+			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
+					req_msg)
+				->global.interface_id =
+				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
 		}
 		if (alt_path->hop_limit <= 1) {
-			req_msg->alt_local_lid = alt_ext ? 0 :
-				htons(ntohl(sa_path_get_slid(alt_path)));
-			req_msg->alt_remote_lid = alt_ext ? 0 :
-				htons(ntohl(sa_path_get_dlid(alt_path)));
+			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+				be16_to_cpu(
+					alt_ext ? 0 :
+						  htons(ntohl(sa_path_get_slid(
+							  alt_path)))));
+			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+				be16_to_cpu(
+					alt_ext ? 0 :
+						  htons(ntohl(sa_path_get_dlid(
+							  alt_path)))));
 		} else {
-			req_msg->alt_local_lid = IB_LID_PERMISSIVE;
-			req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
+			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+				be16_to_cpu(IB_LID_PERMISSIVE));
+			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+				be16_to_cpu(IB_LID_PERMISSIVE));
 		}
-		cm_req_set_alt_flow_label(req_msg,
-					  alt_path->flow_label);
-		cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
-		req_msg->alt_traffic_class = alt_path->traffic_class;
-		req_msg->alt_hop_limit = alt_path->hop_limit;
-		cm_req_set_alt_sl(req_msg, alt_path->sl);
-		cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
-		cm_req_set_alt_local_ack_timeout(req_msg,
+		IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
+			be32_to_cpu(alt_path->flow_label));
+		IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
+		IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
+			alt_path->traffic_class);
+		IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
+			alt_path->hop_limit);
+		IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
+		IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
+			(alt_path->hop_limit <= 1));
+		IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
 			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
 				       alt_path->packet_life_time));
 	}
 
 	if (param->private_data && param->private_data_len)
-		memcpy(req_msg->private_data, param->private_data,
-		       param->private_data_len);
+		IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
+			    param->private_data_len);
 }
 
 static int cm_validate_req_param(struct ib_cm_req_param *param)
@@ -1443,8 +1467,8 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
 	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
 
-	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
-	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
+	cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
@@ -1482,14 +1506,16 @@ static int cm_issue_rej(struct cm_port *port,
 	rej_msg = (struct cm_rej_msg *) msg->mad;
 
 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
-	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
-	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
-	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
-	rej_msg->reason = cpu_to_be16(reason);
+	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+		IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
+	IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
+	IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
+	IBA_SET(CM_REJ_REASON, rej_msg, reason);
 
 	if (ari && ari_length) {
-		cm_rej_set_reject_info_len(rej_msg, ari_length);
-		memcpy(rej_msg->ari, ari, ari_length);
+		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
 	}
 
 	ret = ib_post_send_mad(msg, NULL);
@@ -1501,8 +1527,10 @@ static int cm_issue_rej(struct cm_port *port,
 
 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
 {
-	return ((req_msg->alt_local_lid) ||
-		(ib_is_opa_gid(&req_msg->alt_local_gid)));
+	return ((cpu_to_be16(
+			IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
+		(ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+					       req_msg))));
 }
 
 static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
@@ -1522,14 +1550,18 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
 
 	if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
 		sa_path_set_dlid(primary_path,
-				 ntohs(req_msg->primary_local_lid));
+				 IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+					 req_msg));
 		sa_path_set_slid(primary_path,
-				 ntohs(req_msg->primary_remote_lid));
+				 IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+					 req_msg));
 	} else {
-		lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
+		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+			CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
 		sa_path_set_dlid(primary_path, lid);
 
-		lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
+		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+			CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
 		sa_path_set_slid(primary_path, lid);
 	}
 
@@ -1537,13 +1569,19 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
 		return;
 
 	if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
-		sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
-		sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
+		sa_path_set_dlid(alt_path,
+				 IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+					 req_msg));
+		sa_path_set_slid(alt_path,
+				 IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+					 req_msg));
 	} else {
-		lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
+		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
 		sa_path_set_dlid(alt_path, lid);
 
-		lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
+		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
 		sa_path_set_slid(alt_path, lid);
 	}
 }
@@ -1552,44 +1590,58 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
 				     struct sa_path_rec *primary_path,
 				     struct sa_path_rec *alt_path)
 {
-	primary_path->dgid = req_msg->primary_local_gid;
-	primary_path->sgid = req_msg->primary_remote_gid;
-	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
-	primary_path->hop_limit = req_msg->primary_hop_limit;
-	primary_path->traffic_class = req_msg->primary_traffic_class;
+	primary_path->dgid =
+		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
+	primary_path->sgid =
+		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
+	primary_path->flow_label =
+		cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
+	primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
+	primary_path->traffic_class =
+		IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
 	primary_path->reversible = 1;
-	primary_path->pkey = req_msg->pkey;
-	primary_path->sl = cm_req_get_primary_sl(req_msg);
+	primary_path->pkey =
+		cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+	primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
 	primary_path->mtu_selector = IB_SA_EQ;
-	primary_path->mtu = cm_req_get_path_mtu(req_msg);
+	primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
 	primary_path->rate_selector = IB_SA_EQ;
-	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
+	primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
 	primary_path->packet_life_time_selector = IB_SA_EQ;
 	primary_path->packet_life_time =
-		cm_req_get_primary_local_ack_timeout(req_msg);
+		IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
 	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
-	primary_path->service_id = req_msg->service_id;
+	primary_path->service_id =
+		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
 	if (sa_path_is_roce(primary_path))
 		primary_path->roce.route_resolved = false;
 
 	if (cm_req_has_alt_path(req_msg)) {
-		alt_path->dgid = req_msg->alt_local_gid;
-		alt_path->sgid = req_msg->alt_remote_gid;
-		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
-		alt_path->hop_limit = req_msg->alt_hop_limit;
-		alt_path->traffic_class = req_msg->alt_traffic_class;
+		alt_path->dgid = *IBA_GET_MEM_PTR(
+			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
+		alt_path->sgid = *IBA_GET_MEM_PTR(
+			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
+		alt_path->flow_label = cpu_to_be32(
+			IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
+		alt_path->hop_limit =
+			IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
+		alt_path->traffic_class =
+			IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
 		alt_path->reversible = 1;
-		alt_path->pkey = req_msg->pkey;
-		alt_path->sl = cm_req_get_alt_sl(req_msg);
+		alt_path->pkey =
+			cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+		alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
 		alt_path->mtu_selector = IB_SA_EQ;
-		alt_path->mtu = cm_req_get_path_mtu(req_msg);
+		alt_path->mtu =
+			IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
 		alt_path->rate_selector = IB_SA_EQ;
-		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
+		alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
 		alt_path->packet_life_time_selector = IB_SA_EQ;
 		alt_path->packet_life_time =
-			cm_req_get_alt_local_ack_timeout(req_msg);
+			IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
 		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
-		alt_path->service_id = req_msg->service_id;
+		alt_path->service_id =
+			cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
 
 		if (sa_path_is_roce(alt_path))
 			alt_path->roce.route_resolved = false;
@@ -1664,23 +1716,25 @@ static void cm_format_req_event(struct cm_work *work,
 	} else {
 		param->alternate_path = NULL;
 	}
-	param->remote_ca_guid = req_msg->local_ca_guid;
-	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
-	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
+	param->remote_ca_guid =
+		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+	param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
+	param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
 	param->qp_type = cm_req_get_qp_type(req_msg);
-	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
-	param->responder_resources = cm_req_get_init_depth(req_msg);
-	param->initiator_depth = cm_req_get_resp_res(req_msg);
+	param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
+	param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+	param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
 	param->local_cm_response_timeout =
-					cm_req_get_remote_resp_timeout(req_msg);
-	param->flow_control = cm_req_get_flow_ctrl(req_msg);
+		IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
+	param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
 	param->remote_cm_response_timeout =
-					cm_req_get_local_resp_timeout(req_msg);
-	param->retry_count = cm_req_get_retry_count(req_msg);
-	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
-	param->srq = cm_req_get_srq(req_msg);
+		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
+	param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+	param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+	param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
 	param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
-	work->cm_event.private_data = &req_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
 }
 
 static void cm_process_work(struct cm_id_private *cm_id_priv,
@@ -1714,13 +1768,16 @@ static void cm_format_mra(struct cm_mra_msg *mra_msg,
 			  const void *private_data, u8 private_data_len)
 {
 	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
-	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
-	mra_msg->local_comm_id = cm_id_priv->id.local_id;
-	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
-	cm_mra_set_service_timeout(mra_msg, service_timeout);
+	IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
+	IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
+	IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
 
 	if (private_data && private_data_len)
-		memcpy(mra_msg->private_data, private_data, private_data_len);
+		IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
+			    private_data_len);
 }
 
 static void cm_format_rej(struct cm_rej_msg *rej_msg,
@@ -1732,36 +1789,42 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg,
 			  u8 private_data_len)
 {
 	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
-	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
 
 	switch(cm_id_priv->id.state) {
 	case IB_CM_REQ_RCVD:
-		rej_msg->local_comm_id = 0;
-		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
+		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
 		break;
 	case IB_CM_MRA_REQ_SENT:
-		rej_msg->local_comm_id = cm_id_priv->id.local_id;
-		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+			be32_to_cpu(cm_id_priv->id.local_id));
+		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
 		break;
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
-		rej_msg->local_comm_id = cm_id_priv->id.local_id;
-		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
+		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+			be32_to_cpu(cm_id_priv->id.local_id));
+		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
 		break;
 	default:
-		rej_msg->local_comm_id = cm_id_priv->id.local_id;
-		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
+		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+			be32_to_cpu(cm_id_priv->id.local_id));
+		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
+			CM_MSG_RESPONSE_OTHER);
 		break;
 	}
 
-	rej_msg->reason = cpu_to_be16(reason);
+	IBA_SET(CM_REJ_REASON, rej_msg, reason);
 	if (ari && ari_length) {
-		cm_rej_set_reject_info_len(rej_msg, ari_length);
-		memcpy(rej_msg->ari, ari, ari_length);
+		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
 	}
 
 	if (private_data && private_data_len)
-		memcpy(rej_msg->private_data, private_data, private_data_len);
+		IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
+			    private_data_len);
 }
 
 static void cm_dup_req_handler(struct cm_work *work,
@@ -1821,7 +1884,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	spin_lock_irq(&cm.lock);
 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
 	if (timewait_info) {
-		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
 					   timewait_info->work.remote_id);
 		spin_unlock_irq(&cm.lock);
 		if (cur_cm_id_priv) {
@@ -1835,7 +1898,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
 	if (timewait_info) {
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
-		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
 					   timewait_info->work.remote_id);
 
 		spin_unlock_irq(&cm.lock);
@@ -1851,8 +1914,9 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	}
 
 	/* Find matching listen request. */
-	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
-					   req_msg->service_id);
+	listen_cm_id_priv = cm_find_listen(
+		cm_id_priv->id.device,
+		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
 	if (!listen_cm_id_priv) {
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
 		spin_unlock_irq(&cm.lock);
@@ -1877,24 +1941,32 @@ out:
  */
 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
 {
-	if (!cm_req_get_primary_subnet_local(req_msg)) {
-		if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
-			req_msg->primary_local_lid = ib_lid_be16(wc->slid);
-			cm_req_set_primary_sl(req_msg, wc->sl);
+	if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
+		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+					req_msg)) == IB_LID_PERMISSIVE) {
+			IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+				be16_to_cpu(ib_lid_be16(wc->slid)));
+			IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
 		}
 
-		if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
-			req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+					req_msg)) == IB_LID_PERMISSIVE)
+			IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+				wc->dlid_path_bits);
 	}
 
-	if (!cm_req_get_alt_subnet_local(req_msg)) {
-		if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
-			req_msg->alt_local_lid = ib_lid_be16(wc->slid);
-			cm_req_set_alt_sl(req_msg, wc->sl);
+	if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
+		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+					req_msg)) == IB_LID_PERMISSIVE) {
+			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+				be16_to_cpu(ib_lid_be16(wc->slid)));
+			IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
 		}
 
-		if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
-			req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+					req_msg)) == IB_LID_PERMISSIVE)
+			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+				wc->dlid_path_bits);
 	}
 }
 
@@ -1914,7 +1986,8 @@ static int cm_req_handler(struct cm_work *work)
 		return PTR_ERR(cm_id);
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	cm_id_priv->id.remote_id = req_msg->local_comm_id;
+	cm_id_priv->id.remote_id =
+		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
 	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
 				      work->mad_recv_wc->recv_buf.grh,
 				      &cm_id_priv->av);
@@ -1926,9 +1999,12 @@ static int cm_req_handler(struct cm_work *work)
 		ret = PTR_ERR(cm_id_priv->timewait_info);
 		goto destroy;
 	}
-	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
-	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
-	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
+	cm_id_priv->timewait_info->work.remote_id =
+		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+	cm_id_priv->timewait_info->remote_ca_guid =
+		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+	cm_id_priv->timewait_info->remote_qpn =
+		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
 
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
@@ -1940,7 +2016,8 @@ static int cm_req_handler(struct cm_work *work)
 
 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
 	cm_id_priv->id.context = listen_cm_id_priv->id.context;
-	cm_id_priv->id.service_id = req_msg->service_id;
+	cm_id_priv->id.service_id =
+		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
 
 	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
@@ -1957,10 +2034,11 @@ static int cm_req_handler(struct cm_work *work)
 		work->path[0].rec_type =
 			sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
 	} else {
-		cm_path_set_rec_type(work->port->cm_dev->ib_device,
-				     work->port->port_num,
-				     &work->path[0],
-				     &req_msg->primary_local_gid);
+		cm_path_set_rec_type(
+			work->port->cm_dev->ib_device, work->port->port_num,
+			&work->path[0],
+			IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
+					req_msg));
 	}
 	if (cm_req_has_alt_path(req_msg))
 		work->path[1].rec_type = work->path[0].rec_type;
@@ -2000,16 +2078,19 @@ static int cm_req_handler(struct cm_work *work)
 	}
 	cm_id_priv->tid = req_msg->hdr.tid;
 	cm_id_priv->timeout_ms = cm_convert_to_ms(
-					cm_req_get_local_resp_timeout(req_msg));
-	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
-	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
-	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
-	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
-	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
-	cm_id_priv->pkey = req_msg->pkey;
-	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
-	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
-	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+	cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+	cm_id_priv->remote_qpn =
+		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+	cm_id_priv->initiator_depth =
+		IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+	cm_id_priv->responder_resources =
+		IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+	cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+	cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+	cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+	cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
 	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
 
 	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
@@ -2032,29 +2113,35 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
 			  struct ib_cm_rep_param *param)
 {
 	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
-	rep_msg->local_comm_id = cm_id_priv->id.local_id;
-	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
-	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
-	rep_msg->resp_resources = param->responder_resources;
-	cm_rep_set_target_ack_delay(rep_msg,
-				    cm_id_priv->av.port->cm_dev->ack_delay);
-	cm_rep_set_failover(rep_msg, param->failover_accepted);
-	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
-	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+	IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
+	IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
+	IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
+		param->responder_resources);
+	IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
+		cm_id_priv->av.port->cm_dev->ack_delay);
+	IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
+	IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
+	IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
+		be64_to_cpu(cm_id_priv->id.device->node_guid));
 
 	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
-		rep_msg->initiator_depth = param->initiator_depth;
-		cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
-		cm_rep_set_srq(rep_msg, param->srq);
-		cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+		IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
+			param->initiator_depth);
+		IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
+			param->flow_control);
+		IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
+		IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
 	} else {
-		cm_rep_set_srq(rep_msg, 1);
-		cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+		IBA_SET(CM_REP_SRQ, rep_msg, 1);
+		IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
 	}
 
 	if (param->private_data && param->private_data_len)
-		memcpy(rep_msg->private_data, param->private_data,
-		       param->private_data_len);
+		IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
+			    param->private_data_len);
 }
 
 int ib_send_cm_rep(struct ib_cm_id *cm_id,
@@ -2100,7 +2187,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
 	cm_id_priv->msg = msg;
 	cm_id_priv->initiator_depth = param->initiator_depth;
 	cm_id_priv->responder_resources = param->responder_resources;
-	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
+	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
 	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
 
 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2114,11 +2201,14 @@ static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
 			  u8 private_data_len)
 {
 	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
-	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
-	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
 
 	if (private_data && private_data_len)
-		memcpy(rtu_msg->private_data, private_data, private_data_len);
+		IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
+			    private_data_len);
 }
 
 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
@@ -2181,18 +2271,20 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
 
 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
 	param = &work->cm_event.param.rep_rcvd;
-	param->remote_ca_guid = rep_msg->local_ca_guid;
-	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
+	param->remote_ca_guid =
+		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
+	param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
 	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
-	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
-	param->responder_resources = rep_msg->initiator_depth;
-	param->initiator_depth = rep_msg->resp_resources;
-	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
-	param->failover_accepted = cm_rep_get_failover(rep_msg);
-	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
-	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
-	param->srq = cm_rep_get_srq(rep_msg);
-	work->cm_event.private_data = &rep_msg->private_data;
+	param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
+	param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+	param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+	param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
+	param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
+	param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
+	param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+	param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
 }
 
 static void cm_dup_rep_handler(struct cm_work *work)
@@ -2203,8 +2295,9 @@ static void cm_dup_rep_handler(struct cm_work *work)
 	int ret;
 
 	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
-				   rep_msg->local_comm_id);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
+		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
 	if (!cm_id_priv)
 		return;
 
@@ -2248,11 +2341,12 @@ static int cm_rep_handler(struct cm_work *work)
 	struct cm_timewait_info *timewait_info;
 
 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
 	if (!cm_id_priv) {
 		cm_dup_rep_handler(work);
 		pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
-			 be32_to_cpu(rep_msg->remote_comm_id));
+			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 		return -EINVAL;
 	}
 
@@ -2266,15 +2360,18 @@ static int cm_rep_handler(struct cm_work *work)
 	default:
 		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
-		pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
-			 __func__, cm_id_priv->id.state,
-			 be32_to_cpu(rep_msg->local_comm_id),
-			 be32_to_cpu(rep_msg->remote_comm_id));
+		pr_debug(
+			"%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
+			__func__, cm_id_priv->id.state,
+			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 		goto error;
 	}
 
-	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
-	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
+	cm_id_priv->timewait_info->work.remote_id =
+		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
+	cm_id_priv->timewait_info->remote_ca_guid =
+		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
 	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
 
 	spin_lock(&cm.lock);
@@ -2284,7 +2381,7 @@ static int cm_rep_handler(struct cm_work *work)
 		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
 		pr_debug("%s: Failed to insert remote id %d\n", __func__,
-			 be32_to_cpu(rep_msg->remote_comm_id));
+			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 		goto error;
 	}
 	/* Check for a stale connection. */
@@ -2293,7 +2390,7 @@ static int cm_rep_handler(struct cm_work *work)
 		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
 			 &cm.remote_id_table);
 		cm_id_priv->timewait_info->inserted_remote_id = 0;
-		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
 					   timewait_info->work.remote_id);
 
 		spin_unlock(&cm.lock);
@@ -2302,9 +2399,10 @@ static int cm_rep_handler(struct cm_work *work)
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
 			     NULL, 0);
 		ret = -EINVAL;
-		pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
-			 __func__, be32_to_cpu(rep_msg->local_comm_id),
-			 be32_to_cpu(rep_msg->remote_comm_id));
+		pr_debug(
+			"%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
+			__func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 
 		if (cur_cm_id_priv) {
 			cm_id = &cur_cm_id_priv->id;
@@ -2317,13 +2415,17 @@ static int cm_rep_handler(struct cm_work *work)
 	spin_unlock(&cm.lock);
 
 	cm_id_priv->id.state = IB_CM_REP_RCVD;
-	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
+	cm_id_priv->id.remote_id =
+		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
 	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
-	cm_id_priv->initiator_depth = rep_msg->resp_resources;
-	cm_id_priv->responder_resources = rep_msg->initiator_depth;
-	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
-	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
-	cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+	cm_id_priv->initiator_depth =
+		IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+	cm_id_priv->responder_resources =
+		IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+	cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+	cm_id_priv->target_ack_delay =
+		IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
 	cm_id_priv->av.timeout =
 			cm_ack_timeout(cm_id_priv->target_ack_delay,
 				       cm_id_priv->av.timeout - 1);
@@ -2389,12 +2491,14 @@ static int cm_rtu_handler(struct cm_work *work)
 	int ret;
 
 	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
-				   rtu_msg->local_comm_id);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
+		cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
 	if (!cm_id_priv)
 		return -EINVAL;
 
-	work->cm_event.private_data = &rtu_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
 
 	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
@@ -2429,12 +2533,16 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
 {
 	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
 			  cm_form_tid(cm_id_priv));
-	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
-	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
-	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
+	IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
+	IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
+		be32_to_cpu(cm_id_priv->remote_qpn));
 
 	if (private_data && private_data_len)
-		memcpy(dreq_msg->private_data, private_data, private_data_len);
+		IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
+			    private_data_len);
 }
 
 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
@@ -2494,11 +2602,14 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
 			  u8 private_data_len)
 {
 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
-	drep_msg->local_comm_id = cm_id_priv->id.local_id;
-	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
 
 	if (private_data && private_data_len)
-		memcpy(drep_msg->private_data, private_data, private_data_len);
+		IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
+			    private_data_len);
 }
 
 int ib_send_cm_drep(struct ib_cm_id *cm_id,
@@ -2566,8 +2677,10 @@ static int cm_issue_drep(struct cm_port *port,
 	drep_msg = (struct cm_drep_msg *) msg->mad;
 
 	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
-	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
-	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
+	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+		IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
+	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
@@ -2584,22 +2697,26 @@ static int cm_dreq_handler(struct cm_work *work)
 	int ret;
 
 	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
-				   dreq_msg->local_comm_id);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
+		cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
 	if (!cm_id_priv) {
 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
 				counter[CM_DREQ_COUNTER]);
 		cm_issue_drep(work->port, work->mad_recv_wc);
-		pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
-			 __func__, be32_to_cpu(dreq_msg->local_comm_id),
-			 be32_to_cpu(dreq_msg->remote_comm_id));
+		pr_debug(
+			"%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
+			__func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+			IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 		return -EINVAL;
 	}
 
-	work->cm_event.private_data = &dreq_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
 
 	spin_lock_irq(&cm_id_priv->lock);
-	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
+	if (cm_id_priv->local_qpn !=
+	    cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
 		goto unlock;
 
 	switch (cm_id_priv->id.state) {
@@ -2665,12 +2782,14 @@ static int cm_drep_handler(struct cm_work *work)
 	int ret;
 
 	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
-				   drep_msg->local_comm_id);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
+		cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
 	if (!cm_id_priv)
 		return -EINVAL;
 
-	work->cm_event.private_data = &drep_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
 
 	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
@@ -2766,10 +2885,11 @@ static void cm_format_rej_event(struct cm_work *work)
 
 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
 	param = &work->cm_event.param.rej_rcvd;
-	param->ari = rej_msg->ari;
-	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
-	param->reason = __be16_to_cpu(rej_msg->reason);
-	work->cm_event.private_data = &rej_msg->private_data;
+	param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
+	param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
+	param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
 }
 
 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
@@ -2778,29 +2898,29 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 	struct cm_id_private *cm_id_priv;
 	__be32 remote_id;
 
-	remote_id = rej_msg->local_comm_id;
+	remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
 
-	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
+	if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
 		spin_lock_irq(&cm.lock);
-		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
-						  remote_id);
+		timewait_info = cm_find_remote_id(
+			*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
+			remote_id);
 		if (!timewait_info) {
 			spin_unlock_irq(&cm.lock);
 			return NULL;
 		}
-		cm_id_priv = xa_load(&cm.local_id_table,
-				cm_local_id(timewait_info->work.local_id));
-		if (cm_id_priv) {
-			if (cm_id_priv->id.remote_id == remote_id)
-				refcount_inc(&cm_id_priv->refcount);
-			else
-				cm_id_priv = NULL;
-		}
+		cm_id_priv =
+			cm_acquire_id(timewait_info->work.local_id, remote_id);
 		spin_unlock_irq(&cm.lock);
-	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
-		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
+	} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
+		   CM_MSG_RESPONSE_REQ)
+		cm_id_priv = cm_acquire_id(
+			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+			0);
 	else
-		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
+		cm_id_priv = cm_acquire_id(
+			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+			remote_id);
 
 	return cm_id_priv;
 }
@@ -2828,7 +2948,7 @@ static int cm_rej_handler(struct cm_work *work)
 		/* fall through */
 	case IB_CM_REQ_RCVD:
 	case IB_CM_MRA_REQ_SENT:
-		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
+		if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
 			cm_enter_timewait(cm_id_priv);
 		else
 			cm_reset_to_idle(cm_id_priv);
@@ -2958,13 +3078,16 @@ EXPORT_SYMBOL(ib_send_cm_mra);
 
 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
 {
-	switch (cm_mra_get_msg_mraed(mra_msg)) {
+	switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
 	case CM_MSG_RESPONSE_REQ:
-		return cm_acquire_id(mra_msg->remote_comm_id, 0);
+		return cm_acquire_id(
+			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+			0);
 	case CM_MSG_RESPONSE_REP:
 	case CM_MSG_RESPONSE_OTHER:
-		return cm_acquire_id(mra_msg->remote_comm_id,
-				     mra_msg->local_comm_id);
+		return cm_acquire_id(
+			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+			cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
 	default:
 		return NULL;
 	}
@@ -2981,30 +3104,34 @@ static int cm_mra_handler(struct cm_work *work)
 	if (!cm_id_priv)
 		return -EINVAL;
 
-	work->cm_event.private_data = &mra_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
 	work->cm_event.param.mra_rcvd.service_timeout =
-					cm_mra_get_service_timeout(mra_msg);
-	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
+		IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
+	timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
 		  cm_convert_to_ms(cm_id_priv->av.timeout);
 
 	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REQ_SENT:
-		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
+		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+			    CM_MSG_RESPONSE_REQ ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
 				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
 		break;
 	case IB_CM_REP_SENT:
-		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
+		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+			    CM_MSG_RESPONSE_REP ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
 				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
 		break;
 	case IB_CM_ESTABLISHED:
-		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
+		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+			    CM_MSG_RESPONSE_OTHER ||
 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
 				  cm_id_priv->msg, timeout)) {
@@ -3046,117 +3173,23 @@ out:
 	return -EINVAL;
 }
 
-static void cm_format_lap(struct cm_lap_msg *lap_msg,
-			  struct cm_id_private *cm_id_priv,
-			  struct sa_path_rec *alternate_path,
-			  const void *private_data,
-			  u8 private_data_len)
-{
-	bool alt_ext = false;
-
-	if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
-		alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
-					      alternate_path->opa.slid);
-	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
-			  cm_form_tid(cm_id_priv));
-	lap_msg->local_comm_id = cm_id_priv->id.local_id;
-	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
-	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
-	/* todo: need remote CM response timeout */
-	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
-	lap_msg->alt_local_lid =
-		htons(ntohl(sa_path_get_slid(alternate_path)));
-	lap_msg->alt_remote_lid =
-		htons(ntohl(sa_path_get_dlid(alternate_path)));
-	lap_msg->alt_local_gid = alternate_path->sgid;
-	lap_msg->alt_remote_gid = alternate_path->dgid;
-	if (alt_ext) {
-		lap_msg->alt_local_gid.global.interface_id
-			= OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
-		lap_msg->alt_remote_gid.global.interface_id
-			= OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
-	}
-	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
-	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
-	lap_msg->alt_hop_limit = alternate_path->hop_limit;
-	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
-	cm_lap_set_sl(lap_msg, alternate_path->sl);
-	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
-	cm_lap_set_local_ack_timeout(lap_msg,
-		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
-			       alternate_path->packet_life_time));
-
-	if (private_data && private_data_len)
-		memcpy(lap_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_lap(struct ib_cm_id *cm_id,
-		   struct sa_path_rec *alternate_path,
-		   const void *private_data,
-		   u8 private_data_len)
-{
-	struct cm_id_private *cm_id_priv;
-	struct ib_mad_send_buf *msg;
-	unsigned long flags;
-	int ret;
-
-	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
-		return -EINVAL;
-
-	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	if (cm_id->state != IB_CM_ESTABLISHED ||
-	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
-	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
-				 cm_id_priv);
-	if (ret)
-		goto out;
-	cm_id_priv->alt_av.timeout =
-			cm_ack_timeout(cm_id_priv->target_ack_delay,
-				       cm_id_priv->alt_av.timeout - 1);
-
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
-		goto out;
-
-	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
-		      alternate_path, private_data, private_data_len);
-	msg->timeout_ms = cm_id_priv->timeout_ms;
-	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
-
-	ret = ib_post_send_mad(msg, NULL);
-	if (ret) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		cm_free_msg(msg);
-		return ret;
-	}
-
-	cm_id->lap_state = IB_CM_LAP_SENT;
-	cm_id_priv->msg = msg;
-
-out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_lap);
-
 static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
 					struct sa_path_rec *path)
 {
 	u32 lid;
 
 	if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
-		sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
-		sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
+		sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
+					       lap_msg));
+		sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
+					       lap_msg));
 	} else {
-		lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
+		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+			CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
 		sa_path_set_dlid(path, lid);
 
-		lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
+		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+			CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
 		sa_path_set_slid(path, lid);
 	}
 }
@@ -3165,20 +3198,23 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
 				    struct sa_path_rec *path,
 				    struct cm_lap_msg *lap_msg)
 {
-	path->dgid = lap_msg->alt_local_gid;
-	path->sgid = lap_msg->alt_remote_gid;
-	path->flow_label = cm_lap_get_flow_label(lap_msg);
-	path->hop_limit = lap_msg->alt_hop_limit;
-	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
+	path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
+	path->sgid =
+		*IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
+	path->flow_label =
+		cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
+	path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
+	path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
 	path->reversible = 1;
 	path->pkey = cm_id_priv->pkey;
-	path->sl = cm_lap_get_sl(lap_msg);
+	path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
 	path->mtu_selector = IB_SA_EQ;
 	path->mtu = cm_id_priv->path_mtu;
 	path->rate_selector = IB_SA_EQ;
-	path->rate = cm_lap_get_packet_rate(lap_msg);
+	path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
 	path->packet_life_time_selector = IB_SA_EQ;
-	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
+	path->packet_life_time =
+		IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
 	path->packet_life_time -= (path->packet_life_time > 0);
 	cm_format_path_lid_from_lap(lap_msg, path);
 }
@@ -3200,20 +3236,22 @@ static int cm_lap_handler(struct cm_work *work)
 
 	/* todo: verify LAP request and send reject APR if invalid. */
 	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
-				   lap_msg->local_comm_id);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
+		cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
 	if (!cm_id_priv)
 		return -EINVAL;
 
 	param = &work->cm_event.param.lap_rcvd;
 	memset(&work->path[0], 0, sizeof(work->path[1]));
 	cm_path_set_rec_type(work->port->cm_dev->ib_device,
-			     work->port->port_num,
-			     &work->path[0],
-			     &lap_msg->alt_local_gid);
+			     work->port->port_num, &work->path[0],
+			     IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
+					     lap_msg));
 	param->alternate_path = &work->path[0];
 	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
-	work->cm_event.private_data = &lap_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
 
 	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
@@ -3278,72 +3316,6 @@ deref:	cm_deref_id(cm_id_priv);
 	return -EINVAL;
 }
 
-static void cm_format_apr(struct cm_apr_msg *apr_msg,
-			  struct cm_id_private *cm_id_priv,
-			  enum ib_cm_apr_status status,
-			  void *info,
-			  u8 info_length,
-			  const void *private_data,
-			  u8 private_data_len)
-{
-	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
-	apr_msg->local_comm_id = cm_id_priv->id.local_id;
-	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
-	apr_msg->ap_status = (u8) status;
-
-	if (info && info_length) {
-		apr_msg->info_length = info_length;
-		memcpy(apr_msg->info, info, info_length);
-	}
-
-	if (private_data && private_data_len)
-		memcpy(apr_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_apr(struct ib_cm_id *cm_id,
-		   enum ib_cm_apr_status status,
-		   void *info,
-		   u8 info_length,
-		   const void *private_data,
-		   u8 private_data_len)
-{
-	struct cm_id_private *cm_id_priv;
-	struct ib_mad_send_buf *msg;
-	unsigned long flags;
-	int ret;
-
-	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
-	    (info && info_length > IB_CM_APR_INFO_LENGTH))
-		return -EINVAL;
-
-	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	if (cm_id->state != IB_CM_ESTABLISHED ||
-	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
-	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = cm_alloc_msg(cm_id_priv, &msg);
-	if (ret)
-		goto out;
-
-	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
-		      info, info_length, private_data, private_data_len);
-	ret = ib_post_send_mad(msg, NULL);
-	if (ret) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		cm_free_msg(msg);
-		return ret;
-	}
-
-	cm_id->lap_state = IB_CM_LAP_IDLE;
-out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_apr);
-
 static int cm_apr_handler(struct cm_work *work)
 {
 	struct cm_id_private *cm_id_priv;
@@ -3358,15 +3330,20 @@ static int cm_apr_handler(struct cm_work *work)
 		return -EINVAL;
 
 	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
-				   apr_msg->local_comm_id);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
+		cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
 	if (!cm_id_priv)
 		return -EINVAL; /* Unmatched reply. */
 
-	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
-	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
-	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
-	work->cm_event.private_data = &apr_msg->private_data;
+	work->cm_event.param.apr_rcvd.ap_status =
+		IBA_GET(CM_APR_AR_STATUS, apr_msg);
+	work->cm_event.param.apr_rcvd.apr_info =
+		IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
+	work->cm_event.param.apr_rcvd.info_len =
+		IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
 
 	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
@@ -3438,13 +3415,16 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
 {
 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
 			  cm_form_tid(cm_id_priv));
-	sidr_req_msg->request_id = cm_id_priv->id.local_id;
-	sidr_req_msg->pkey = param->path->pkey;
-	sidr_req_msg->service_id = param->service_id;
+	IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
+		be32_to_cpu(cm_id_priv->id.local_id));
+	IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
+		be16_to_cpu(param->path->pkey));
+	IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
+		be64_to_cpu(param->service_id));
 
 	if (param->private_data && param->private_data_len)
-		memcpy(sidr_req_msg->private_data, param->private_data,
-		       param->private_data_len);
+		IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
+			    param->private_data, param->private_data_len);
 }
 
 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
@@ -3508,13 +3488,15 @@ static void cm_format_sidr_req_event(struct cm_work *work,
 	sidr_req_msg = (struct cm_sidr_req_msg *)
 				work->mad_recv_wc->recv_buf.mad;
 	param = &work->cm_event.param.sidr_req_rcvd;
-	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
+	param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
 	param->listen_id = listen_id;
-	param->service_id = sidr_req_msg->service_id;
+	param->service_id =
+		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
 	param->bth_pkey = cm_get_bth_pkey(work);
 	param->port = work->port->port_num;
 	param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
-	work->cm_event.private_data = &sidr_req_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
 }
 
 static int cm_sidr_req_handler(struct cm_work *work)
@@ -3542,7 +3524,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	if (ret)
 		goto out;
 
-	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
+	cm_id_priv->id.remote_id =
+		cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
 	cm_id_priv->tid = sidr_req_msg->hdr.tid;
 	atomic_inc(&cm_id_priv->work_count);
 
@@ -3555,8 +3538,9 @@ static int cm_sidr_req_handler(struct cm_work *work)
 		goto out; /* Duplicate message. */
 	}
 	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
-	cur_cm_id_priv = cm_find_listen(cm_id->device,
-					sidr_req_msg->service_id);
+	cur_cm_id_priv = cm_find_listen(
+		cm_id->device,
+		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)));
 	if (!cur_cm_id_priv) {
 		spin_unlock_irq(&cm.lock);
 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
@@ -3568,7 +3552,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
 
 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
-	cm_id_priv->id.service_id = sidr_req_msg->service_id;
+	cm_id_priv->id.service_id =
+		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
 
 	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
@@ -3586,18 +3571,21 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
 {
 	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
 			  cm_id_priv->tid);
-	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
-	sidr_rep_msg->status = param->status;
-	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
-	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
-	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
+	IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
+		be32_to_cpu(cm_id_priv->id.remote_id));
+	IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
+	IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
+	IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
+		be64_to_cpu(cm_id_priv->id.service_id));
+	IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
 
 	if (param->info && param->info_length)
-		memcpy(sidr_rep_msg->info, param->info, param->info_length);
+		IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
+			    param->info, param->info_length);
 
 	if (param->private_data && param->private_data_len)
-		memcpy(sidr_rep_msg->private_data, param->private_data,
-		       param->private_data_len);
+		IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
+			    param->private_data, param->private_data_len);
 }
 
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
@@ -3657,13 +3645,16 @@ static void cm_format_sidr_rep_event(struct cm_work *work,
 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
 				work->mad_recv_wc->recv_buf.mad;
 	param = &work->cm_event.param.sidr_rep_rcvd;
-	param->status = sidr_rep_msg->status;
-	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
-	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
-	param->info = &sidr_rep_msg->info;
-	param->info_len = sidr_rep_msg->info_length;
+	param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
+	param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
+	param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
+	param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
+				      sidr_rep_msg);
+	param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
+				  sidr_rep_msg);
 	param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
-	work->cm_event.private_data = &sidr_rep_msg->private_data;
+	work->cm_event.private_data =
+		IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
 }
 
 static int cm_sidr_rep_handler(struct cm_work *work)
@@ -3673,7 +3664,8 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 
 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
 				work->mad_recv_wc->recv_buf.mad;
-	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
+	cm_id_priv = cm_acquire_id(
+		cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
 	if (!cm_id_priv)
 		return -EINVAL; /* Unmatched reply. */
 
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 92d7260ac913..0cc40656b5c5 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -8,6 +8,7 @@
 #ifndef CM_MSGS_H
 #define CM_MSGS_H
 
+#include <rdma/ibta_vol1_c12.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_cm.h>
 
@@ -18,120 +19,14 @@
 
 #define IB_CM_CLASS_VERSION	2 /* IB specification 1.2 */
 
-struct cm_req_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 rsvd4;
-	__be64 service_id;
-	__be64 local_ca_guid;
-	__be32 rsvd24;
-	__be32 local_qkey;
-	/* local QPN:24, responder resources:8 */
-	__be32 offset32;
-	/* local EECN:24, initiator depth:8 */
-	__be32 offset36;
-	/*
-	 * remote EECN:24, remote CM response timeout:5,
-	 * transport service type:2, end-to-end flow control:1
-	 */
-	__be32 offset40;
-	/* starting PSN:24, local CM response timeout:5, retry count:3 */
-	__be32 offset44;
-	__be16 pkey;
-	/* path MTU:4, RDC exists:1, RNR retry count:3. */
-	u8 offset50;
-	/* max CM Retries:4, SRQ:1, extended transport type:3 */
-	u8 offset51;
-
-	__be16 primary_local_lid;
-	__be16 primary_remote_lid;
-	union ib_gid primary_local_gid;
-	union ib_gid primary_remote_gid;
-	/* flow label:20, rsvd:6, packet rate:6 */
-	__be32 primary_offset88;
-	u8 primary_traffic_class;
-	u8 primary_hop_limit;
-	/* SL:4, subnet local:1, rsvd:3 */
-	u8 primary_offset94;
-	/* local ACK timeout:5, rsvd:3 */
-	u8 primary_offset95;
-
-	__be16 alt_local_lid;
-	__be16 alt_remote_lid;
-	union ib_gid alt_local_gid;
-	union ib_gid alt_remote_gid;
-	/* flow label:20, rsvd:6, packet rate:6 */
-	__be32 alt_offset132;
-	u8 alt_traffic_class;
-	u8 alt_hop_limit;
-	/* SL:4, subnet local:1, rsvd:3 */
-	u8 alt_offset138;
-	/* local ACK timeout:5, rsvd:3 */
-	u8 alt_offset139;
-
-	u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-
-} __packed;
-
-static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
-{
-	return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
-}
-
-static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
-{
-	req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
-					 (be32_to_cpu(req_msg->offset32) &
-					  0x000000FF));
-}
-
-static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
-{
-	return (u8) be32_to_cpu(req_msg->offset32);
-}
-
-static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
-{
-	req_msg->offset32 = cpu_to_be32(resp_res |
-					(be32_to_cpu(req_msg->offset32) &
-					 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
-{
-	return (u8) be32_to_cpu(req_msg->offset36);
-}
-
-static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
-					 u8 init_depth)
-{
-	req_msg->offset36 = cpu_to_be32(init_depth |
-					(be32_to_cpu(req_msg->offset36) &
-					 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
-{
-	return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
-						  u8 resp_timeout)
-{
-	req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
-					 (be32_to_cpu(req_msg->offset40) &
-					  0xFFFFFF07));
-}
-
 static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
 {
-	u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
+	u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg);
 	switch(transport_type) {
 	case 0: return IB_QPT_RC;
 	case 1: return IB_QPT_UC;
 	case 3:
-		switch (req_msg->offset51 & 0x7) {
+		switch (IBA_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg)) {
 		case 1: return IB_QPT_XRC_TGT;
 		default: return 0;
 		}
@@ -144,240 +39,17 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
 {
 	switch(qp_type) {
 	case IB_QPT_UC:
-		req_msg->offset40 = cpu_to_be32((be32_to_cpu(
-						  req_msg->offset40) &
-						   0xFFFFFFF9) | 0x2);
+		IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1);
 		break;
 	case IB_QPT_XRC_INI:
-		req_msg->offset40 = cpu_to_be32((be32_to_cpu(
-						 req_msg->offset40) &
-						   0xFFFFFFF9) | 0x6);
-		req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+		IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 3);
+		IBA_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg, 1);
 		break;
 	default:
-		req_msg->offset40 = cpu_to_be32(be32_to_cpu(
-						 req_msg->offset40) &
-						  0xFFFFFFF9);
+		IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 0);
 	}
 }
 
-static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
-{
-	return be32_to_cpu(req_msg->offset40) & 0x1;
-}
-
-static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
-					u8 flow_ctrl)
-{
-	req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
-					 (be32_to_cpu(req_msg->offset40) &
-					  0xFFFFFFFE));
-}
-
-static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
-{
-	return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
-}
-
-static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
-					   __be32 starting_psn)
-{
-	req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
-			    (be32_to_cpu(req_msg->offset44) & 0x000000FF));
-}
-
-static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
-{
-	return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
-						 u8 resp_timeout)
-{
-	req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
-			    (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
-}
-
-static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
-{
-	return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
-}
-
-static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
-					  u8 retry_count)
-{
-	req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
-			    (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
-}
-
-static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
-{
-	return req_msg->offset50 >> 4;
-}
-
-static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
-{
-	req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
-}
-
-static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
-{
-	return req_msg->offset50 & 0x7;
-}
-
-static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
-					      u8 rnr_retry_count)
-{
-	req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
-				  (rnr_retry_count & 0x7));
-}
-
-static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
-{
-	return req_msg->offset51 >> 4;
-}
-
-static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
-					     u8 retries)
-{
-	req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
-}
-
-static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
-{
-	return (req_msg->offset51 & 0x8) >> 3;
-}
-
-static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
-{
-	req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
-				  ((srq & 0x1) << 3));
-}
-
-static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
-{
-	return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
-}
-
-static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
-						 __be32 flow_label)
-{
-	req_msg->primary_offset88 = cpu_to_be32(
-				    (be32_to_cpu(req_msg->primary_offset88) &
-				     0x00000FFF) |
-				     (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
-{
-	return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
-}
-
-static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
-						  u8 rate)
-{
-	req_msg->primary_offset88 = cpu_to_be32(
-				    (be32_to_cpu(req_msg->primary_offset88) &
-				     0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
-{
-	return (u8) (req_msg->primary_offset94 >> 4);
-}
-
-static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
-{
-	req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
-					  (sl << 4));
-}
-
-static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
-{
-	return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
-						   u8 subnet_local)
-{
-	req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
-					  ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
-{
-	return (u8) (req_msg->primary_offset95 >> 3);
-}
-
-static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
-							u8 local_ack_timeout)
-{
-	req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
-					  (local_ack_timeout << 3));
-}
-
-static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
-{
-	return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
-}
-
-static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
-					     __be32 flow_label)
-{
-	req_msg->alt_offset132 = cpu_to_be32(
-				 (be32_to_cpu(req_msg->alt_offset132) &
-				  0x00000FFF) |
-				  (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
-{
-	return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
-}
-
-static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
-					      u8 rate)
-{
-	req_msg->alt_offset132 = cpu_to_be32(
-				 (be32_to_cpu(req_msg->alt_offset132) &
-				  0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
-{
-	return (u8) (req_msg->alt_offset138 >> 4);
-}
-
-static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
-{
-	req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
-				       (sl << 4));
-}
-
-static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
-{
-	return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
-					       u8 subnet_local)
-{
-	req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
-				       ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
-{
-	return (u8) (req_msg->alt_offset139 >> 3);
-}
-
-static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
-						    u8 local_ack_timeout)
-{
-	req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
-				       (local_ack_timeout << 3));
-}
-
 /* Message REJected or MRAed */
 enum cm_msg_response {
 	CM_MSG_RESPONSE_REQ = 0x0,
@@ -385,419 +57,12 @@ enum cm_msg_response {
 	CM_MSG_RESPONSE_OTHER = 0x2
 };
 
- struct cm_mra_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-	/* message MRAed:2, rsvd:6 */
-	u8 offset8;
-	/* service timeout:5, rsvd:3 */
-	u8 offset9;
-
-	u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
-{
-	return (u8) (mra_msg->offset8 >> 6);
-}
-
-static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
-{
-	mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
-{
-	return (u8) (mra_msg->offset9 >> 3);
-}
-
-static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
-					      u8 service_timeout)
-{
-	mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
-				 (service_timeout << 3));
-}
-
-struct cm_rej_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-	/* message REJected:2, rsvd:6 */
-	u8 offset8;
-	/* reject info length:7, rsvd:1. */
-	u8 offset9;
-	__be16 reason;
-	u8 ari[IB_CM_REJ_ARI_LENGTH];
-
-	u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
-{
-	return (u8) (rej_msg->offset8 >> 6);
-}
-
-static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
-{
-	rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
-{
-	return (u8) (rej_msg->offset9 >> 1);
-}
-
-static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
-					      u8 len)
-{
-	rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
-}
-
-struct cm_rep_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-	__be32 local_qkey;
-	/* local QPN:24, rsvd:8 */
-	__be32 offset12;
-	/* local EECN:24, rsvd:8 */
-	__be32 offset16;
-	/* starting PSN:24 rsvd:8 */
-	__be32 offset20;
-	u8 resp_resources;
-	u8 initiator_depth;
-	/* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
-	u8 offset26;
-	/* RNR retry count:3, SRQ:1, rsvd:5 */
-	u8 offset27;
-	__be64 local_ca_guid;
-
-	u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
-{
-	return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
-}
-
-static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
-{
-	rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
-			    (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
-}
-
-static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
-{
-	return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
-}
-
-static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
-{
-	rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
-			    (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
-}
-
 static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
 {
 	return (qp_type == IB_QPT_XRC_INI) ?
-		cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
-}
-
-static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
-{
-	return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
-}
-
-static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
-					   __be32 starting_psn)
-{
-	rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
-			    (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
-}
-
-static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
-{
-	return (u8) (rep_msg->offset26 >> 3);
-}
-
-static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
-					       u8 target_ack_delay)
-{
-	rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
-				  (target_ack_delay << 3));
-}
-
-static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
-{
-	return (u8) ((rep_msg->offset26 & 0x06) >> 1);
-}
-
-static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
-{
-	rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
-				  ((failover & 0x3) << 1));
-}
-
-static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
-{
-	return (u8) (rep_msg->offset26 & 0x01);
-}
-
-static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
-					    u8 flow_ctrl)
-{
-	rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
-				  (flow_ctrl & 0x1));
-}
-
-static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
-{
-	return (u8) (rep_msg->offset27 >> 5);
-}
-
-static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
-					      u8 rnr_retry_count)
-{
-	rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
-				  (rnr_retry_count << 5));
-}
-
-static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
-{
-	return (u8) ((rep_msg->offset27 >> 4) & 0x1);
-}
-
-static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
-{
-	rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
-				  ((srq & 0x1) << 4));
-}
-
-struct cm_rtu_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-
-	u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-struct cm_dreq_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-	/* remote QPN/EECN:24, rsvd:8 */
-	__be32 offset8;
-
-	u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
-{
-	return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
-}
-
-static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
-{
-	dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
-			    (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
-}
-
-struct cm_drep_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-
-	u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-struct cm_lap_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-
-	__be32 rsvd8;
-	/* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
-	__be32 offset12;
-	__be32 rsvd16;
-
-	__be16 alt_local_lid;
-	__be16 alt_remote_lid;
-	union ib_gid alt_local_gid;
-	union ib_gid alt_remote_gid;
-	/* flow label:20, rsvd:4, traffic class:8 */
-	__be32 offset56;
-	u8 alt_hop_limit;
-	/* rsvd:2, packet rate:6 */
-	u8 offset61;
-	/* SL:4, subnet local:1, rsvd:3 */
-	u8 offset62;
-	/* local ACK timeout:5, rsvd:3 */
-	u8 offset63;
-
-	u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
-} __packed;
-
-static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
-{
-	return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
-}
-
-static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
-{
-	lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
-					 (be32_to_cpu(lap_msg->offset12) &
-					  0x000000FF));
-}
-
-static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
-{
-	return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
-}
-
-static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
-						  u8 resp_timeout)
-{
-	lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
-					 (be32_to_cpu(lap_msg->offset12) &
-					  0xFFFFFF07));
-}
-
-static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
-{
-	return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
-}
-
-static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
-					 __be32 flow_label)
-{
-	lap_msg->offset56 = cpu_to_be32(
-				 (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
-				 (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
-{
-	return (u8) be32_to_cpu(lap_msg->offset56);
-}
-
-static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
-					    u8 traffic_class)
-{
-	lap_msg->offset56 = cpu_to_be32(traffic_class |
-					 (be32_to_cpu(lap_msg->offset56) &
-					  0xFFFFFF00));
-}
-
-static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
-{
-	return lap_msg->offset61 & 0x3F;
-}
-
-static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
-					  u8 packet_rate)
-{
-	lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
-}
-
-static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
-{
-	return lap_msg->offset62 >> 4;
-}
-
-static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
-{
-	lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
-}
-
-static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
-{
-	return (lap_msg->offset62 >> 3) & 0x1;
-}
-
-static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
-					   u8 subnet_local)
-{
-	lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
-			     (lap_msg->offset61 & 0xF7);
-}
-static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
-{
-	return lap_msg->offset63 >> 3;
-}
-
-static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
-						u8 local_ack_timeout)
-{
-	lap_msg->offset63 = (local_ack_timeout << 3) |
-			    (lap_msg->offset63 & 0x07);
-}
-
-struct cm_apr_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 local_comm_id;
-	__be32 remote_comm_id;
-
-	u8 info_length;
-	u8 ap_status;
-	__be16 rsvd;
-	u8 info[IB_CM_APR_INFO_LENGTH];
-
-	u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
-} __packed;
-
-struct cm_sidr_req_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 request_id;
-	__be16 pkey;
-	__be16 rsvd;
-	__be64 service_id;
-
-	u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-} __packed;
-
-struct cm_sidr_rep_msg {
-	struct ib_mad_hdr hdr;
-
-	__be32 request_id;
-	u8 status;
-	u8 info_length;
-	__be16 rsvd;
-	/* QPN:24, rsvd:8 */
-	__be32 offset8;
-	__be64 service_id;
-	__be32 qkey;
-	u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
-
-	u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
-} __packed;
-
-static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
-{
-	return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
-}
-
-static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
-				       __be32 qpn)
-{
-	sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
-					(be32_to_cpu(sidr_rep_msg->offset8) &
-					 0x000000FF));
+		       cpu_to_be32(IBA_GET(CM_REP_LOCAL_EE_CONTEXT_NUMBER,
+					   rep_msg)) :
+		       cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg));
 }
 
 #endif /* CM_MSGS_H */
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 43a6f07e0afe..72f032160c4b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -36,6 +36,7 @@
 
 #include "core_priv.h"
 #include "cma_priv.h"
+#include "cma_trace.h"
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -877,6 +878,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
 	id_priv->id.route.addr.dev_addr.net = get_net(net);
 	id_priv->seq_num &= 0x00ffffff;
 
+	trace_cm_id_create(id_priv);
 	return &id_priv->id;
 }
 EXPORT_SYMBOL(__rdma_create_id);
@@ -928,27 +930,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id->device != pd->device)
-		return -EINVAL;
+	if (id->device != pd->device) {
+		ret = -EINVAL;
+		goto out_err;
+	}
 
 	qp_init_attr->port_num = id->port_num;
 	qp = ib_create_qp(pd, qp_init_attr);
-	if (IS_ERR(qp))
-		return PTR_ERR(qp);
+	if (IS_ERR(qp)) {
+		ret = PTR_ERR(qp);
+		goto out_err;
+	}
 
 	if (id->qp_type == IB_QPT_UD)
 		ret = cma_init_ud_qp(id_priv, qp);
 	else
 		ret = cma_init_conn_qp(id_priv, qp);
 	if (ret)
-		goto err;
+		goto out_destroy;
 
 	id->qp = qp;
 	id_priv->qp_num = qp->qp_num;
 	id_priv->srq = (qp->srq != NULL);
+	trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
 	return 0;
-err:
+out_destroy:
 	ib_destroy_qp(qp);
+out_err:
+	trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_create_qp);
@@ -958,6 +967,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
 	struct rdma_id_private *id_priv;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
+	trace_cm_qp_destroy(id_priv);
 	mutex_lock(&id_priv->qp_mutex);
 	ib_destroy_qp(id_priv->id.qp);
 	id_priv->id.qp = NULL;
@@ -1811,6 +1821,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	enum rdma_cm_state state;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
+	trace_cm_id_destroy(id_priv);
 	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
@@ -1863,6 +1874,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 	if (ret)
 		goto reject;
 
+	trace_cm_send_rtu(id_priv);
 	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
 	if (ret)
 		goto reject;
@@ -1871,6 +1883,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 reject:
 	pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
 	cma_modify_qp_err(id_priv);
+	trace_cm_send_rej(id_priv);
 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
 		       NULL, 0, NULL, 0);
 	return ret;
@@ -1890,6 +1903,17 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
 	event->param.conn.qp_num = rep_data->remote_qpn;
 }
 
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
+				struct rdma_cm_event *event)
+{
+	int ret;
+
+	trace_cm_event_handler(id_priv, event);
+	ret = id_priv->id.event_handler(&id_priv->id, event);
+	trace_cm_event_done(id_priv, event, ret);
+	return ret;
+}
+
 static int cma_ib_handler(struct ib_cm_id *cm_id,
 			  const struct ib_cm_event *ib_event)
 {
@@ -1912,8 +1936,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
 		break;
 	case IB_CM_REP_RECEIVED:
 		if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
-		    (id_priv->id.qp_type != IB_QPT_UD))
+		    (id_priv->id.qp_type != IB_QPT_UD)) {
+			trace_cm_send_mra(id_priv);
 			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+		}
 		if (id_priv->id.qp) {
 			event.status = cma_rep_recv(id_priv);
 			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1958,7 +1984,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	ret = cma_cm_event_handler(id_priv, &event);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
@@ -2119,6 +2145,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 	if (IS_ERR(listen_id))
 		return PTR_ERR(listen_id);
 
+	trace_cm_req_handler(listen_id, ib_event->event);
 	if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
 		ret = -EINVAL;
 		goto net_dev_put;
@@ -2161,7 +2188,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 	 * until we're done accessing it.
 	 */
 	atomic_inc(&conn_id->refcount);
-	ret = conn_id->id.event_handler(&conn_id->id, &event);
+	ret = cma_cm_event_handler(conn_id, &event);
 	if (ret)
 		goto err3;
 	/*
@@ -2170,8 +2197,10 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 	 */
 	mutex_lock(&lock);
 	if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
-	    (conn_id->id.qp_type != IB_QPT_UD))
+	    (conn_id->id.qp_type != IB_QPT_UD)) {
+		trace_cm_send_mra(cm_id->context);
 		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+	}
 	mutex_unlock(&lock);
 	mutex_unlock(&conn_id->handler_mutex);
 	mutex_unlock(&listen_id->handler_mutex);
@@ -2286,7 +2315,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	event.status = iw_event->status;
 	event.param.conn.private_data = iw_event->private_data;
 	event.param.conn.private_data_len = iw_event->private_data_len;
-	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	ret = cma_cm_event_handler(id_priv, &event);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.iw = NULL;
@@ -2363,7 +2392,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	 * until we're done accessing it.
 	 */
 	atomic_inc(&conn_id->refcount);
-	ret = conn_id->id.event_handler(&conn_id->id, &event);
+	ret = cma_cm_event_handler(conn_id, &event);
 	if (ret) {
 		/* User wants to destroy the CM ID */
 		conn_id->cm_id.iw = NULL;
@@ -2435,6 +2464,7 @@ static int cma_listen_handler(struct rdma_cm_id *id,
 
 	id->context = id_priv->id.context;
 	id->event_handler = id_priv->id.event_handler;
+	trace_cm_event_handler(id_priv, event);
 	return id_priv->id.event_handler(id, event);
 }
 
@@ -2611,7 +2641,7 @@ static void cma_work_handler(struct work_struct *_work)
 	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
 		goto out;
 
-	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+	if (cma_cm_event_handler(id_priv, &work->event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
@@ -2634,7 +2664,7 @@ static void cma_ndev_work_handler(struct work_struct *_work)
 	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)
 		goto out;
 
-	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+	if (cma_cm_event_handler(id_priv, &work->event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
@@ -3089,7 +3119,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 	} else
 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 
-	if (id_priv->id.event_handler(&id_priv->id, &event)) {
+	if (cma_cm_event_handler(id_priv, &event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
@@ -3118,6 +3148,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
 	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
+	atomic_inc(&id_priv->refcount);
 	cma_init_resolve_addr_work(work, id_priv);
 	queue_work(cma_wq, &work->work);
 	return 0;
@@ -3144,6 +3175,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
 	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
 		&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
 
+	atomic_inc(&id_priv->refcount);
 	cma_init_resolve_addr_work(work, id_priv);
 	queue_work(cma_wq, &work->work);
 	return 0;
@@ -3736,7 +3768,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	ret = cma_cm_event_handler(id_priv, &event);
 
 	rdma_destroy_ah_attr(&event.param.ud.ah_attr);
 	if (ret) {
@@ -3800,6 +3832,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
 
+	trace_cm_send_sidr_req(id_priv);
 	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
 	if (ret) {
 		ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -3873,6 +3906,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
 	req.srq = id_priv->srq ? 1 : 0;
 
+	trace_cm_send_req(id_priv);
 	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
 out:
 	if (ret && !IS_ERR(id)) {
@@ -3986,6 +4020,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
 	rep.srq = id_priv->srq ? 1 : 0;
 
+	trace_cm_send_rep(id_priv);
 	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 out:
 	return ret;
@@ -4035,6 +4070,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 	rep.private_data = private_data;
 	rep.private_data_len = private_data_len;
 
+	trace_cm_send_sidr_rep(id_priv);
 	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
@@ -4120,13 +4156,15 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 		return -EINVAL;
 
 	if (rdma_cap_ib_cm(id->device, id->port_num)) {
-		if (id->qp_type == IB_QPT_UD)
+		if (id->qp_type == IB_QPT_UD) {
 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
 						private_data, private_data_len);
-		else
+		} else {
+			trace_cm_send_rej(id_priv);
 			ret = ib_send_cm_rej(id_priv->cm_id.ib,
 					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
 					     0, private_data, private_data_len);
+		}
 	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
 		ret = iw_cm_reject(id_priv->cm_id.iw,
 				   private_data, private_data_len);
@@ -4151,8 +4189,13 @@ int rdma_disconnect(struct rdma_cm_id *id)
 		if (ret)
 			goto out;
 		/* Initiate or respond to a disconnect. */
-		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
-			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+		trace_cm_disconnect(id_priv);
+		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+			if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
+				trace_cm_sent_drep(id_priv);
+		} else {
+			trace_cm_sent_dreq(id_priv);
+		}
 	} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
 		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
 	} else
@@ -4218,7 +4261,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	} else
 		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
-	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	ret = cma_cm_event_handler(id_priv, &event);
 
 	rdma_destroy_ah_attr(&event.param.ud.ah_attr);
 	if (ret) {
@@ -4623,6 +4666,7 @@ static void cma_add_one(struct ib_device *device)
 		cma_listen_on_dev(id_priv, cma_dev);
 	mutex_unlock(&lock);
 
+	trace_cm_add_one(device);
 	return;
 
 free_gid_type:
@@ -4653,7 +4697,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 		goto out;
 
 	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
-	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	ret = cma_cm_event_handler(id_priv, &event);
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
@@ -4691,6 +4735,8 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
 {
 	struct cma_device *cma_dev = client_data;
 
+	trace_cm_remove_one(device);
+
 	if (!cma_dev)
 		return;
 
diff --git a/drivers/infiniband/core/cma_trace.c b/drivers/infiniband/core/cma_trace.c
new file mode 100644
index 000000000000..b314a281e10e
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the RDMA Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include "cma_priv.h"
+
+#include "cma_trace.h"
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
new file mode 100644
index 000000000000..81e36bf13159
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.h
@@ -0,0 +1,391 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdma_cma
+
+#if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_RDMA_CMA_H
+
+#include <linux/tracepoint.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_event_type, from include/rdma/ib_cm.h
+ */
+#define IB_CM_EVENT_LIST			\
+	ib_cm_event(REQ_ERROR)			\
+	ib_cm_event(REQ_RECEIVED)		\
+	ib_cm_event(REP_ERROR)			\
+	ib_cm_event(REP_RECEIVED)		\
+	ib_cm_event(RTU_RECEIVED)		\
+	ib_cm_event(USER_ESTABLISHED)		\
+	ib_cm_event(DREQ_ERROR)			\
+	ib_cm_event(DREQ_RECEIVED)		\
+	ib_cm_event(DREP_RECEIVED)		\
+	ib_cm_event(TIMEWAIT_EXIT)		\
+	ib_cm_event(MRA_RECEIVED)		\
+	ib_cm_event(REJ_RECEIVED)		\
+	ib_cm_event(LAP_ERROR)			\
+	ib_cm_event(LAP_RECEIVED)		\
+	ib_cm_event(APR_RECEIVED)		\
+	ib_cm_event(SIDR_REQ_ERROR)		\
+	ib_cm_event(SIDR_REQ_RECEIVED)		\
+	ib_cm_event_end(SIDR_REP_RECEIVED)
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x)		TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_event_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_EVENT_LIST
+
+#undef ib_cm_event
+#undef ib_cm_event_end
+
+#define ib_cm_event(x)		{ IB_CM_##x, #x },
+#define ib_cm_event_end(x)	{ IB_CM_##x, #x }
+
+#define rdma_show_ib_cm_event(x) \
+		__print_symbolic(x, IB_CM_EVENT_LIST)
+
+
+DECLARE_EVENT_CLASS(cma_fsm_class,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv
+	),
+
+	TP_ARGS(id_priv),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+		__field(u32, tos)
+		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+		__entry->tos = id_priv->tos;
+		memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+		       sizeof(struct sockaddr_in6));
+		memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u",
+		__entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos
+	)
+);
+
+#define DEFINE_CMA_FSM_EVENT(name)						\
+		DEFINE_EVENT(cma_fsm_class, cm_##name,				\
+				TP_PROTO(					\
+					const struct rdma_id_private *id_priv	\
+				),						\
+				TP_ARGS(id_priv))
+
+DEFINE_CMA_FSM_EVENT(send_rtu);
+DEFINE_CMA_FSM_EVENT(send_rej);
+DEFINE_CMA_FSM_EVENT(send_mra);
+DEFINE_CMA_FSM_EVENT(send_sidr_req);
+DEFINE_CMA_FSM_EVENT(send_sidr_rep);
+DEFINE_CMA_FSM_EVENT(disconnect);
+DEFINE_CMA_FSM_EVENT(sent_drep);
+DEFINE_CMA_FSM_EVENT(sent_dreq);
+DEFINE_CMA_FSM_EVENT(id_destroy);
+
+TRACE_EVENT(cm_id_create,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv
+	),
+
+	TP_ARGS(id_priv),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+	),
+
+	TP_printk("cm.id=%u",
+		__entry->cm_id
+	)
+);
+
+DECLARE_EVENT_CLASS(cma_qp_class,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv
+	),
+
+	TP_ARGS(id_priv),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+		__field(u32, tos)
+		__field(u32, qp_num)
+		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+		__entry->tos = id_priv->tos;
+		__entry->qp_num = id_priv->qp_num;
+		memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+		       sizeof(struct sockaddr_in6));
+		memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u",
+		__entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+		__entry->qp_num
+	)
+);
+
+#define DEFINE_CMA_QP_EVENT(name)						\
+		DEFINE_EVENT(cma_qp_class, cm_##name,				\
+				TP_PROTO(					\
+					const struct rdma_id_private *id_priv	\
+				),						\
+				TP_ARGS(id_priv))
+
+DEFINE_CMA_QP_EVENT(send_req);
+DEFINE_CMA_QP_EVENT(send_rep);
+DEFINE_CMA_QP_EVENT(qp_destroy);
+
+/*
+ * enum ib_wp_type, from include/rdma/ib_verbs.h
+ */
+#define IB_QP_TYPE_LIST				\
+	ib_qp_type(SMI)				\
+	ib_qp_type(GSI)				\
+	ib_qp_type(RC)				\
+	ib_qp_type(UC)				\
+	ib_qp_type(UD)				\
+	ib_qp_type(RAW_IPV6)			\
+	ib_qp_type(RAW_ETHERTYPE)		\
+	ib_qp_type(RAW_PACKET)			\
+	ib_qp_type(XRC_INI)			\
+	ib_qp_type_end(XRC_TGT)
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x)		TRACE_DEFINE_ENUM(IB_QPT_##x);
+#define ib_qp_type_end(x)	TRACE_DEFINE_ENUM(IB_QPT_##x);
+
+IB_QP_TYPE_LIST
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x)		{ IB_QPT_##x, #x },
+#define ib_qp_type_end(x)	{ IB_QPT_##x, #x }
+
+#define rdma_show_qp_type(x) \
+		__print_symbolic(x, IB_QP_TYPE_LIST)
+
+
+TRACE_EVENT(cm_qp_create,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv,
+		const struct ib_pd *pd,
+		const struct ib_qp_init_attr *qp_init_attr,
+		int rc
+	),
+
+	TP_ARGS(id_priv, pd, qp_init_attr, rc),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+		__field(u32, pd_id)
+		__field(u32, tos)
+		__field(u32, qp_num)
+		__field(u32, send_wr)
+		__field(u32, recv_wr)
+		__field(int, rc)
+		__field(unsigned long, qp_type)
+		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+		__entry->pd_id = pd->res.id;
+		__entry->tos = id_priv->tos;
+		__entry->send_wr = qp_init_attr->cap.max_send_wr;
+		__entry->recv_wr = qp_init_attr->cap.max_recv_wr;
+		__entry->rc = rc;
+		if (!rc) {
+			__entry->qp_num = id_priv->qp_num;
+			__entry->qp_type = id_priv->id.qp_type;
+		} else {
+			__entry->qp_num = 0;
+			__entry->qp_type = 0;
+		}
+		memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+		       sizeof(struct sockaddr_in6));
+		memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s"
+		" send_wr=%u recv_wr=%u qp_num=%u rc=%d",
+		__entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+		__entry->tos, __entry->pd_id,
+		rdma_show_qp_type(__entry->qp_type), __entry->send_wr,
+		__entry->recv_wr, __entry->qp_num, __entry->rc
+	)
+);
+
+TRACE_EVENT(cm_req_handler,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv,
+		int event
+	),
+
+	TP_ARGS(id_priv, event),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+		__field(u32, tos)
+		__field(unsigned long, event)
+		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+		__entry->tos = id_priv->tos;
+		__entry->event = event;
+		memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+		       sizeof(struct sockaddr_in6));
+		memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)",
+		__entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+		rdma_show_ib_cm_event(__entry->event), __entry->event
+	)
+);
+
+TRACE_EVENT(cm_event_handler,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv,
+		const struct rdma_cm_event *event
+	),
+
+	TP_ARGS(id_priv, event),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+		__field(u32, tos)
+		__field(unsigned long, event)
+		__field(int, status)
+		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+		__entry->tos = id_priv->tos;
+		__entry->event = event->event;
+		__entry->status = event->status;
+		memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+		       sizeof(struct sockaddr_in6));
+		memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)",
+		__entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+		rdma_show_cm_event(__entry->event), __entry->event,
+		__entry->status
+	)
+);
+
+TRACE_EVENT(cm_event_done,
+	TP_PROTO(
+		const struct rdma_id_private *id_priv,
+		const struct rdma_cm_event *event,
+		int result
+	),
+
+	TP_ARGS(id_priv, event, result),
+
+	TP_STRUCT__entry(
+		__field(u32, cm_id)
+		__field(u32, tos)
+		__field(unsigned long, event)
+		__field(int, result)
+		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = id_priv->res.id;
+		__entry->tos = id_priv->tos;
+		__entry->event = event->event;
+		__entry->result = result;
+		memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+		       sizeof(struct sockaddr_in6));
+		memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d",
+		__entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+		rdma_show_cm_event(__entry->event), __entry->result
+	)
+);
+
+DECLARE_EVENT_CLASS(cma_client_class,
+	TP_PROTO(
+		const struct ib_device *device
+	),
+
+	TP_ARGS(device),
+
+	TP_STRUCT__entry(
+		__string(name, device->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, device->name);
+	),
+
+	TP_printk("device name=%s",
+		__get_str(name)
+	)
+);
+
+#define DEFINE_CMA_CLIENT_EVENT(name)						\
+		DEFINE_EVENT(cma_client_class, cm_##name,			\
+				TP_PROTO(					\
+					const struct ib_device *device		\
+				),						\
+				TP_ARGS(device))
+
+DEFINE_CMA_CLIENT_EVENT(add_one);
+DEFINE_CMA_CLIENT_EVENT(remove_one);
+
+#endif /* _TRACE_RDMA_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE cma_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 3645e092e1c7..b1457b3464d3 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
 int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
+void ib_dispatch_event_clients(struct ib_event *event);
 
 #ifdef CONFIG_CGROUP_RDMA
 void ib_device_register_rdmacg(struct ib_device *device);
@@ -320,7 +321,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
 					  struct ib_pd *pd,
 					  struct ib_qp_init_attr *attr,
 					  struct ib_udata *udata,
-					  struct ib_uobject *uobj)
+					  struct ib_uqp_object *uobj)
 {
 	enum ib_qp_type qp_type = attr->qp_type;
 	struct ib_qp *qp;
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index bbfded6d5d3d..4f25b2400694 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -7,6 +7,8 @@
 #include <linux/slab.h>
 #include <rdma/ib_verbs.h>
 
+#include <trace/events/rdma_core.h>
+
 /* # of WCs to poll for with a single call to ib_poll_cq */
 #define IB_POLL_BATCH			16
 #define IB_POLL_BATCH_DIRECT		8
@@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w)
 
 	dim->state = DIM_START_MEASURE;
 
+	trace_cq_modify(cq, comps, usec);
 	cq->device->ops.modify_cq(cq, comps, usec);
 }
 
@@ -65,18 +68,29 @@ static void rdma_dim_init(struct ib_cq *cq)
 	INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
 }
 
+static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+	int rc;
+
+	rc = ib_poll_cq(cq, num_entries, wc);
+	trace_cq_poll(cq, num_entries, rc);
+	return rc;
+}
+
 static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
 			   int batch)
 {
 	int i, n, completed = 0;
 
+	trace_cq_process(cq);
+
 	/*
 	 * budget might be (-1) if the caller does not
 	 * want to bound this call, thus we need unsigned
 	 * minimum here.
 	 */
-	while ((n = ib_poll_cq(cq, min_t(u32, batch,
-					 budget - completed), wcs)) > 0) {
+	while ((n = __poll_cq(cq, min_t(u32, batch,
+					budget - completed), wcs)) > 0) {
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = &wcs[i];
 
@@ -131,8 +145,10 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
 	completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
 	if (completed < budget) {
 		irq_poll_complete(&cq->iop);
-		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
+			trace_cq_reschedule(cq);
 			irq_poll_sched(&cq->iop);
+		}
 	}
 
 	if (dim)
@@ -143,6 +159,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
 
 static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
 {
+	trace_cq_schedule(cq);
 	irq_poll_sched(&cq->iop);
 }
 
@@ -162,6 +179,7 @@ static void ib_cq_poll_work(struct work_struct *work)
 
 static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 {
+	trace_cq_schedule(cq);
 	queue_work(cq->comp_wq, &cq->work);
 }
 
@@ -239,6 +257,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
 		goto out_destroy_cq;
 	}
 
+	trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
 	return cq;
 
 out_destroy_cq:
@@ -248,6 +267,7 @@ out_free_wc:
 	kfree(cq->wc);
 out_free_cq:
 	kfree(cq);
+	trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(__ib_alloc_cq_user);
@@ -304,6 +324,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
 		WARN_ON_ONCE(1);
 	}
 
+	trace_cq_free(cq);
 	rdma_restrack_del(&cq->res);
 	cq->device->ops.destroy_cq(cq, udata);
 	if (cq->dim)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 84dd74fe13b8..f6c255202d7f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -587,7 +587,8 @@ struct ib_device *_ib_alloc_device(size_t size)
 	rdma_init_coredev(&device->coredev, device, &init_net);
 
 	INIT_LIST_HEAD(&device->event_handler_list);
-	spin_lock_init(&device->event_handler_lock);
+	spin_lock_init(&device->qp_open_list_lock);
+	init_rwsem(&device->event_handler_rwsem);
 	mutex_init(&device->unregistration_lock);
 	/*
 	 * client_data needs to be alloc because we don't want our mark to be
@@ -1931,17 +1932,15 @@ EXPORT_SYMBOL(ib_set_client_data);
  *
  * ib_register_event_handler() registers an event handler that will be
  * called back when asynchronous IB events occur (as defined in
- * chapter 11 of the InfiniBand Architecture Specification).  This
- * callback may occur in interrupt context.
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback occurs in workqueue context.
  */
 void ib_register_event_handler(struct ib_event_handler *event_handler)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+	down_write(&event_handler->device->event_handler_rwsem);
 	list_add_tail(&event_handler->list,
 		      &event_handler->device->event_handler_list);
-	spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+	up_write(&event_handler->device->event_handler_rwsem);
 }
 EXPORT_SYMBOL(ib_register_event_handler);
 
@@ -1954,35 +1953,23 @@ EXPORT_SYMBOL(ib_register_event_handler);
  */
 void ib_unregister_event_handler(struct ib_event_handler *event_handler)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+	down_write(&event_handler->device->event_handler_rwsem);
 	list_del(&event_handler->list);
-	spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+	up_write(&event_handler->device->event_handler_rwsem);
 }
 EXPORT_SYMBOL(ib_unregister_event_handler);
 
-/**
- * ib_dispatch_event - Dispatch an asynchronous event
- * @event:Event to dispatch
- *
- * Low-level drivers must call ib_dispatch_event() to dispatch the
- * event to all registered event handlers when an asynchronous event
- * occurs.
- */
-void ib_dispatch_event(struct ib_event *event)
+void ib_dispatch_event_clients(struct ib_event *event)
 {
-	unsigned long flags;
 	struct ib_event_handler *handler;
 
-	spin_lock_irqsave(&event->device->event_handler_lock, flags);
+	down_read(&event->device->event_handler_rwsem);
 
 	list_for_each_entry(handler, &event->device->event_handler_list, list)
 		handler->handler(handler, event);
 
-	spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+	up_read(&event->device->event_handler_rwsem);
 }
-EXPORT_SYMBOL(ib_dispatch_event);
 
 static int iw_query_port(struct ib_device *device,
 			   u8 port_num,
@@ -1990,7 +1977,6 @@ static int iw_query_port(struct ib_device *device,
 {
 	struct in_device *inetdev;
 	struct net_device *netdev;
-	int err;
 
 	memset(port_attr, 0, sizeof(*port_attr));
 
@@ -2021,11 +2007,7 @@ static int iw_query_port(struct ib_device *device,
 	}
 
 	dev_put(netdev);
-	err = device->ops.query_port(device, port_num, port_attr);
-	if (err)
-		return err;
-
-	return 0;
+	return device->ops.query_port(device, port_num, port_attr);
 }
 
 static int __ib_query_port(struct ib_device *device,
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
index b7cb59844ece..b51bd7087a88 100644
--- a/drivers/infiniband/core/ib_core_uverbs.c
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -232,7 +232,9 @@ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
 	if (!entry)
 		return;
 
+	xa_lock(&entry->ucontext->mmap_xa);
 	entry->driver_removed = true;
+	xa_unlock(&entry->ucontext->mmap_xa);
 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
 }
 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index cbf6041a5d4a..37b433aa7306 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -41,6 +41,7 @@
 #include "core_priv.h"
 #include "cma_priv.h"
 #include "restrack.h"
+#include "uverbs.h"
 
 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
 			       struct rdma_restrack_entry*, uint32_t);
@@ -599,7 +600,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
 		goto err;
 	if (!rdma_is_kernel_res(res) &&
 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
-			cq->uobject->context->res.id))
+			cq->uobject->uevent.uobject.context->res.id))
 		goto err;
 
 	if (fill_res_name_pid(msg, res))
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 6c72773faf29..5128cb16bb48 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -42,26 +42,21 @@
 #include "core_priv.h"
 #include "rdma_core.h"
 
-void uverbs_uobject_get(struct ib_uobject *uobject)
-{
-	kref_get(&uobject->ref);
-}
-
 static void uverbs_uobject_free(struct kref *ref)
 {
-	struct ib_uobject *uobj =
-		container_of(ref, struct ib_uobject, ref);
-
-	if (uobj->uapi_object->type_class->needs_kfree_rcu)
-		kfree_rcu(uobj, rcu);
-	else
-		kfree(uobj);
+	kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu);
 }
 
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
 void uverbs_uobject_put(struct ib_uobject *uobject)
 {
 	kref_put(&uobject->ref, uverbs_uobject_free);
 }
+EXPORT_SYMBOL(uverbs_uobject_put);
 
 static int uverbs_try_lock_object(struct ib_uobject *uobj,
 				  enum rdma_lookup_mode mode)
@@ -135,7 +130,11 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	lockdep_assert_held(&ufile->hw_destroy_rwsem);
 	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 
-	if (uobj->object) {
+	if (reason == RDMA_REMOVE_ABORT) {
+		WARN_ON(!list_empty(&uobj->list));
+		WARN_ON(!uobj->context);
+		uobj->uapi_object->type_class->alloc_abort(uobj);
+	} else if (uobj->object) {
 		ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
 								attrs);
 		if (ret) {
@@ -151,12 +150,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 		uobj->object = NULL;
 	}
 
-	if (reason == RDMA_REMOVE_ABORT) {
-		WARN_ON(!list_empty(&uobj->list));
-		WARN_ON(!uobj->context);
-		uobj->uapi_object->type_class->alloc_abort(uobj);
-	}
-
 	uobj->context = NULL;
 
 	/*
@@ -263,15 +256,20 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
 }
 
 /* alloc_uobj must be undone by uverbs_destroy_uobject() */
-static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
+static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs,
 				     const struct uverbs_api_object *obj)
 {
+	struct ib_uverbs_file *ufile = attrs->ufile;
 	struct ib_uobject *uobj;
-	struct ib_ucontext *ucontext;
 
-	ucontext = ib_uverbs_get_ucontext_file(ufile);
-	if (IS_ERR(ucontext))
-		return ERR_CAST(ucontext);
+	if (!attrs->context) {
+		struct ib_ucontext *ucontext =
+			ib_uverbs_get_ucontext_file(ufile);
+
+		if (IS_ERR(ucontext))
+			return ERR_CAST(ucontext);
+		attrs->context = ucontext;
+	}
 
 	uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
 	if (!uobj)
@@ -281,7 +279,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 	 * The object is added to the list in the commit stage.
 	 */
 	uobj->ufile = ufile;
-	uobj->context = ucontext;
+	uobj->context = attrs->context;
 	INIT_LIST_HEAD(&uobj->list);
 	uobj->uapi_object = obj;
 	/*
@@ -358,9 +356,9 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj,
 
 	uobject = f->private_data;
 	/*
-	 * fget(id) ensures we are not currently running uverbs_close_fd,
-	 * and the caller is expected to ensure that uverbs_close_fd is never
-	 * done while a call top lookup is possible.
+	 * fget(id) ensures we are not currently running
+	 * uverbs_uobject_fd_release(), and the caller is expected to ensure
+	 * that release is never done while a call to lookup is possible.
 	 */
 	if (f->f_op != fd_type->fops) {
 		fput(f);
@@ -424,12 +422,12 @@ free:
 
 static struct ib_uobject *
 alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
-			struct ib_uverbs_file *ufile)
+			struct uverbs_attr_bundle *attrs)
 {
 	int ret;
 	struct ib_uobject *uobj;
 
-	uobj = alloc_uobj(ufile, obj);
+	uobj = alloc_uobj(attrs, obj);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -445,7 +443,7 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
 	return uobj;
 
 remove:
-	xa_erase(&ufile->idr, uobj->id);
+	xa_erase(&attrs->ufile->idr, uobj->id);
 uobj_put:
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
@@ -453,31 +451,48 @@ uobj_put:
 
 static struct ib_uobject *
 alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
-		       struct ib_uverbs_file *ufile)
+		       struct uverbs_attr_bundle *attrs)
 {
+	const struct uverbs_obj_fd_type *fd_type =
+		container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
 	int new_fd;
 	struct ib_uobject *uobj;
+	struct file *filp;
+
+	if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release))
+		return ERR_PTR(-EINVAL);
 
 	new_fd = get_unused_fd_flags(O_CLOEXEC);
 	if (new_fd < 0)
 		return ERR_PTR(new_fd);
 
-	uobj = alloc_uobj(ufile, obj);
-	if (IS_ERR(uobj)) {
-		put_unused_fd(new_fd);
-		return uobj;
+	uobj = alloc_uobj(attrs, obj);
+	if (IS_ERR(uobj))
+		goto err_fd;
+
+	/* Note that uverbs_uobject_fd_release() is called during abort */
+	filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
+				  fd_type->flags);
+	if (IS_ERR(filp)) {
+		uobj = ERR_CAST(filp);
+		goto err_uobj;
 	}
+	uobj->object = filp;
 
 	uobj->id = new_fd;
-	uobj->ufile = ufile;
+	return uobj;
 
+err_uobj:
+	uverbs_uobject_put(uobj);
+err_fd:
+	put_unused_fd(new_fd);
 	return uobj;
 }
 
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
-					    struct ib_uverbs_file *ufile,
 					    struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uverbs_file *ufile = attrs->ufile;
 	struct ib_uobject *ret;
 
 	if (IS_ERR(obj))
@@ -491,13 +506,11 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
 	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
 		return ERR_PTR(-EIO);
 
-	ret = obj->type_class->alloc_begin(obj, ufile);
+	ret = obj->type_class->alloc_begin(obj, attrs);
 	if (IS_ERR(ret)) {
 		up_read(&ufile->hw_destroy_rwsem);
 		return ret;
 	}
-	if (attrs)
-		attrs->context = ret->context;
 	return ret;
 }
 
@@ -544,6 +557,9 @@ static void remove_handle_idr_uobject(struct ib_uobject *uobj)
 
 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 {
+	struct file *filp = uobj->object;
+
+	fput(filp);
 	put_unused_fd(uobj->id);
 }
 
@@ -553,7 +569,7 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
 {
 	const struct uverbs_obj_fd_type *fd_type = container_of(
 		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
-	int ret = fd_type->context_closed(uobj, why);
+	int ret = fd_type->destroy_object(uobj, why);
 
 	if (ib_is_destroy_retryable(ret, why, uobj))
 		return ret;
@@ -565,7 +581,7 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj)
 {
 }
 
-static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
 	void *old;
@@ -579,33 +595,14 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 	 */
 	old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
 	WARN_ON(old != NULL);
-
-	return 0;
 }
 
-static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
 {
-	const struct uverbs_obj_fd_type *fd_type = container_of(
-		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 	int fd = uobj->id;
-	struct file *filp;
-
-	/*
-	 * The kref for uobj is moved into filp->private data and put in
-	 * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
-	 * must be guaranteed to be called from the provided fops release
-	 * callback.
-	 */
-	filp = anon_inode_getfile(fd_type->name,
-				  fd_type->fops,
-				  uobj,
-				  fd_type->flags);
-	if (IS_ERR(filp))
-		return PTR_ERR(filp);
-
-	uobj->object = filp;
+	struct file *filp = uobj->object;
 
-	/* Matching put will be done in uverbs_close_fd() */
+	/* Matching put will be done in uverbs_uobject_fd_release() */
 	kref_get(&uobj->ufile->ref);
 
 	/* This shouldn't be used anymore. Use the file object instead */
@@ -613,11 +610,10 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
 
 	/*
 	 * NOTE: Once we install the file we loose ownership of our kref on
-	 * uobj. It will be put by uverbs_close_fd()
+	 * uobj. It will be put by uverbs_uobject_fd_release()
 	 */
+	filp->private_data = uobj;
 	fd_install(fd, filp);
-
-	return 0;
 }
 
 /*
@@ -625,19 +621,13 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
  * caller can no longer assume uobj is valid. If this function fails it
  * destroys the uboject, including the attached HW object.
  */
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
-					   struct uverbs_attr_bundle *attrs)
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+			       struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uverbs_file *ufile = attrs->ufile;
-	int ret;
 
 	/* alloc_commit consumes the uobj kref */
-	ret = uobj->uapi_object->type_class->alloc_commit(uobj);
-	if (ret) {
-		uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
-		up_read(&ufile->hw_destroy_rwsem);
-		return ret;
-	}
+	uobj->uapi_object->type_class->alloc_commit(uobj);
 
 	/* kref is held so long as the uobj is on the uobj list. */
 	uverbs_uobject_get(uobj);
@@ -650,8 +640,6 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
 
 	/* Matches the down_read in rdma_alloc_begin_uobject */
 	up_read(&ufile->hw_destroy_rwsem);
-
-	return 0;
 }
 
 /*
@@ -663,7 +651,6 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
 
-	uobj->object = NULL;
 	uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
 
 	/* Matches the down_read in rdma_alloc_begin_uobject */
@@ -681,7 +668,10 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj,
 	struct file *filp = uobj->object;
 
 	WARN_ON(mode != UVERBS_LOOKUP_READ);
-	/* This indirectly calls uverbs_close_fd and free the object */
+	/*
+	 * This indirectly calls uverbs_uobject_fd_release() and free the
+	 * object
+	 */
 	fput(filp);
 }
 
@@ -744,33 +734,32 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
 	.lookup_put = lookup_put_idr_uobject,
 	.destroy_hw = destroy_hw_idr_uobject,
 	.remove_handle = remove_handle_idr_uobject,
-	/*
-	 * When we destroy an object, we first just lock it for WRITE and
-	 * actually DESTROY it in the finalize stage. So, the problematic
-	 * scenario is when we just started the finalize stage of the
-	 * destruction (nothing was executed yet). Now, the other thread
-	 * fetched the object for READ access, but it didn't lock it yet.
-	 * The DESTROY thread continues and starts destroying the object.
-	 * When the other thread continue - without the RCU, it would
-	 * access freed memory. However, the rcu_read_lock delays the free
-	 * until the rcu_read_lock of the READ operation quits. Since the
-	 * exclusive lock of the object is still taken by the DESTROY flow, the
-	 * READ operation will get -EBUSY and it'll just bail out.
-	 */
-	.needs_kfree_rcu = true,
 };
 EXPORT_SYMBOL(uverbs_idr_class);
 
-void uverbs_close_fd(struct file *f)
+/*
+ * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
+ * file_operations release method.
+ */
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
 {
-	struct ib_uobject *uobj = f->private_data;
-	struct ib_uverbs_file *ufile = uobj->ufile;
-	struct uverbs_attr_bundle attrs = {
-		.context = uobj->context,
-		.ufile = ufile,
-	};
+	struct ib_uverbs_file *ufile;
+	struct ib_uobject *uobj;
+
+	/*
+	 * This can only happen if the fput came from alloc_abort_fd_uobject()
+	 */
+	if (!filp->private_data)
+		return 0;
+	uobj = filp->private_data;
+	ufile = uobj->ufile;
 
 	if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+		struct uverbs_attr_bundle attrs = {
+			.context = uobj->context,
+			.ufile = ufile,
+		};
+
 		/*
 		 * lookup_get_fd_uobject holds the kref on the struct file any
 		 * time a FD uobj is locked, which prevents this release
@@ -782,13 +771,14 @@ void uverbs_close_fd(struct file *f)
 		up_read(&ufile->hw_destroy_rwsem);
 	}
 
-	/* Matches the get in alloc_begin_fd_uobject */
+	/* Matches the get in alloc_commit_fd_uobject() */
 	kref_put(&ufile->ref, ib_uverbs_release_file);
 
 	/* Pairs with filp->private_data in alloc_begin_fd_uobject */
 	uverbs_uobject_put(uobj);
+	return 0;
 }
-EXPORT_SYMBOL(uverbs_close_fd);
+EXPORT_SYMBOL(uverbs_uobject_fd_release);
 
 /*
  * Drop the ucontext off the ufile and completely disconnect it from the
@@ -855,9 +845,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 }
 
 /*
- * Destroy the uncontext and every uobject associated with it. If called with
- * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
- * been completed and ufile->ucontext is NULL.
+ * Destroy the uncontext and every uobject associated with it.
  *
  * This is internally locked and can be called in parallel from multiple
  * contexts.
@@ -865,22 +853,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 			     enum rdma_remove_reason reason)
 {
-	if (reason == RDMA_REMOVE_CLOSE) {
-		/*
-		 * During destruction we might trigger something that
-		 * synchronously calls release on any file descriptor. For
-		 * this reason all paths that come from file_operations
-		 * release must use try_lock. They can progress knowing that
-		 * there is an ongoing uverbs_destroy_ufile_hw that will clean
-		 * up the driver resources.
-		 */
-		if (!mutex_trylock(&ufile->ucontext_lock))
-			return;
-
-	} else {
-		mutex_lock(&ufile->ucontext_lock);
-	}
-
 	down_write(&ufile->hw_destroy_rwsem);
 
 	/*
@@ -909,7 +881,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 
 done:
 	up_write(&ufile->hw_destroy_rwsem);
-	mutex_unlock(&ufile->ucontext_lock);
 }
 
 const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -920,7 +891,6 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
 	.lookup_put = lookup_put_fd_uobject,
 	.destroy_hw = destroy_hw_fd_uobject,
 	.remove_handle = remove_handle_fd_uobject,
-	.needs_kfree_rcu = false,
 };
 EXPORT_SYMBOL(uverbs_fd_class);
 
@@ -943,19 +913,17 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
 		return rdma_lookup_get_uobject(obj, attrs->ufile, id,
 					       UVERBS_LOOKUP_WRITE, attrs);
 	case UVERBS_ACCESS_NEW:
-		return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
+		return rdma_alloc_begin_uobject(obj, attrs);
 	default:
 		WARN_ON(true);
 		return ERR_PTR(-EOPNOTSUPP);
 	}
 }
 
-int uverbs_finalize_object(struct ib_uobject *uobj,
-			   enum uverbs_obj_access access, bool commit,
-			   struct uverbs_attr_bundle *attrs)
+void uverbs_finalize_object(struct ib_uobject *uobj,
+			    enum uverbs_obj_access access, bool commit,
+			    struct uverbs_attr_bundle *attrs)
 {
-	int ret = 0;
-
 	/*
 	 * refcounts should be handled at the object level and not at the
 	 * uobject level. Refcounts of the objects themselves are done in
@@ -975,14 +943,11 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 		break;
 	case UVERBS_ACCESS_NEW:
 		if (commit)
-			ret = rdma_alloc_commit_uobject(uobj, attrs);
+			rdma_alloc_commit_uobject(uobj, attrs);
 		else
 			rdma_alloc_abort_uobject(uobj, attrs);
 		break;
 	default:
 		WARN_ON(true);
-		ret = -EOPNOTSUPP;
 	}
-
-	return ret;
 }
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index e63fbda25e1d..33978e0f1262 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -51,29 +51,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs);
 
 /*
- * uverbs_uobject_get is called in order to increase the reference count on
- * an uobject. This is useful when a handler wants to keep the uobject's memory
- * alive, regardless if this uobject is still alive in the context's objects
- * repository. Objects are put via uverbs_uobject_put.
- */
-void uverbs_uobject_get(struct ib_uobject *uobject);
-
-/*
- * In order to indicate we no longer needs this uobject, uverbs_uobject_put
- * is called. When the reference count is decreased, the uobject is freed.
- * For example, this is used when attaching a completion channel to a CQ.
- */
-void uverbs_uobject_put(struct ib_uobject *uobject);
-
-/* Indicate this fd is no longer used by this consumer, but its memory isn't
- * necessarily released yet. When the last reference is put, we release the
- * memory. After this call is executed, calling uverbs_uobject_get isn't
- * allowed.
- * This must be called from the release file_operations of the file!
- */
-void uverbs_close_fd(struct file *f);
-
-/*
  * Get an ib_uobject that corresponds to the given id from ufile, assuming
  * the object is from the given type. Lock it to the required access when
  * applicable.
@@ -86,24 +63,9 @@ struct ib_uobject *
 uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
 			     s64 id, struct uverbs_attr_bundle *attrs);
 
-/*
- * Note that certain finalize stages could return a status:
- *   (a) alloc_commit could return a failure if the object is committed at the
- *       same time when the context is destroyed.
- *   (b) remove_commit could fail if the object wasn't destroyed successfully.
- * Since multiple objects could be finalized in one transaction, it is very NOT
- * recommended to have several finalize actions which have side effects.
- * For example, it's NOT recommended to have a certain action which has both
- * a commit action and a destroy action or two destroy objects in the same
- * action. The rule of thumb is to have one destroy or commit action with
- * multiple lookups.
- * The first non zero return value of finalize_object is returned from this
- * function. For example, this could happen when we couldn't destroy an
- * object.
- */
-int uverbs_finalize_object(struct ib_uobject *uobj,
-			   enum uverbs_obj_access access, bool commit,
-			   struct uverbs_attr_bundle *attrs);
+void uverbs_finalize_object(struct ib_uobject *uobj,
+			    enum uverbs_obj_access access, bool commit,
+			    struct uverbs_attr_bundle *attrs);
 
 int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
 
@@ -189,6 +151,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
 			      unsigned int num_attrs);
 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
 
+extern const struct uapi_definition uverbs_def_obj_async_fd[];
 extern const struct uapi_definition uverbs_def_obj_counters[];
 extern const struct uapi_definition uverbs_def_obj_cq[];
 extern const struct uapi_definition uverbs_def_obj_device[];
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8917125ea16d..30d4c126a2db 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1068,7 +1068,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
 	}
 
 settimeout_out:
-	return skb->len;
+	return 0;
 }
 
 static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
@@ -1139,7 +1139,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
 	}
 
 resp_out:
-	return skb->len;
+	return 0;
 }
 
 static void free_sm_ah(struct kref *kref)
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
new file mode 100644
index 000000000000..6c3514beac4d
--- /dev/null
+++ b/drivers/infiniband/core/trace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/ib_verbs.h>
+
+#include <trace/events/rdma_core.h>
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index c3769a5f096d..06b6125b5ae1 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -166,10 +166,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 		 * for any address.
 		 */
 		mask |= (sg_dma_address(sg) + pgoff) ^ va;
-		if (i && i != (umem->nmap - 1))
-			/* restrict by length as well for interior SGEs */
-			mask |= sg_dma_len(sg);
 		va += sg_dma_len(sg) - pgoff;
+		/* Except for the last entry, the ending iova alignment sets
+		 * the maximum possible page size as the low bits of the iova
+		 * must be zero when starting the next chunk.
+		 */
+		if (i != (umem->nmap - 1))
+			mask |= va;
 		pgoff = 0;
 	}
 	best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index a71ce0ae2031..b8c657b28380 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -227,21 +227,10 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
 	umem_odp->notifier.ops = ops;
 
 	umem_odp->page_shift = PAGE_SHIFT;
-	if (access & IB_ACCESS_HUGETLB) {
-		struct vm_area_struct *vma;
-		struct hstate *h;
-
-		down_read(&mm->mmap_sem);
-		vma = find_vma(mm, ib_umem_start(umem_odp));
-		if (!vma || !is_vm_hugetlb_page(vma)) {
-			up_read(&mm->mmap_sem);
-			ret = -EINVAL;
-			goto err_free;
-		}
-		h = hstate_vma(vma);
-		umem_odp->page_shift = huge_page_shift(h);
-		up_read(&mm->mmap_sem);
-	}
+#ifdef CONFIG_HUGETLB_PAGE
+	if (access & IB_ACCESS_HUGETLB)
+		umem_odp->page_shift = HPAGE_SHIFT;
+#endif
 
 	umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
 	ret = ib_init_umem_odp(umem_odp, ops);
@@ -251,7 +240,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
 
 err_put_pid:
 	put_pid(umem_odp->tgid);
-err_free:
 	kfree(umem_odp);
 	return ERR_PTR(ret);
 }
@@ -424,7 +412,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
 
 	while (bcnt > 0) {
 		const size_t gup_num_pages = min_t(size_t,
-				(bcnt + BIT(page_shift) - 1) >> page_shift,
+				ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
 				PAGE_SIZE / sizeof(struct page *));
 
 		down_read(&owning_mm->mmap_sem);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 63f7f7db5902..7df71983212d 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -111,7 +111,6 @@ struct ib_uverbs_device {
 	struct srcu_struct			disassociate_srcu;
 	struct mutex				lists_mutex; /* protect lists */
 	struct list_head			uverbs_file_list;
-	struct list_head			uverbs_events_file_list;
 	struct uverbs_api			*uapi;
 };
 
@@ -124,10 +123,9 @@ struct ib_uverbs_event_queue {
 };
 
 struct ib_uverbs_async_event_file {
+	struct ib_uobject			uobj;
 	struct ib_uverbs_event_queue		ev_queue;
-	struct ib_uverbs_file		       *uverbs_file;
-	struct kref				ref;
-	struct list_head			list;
+	struct ib_event_handler			event_handler;
 };
 
 struct ib_uverbs_completion_event_file {
@@ -144,8 +142,7 @@ struct ib_uverbs_file {
 	 * ucontext_lock held
 	 */
 	struct ib_ucontext		       *ucontext;
-	struct ib_event_handler			event_handler;
-	struct ib_uverbs_async_event_file       *async_file;
+	struct ib_uverbs_async_event_file      *async_file;
 	struct list_head			list;
 
 	/*
@@ -183,6 +180,7 @@ struct ib_uverbs_mcast_entry {
 
 struct ib_uevent_object {
 	struct ib_uobject	uobject;
+	/* List member for ib_uverbs_async_event_file list */
 	struct list_head	event_list;
 	u32			events_reported;
 };
@@ -210,25 +208,24 @@ struct ib_uwq_object {
 };
 
 struct ib_ucq_object {
-	struct ib_uobject	uobject;
+	struct ib_uevent_object uevent;
 	struct list_head	comp_list;
-	struct list_head	async_list;
 	u32			comp_events_reported;
-	u32			async_events_reported;
 };
 
 extern const struct file_operations uverbs_event_fops;
+extern const struct file_operations uverbs_async_event_fops;
 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
-struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
-					      struct ib_device *ib_dev);
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
 void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
 
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
-			   struct ib_uverbs_completion_event_file *ev_file,
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
 			   struct ib_ucq_object *uobj);
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
-			      struct ib_uevent_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
 void ib_uverbs_release_file(struct kref *ref);
 
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
@@ -236,8 +233,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
-			     struct ib_event *event);
 int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
 			   enum rdma_remove_reason why,
 			   struct uverbs_attr_bundle *attrs);
@@ -276,23 +271,6 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
 					  size_t kern_filter_sz,
 					  union ib_flow_spec *ib_spec);
 
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
-
 /*
  * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
  * PortInfo CapabilityMask, but was extended with unique bits.
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 06ed32c8662f..c8693f5231dd 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -203,82 +203,55 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
 #define ib_uverbs_lookup_comp_file(_fd, _ufile)                                \
 	_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
 
-static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
 {
-	struct ib_uverbs_file *file = attrs->ufile;
-	struct ib_uverbs_get_context      cmd;
-	struct ib_uverbs_get_context_resp resp;
-	struct ib_ucontext		 *ucontext;
-	struct file			 *filp;
-	struct ib_rdmacg_object		 cg_obj;
+	struct ib_uverbs_file *ufile = attrs->ufile;
+	struct ib_ucontext *ucontext;
 	struct ib_device *ib_dev;
-	int ret;
 
-	ret = uverbs_request(attrs, &cmd, sizeof(cmd));
-	if (ret)
-		return ret;
-
-	mutex_lock(&file->ucontext_lock);
-	ib_dev = srcu_dereference(file->device->ib_dev,
-				  &file->device->disassociate_srcu);
-	if (!ib_dev) {
-		ret = -EIO;
-		goto err;
-	}
-
-	if (file->ucontext) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
-	if (ret)
-		goto err;
+	ib_dev = srcu_dereference(ufile->device->ib_dev,
+				  &ufile->device->disassociate_srcu);
+	if (!ib_dev)
+		return -EIO;
 
 	ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
-	if (!ucontext) {
-		ret = -ENOMEM;
-		goto err_alloc;
-	}
-
-	attrs->context = ucontext;
+	if (!ucontext)
+		return -ENOMEM;
 
 	ucontext->res.type = RDMA_RESTRACK_CTX;
 	ucontext->device = ib_dev;
-	ucontext->cg_obj = cg_obj;
-	/* ufile is required when some objects are released */
-	ucontext->ufile = file;
-
-	ucontext->closing = false;
-	ucontext->cleanup_retryable = false;
-
+	ucontext->ufile = ufile;
 	xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+	attrs->context = ucontext;
+	return 0;
+}
 
-	ret = get_unused_fd_flags(O_CLOEXEC);
-	if (ret < 0)
-		goto err_free;
-	resp.async_fd = ret;
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
+{
+	struct ib_ucontext *ucontext = attrs->context;
+	struct ib_uverbs_file *file = attrs->ufile;
+	int ret;
 
-	filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
-	if (IS_ERR(filp)) {
-		ret = PTR_ERR(filp);
-		goto err_fd;
+	if (!down_read_trylock(&file->hw_destroy_rwsem))
+		return -EIO;
+	mutex_lock(&file->ucontext_lock);
+	if (file->ucontext) {
+		ret = -EINVAL;
+		goto err;
 	}
 
-	resp.num_comp_vectors = file->device->num_comp_vectors;
-
-	ret = uverbs_response(attrs, &resp, sizeof(resp));
+	ret = ib_rdmacg_try_charge(&ucontext->cg_obj, ucontext->device,
+				   RDMACG_RESOURCE_HCA_HANDLE);
 	if (ret)
-		goto err_file;
+		goto err;
 
-	ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
+	ret = ucontext->device->ops.alloc_ucontext(ucontext,
+						   &attrs->driver_udata);
 	if (ret)
-		goto err_file;
+		goto err_uncharge;
 
 	rdma_restrack_uadd(&ucontext->res);
 
-	fd_install(resp.async_fd, filp);
-
 	/*
 	 * Make sure that ib_uverbs_get_ucontext() sees the pointer update
 	 * only after all writes to setup the ucontext have completed
@@ -286,24 +259,62 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 	smp_store_release(&file->ucontext, ucontext);
 
 	mutex_unlock(&file->ucontext_lock);
-
+	up_read(&file->hw_destroy_rwsem);
 	return 0;
 
-err_file:
-	ib_uverbs_free_async_event_file(file);
-	fput(filp);
+err_uncharge:
+	ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+			   RDMACG_RESOURCE_HCA_HANDLE);
+err:
+	mutex_unlock(&file->ucontext_lock);
+	up_read(&file->hw_destroy_rwsem);
+	return ret;
+}
 
-err_fd:
-	put_unused_fd(resp.async_fd);
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_get_context_resp resp;
+	struct ib_uverbs_get_context cmd;
+	struct ib_device *ib_dev;
+	struct ib_uobject *uobj;
+	int ret;
 
-err_free:
-	kfree(ucontext);
+	ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+	if (ret)
+		return ret;
 
-err_alloc:
-	ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+	ret = ib_alloc_ucontext(attrs);
+	if (ret)
+		return ret;
 
-err:
-	mutex_unlock(&file->ucontext_lock);
+	uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev);
+	if (IS_ERR(uobj)) {
+		ret = PTR_ERR(uobj);
+		goto err_ucontext;
+	}
+
+	resp = (struct ib_uverbs_get_context_resp){
+		.num_comp_vectors = attrs->ufile->device->num_comp_vectors,
+		.async_fd = uobj->id,
+	};
+	ret = uverbs_response(attrs, &resp, sizeof(resp));
+	if (ret)
+		goto err_uobj;
+
+	ret = ib_init_ucontext(attrs);
+	if (ret)
+		goto err_uobj;
+
+	ib_uverbs_init_async_event_file(
+		container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
+
+err_uobj:
+	rdma_alloc_abort_uobject(uobj, attrs);
+err_ucontext:
+	kfree(attrs->context);
+	attrs->context = NULL;
 	return ret;
 }
 
@@ -446,7 +457,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 	if (ret)
 		goto err_copy;
 
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 
 err_copy:
 	ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs));
@@ -642,7 +654,8 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
 
 	mutex_unlock(&ibudev->xrcd_tree_mutex);
 
-	return uobj_alloc_commit(&obj->uobject, attrs);
+	rdma_alloc_commit_uobject(&obj->uobject, attrs);
+	return 0;
 
 err_copy:
 	if (inode) {
@@ -774,7 +787,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
 
 	uobj_put_obj_read(pd);
 
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 
 err_copy:
 	ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
@@ -928,7 +942,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
 		goto err_copy;
 
 	uobj_put_obj_read(pd);
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 
 err_copy:
 	uverbs_dealloc_mw(mw);
@@ -980,7 +995,8 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
 		return ret;
 	}
 
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 }
 
 static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
@@ -1010,11 +1026,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
 		}
 	}
 
-	obj->uobject.user_handle = cmd->user_handle;
-	obj->comp_events_reported  = 0;
-	obj->async_events_reported = 0;
+	obj->uevent.uobject.user_handle = cmd->user_handle;
 	INIT_LIST_HEAD(&obj->comp_list);
-	INIT_LIST_HEAD(&obj->async_list);
+	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	attr.cqe = cmd->cqe;
 	attr.comp_vector = cmd->comp_vector;
@@ -1026,7 +1040,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
 		goto err_file;
 	}
 	cq->device        = ib_dev;
-	cq->uobject       = &obj->uobject;
+	cq->uobject       = obj;
 	cq->comp_handler  = ib_uverbs_comp_handler;
 	cq->event_handler = ib_uverbs_cq_event_handler;
 	cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
@@ -1036,9 +1050,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
 	if (ret)
 		goto err_free;
 
-	obj->uobject.object = cq;
+	obj->uevent.uobject.object = cq;
 	memset(&resp, 0, sizeof resp);
-	resp.base.cq_handle = obj->uobject.id;
+	resp.base.cq_handle = obj->uevent.uobject.id;
 	resp.base.cqe       = cq->cqe;
 	resp.response_length = uverbs_response_length(attrs, sizeof(resp));
 
@@ -1049,9 +1063,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
 	if (ret)
 		goto err_cb;
 
-	ret = uobj_alloc_commit(&obj->uobject, attrs);
-	if (ret)
-		return ERR_PTR(ret);
+	rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
 	return obj;
 
 err_cb:
@@ -1061,10 +1073,10 @@ err_free:
 	kfree(cq);
 err_file:
 	if (ev_file)
-		ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
+		ib_uverbs_release_ucq(ev_file, obj);
 
 err:
-	uobj_alloc_abort(&obj->uobject, attrs);
+	uobj_alloc_abort(&obj->uevent.uobject, attrs);
 
 	return ERR_PTR(ret);
 }
@@ -1133,7 +1145,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
 
 	ret = uverbs_response(attrs, &resp, sizeof(resp));
 out:
-	uobj_put_obj_read(cq);
+	rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	return ret;
 }
@@ -1216,7 +1229,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
 		ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT);
 
 out_put:
-	uobj_put_obj_read(cq);
+	rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	return ret;
 }
 
@@ -1237,8 +1251,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
 	ib_req_notify_cq(cq, cmd.solicited_only ?
 			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-	uobj_put_obj_read(cq);
-
+	rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	return 0;
 }
 
@@ -1258,10 +1272,10 @@ static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	obj = container_of(uobj, struct ib_ucq_object, uobject);
+	obj = container_of(uobj, struct ib_ucq_object, uevent.uobject);
 	memset(&resp, 0, sizeof(resp));
 	resp.comp_events_reported  = obj->comp_events_reported;
-	resp.async_events_reported = obj->async_events_reported;
+	resp.async_events_reported = obj->uevent.events_reported;
 
 	uobj_put_destroy(uobj);
 
@@ -1375,7 +1389,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 	}
 
 	attr.event_handler = ib_uverbs_qp_event_handler;
-	attr.qp_context    = attrs->ufile;
 	attr.send_cq       = scq;
 	attr.recv_cq       = rcq;
 	attr.srq           = srq;
@@ -1391,7 +1404,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 	attr.cap.max_recv_sge    = cmd->max_recv_sge;
 	attr.cap.max_inline_data = cmd->max_inline_data;
 
-	obj->uevent.events_reported     = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 	INIT_LIST_HEAD(&obj->mcast_list);
 
@@ -1421,7 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 		qp = ib_create_qp(pd, &attr);
 	else
 		qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
-				   &obj->uevent.uobject);
+				   obj);
 
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
@@ -1439,7 +1451,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 		qp->srq		  = attr.srq;
 		qp->rwq_ind_tbl	  = ind_tbl;
 		qp->event_handler = attr.event_handler;
-		qp->qp_context	  = attr.qp_context;
 		qp->qp_type	  = attr.qp_type;
 		atomic_set(&qp->usecnt, 0);
 		atomic_inc(&pd->usecnt);
@@ -1454,7 +1465,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 			atomic_inc(&ind_tbl->usecnt);
 	} else {
 		/* It is done in _ib_create_qp for other QP types */
-		qp->uobject = &obj->uevent.uobject;
+		qp->uobject = obj;
 	}
 
 	obj->uevent.uobject.object = qp;
@@ -1483,15 +1494,19 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 	if (pd)
 		uobj_put_obj_read(pd);
 	if (scq)
-		uobj_put_obj_read(scq);
+		rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 	if (rcq && rcq != scq)
-		uobj_put_obj_read(rcq);
+		rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 	if (srq)
-		uobj_put_obj_read(srq);
+		rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 	if (ind_tbl)
 		uobj_put_obj_read(ind_tbl);
 
-	return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+	rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+	return 0;
 err_cb:
 	ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
 
@@ -1501,11 +1516,14 @@ err_put:
 	if (pd)
 		uobj_put_obj_read(pd);
 	if (scq)
-		uobj_put_obj_read(scq);
+		rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 	if (rcq && rcq != scq)
-		uobj_put_obj_read(rcq);
+		rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 	if (srq)
-		uobj_put_obj_read(srq);
+		rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 	if (ind_tbl)
 		uobj_put_obj_read(ind_tbl);
 
@@ -1567,7 +1585,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
 	struct ib_xrcd		       *xrcd;
 	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
 	struct ib_qp                   *qp;
-	struct ib_qp_open_attr          attr;
+	struct ib_qp_open_attr          attr = {};
 	int ret;
 	struct ib_device *ib_dev;
 
@@ -1593,11 +1611,9 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
 	}
 
 	attr.event_handler = ib_uverbs_qp_event_handler;
-	attr.qp_context    = attrs->ufile;
 	attr.qp_num        = cmd.qpn;
 	attr.qp_type       = cmd.qp_type;
 
-	obj->uevent.events_reported = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 	INIT_LIST_HEAD(&obj->mcast_list);
 
@@ -1620,10 +1636,11 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
 
 	obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
 	atomic_inc(&obj->uxrcd->refcnt);
-	qp->uobject = &obj->uevent.uobject;
+	qp->uobject = obj;
 	uobj_put_read(xrcd_uobj);
 
-	return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+	rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+	return 0;
 
 err_destroy:
 	ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
@@ -1684,7 +1701,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
 
 	ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
 
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	if (ret)
 		goto out;
@@ -1921,7 +1939,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
 				      &attrs->driver_udata);
 
 release_qp:
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 out:
 	kfree(attr);
 
@@ -2185,7 +2204,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
 		ret = ret2;
 
 out_put:
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	while (wr) {
 		if (is_ud && ud_wr(wr)->ah)
@@ -2327,7 +2347,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
 	resp.bad_wr = 0;
 	ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr);
 
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	if (ret) {
 		for (next = wr; next; next = next->next) {
 			++resp.bad_wr;
@@ -2377,7 +2398,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
 	resp.bad_wr = 0;
 	ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr);
 
-	uobj_put_obj_read(srq);
+	rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	if (ret)
 		for (next = wr; next; next = next->next) {
@@ -2465,7 +2487,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
 		goto err_copy;
 
 	uobj_put_obj_read(pd);
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 
 err_copy:
 	rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE,
@@ -2507,7 +2530,7 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
 	if (!qp)
 		return -EINVAL;
 
-	obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+	obj = qp->uobject;
 
 	mutex_lock(&obj->mcast_lock);
 	list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -2534,7 +2557,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
 
 out_put:
 	mutex_unlock(&obj->mcast_lock);
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	return ret;
 }
@@ -2556,7 +2580,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
 	if (!qp)
 		return -EINVAL;
 
-	obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+	obj = qp->uobject;
 	mutex_lock(&obj->mcast_lock);
 
 	list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -2577,7 +2601,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
 
 out_put:
 	mutex_unlock(&obj->mcast_lock);
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	return ret;
 }
 
@@ -2943,7 +2968,6 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
 	wq_init_attr.wq_type = cmd.wq_type;
 	wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
 	wq_init_attr.create_flags = cmd.create_flags;
-	obj->uevent.events_reported = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
@@ -2952,7 +2976,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
 		goto err_put_cq;
 	}
 
-	wq->uobject = &obj->uevent.uobject;
+	wq->uobject = obj;
 	obj->uevent.uobject.object = wq;
 	wq->wq_type = wq_init_attr.wq_type;
 	wq->cq = cq;
@@ -2962,7 +2986,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
 	atomic_set(&wq->usecnt, 0);
 	atomic_inc(&pd->usecnt);
 	atomic_inc(&cq->usecnt);
-	wq->uobject = &obj->uevent.uobject;
+	wq->uobject = obj;
 	obj->uevent.uobject.object = wq;
 
 	memset(&resp, 0, sizeof(resp));
@@ -2976,13 +3000,16 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
 		goto err_copy;
 
 	uobj_put_obj_read(pd);
-	uobj_put_obj_read(cq);
-	return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+	rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
+	rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+	return 0;
 
 err_copy:
 	ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs));
 err_put_cq:
-	uobj_put_obj_read(cq);
+	rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 err_put_pd:
 	uobj_put_obj_read(pd);
 err_uobj:
@@ -3048,7 +3075,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
 	}
 	ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
 					&attrs->driver_udata);
-	uobj_put_obj_read(wq);
+	rdma_lookup_put_uobject(&wq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	return ret;
 }
 
@@ -3149,9 +3177,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 	kfree(wqs_handles);
 
 	for (j = 0; j < num_read_wqs; j++)
-		uobj_put_obj_read(wqs[j]);
+		rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 
 err_copy:
 	ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3159,7 +3189,8 @@ err_uobj:
 	uobj_alloc_abort(uobj, attrs);
 put_wqs:
 	for (j = 0; j < num_read_wqs; j++)
-		uobj_put_obj_read(wqs[j]);
+		rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 err_free:
 	kfree(wqs_handles);
 	kfree(wqs);
@@ -3325,11 +3356,13 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
 	if (err)
 		goto err_copy;
 
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	kfree(flow_attr);
 	if (cmd.flow_attr.num_of_specs)
 		kfree(kern_flow_attr);
-	return uobj_alloc_commit(uobj, attrs);
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
 err_copy:
 	if (!qp->device->ops.destroy_flow(flow_id))
 		atomic_dec(&qp->usecnt);
@@ -3338,7 +3371,8 @@ err_free:
 err_free_flow_attr:
 	kfree(flow_attr);
 err_put:
-	uobj_put_obj_read(qp);
+	rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 err_uobj:
 	uobj_alloc_abort(uobj, attrs);
 err_free_attr:
@@ -3423,7 +3457,6 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
 	attr.attr.max_sge   = cmd->max_sge;
 	attr.attr.srq_limit = cmd->srq_limit;
 
-	obj->uevent.events_reported = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	srq = rdma_zalloc_drv_obj(ib_dev, ib_srq);
@@ -3435,7 +3468,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
 	srq->device        = pd->device;
 	srq->pd            = pd;
 	srq->srq_type	   = cmd->srq_type;
-	srq->uobject       = &obj->uevent.uobject;
+	srq->uobject       = obj;
 	srq->event_handler = attr.event_handler;
 	srq->srq_context   = attr.srq_context;
 
@@ -3474,10 +3507,12 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
 		uobj_put_read(xrcd_uobj);
 
 	if (ib_srq_has_cq(cmd->srq_type))
-		uobj_put_obj_read(attr.ext.cq);
+		rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 
 	uobj_put_obj_read(pd);
-	return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+	rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
+	return 0;
 
 err_copy:
 	ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
@@ -3490,7 +3525,8 @@ err_put:
 
 err_put_cq:
 	if (ib_srq_has_cq(cmd->srq_type))
-		uobj_put_obj_read(attr.ext.cq);
+		rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+					UVERBS_LOOKUP_READ);
 
 err_put_xrcd:
 	if (cmd->srq_type == IB_SRQT_XRC) {
@@ -3558,7 +3594,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
 	ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask,
 					  &attrs->driver_udata);
 
-	uobj_put_obj_read(srq);
+	rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	return ret;
 }
@@ -3581,7 +3618,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
 
 	ret = ib_query_srq(srq, &attr);
 
-	uobj_put_obj_read(srq);
+	rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 
 	if (ret)
 		return ret;
@@ -3706,8 +3744,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
 
 	ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
 
-	uobj_put_obj_read(cq);
-
+	rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+				UVERBS_LOOKUP_READ);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 269938f59d3f..538affbc517e 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -220,24 +220,17 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
 	return ret;
 }
 
-static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
-				  struct uverbs_objs_arr_attr *attr,
-				  bool commit, struct uverbs_attr_bundle *attrs)
+static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+				   struct uverbs_objs_arr_attr *attr,
+				   bool commit,
+				   struct uverbs_attr_bundle *attrs)
 {
 	const struct uverbs_attr_spec *spec = &attr_uapi->spec;
-	int current_ret;
-	int ret = 0;
 	size_t i;
 
-	for (i = 0; i != attr->len; i++) {
-		current_ret = uverbs_finalize_object(attr->uobjects[i],
-						     spec->u2.objs_arr.access,
-						     commit, attrs);
-		if (!ret)
-			ret = current_ret;
-	}
-
-	return ret;
+	for (i = 0; i != attr->len; i++)
+		uverbs_finalize_object(attr->uobjects[i],
+				       spec->u2.objs_arr.access, commit, attrs);
 }
 
 static int uverbs_process_attr(struct bundle_priv *pbundle,
@@ -495,26 +488,22 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
 	return ret;
 }
 
-static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
+static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
 {
 	unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
 	struct bundle_alloc_head *memblock;
 	unsigned int i;
-	int ret = 0;
 
 	/* fast path for simple uobjects */
 	i = -1;
 	while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
 				  i + 1)) < key_bitmap_len) {
 		struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
-		int current_ret;
 
-		current_ret = uverbs_finalize_object(
+		uverbs_finalize_object(
 			attr->obj_attr.uobject,
 			attr->obj_attr.attr_elm->spec.u.obj.access, commit,
 			&pbundle->bundle);
-		if (!ret)
-			ret = current_ret;
 	}
 
 	i = -1;
@@ -523,7 +512,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
 		struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
 		const struct uverbs_api_attr *attr_uapi;
 		void __rcu **slot;
-		int current_ret;
 
 		slot = uapi_get_attr_for_method(
 			pbundle,
@@ -534,11 +522,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
 		attr_uapi = rcu_dereference_protected(*slot, true);
 
 		if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
-			current_ret = uverbs_free_idrs_array(
-				attr_uapi, &attr->objs_arr_attr, commit,
-				&pbundle->bundle);
-			if (!ret)
-				ret = current_ret;
+			uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr,
+					       commit, &pbundle->bundle);
 		}
 	}
 
@@ -548,8 +533,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
 		memblock = memblock->next;
 		kvfree(tmp);
 	}
-
-	return ret;
 }
 
 static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
@@ -562,7 +545,6 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
 	struct bundle_priv *pbundle;
 	struct bundle_priv onstack;
 	void __rcu **slot;
-	int destroy_ret;
 	int ret;
 
 	if (unlikely(hdr->driver_id != uapi->driver_id))
@@ -610,10 +592,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
 	memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
 
 	ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
-	destroy_ret = bundle_destroy(pbundle, ret == 0);
-	if (unlikely(destroy_ret && !ret))
-		return destroy_ret;
-
+	bundle_destroy(pbundle, ret == 0);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 970d8e31dd65..2d4083bf4a04 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -125,17 +125,8 @@ static void ib_uverbs_release_dev(struct device *device)
 	kfree(dev);
 }
 
-static void ib_uverbs_release_async_event_file(struct kref *ref)
-{
-	struct ib_uverbs_async_event_file *file =
-		container_of(ref, struct ib_uverbs_async_event_file, ref);
-
-	kfree(file);
-}
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
-			  struct ib_uverbs_completion_event_file *ev_file,
-			  struct ib_ucq_object *uobj)
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
+			   struct ib_ucq_object *uobj)
 {
 	struct ib_uverbs_event *evt, *tmp;
 
@@ -150,25 +141,24 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
 		uverbs_uobject_put(&ev_file->uobj);
 	}
 
-	spin_lock_irq(&file->async_file->ev_queue.lock);
-	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
-		list_del(&evt->list);
-		kfree(evt);
-	}
-	spin_unlock_irq(&file->async_file->ev_queue.lock);
+	ib_uverbs_release_uevent(&uobj->uevent);
 }
 
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
-			      struct ib_uevent_object *uobj)
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
 {
+	struct ib_uverbs_async_event_file *async_file =
+		READ_ONCE(uobj->uobject.ufile->async_file);
 	struct ib_uverbs_event *evt, *tmp;
 
-	spin_lock_irq(&file->async_file->ev_queue.lock);
+	if (!async_file)
+		return;
+
+	spin_lock_irq(&async_file->ev_queue.lock);
 	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
 		list_del(&evt->list);
 		kfree(evt);
 	}
-	spin_unlock_irq(&file->async_file->ev_queue.lock);
+	spin_unlock_irq(&async_file->ev_queue.lock);
 }
 
 void ib_uverbs_detach_umcast(struct ib_qp *qp,
@@ -208,8 +198,7 @@ void ib_uverbs_release_file(struct kref *ref)
 		ib_uverbs_comp_dev(file->device);
 
 	if (file->async_file)
-		kref_put(&file->async_file->ref,
-			 ib_uverbs_release_async_event_file);
+		uverbs_uobject_put(&file->async_file->uobj);
 	put_device(&file->device->dev);
 
 	if (file->disassociate_page)
@@ -220,7 +209,6 @@ void ib_uverbs_release_file(struct kref *ref)
 }
 
 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
-				    struct ib_uverbs_file *uverbs_file,
 				    struct file *filp, char __user *buf,
 				    size_t count, loff_t *pos,
 				    size_t eventsz)
@@ -238,19 +226,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
 
 		if (wait_event_interruptible(ev_queue->poll_wait,
 					     (!list_empty(&ev_queue->event_list) ||
-			/* The barriers built into wait_event_interruptible()
-			 * and wake_up() guarentee this will see the null set
-			 * without using RCU
-			 */
-					     !uverbs_file->device->ib_dev)))
+					      ev_queue->is_closed)))
 			return -ERESTARTSYS;
 
+		spin_lock_irq(&ev_queue->lock);
+
 		/* If device was disassociated and no event exists set an error */
-		if (list_empty(&ev_queue->event_list) &&
-		    !uverbs_file->device->ib_dev)
+		if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
+			spin_unlock_irq(&ev_queue->lock);
 			return -EIO;
-
-		spin_lock_irq(&ev_queue->lock);
+		}
 	}
 
 	event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
@@ -285,8 +270,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
 {
 	struct ib_uverbs_async_event_file *file = filp->private_data;
 
-	return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
-				    buf, count, pos,
+	return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
 				    sizeof(struct ib_uverbs_async_event_desc));
 }
 
@@ -296,9 +280,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
 	struct ib_uverbs_completion_event_file *comp_ev_file =
 		filp->private_data;
 
-	return ib_uverbs_event_read(&comp_ev_file->ev_queue,
-				    comp_ev_file->uobj.ufile, filp,
-				    buf, count, pos,
+	return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
+				    pos,
 				    sizeof(struct ib_uverbs_comp_event_desc));
 }
 
@@ -321,7 +304,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
 static __poll_t ib_uverbs_async_event_poll(struct file *filp,
 					       struct poll_table_struct *wait)
 {
-	return ib_uverbs_event_poll(filp->private_data, filp, wait);
+	struct ib_uverbs_async_event_file *file = filp->private_data;
+
+	return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
 }
 
 static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
@@ -335,9 +320,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
 
 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
 {
-	struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+	struct ib_uverbs_async_event_file *file = filp->private_data;
 
-	return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+	return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
 }
 
 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
@@ -348,70 +333,20 @@ static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
 	return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
 }
 
-static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
-{
-	struct ib_uverbs_async_event_file *file = filp->private_data;
-	struct ib_uverbs_file *uverbs_file = file->uverbs_file;
-	struct ib_uverbs_event *entry, *tmp;
-	int closed_already = 0;
-
-	mutex_lock(&uverbs_file->device->lists_mutex);
-	spin_lock_irq(&file->ev_queue.lock);
-	closed_already = file->ev_queue.is_closed;
-	file->ev_queue.is_closed = 1;
-	list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
-		if (entry->counter)
-			list_del(&entry->obj_list);
-		kfree(entry);
-	}
-	spin_unlock_irq(&file->ev_queue.lock);
-	if (!closed_already) {
-		list_del(&file->list);
-		ib_unregister_event_handler(&uverbs_file->event_handler);
-	}
-	mutex_unlock(&uverbs_file->device->lists_mutex);
-
-	kref_put(&uverbs_file->ref, ib_uverbs_release_file);
-	kref_put(&file->ref, ib_uverbs_release_async_event_file);
-
-	return 0;
-}
-
-static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
-{
-	struct ib_uobject *uobj = filp->private_data;
-	struct ib_uverbs_completion_event_file *file = container_of(
-		uobj, struct ib_uverbs_completion_event_file, uobj);
-	struct ib_uverbs_event *entry, *tmp;
-
-	spin_lock_irq(&file->ev_queue.lock);
-	list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
-		if (entry->counter)
-			list_del(&entry->obj_list);
-		kfree(entry);
-	}
-	file->ev_queue.is_closed = 1;
-	spin_unlock_irq(&file->ev_queue.lock);
-
-	uverbs_close_fd(filp);
-
-	return 0;
-}
-
 const struct file_operations uverbs_event_fops = {
 	.owner	 = THIS_MODULE,
 	.read	 = ib_uverbs_comp_event_read,
 	.poll    = ib_uverbs_comp_event_poll,
-	.release = ib_uverbs_comp_event_close,
+	.release = uverbs_uobject_fd_release,
 	.fasync  = ib_uverbs_comp_event_fasync,
 	.llseek	 = no_llseek,
 };
 
-static const struct file_operations uverbs_async_event_fops = {
+const struct file_operations uverbs_async_event_fops = {
 	.owner	 = THIS_MODULE,
 	.read	 = ib_uverbs_async_event_read,
 	.poll    = ib_uverbs_async_event_poll,
-	.release = ib_uverbs_async_event_close,
+	.release = uverbs_uobject_fd_release,
 	.fasync  = ib_uverbs_async_event_fasync,
 	.llseek	 = no_llseek,
 };
@@ -438,9 +373,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 		return;
 	}
 
-	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+	uobj = cq->uobject;
 
-	entry->desc.comp.cq_handle = cq->uobject->user_handle;
+	entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
 	entry->counter		   = &uobj->comp_events_reported;
 
 	list_add_tail(&entry->list, &ev_queue->event_list);
@@ -451,102 +386,82 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 	kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
 }
 
-static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
-				    __u64 element, __u64 event,
-				    struct list_head *obj_list,
-				    u32 *counter)
+static void
+ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+			__u64 element, __u64 event, struct list_head *obj_list,
+			u32 *counter)
 {
 	struct ib_uverbs_event *entry;
 	unsigned long flags;
 
-	spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
-	if (file->async_file->ev_queue.is_closed) {
-		spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+	if (!async_file)
+		return;
+
+	spin_lock_irqsave(&async_file->ev_queue.lock, flags);
+	if (async_file->ev_queue.is_closed) {
+		spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
 		return;
 	}
 
 	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry) {
-		spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+		spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
 		return;
 	}
 
-	entry->desc.async.element    = element;
+	entry->desc.async.element = element;
 	entry->desc.async.event_type = event;
-	entry->desc.async.reserved   = 0;
-	entry->counter               = counter;
+	entry->desc.async.reserved = 0;
+	entry->counter = counter;
 
-	list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
+	list_add_tail(&entry->list, &async_file->ev_queue.event_list);
 	if (obj_list)
 		list_add_tail(&entry->obj_list, obj_list);
-	spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+	spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
 
-	wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
-	kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
+	wake_up_interruptible(&async_file->ev_queue.poll_wait);
+	kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
 }
 
-void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+static void uverbs_uobj_event(struct ib_uevent_object *eobj,
+			      struct ib_event *event)
 {
-	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
-						  struct ib_ucq_object, uobject);
+	ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file),
+				eobj->uobject.user_handle, event->event,
+				&eobj->event_list, &eobj->events_reported);
+}
 
-	ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
-				event->event, &uobj->async_list,
-				&uobj->async_events_reported);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+	uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
 }
 
 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
 {
-	struct ib_uevent_object *uobj;
-
 	/* for XRC target qp's, check that qp is live */
 	if (!event->element.qp->uobject)
 		return;
 
-	uobj = container_of(event->element.qp->uobject,
-			    struct ib_uevent_object, uobject);
-
-	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
-				event->event, &uobj->event_list,
-				&uobj->events_reported);
+	uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
 }
 
 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
 {
-	struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
-						  struct ib_uevent_object, uobject);
-
-	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
-				event->event, &uobj->event_list,
-				&uobj->events_reported);
+	uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
 }
 
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
 {
-	struct ib_uevent_object *uobj;
-
-	uobj = container_of(event->element.srq->uobject,
-			    struct ib_uevent_object, uobject);
-
-	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
-				event->event, &uobj->event_list,
-				&uobj->events_reported);
-}
-
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
-			     struct ib_event *event)
-{
-	struct ib_uverbs_file *file =
-		container_of(handler, struct ib_uverbs_file, event_handler);
-
-	ib_uverbs_async_handler(file, event->element.port_num, event->event,
-				NULL, NULL);
+	uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
 }
 
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+				    struct ib_event *event)
 {
-	kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
-	file->async_file = NULL;
+	ib_uverbs_async_handler(
+		container_of(handler, struct ib_uverbs_async_event_file,
+			     event_handler),
+		event->element.port_num, event->event, NULL, NULL);
 }
 
 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
@@ -558,45 +473,26 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
 	ev_queue->async_queue = NULL;
 }
 
-struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
-					      struct ib_device	*ib_dev)
+void ib_uverbs_init_async_event_file(
+	struct ib_uverbs_async_event_file *async_file)
 {
-	struct ib_uverbs_async_event_file *ev_file;
-	struct file *filp;
-
-	ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
-	if (!ev_file)
-		return ERR_PTR(-ENOMEM);
-
-	ib_uverbs_init_event_queue(&ev_file->ev_queue);
-	ev_file->uverbs_file = uverbs_file;
-	kref_get(&ev_file->uverbs_file->ref);
-	kref_init(&ev_file->ref);
-	filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
-				  ev_file, O_RDONLY);
-	if (IS_ERR(filp))
-		goto err_put_refs;
-
-	mutex_lock(&uverbs_file->device->lists_mutex);
-	list_add_tail(&ev_file->list,
-		      &uverbs_file->device->uverbs_events_file_list);
-	mutex_unlock(&uverbs_file->device->lists_mutex);
-
-	WARN_ON(uverbs_file->async_file);
-	uverbs_file->async_file = ev_file;
-	kref_get(&uverbs_file->async_file->ref);
-	INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
-			      ib_dev,
-			      ib_uverbs_event_handler);
-	ib_register_event_handler(&uverbs_file->event_handler);
-	/* At that point async file stuff was fully set */
+	struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
+	struct ib_device *ib_dev = async_file->uobj.context->device;
+
+	ib_uverbs_init_event_queue(&async_file->ev_queue);
 
-	return filp;
+	/* The first async_event_file becomes the default one for the file. */
+	mutex_lock(&uverbs_file->ucontext_lock);
+	if (!uverbs_file->async_file) {
+		/* Pairs with the put in ib_uverbs_release_file */
+		uverbs_uobject_get(&async_file->uobj);
+		smp_store_release(&uverbs_file->async_file, async_file);
+	}
+	mutex_unlock(&uverbs_file->ucontext_lock);
 
-err_put_refs:
-	kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
-	kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
-	return filp;
+	INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
+			      ib_uverbs_event_handler);
+	ib_register_event_handler(&async_file->event_handler);
 }
 
 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
@@ -1225,7 +1121,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	mutex_init(&uverbs_dev->xrcd_tree_mutex);
 	mutex_init(&uverbs_dev->lists_mutex);
 	INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
-	INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
 	rcu_assign_pointer(uverbs_dev->ib_dev, device);
 	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
 
@@ -1270,14 +1165,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 					struct ib_device *ib_dev)
 {
 	struct ib_uverbs_file *file;
-	struct ib_uverbs_async_event_file *event_file;
-	struct ib_event event;
 
 	/* Pending running commands to terminate */
 	uverbs_disassociate_api_pre(uverbs_dev);
-	event.event = IB_EVENT_DEVICE_FATAL;
-	event.element.port_num = 0;
-	event.device = ib_dev;
 
 	mutex_lock(&uverbs_dev->lists_mutex);
 	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
@@ -1293,31 +1183,14 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 		 */
 		mutex_unlock(&uverbs_dev->lists_mutex);
 
-		ib_uverbs_event_handler(&file->event_handler, &event);
+		ib_uverbs_async_handler(READ_ONCE(file->async_file), 0,
+					IB_EVENT_DEVICE_FATAL, NULL, NULL);
+
 		uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
 		kref_put(&file->ref, ib_uverbs_release_file);
 
 		mutex_lock(&uverbs_dev->lists_mutex);
 	}
-
-	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
-		event_file = list_first_entry(&uverbs_dev->
-					      uverbs_events_file_list,
-					      struct ib_uverbs_async_event_file,
-					      list);
-		spin_lock_irq(&event_file->ev_queue.lock);
-		event_file->ev_queue.is_closed = 1;
-		spin_unlock_irq(&event_file->ev_queue.lock);
-
-		list_del(&event_file->list);
-		ib_unregister_event_handler(
-			&event_file->uverbs_file->event_handler);
-		event_file->uverbs_file->event_handler.device =
-			NULL;
-
-		wake_up_interruptible(&event_file->ev_queue.poll_wait);
-		kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
-	}
 	mutex_unlock(&uverbs_dev->lists_mutex);
 
 	uverbs_disassociate_api(uverbs_dev->uapi);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 35b2e2c640cc..994d8744b246 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -105,7 +105,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
 	if (uqp->uxrcd)
 		atomic_dec(&uqp->uxrcd->refcnt);
 
-	ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent);
+	ib_uverbs_release_uevent(&uqp->uevent);
 	return ret;
 }
 
@@ -138,7 +138,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
 	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
-	ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent);
+	ib_uverbs_release_uevent(&uwq->uevent);
 	return ret;
 }
 
@@ -163,7 +163,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
 		atomic_dec(&us->uxrcd->refcnt);
 	}
 
-	ib_uverbs_release_uevent(attrs->ufile, uevent);
+	ib_uverbs_release_uevent(uevent);
 	return ret;
 }
 
@@ -202,24 +202,40 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
 	return 0;
 }
 
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
-						   enum rdma_remove_reason why)
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
 {
-	struct ib_uverbs_completion_event_file *comp_event_file =
-		container_of(uobj, struct ib_uverbs_completion_event_file,
-			     uobj);
-	struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+	struct ib_uverbs_event *entry, *tmp;
 
 	spin_lock_irq(&event_queue->lock);
+	/*
+	 * The user must ensure that no new items are added to the event_list
+	 * once is_closed is set.
+	 */
 	event_queue->is_closed = 1;
 	spin_unlock_irq(&event_queue->lock);
+	wake_up_interruptible(&event_queue->poll_wait);
+	kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
 
-	if (why == RDMA_REMOVE_DRIVER_REMOVE) {
-		wake_up_interruptible(&event_queue->poll_wait);
-		kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+	spin_lock_irq(&event_queue->lock);
+	list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) {
+		if (entry->counter)
+			list_del(&entry->obj_list);
+		kfree(entry);
 	}
+	spin_unlock_irq(&event_queue->lock);
+}
+
+static int
+uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
+					  enum rdma_remove_reason why)
+{
+	struct ib_uverbs_completion_event_file *file =
+		container_of(uobj, struct ib_uverbs_completion_event_file,
+			     uobj);
+
+	ib_uverbs_free_event_queue(&file->ev_queue);
 	return 0;
-};
+}
 
 int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
 {
@@ -230,7 +246,7 @@ EXPORT_SYMBOL(uverbs_destroy_def_handler);
 DECLARE_UVERBS_NAMED_OBJECT(
 	UVERBS_OBJECT_COMP_CHANNEL,
 	UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
-			     uverbs_hot_unplug_completion_event_file,
+			     uverbs_completion_event_file_destroy_uobj,
 			     &uverbs_event_fops,
 			     "[infinibandevent]",
 			     O_RDONLY));
diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c
new file mode 100644
index 000000000000..82ec0806b34b
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj =
+		uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC);
+
+	ib_uverbs_init_async_event_file(
+		container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+	return 0;
+}
+
+static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+					   enum rdma_remove_reason why)
+{
+	struct ib_uverbs_async_event_file *event_file =
+		container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+	ib_unregister_event_handler(&event_file->event_handler);
+	ib_uverbs_free_event_queue(&event_file->ev_queue);
+	return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+	UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+		       UVERBS_OBJECT_ASYNC_EVENT,
+		       UVERBS_ACCESS_NEW,
+		       UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_ASYNC_EVENT,
+	UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file),
+			     uverbs_async_event_destroy_uobj,
+			     &uverbs_async_event_fops,
+			     "[infinibandevent]",
+			     O_RDONLY),
+	&UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC));
+
+const struct uapi_definition uverbs_def_obj_async_fd[] = {
+	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT),
+	{}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index e39fe6a8aac4..da4110a0eea2 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -41,7 +41,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
 	struct ib_cq *cq = uobject->object;
 	struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
 	struct ib_ucq_object *ucq =
-		container_of(uobject, struct ib_ucq_object, uobject);
+		container_of(uobject, struct ib_ucq_object, uevent.uobject);
 	int ret;
 
 	ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
@@ -49,7 +49,6 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
 		return ret;
 
 	ib_uverbs_release_ucq(
-		attrs->ufile,
 		ev_queue ? container_of(ev_queue,
 					struct ib_uverbs_completion_event_file,
 					ev_queue) :
@@ -63,7 +62,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 {
 	struct ib_ucq_object *obj = container_of(
 		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
-		typeof(*obj), uobject);
+		typeof(*obj), uevent.uobject);
 	struct ib_device *ib_dev = attrs->context->device;
 	int ret;
 	u64 user_handle;
@@ -106,10 +105,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 		goto err_event_file;
 	}
 
-	obj->comp_events_reported  = 0;
-	obj->async_events_reported = 0;
 	INIT_LIST_HEAD(&obj->comp_list);
-	INIT_LIST_HEAD(&obj->async_list);
+	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
 	if (!cq) {
@@ -118,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 	}
 
 	cq->device        = ib_dev;
-	cq->uobject       = &obj->uobject;
+	cq->uobject       = obj;
 	cq->comp_handler  = ib_uverbs_comp_handler;
 	cq->event_handler = ib_uverbs_cq_event_handler;
 	cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
@@ -129,8 +126,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 	if (ret)
 		goto err_free;
 
-	obj->uobject.object = cq;
-	obj->uobject.user_handle = user_handle;
+	obj->uevent.uobject.object = cq;
+	obj->uevent.uobject.user_handle = user_handle;
 	rdma_restrack_uadd(&cq->res);
 
 	ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
@@ -182,10 +179,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
 	struct ib_uobject *uobj =
 		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
 	struct ib_ucq_object *obj =
-		container_of(uobj, struct ib_ucq_object, uobject);
+		container_of(uobj, struct ib_ucq_object, uevent.uobject);
 	struct ib_uverbs_destroy_cq_resp resp = {
 		.comp_events_reported = obj->comp_events_reported,
-		.async_events_reported = obj->async_events_reported
+		.async_events_reported = obj->uevent.events_reported
 	};
 
 	return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 2a3f2f01028d..ae4a59d6f9b1 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -200,6 +200,43 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
 					     &resp, sizeof(resp));
 }
 
+static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
+	struct uverbs_attr_bundle *attrs)
+{
+	u32 num_comp = attrs->ufile->device->num_comp_vectors;
+	u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+	int ret;
+
+	ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+			     &num_comp, sizeof(num_comp));
+	if (IS_UVERBS_COPY_ERR(ret))
+		return ret;
+
+	ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+			     &core_support, sizeof(core_support));
+	if (IS_UVERBS_COPY_ERR(ret))
+		return ret;
+
+	ret = ib_alloc_ucontext(attrs);
+	if (ret)
+		return ret;
+	ret = ib_init_ucontext(attrs);
+	if (ret) {
+		kfree(attrs->context);
+		attrs->context = NULL;
+		return ret;
+	}
+	return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_GET_CONTEXT,
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+			    UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+			    UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
+	UVERBS_ATTR_UHW());
+
 DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_METHOD_INFO_HANDLES,
 	/* Also includes any device specific object ids */
@@ -220,6 +257,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 		UA_MANDATORY));
 
 DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+			      &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
 			      &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
 			      &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
 			      &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 00c547887132..3f121ac31e0a 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -195,9 +195,9 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi,
 		 * disassociation, and the FD types require the driver to use
 		 * struct file_operations.owner to prevent the driver module
 		 * code from unloading while the file is open. This provides
-		 * enough safety that uverbs_close_fd() will continue to work.
-		 * Drivers using FD are responsible to handle disassociation of
-		 * the device on their own.
+		 * enough safety that uverbs_uobject_fd_release() will
+		 * continue to work.  Drivers using FD are responsible to
+		 * handle disassociation of the device on their own.
 		 */
 		if (WARN_ON(is_driver &&
 			    obj->type_attrs->type_class != &uverbs_idr_class &&
@@ -626,6 +626,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
 }
 
 static const struct uapi_definition uverbs_core_api[] = {
+	UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
 	UAPI_DEF_CHAIN(uverbs_def_obj_counters),
 	UAPI_DEF_CHAIN(uverbs_def_obj_cq),
 	UAPI_DEF_CHAIN(uverbs_def_obj_device),
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index d33bdc9b01cd..3ebae3b65c28 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -52,6 +52,9 @@
 #include <rdma/rw.h>
 
 #include "core_priv.h"
+#include <trace/events/rdma_core.h>
+
+#include <trace/events/rdma_core.h>
 
 static int ib_resolve_eth_dmac(struct ib_device *device,
 			       struct rdma_ah_attr *ah_attr);
@@ -1053,11 +1056,11 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
 	struct ib_qp *qp = context;
 	unsigned long flags;
 
-	spin_lock_irqsave(&qp->device->event_handler_lock, flags);
+	spin_lock_irqsave(&qp->device->qp_open_list_lock, flags);
 	list_for_each_entry(event->element.qp, &qp->open_list, open_list)
 		if (event->element.qp->event_handler)
 			event->element.qp->event_handler(event, event->element.qp->qp_context);
-	spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
+	spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
 }
 
 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
@@ -1094,9 +1097,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
 	qp->qp_num = real_qp->qp_num;
 	qp->qp_type = real_qp->qp_type;
 
-	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+	spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
 	list_add(&qp->open_list, &real_qp->open_list);
-	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+	spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
 
 	return qp;
 }
@@ -1824,9 +1827,9 @@ int ib_close_qp(struct ib_qp *qp)
 	if (real_qp == qp)
 		return -EINVAL;
 
-	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+	spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
 	list_del(&qp->open_list);
-	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+	spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
 
 	atomic_dec(&real_qp->usecnt);
 	if (qp->qp_sec)
@@ -2038,6 +2041,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
 	struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
 	int ret;
 
+	trace_mr_dereg(mr);
 	rdma_restrack_del(&mr->res);
 	ret = mr->device->ops.dereg_mr(mr, udata);
 	if (!ret) {
@@ -2069,11 +2073,16 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
 {
 	struct ib_mr *mr;
 
-	if (!pd->device->ops.alloc_mr)
-		return ERR_PTR(-EOPNOTSUPP);
+	if (!pd->device->ops.alloc_mr) {
+		mr = ERR_PTR(-EOPNOTSUPP);
+		goto out;
+	}
 
-	if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY))
-		return ERR_PTR(-EINVAL);
+	if (mr_type == IB_MR_TYPE_INTEGRITY) {
+		WARN_ON_ONCE(1);
+		mr = ERR_PTR(-EINVAL);
+		goto out;
+	}
 
 	mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
 	if (!IS_ERR(mr)) {
@@ -2089,6 +2098,8 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
 		mr->sig_attrs = NULL;
 	}
 
+out:
+	trace_mr_alloc(pd, mr_type, max_num_sg, mr);
 	return mr;
 }
 EXPORT_SYMBOL(ib_alloc_mr_user);
@@ -2113,21 +2124,27 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
 	struct ib_sig_attrs *sig_attrs;
 
 	if (!pd->device->ops.alloc_mr_integrity ||
-	    !pd->device->ops.map_mr_sg_pi)
-		return ERR_PTR(-EOPNOTSUPP);
+	    !pd->device->ops.map_mr_sg_pi) {
+		mr = ERR_PTR(-EOPNOTSUPP);
+		goto out;
+	}
 
-	if (!max_num_meta_sg)
-		return ERR_PTR(-EINVAL);
+	if (!max_num_meta_sg) {
+		mr = ERR_PTR(-EINVAL);
+		goto out;
+	}
 
 	sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
-	if (!sig_attrs)
-		return ERR_PTR(-ENOMEM);
+	if (!sig_attrs) {
+		mr = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
 						max_num_meta_sg);
 	if (IS_ERR(mr)) {
 		kfree(sig_attrs);
-		return mr;
+		goto out;
 	}
 
 	mr->device = pd->device;
@@ -2141,6 +2158,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
 	mr->type = IB_MR_TYPE_INTEGRITY;
 	mr->sig_attrs = sig_attrs;
 
+out:
+	trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
 	return mr;
 }
 EXPORT_SYMBOL(ib_alloc_mr_integrity);
@@ -2785,6 +2804,7 @@ void ib_drain_sq(struct ib_qp *qp)
 		qp->device->ops.drain_sq(qp);
 	else
 		__ib_drain_sq(qp);
+	trace_cq_drain_complete(qp->send_cq);
 }
 EXPORT_SYMBOL(ib_drain_sq);
 
@@ -2813,6 +2833,7 @@ void ib_drain_rq(struct ib_qp *qp)
 		qp->device->ops.drain_rq(qp);
 	else
 		__ib_drain_rq(qp);
+	trace_cq_drain_complete(qp->recv_cq);
 }
 EXPORT_SYMBOL(ib_drain_rq);
 
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index e7e8a0f49464..793c97251588 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -677,7 +677,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 
 	bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
 
-	ibdev->num_comp_vectors	= 1;
+	ibdev->num_comp_vectors	= rdev->num_msix - 1;
 	ibdev->dev.parent = &rdev->en_dev->pdev->dev;
 	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
 
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index e96bcb16bd2b..74b787a90660 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -160,10 +160,16 @@ struct efa_admin_create_qp_resp {
 	/* Common Admin Queue completion descriptor */
 	struct efa_admin_acq_common_desc acq_common_desc;
 
-	/* Opaque handle to be used for consequent operations on the QP */
+	/*
+	 * Opaque handle to be used for consequent admin operations on the
+	 * QP
+	 */
 	u32 qp_handle;
 
-	/* QP number in the given EFA virtual device */
+	/*
+	 * QP number in the given EFA virtual device. Least-significant bits
+	 *    (as needed according to max_qp) carry unique QP ID
+	 */
 	u16 qp_num;
 
 	/* MBZ */
@@ -286,6 +292,7 @@ struct efa_admin_create_ah_cmd {
 	/* PD number */
 	u16 pd;
 
+	/* MBZ */
 	u16 reserved;
 };
 
@@ -296,6 +303,7 @@ struct efa_admin_create_ah_resp {
 	/* Target interface address handle (opaque) */
 	u16 ah;
 
+	/* MBZ */
 	u16 reserved;
 };
 
@@ -372,6 +380,7 @@ struct efa_admin_reg_mr_cmd {
 	 */
 	u8 permissions;
 
+	/* MBZ */
 	u16 reserved16_w5;
 
 	/* number of pages in PBL (redundant, could be calculated) */
@@ -419,20 +428,20 @@ struct efa_admin_create_cq_cmd {
 	struct efa_admin_aq_common_desc aq_common_desc;
 
 	/*
-	 * 4:0 : reserved5
+	 * 4:0 : reserved5 - MBZ
 	 * 5 : interrupt_mode_enabled - if set, cq operates
 	 *    in interrupt mode (i.e. CQ events and MSI-X are
 	 *    generated), otherwise - polling
 	 * 6 : virt - If set, ring base address is virtual
 	 *    (IOVA returned by MR registration)
-	 * 7 : reserved6
+	 * 7 : reserved6 - MBZ
 	 */
 	u8 cq_caps_1;
 
 	/*
 	 * 4:0 : cq_entry_size_words - size of CQ entry in
 	 *    32-bit words, valid values: 4, 8.
-	 * 7:5 : reserved7
+	 * 7:5 : reserved7 - MBZ
 	 */
 	u8 cq_caps_2;
 
@@ -478,6 +487,7 @@ struct efa_admin_destroy_cq_cmd {
 
 	u16 cq_idx;
 
+	/* MBZ */
 	u16 reserved1;
 };
 
@@ -530,7 +540,7 @@ struct efa_admin_get_set_feature_common_desc {
 	/*
 	 * 1:0 : select - 0x1 - current value; 0x3 - default
 	 *    value
-	 * 7:3 : reserved3
+	 * 7:3 : reserved3 - MBZ
 	 */
 	u8 flags;
 
@@ -557,10 +567,10 @@ struct efa_admin_feature_device_attr_desc {
 	/* Bar used for SQ and RQ doorbells */
 	u16 db_bar;
 
-	/* Indicates how many bits are used physical address access */
+	/* Indicates how many bits are used on physical address access */
 	u8 phys_addr_width;
 
-	/* Indicates how many bits are used virtual address access */
+	/* Indicates how many bits are used on virtual address access */
 	u8 virt_addr_width;
 
 	/*
@@ -578,27 +588,28 @@ struct efa_admin_feature_queue_attr_desc {
 	/* The maximum number of queue pairs supported */
 	u32 max_qp;
 
+	/* Maximum number of WQEs per Send Queue */
 	u32 max_sq_depth;
 
-	/* max send wr used in inline-buf */
+	/* Maximum size of data that can be sent inline in a Send WQE */
 	u32 inline_buf_size;
 
+	/* Maximum number of buffer descriptors per Recv Queue */
 	u32 max_rq_depth;
 
 	/* The maximum number of completion queues supported per VF */
 	u32 max_cq;
 
+	/* Maximum number of CQEs per Completion Queue */
 	u32 max_cq_depth;
 
 	/* Number of sub-CQs to be created for each CQ */
 	u16 sub_cqs_per_cq;
 
+	/* MBZ */
 	u16 reserved;
 
-	/*
-	 * Maximum number of SGEs (buffs) allowed for a single send work
-	 *    queue element (WQE)
-	 */
+	/* Maximum number of SGEs (buffers) allowed for a single send WQE */
 	u16 max_wr_send_sges;
 
 	/* Maximum number of SGEs allowed for a single recv WQE */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 4822f5fa12be..ec5545870554 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -387,8 +387,7 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
 	return efa_com_destroy_qp(&dev->edev, &params);
 }
 
-static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx,
-					    struct efa_qp *qp)
+static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
 {
 	rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
 	rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
@@ -398,8 +397,6 @@ static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx,
 
 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 {
-	struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
-		struct efa_ucontext, ibucontext);
 	struct efa_dev *dev = to_edev(ibqp->pd->device);
 	struct efa_qp *qp = to_eqp(ibqp);
 	int err;
@@ -418,7 +415,7 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 				 DMA_TO_DEVICE);
 	}
 
-	efa_qp_user_mmap_entries_remove(ucontext, qp);
+	efa_qp_user_mmap_entries_remove(qp);
 	kfree(qp);
 	return 0;
 }
@@ -510,7 +507,7 @@ static int qp_mmap_entries_setup(struct efa_qp *qp,
 	return 0;
 
 err_remove_mmap:
-	efa_qp_user_mmap_entries_remove(ucontext, qp);
+	efa_qp_user_mmap_entries_remove(qp);
 
 	return -ENOMEM;
 }
@@ -719,7 +716,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 	return &qp->ibqp;
 
 err_remove_mmap_entries:
-	efa_qp_user_mmap_entries_remove(ucontext, qp);
+	efa_qp_user_mmap_entries_remove(qp);
 err_destroy_qp:
 	efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
 err_free_mapped:
@@ -1370,6 +1367,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
 		IB_ACCESS_LOCAL_WRITE |
 		(is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
 
+	access_flags &= ~IB_ACCESS_OPTIONAL;
 	if (access_flags & ~supp_access_flags) {
 		ibdev_dbg(&dev->ibdev,
 			  "Unsupported access flags[%#x], supported[%#x]\n",
@@ -1608,13 +1606,12 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
 		err = -EINVAL;
 	}
 
-	if (err) {
+	if (err)
 		ibdev_dbg(
 			&dev->ibdev,
 			"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
 			entry->address, rdma_entry->npages * PAGE_SIZE,
 			entry->mmap_flag, err);
-	}
 
 	rdma_user_mmap_entry_put(rdma_entry);
 	return err;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9b1fb84a3d45..e0b1238d31df 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
 	return dd->verbs_dev.n_piodrain;
 }
 
+static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry,
+				   void *context, int vl, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = context;
+
+	return dd->ctx0_seq_drop;
+}
+
 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
 			      void *context, int vl, int mode, u64 data)
 {
@@ -4106,6 +4114,7 @@ def_access_ibp_counter(rc_crwaits);
 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
 [C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
+[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH),
 [C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
 [C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
@@ -4249,6 +4258,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
 			    access_sw_cpu_intr),
 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
 			    access_sw_cpu_rcv_limit),
+[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL,
+			    access_sw_ctx0_seq_drop),
 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
 			    access_sw_vtx_wait),
 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
@@ -6862,7 +6873,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
 		}
 		rcvmask = HFI1_RCVCTRL_CTXT_ENB;
 		/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
-		rcvmask |= rcd->rcvhdrtail_kvaddr ?
+		rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
 			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
 		hfi1_rcvctrl(dd, rcvmask, rcd);
 		hfi1_rcd_put(rcd);
@@ -8394,20 +8405,62 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd)
 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 {
 	u32 tail;
-	int present;
 
-	if (!rcd->rcvhdrtail_kvaddr)
-		present = (rcd->seq_cnt ==
-				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
-	else /* is RDMA rtail */
-		present = (rcd->head != get_rcvhdrtail(rcd));
-
-	if (present)
+	if (hfi1_packet_present(rcd))
 		return 1;
 
 	/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
 	tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
-	return rcd->head != tail;
+	return hfi1_rcd_head(rcd) != tail;
+}
+
+/**
+ * Common code for receive contexts interrupt handlers.
+ * Update traces, increment kernel IRQ counter and
+ * setup ASPM when needed.
+ */
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+{
+	struct hfi1_devdata *dd = rcd->dd;
+
+	trace_hfi1_receive_interrupt(dd, rcd);
+	this_cpu_inc(*dd->int_counter);
+	aspm_ctx_disable(rcd);
+}
+
+/**
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
+ * when there are packets present in the queue. When calling
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
+ *
+ * @rcd: valid receive context
+ */
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+	clear_recv_intr(rcd);
+	if (check_packet_present(rcd))
+		force_recv_intr(rcd);
+}
+
+/**
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
+ *
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
+ *
+ *  Hold IRQs so we can safely clear the interrupt and
+ *  recheck for a packet that may have arrived after the previous
+ *  check and the interrupt clear.  If a packet arrived, force another
+ *  interrupt. This routine can be called at the end of receive packet
+ *  processing in interrupt service routines, interrupt service thread
+ *  and softirqs
+ */
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__hfi1_rcd_eoi_intr(rcd);
+	local_irq_restore(flags);
 }
 
 /*
@@ -8421,13 +8474,9 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 irqreturn_t receive_context_interrupt(int irq, void *data)
 {
 	struct hfi1_ctxtdata *rcd = data;
-	struct hfi1_devdata *dd = rcd->dd;
 	int disposition;
-	int present;
 
-	trace_hfi1_receive_interrupt(dd, rcd);
-	this_cpu_inc(*dd->int_counter);
-	aspm_ctx_disable(rcd);
+	receive_interrupt_common(rcd);
 
 	/* receive interrupt remains blocked while processing packets */
 	disposition = rcd->do_interrupt(rcd, 0);
@@ -8440,17 +8489,7 @@ irqreturn_t receive_context_interrupt(int irq, void *data)
 	if (disposition == RCV_PKT_LIMIT)
 		return IRQ_WAKE_THREAD;
 
-	/*
-	 * The packet processor detected no more packets.  Clear the receive
-	 * interrupt and recheck for a packet packet that may have arrived
-	 * after the previous check and interrupt clear.  If a packet arrived,
-	 * force another interrupt.
-	 */
-	clear_recv_intr(rcd);
-	present = check_packet_present(rcd);
-	if (present)
-		force_recv_intr(rcd);
-
+	__hfi1_rcd_eoi_intr(rcd);
 	return IRQ_HANDLED;
 }
 
@@ -8461,24 +8500,11 @@ irqreturn_t receive_context_interrupt(int irq, void *data)
 irqreturn_t receive_context_thread(int irq, void *data)
 {
 	struct hfi1_ctxtdata *rcd = data;
-	int present;
 
 	/* receive interrupt is still blocked from the IRQ handler */
 	(void)rcd->do_interrupt(rcd, 1);
 
-	/*
-	 * The packet processor will only return if it detected no more
-	 * packets.  Hold IRQs here so we can safely clear the interrupt and
-	 * recheck for a packet that may have arrived after the previous
-	 * check and the interrupt clear.  If a packet arrived, force another
-	 * interrupt.
-	 */
-	local_irq_disable();
-	clear_recv_intr(rcd);
-	present = check_packet_present(rcd);
-	if (present)
-		force_recv_intr(rcd);
-	local_irq_enable();
+	hfi1_rcd_eoi_intr(rcd);
 
 	return IRQ_HANDLED;
 }
@@ -10049,7 +10075,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
 	 * the first kernel context would have been allocated by now so
 	 * we are guaranteed a valid value.
 	 */
-	return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
+	return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
 }
 
 /*
@@ -10094,7 +10120,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 		thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
-						dd->rcd[0]->rcvhdrqentsize));
+						get_hdrqentsize(dd->rcd[0])));
 		for (j = 0; j < INIT_SC_PER_VL; j++)
 			sc_set_cr_threshold(
 					pio_select_send_context_vl(dd, j, i),
@@ -11821,7 +11847,7 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
 	head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
 		& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
 
-	if (rcd->rcvhdrtail_kvaddr)
+	if (hfi1_rcvhdrtail_kvaddr(rcd))
 		tail = get_rcvhdrtail(rcd);
 	else
 		tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
@@ -11865,6 +11891,84 @@ static u32 encoded_size(u32 size)
 	return 0x1;	/* if invalid, go with the minimum size */
 }
 
+/**
+ * encode_rcv_header_entry_size - return chip specific encoding for size
+ * @size: size in dwords
+ *
+ * Convert a receive header entry size that to the encoding used in the CSR.
+ *
+ * Return a zero if the given size is invalid, otherwise the encoding.
+ */
+u8 encode_rcv_header_entry_size(u8 size)
+{
+	/* there are only 3 valid receive header entry sizes */
+	if (size == 2)
+		return 1;
+	if (size == 16)
+		return 2;
+	if (size == 32)
+		return 4;
+	return 0; /* invalid */
+}
+
+/**
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
+ * @dd: the device data
+ * @thecnt: the header count
+ */
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
+{
+	if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+		dd_dev_err(dd, "Receive header queue count too small\n");
+		return -EINVAL;
+	}
+
+	if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+		dd_dev_err(dd,
+			   "Receive header queue count cannot be greater than %u\n",
+			   HFI1_MAX_HDRQ_EGRBUF_CNT);
+		return -EINVAL;
+	}
+
+	if (thecnt % HDRQ_INCREMENT) {
+		dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+			   thecnt, HDRQ_INCREMENT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * set_hdrq_regs - set header queue registers for context
+ * @dd: the device data
+ * @ctxt: the context
+ * @entsize: the dword entry size
+ * @hdrcnt: the number of header entries
+ */
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
+{
+	u64 reg;
+
+	reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
+	      RCV_HDR_CNT_CNT_SHIFT;
+	write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
+	reg = ((u64)encode_rcv_header_entry_size(entsize) &
+	       RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
+	      RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
+	write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
+	reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
+	      RCV_HDR_SIZE_HDR_SIZE_SHIFT;
+	write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
+
+	/*
+	 * Program dummy tail address for every receive context
+	 * before enabling any receive context
+	 */
+	write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
+			dd->rcvhdrtail_dummy_dma);
+}
+
 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 		  struct hfi1_ctxtdata *rcd)
 {
@@ -11886,13 +11990,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
 				rcd->rcvhdrq_dma);
-		if (rcd->rcvhdrtail_kvaddr)
+		if (hfi1_rcvhdrtail_kvaddr(rcd))
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
 					rcd->rcvhdrqtailaddr_dma);
-		rcd->seq_cnt = 1;
+		hfi1_set_seq_cnt(rcd, 1);
 
 		/* reset the cached receive header queue head value */
-		rcd->head = 0;
+		hfi1_set_rcd_head(rcd, 0);
 
 		/*
 		 * Zero the receive header queue so we don't get false
@@ -11972,7 +12076,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 			      IS_RCVAVAIL_START + rcd->ctxt, false);
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	}
-	if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
+	if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
 		/* See comment on RcvCtxtCtrl.TailUpd above */
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 4ca5ac8d7e9e..725509261016 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -358,6 +358,8 @@
 #define MAX_EAGER_BUFFER       (256 * 1024)
 #define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */
 #define MAX_EXPECTED_BUFFER    (2048 * 1024)
+#define HFI1_MIN_HDRQ_EGRBUF_CNT 32
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 
 /*
  * Receive expected base and count and eager base and count increment -
@@ -699,6 +701,10 @@ static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
 	return read_csr(dd, RCV_ARRAY_CNT);
 }
 
+u8 encode_rcv_header_entry_size(u8 size);
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt);
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt);
+
 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
 	       u32 dw_len);
 
@@ -859,6 +865,7 @@ static inline int idx_from_vl(int vl)
 enum {
 	C_RCV_OVF = 0,
 	C_RX_LEN_ERR,
+	C_RX_SHORT_ERR,
 	C_RX_ICRC_ERR,
 	C_RX_EBP,
 	C_RX_TID_FULL,
@@ -926,6 +933,7 @@ enum {
 	C_DC_PG_STS_TX_MBE_CNT,
 	C_SW_CPU_INTR,
 	C_SW_CPU_RCV_LIM,
+	C_SW_CTX0_SEQ_DROP,
 	C_SW_VTX_WAIT,
 	C_SW_PIO_WAIT,
 	C_SW_PIO_DRAIN,
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ab3589d17aee..fb3ec9bff7a2 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -381,6 +381,7 @@
 #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468)
 #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0)
 #define RCV_LENGTH_ERR_CNT 0
+#define RCV_SHORT_ERR_CNT 2
 #define RCV_ICRC_ERR_CNT 6
 #define RCV_EBP_CNT 9
 #define RCV_BUF_OVFL_CNT 10
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index d47da7b0438f..40a1ff0c8a8e 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -323,6 +323,9 @@ struct diag_pkt {
 /* RHF receive type error - bypass packet errors */
 #define RHF_RTE_BYPASS_NO_ERR		0x0
 
+/* MAX RcvSEQ */
+#define RHF_MAX_SEQ 13
+
 /* IB - LRH header constants */
 #define HFI1_LRH_GRH 0x0003      /* 1. word of IB LRH - next header: GRH */
 #define HFI1_LRH_BTH 0x0002      /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index d268bf9c42ee..4633a0ce1a8c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -379,7 +379,7 @@ static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	struct hfi1_devdata *dd = dd_from_dev(ibd);
 
 	++*pos;
-	if (!dd->rcd || *pos >= dd->n_krcv_queues)
+	if (!dd->rcd || *pos >= dd->num_rcv_contexts)
 		return NULL;
 	return pos;
 }
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 01aa1f132f55..049d15befe58 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -411,14 +411,14 @@ drop:
 static inline void init_packet(struct hfi1_ctxtdata *rcd,
 			       struct hfi1_packet *packet)
 {
-	packet->rsize = rcd->rcvhdrqentsize; /* words */
-	packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
+	packet->rsize = get_hdrqentsize(rcd); /* words */
+	packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */
 	packet->rcd = rcd;
 	packet->updegr = 0;
 	packet->etail = -1;
 	packet->rhf_addr = get_rhf_addr(rcd);
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
-	packet->rhqoff = rcd->head;
+	packet->rhqoff = hfi1_rcd_head(rcd);
 	packet->numpkt = 0;
 }
 
@@ -551,22 +551,22 @@ static inline void init_ps_mdata(struct ps_mdata *mdata,
 	mdata->maxcnt = packet->maxcnt;
 	mdata->ps_head = packet->rhqoff;
 
-	if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+	if (get_dma_rtail_setting(rcd)) {
 		mdata->ps_tail = get_rcvhdrtail(rcd);
 		if (rcd->ctxt == HFI1_CTRL_CTXT)
-			mdata->ps_seq = rcd->seq_cnt;
+			mdata->ps_seq = hfi1_seq_cnt(rcd);
 		else
 			mdata->ps_seq = 0; /* not used with DMA_RTAIL */
 	} else {
 		mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
-		mdata->ps_seq = rcd->seq_cnt;
+		mdata->ps_seq = hfi1_seq_cnt(rcd);
 	}
 }
 
 static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
 			  struct hfi1_ctxtdata *rcd)
 {
-	if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+	if (get_dma_rtail_setting(rcd))
 		return mdata->ps_head == mdata->ps_tail;
 	return mdata->ps_seq != rhf_rcv_seq(rhf);
 }
@@ -592,11 +592,9 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
 		mdata->ps_head = 0;
 
 	/* Control context must do seq counting */
-	if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
-	    (rcd->ctxt == HFI1_CTRL_CTXT)) {
-		if (++mdata->ps_seq > 13)
-			mdata->ps_seq = 1;
-	}
+	if (!get_dma_rtail_setting(rcd) ||
+	    rcd->ctxt == HFI1_CTRL_CTXT)
+		mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq);
 }
 
 /*
@@ -734,6 +732,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 {
 	int ret;
 
+	packet->rcd->dd->ctx0_seq_drop++;
 	/* Set up for the next packet */
 	packet->rhqoff += packet->rsize;
 	if (packet->rhqoff >= packet->maxcnt)
@@ -769,7 +768,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 		 * The +2 is the size of the RHF.
 		 */
 		prefetch_range(packet->ebuf,
-			       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+			       packet->tlen - ((get_hdrqentsize(packet->rcd) -
 					       (rhf_hdrq_offset(packet->rhf)
 						+ 2)) * 4));
 	}
@@ -823,7 +822,7 @@ static inline void finish_packet(struct hfi1_packet *packet)
 	 * The only thing we need to do is a final update and call for an
 	 * interrupt
 	 */
-	update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
+	update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr,
 		       packet->etail, rcv_intr_dynamic, packet->numpkt);
 }
 
@@ -832,13 +831,11 @@ static inline void finish_packet(struct hfi1_packet *packet)
  */
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 {
-	u32 seq;
 	int last = RCV_PKT_OK;
 	struct hfi1_packet packet;
 
 	init_packet(rcd, &packet);
-	seq = rhf_rcv_seq(packet.rhf);
-	if (seq != rcd->seq_cnt) {
+	if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
 		last = RCV_PKT_DONE;
 		goto bail;
 	}
@@ -847,15 +844,12 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
-		seq = rhf_rcv_seq(packet.rhf);
-		if (++rcd->seq_cnt > 13)
-			rcd->seq_cnt = 1;
-		if (seq != rcd->seq_cnt)
+		if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
 			last = RCV_PKT_DONE;
 		process_rcv_update(last, &packet);
 	}
 	process_rcv_qp_work(&packet);
-	rcd->head = packet.rhqoff;
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
 bail:
 	finish_packet(&packet);
 	return last;
@@ -884,15 +878,14 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 		process_rcv_update(last, &packet);
 	}
 	process_rcv_qp_work(&packet);
-	rcd->head = packet.rhqoff;
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
 bail:
 	finish_packet(&packet);
 	return last;
 }
 
-static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
+static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_ctxtdata *rcd;
 	u16 i;
 
 	/*
@@ -900,50 +893,17 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
 	 * interrupt handler only for that context. Otherwise, switch
 	 * interrupt handler for all statically allocated kernel contexts.
 	 */
-	if (ctxt >= dd->first_dyn_alloc_ctxt) {
-		rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
-		if (rcd) {
-			rcd->do_interrupt =
-				&handle_receive_interrupt_nodma_rtail;
-			hfi1_rcd_put(rcd);
-		}
-		return;
-	}
-
-	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
-		rcd = hfi1_rcd_get_by_index(dd, i);
-		if (rcd)
-			rcd->do_interrupt =
-				&handle_receive_interrupt_nodma_rtail;
+	if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) {
+		hfi1_rcd_get(rcd);
+		hfi1_set_fast(rcd);
 		hfi1_rcd_put(rcd);
-	}
-}
-
-static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
-{
-	struct hfi1_ctxtdata *rcd;
-	u16 i;
-
-	/*
-	 * For dynamically allocated kernel contexts (like vnic) switch
-	 * interrupt handler only for that context. Otherwise, switch
-	 * interrupt handler for all statically allocated kernel contexts.
-	 */
-	if (ctxt >= dd->first_dyn_alloc_ctxt) {
-		rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
-		if (rcd) {
-			rcd->do_interrupt =
-				&handle_receive_interrupt_dma_rtail;
-			hfi1_rcd_put(rcd);
-		}
 		return;
 	}
 
-	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
+	for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
 		rcd = hfi1_rcd_get_by_index(dd, i);
-		if (rcd)
-			rcd->do_interrupt =
-				&handle_receive_interrupt_dma_rtail;
+		if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic))
+			hfi1_set_fast(rcd);
 		hfi1_rcd_put(rcd);
 	}
 }
@@ -959,17 +919,14 @@ void set_all_slowpath(struct hfi1_devdata *dd)
 		if (!rcd)
 			continue;
 		if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
-			rcd->do_interrupt = &handle_receive_interrupt;
+			rcd->do_interrupt = rcd->slow_handler;
 
 		hfi1_rcd_put(rcd);
 	}
 }
 
-static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
-				      struct hfi1_packet *packet,
-				      struct hfi1_devdata *dd)
+static bool __set_armed_to_active(struct hfi1_packet *packet)
 {
-	struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
 	u8 etype = rhf_rcv_type(packet->rhf);
 	u8 sc = SC15_PACKET;
 
@@ -984,19 +941,34 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
 		sc = hfi1_16B_get_sc(hdr);
 	}
 	if (sc != SC15_PACKET) {
-		int hwstate = driver_lstate(rcd->ppd);
+		int hwstate = driver_lstate(packet->rcd->ppd);
+		struct work_struct *lsaw =
+				&packet->rcd->ppd->linkstate_active_work;
 
 		if (hwstate != IB_PORT_ACTIVE) {
-			dd_dev_info(dd,
+			dd_dev_info(packet->rcd->dd,
 				    "Unexpected link state %s\n",
 				    opa_lstate_name(hwstate));
-			return 0;
+			return false;
 		}
 
-		queue_work(rcd->ppd->link_wq, lsaw);
-		return 1;
+		queue_work(packet->rcd->ppd->link_wq, lsaw);
+		return true;
 	}
-	return 0;
+	return false;
+}
+
+/**
+ * armed to active - the fast path for armed to active
+ * @packet: the packet structure
+ *
+ * Return true if packet processing needs to bail.
+ */
+static bool set_armed_to_active(struct hfi1_packet *packet)
+{
+	if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED))
+		return false;
+	return __set_armed_to_active(packet);
 }
 
 /*
@@ -1019,10 +991,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 
 	init_packet(rcd, &packet);
 
-	if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
-		u32 seq = rhf_rcv_seq(packet.rhf);
-
-		if (seq != rcd->seq_cnt) {
+	if (!get_dma_rtail_setting(rcd)) {
+		if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
 			last = RCV_PKT_DONE;
 			goto bail;
 		}
@@ -1039,22 +1009,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 		 * Control context can potentially receive an invalid
 		 * rhf. Drop such packets.
 		 */
-		if (rcd->ctxt == HFI1_CTRL_CTXT) {
-			u32 seq = rhf_rcv_seq(packet.rhf);
-
-			if (seq != rcd->seq_cnt)
+		if (rcd->ctxt == HFI1_CTRL_CTXT)
+			if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
 				skip_pkt = 1;
-		}
 	}
 
 	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
-		if (unlikely(dd->do_drop &&
-			     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
-			     DROP_PACKET_ON)) {
-			dd->do_drop = 0;
-
+		if (hfi1_need_drop(dd)) {
 			/* On to the next packet */
 			packet.rhqoff += packet.rsize;
 			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
@@ -1066,26 +1029,14 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			last = skip_rcv_packet(&packet, thread);
 			skip_pkt = 0;
 		} else {
-			/* Auto activate link on non-SC15 packet receive */
-			if (unlikely(rcd->ppd->host_link_state ==
-				     HLS_UP_ARMED) &&
-			    set_armed_to_active(rcd, &packet, dd))
+			if (set_armed_to_active(&packet))
 				goto bail;
 			last = process_rcv_packet(&packet, thread);
 		}
 
-		if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
-			u32 seq = rhf_rcv_seq(packet.rhf);
-
-			if (++rcd->seq_cnt > 13)
-				rcd->seq_cnt = 1;
-			if (seq != rcd->seq_cnt)
+		if (!get_dma_rtail_setting(rcd)) {
+			if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
 				last = RCV_PKT_DONE;
-			if (needset) {
-				dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
-				set_nodma_rtail(dd, rcd->ctxt);
-				needset = 0;
-			}
 		} else {
 			if (packet.rhqoff == hdrqtail)
 				last = RCV_PKT_DONE;
@@ -1094,27 +1045,24 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			 * rhf. Drop such packets.
 			 */
 			if (rcd->ctxt == HFI1_CTRL_CTXT) {
-				u32 seq = rhf_rcv_seq(packet.rhf);
+				bool lseq;
 
-				if (++rcd->seq_cnt > 13)
-					rcd->seq_cnt = 1;
-				if (!last && (seq != rcd->seq_cnt))
+				lseq = hfi1_seq_incr(rcd,
+						     rhf_rcv_seq(packet.rhf));
+				if (!last && lseq)
 					skip_pkt = 1;
 			}
-
-			if (needset) {
-				dd_dev_info(dd,
-					    "Switching to DMA_RTAIL\n");
-				set_dma_rtail(dd, rcd->ctxt);
-				needset = 0;
-			}
 		}
 
+		if (needset) {
+			needset = false;
+			set_all_fastpath(dd, rcd);
+		}
 		process_rcv_update(last, &packet);
 	}
 
 	process_rcv_qp_work(&packet);
-	rcd->head = packet.rhqoff;
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
 
 bail:
 	/*
@@ -1606,23 +1554,22 @@ void handle_eflags(struct hfi1_packet *packet)
  * The following functions are called by the interrupt handler. They are type
  * specific handlers for each packet type.
  */
-static int process_receive_ib(struct hfi1_packet *packet)
+static void process_receive_ib(struct hfi1_packet *packet)
 {
 	if (hfi1_setup_9B_packet(packet))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	trace_hfi1_rcvhdr(packet);
 
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		handle_eflags(packet);
-		return RHF_RCV_CONTINUE;
+		return;
 	}
 
 	hfi1_ib_rcv(packet);
-	return RHF_RCV_CONTINUE;
 }
 
 static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
@@ -1638,23 +1585,23 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
 	return false;
 }
 
-static int process_receive_bypass(struct hfi1_packet *packet)
+static void process_receive_bypass(struct hfi1_packet *packet)
 {
 	struct hfi1_devdata *dd = packet->rcd->dd;
 
 	if (hfi1_is_vnic_packet(packet)) {
 		hfi1_vnic_bypass_rcv(packet);
-		return RHF_RCV_CONTINUE;
+		return;
 	}
 
 	if (hfi1_setup_bypass_packet(packet))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	trace_hfi1_rcvhdr(packet);
 
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		handle_eflags(packet);
-		return RHF_RCV_CONTINUE;
+		return;
 	}
 
 	if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
@@ -1677,17 +1624,16 @@ static int process_receive_bypass(struct hfi1_packet *packet)
 				(OPA_EI_STATUS_SMASK | BAD_L2_ERR);
 		}
 	}
-	return RHF_RCV_CONTINUE;
 }
 
-static int process_receive_error(struct hfi1_packet *packet)
+static void process_receive_error(struct hfi1_packet *packet)
 {
 	/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
 	if (unlikely(
 		 hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
 		 (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
 		  packet->rhf & RHF_DC_ERR)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	hfi1_setup_ib_header(packet);
 	handle_eflags(packet);
@@ -1695,32 +1641,29 @@ static int process_receive_error(struct hfi1_packet *packet)
 	if (unlikely(rhf_err_flags(packet->rhf)))
 		dd_dev_err(packet->rcd->dd,
 			   "Unhandled error packet received. Dropping.\n");
-
-	return RHF_RCV_CONTINUE;
 }
 
-static int kdeth_process_expected(struct hfi1_packet *packet)
+static void kdeth_process_expected(struct hfi1_packet *packet)
 {
 	hfi1_setup_9B_packet(packet);
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		struct hfi1_ctxtdata *rcd = packet->rcd;
 
 		if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
-			return RHF_RCV_CONTINUE;
+			return;
 	}
 
 	hfi1_kdeth_expected_rcv(packet);
-	return RHF_RCV_CONTINUE;
 }
 
-static int kdeth_process_eager(struct hfi1_packet *packet)
+static void kdeth_process_eager(struct hfi1_packet *packet)
 {
 	hfi1_setup_9B_packet(packet);
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	trace_hfi1_rcvhdr(packet);
 	if (unlikely(rhf_err_flags(packet->rhf))) {
@@ -1728,37 +1671,41 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
 
 		show_eflags_errs(packet);
 		if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
-			return RHF_RCV_CONTINUE;
+			return;
 	}
 
 	hfi1_kdeth_eager_rcv(packet);
-	return RHF_RCV_CONTINUE;
 }
 
-static int process_receive_invalid(struct hfi1_packet *packet)
+static void process_receive_invalid(struct hfi1_packet *packet)
 {
 	dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
 		   rhf_rcv_type(packet->rhf));
-	return RHF_RCV_CONTINUE;
 }
 
+#define HFI1_RCVHDR_DUMP_MAX	5
+
 void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_packet packet;
 	struct ps_mdata mdata;
+	int i;
 
-	seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n",
-		   rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize,
-		   HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+	seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu  sw head %u\n",
+		   rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd),
+		   get_dma_rtail_setting(rcd) ?
 		   "dma_rtail" : "nodma_rtail",
+		   read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL),
+		   read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS),
 		   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
 		   RCV_HDR_HEAD_HEAD_MASK,
-		   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL));
+		   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL),
+		   rcd->head);
 
 	init_packet(rcd, &packet);
 	init_ps_mdata(&mdata, &packet);
 
-	while (1) {
+	for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) {
 		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 					 rcd->rhf_offset;
 		struct ib_header *hdr;
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7c5e3fb22413..bef6946861b2 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -505,12 +505,12 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
 			ret = -EINVAL;
 			goto done;
 		}
-		if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
+		if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) {
 			ret = -EPERM;
 			goto done;
 		}
 		memlen = PAGE_SIZE;
-		memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
+		memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
 		flags &= ~VM_MAYWRITE;
 		break;
 	case SUBCTXT_UREGS:
@@ -1090,7 +1090,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
 	 * don't have to wait to be sure the DMA update has happened
 	 * (chip resets head/tail to 0 on transition to enable).
 	 */
-	if (uctxt->rcvhdrtail_kvaddr)
+	if (hfi1_rcvhdrtail_kvaddr(uctxt))
 		clear_rcvhdrtail(uctxt);
 
 	/* Setup J_KEY before enabling the context */
@@ -1154,8 +1154,8 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
 	cinfo.send_ctxt = uctxt->sc->hw_context;
 
 	cinfo.egrtids = uctxt->egrbufs.alloced;
-	cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
-	cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
+	cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt);
+	cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2;
 	cinfo.sdma_ring_size = fd->cq->nentries;
 	cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
 
@@ -1543,7 +1543,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
 		 * always resets it's tail register back to 0 on a
 		 * transition from disabled to enabled.
 		 */
-		if (uctxt->rcvhdrtail_kvaddr)
+		if (hfi1_rcvhdrtail_kvaddr(uctxt))
 			clear_rcvhdrtail(uctxt);
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
 	} else {
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index fc10d65fc3e1..6365e8ffed9d 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -197,7 +197,9 @@ struct exp_tid_set {
 	u32 count;
 };
 
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+struct hfi1_ctxtdata;
+typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
+typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
 
 struct tid_queue {
 	struct list_head queue_head;
@@ -226,7 +228,11 @@ struct hfi1_ctxtdata {
 	 * be valid. Worst case is we process an extra interrupt and up to 64
 	 * packets with the wrong interrupt handler.
 	 */
-	int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+	intr_handler do_interrupt;
+	/** fast handler after autoactive */
+	intr_handler fast_handler;
+	/** slow handler */
+	intr_handler slow_handler;
 	/* verbs rx_stats per rcd */
 	struct hfi1_opcode_stats_perctx *opstats;
 	/* clear interrupt mask */
@@ -1153,6 +1159,8 @@ struct hfi1_devdata {
 
 	char *boardname; /* human readable board info */
 
+	u64 ctx0_seq_drop;
+
 	/* reset value */
 	u64 z_int_counter;
 	u64 z_rcv_limit;
@@ -1310,7 +1318,7 @@ struct hfi1_devdata {
 	struct err_info_constraint err_info_xmit_constraint;
 
 	atomic_t drop_packet;
-	u8 do_drop;
+	bool do_drop;
 	u8 err_info_uncorrectable;
 	u8 err_info_fmconfig;
 
@@ -1507,12 +1515,148 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp,
 #define RCV_PKT_LIMIT   0x1 /* stop, hit limit, start thread */
 #define RCV_PKT_DONE    0x2 /* stop, no more packets detected */
 
+/**
+ * hfi1_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ */
+static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->head;
+}
+
+/**
+ * hfi1_set_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ * @head: the new head
+ */
+static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
+{
+	rcd->head = head;
+}
+
 /* calculate the current RHF address */
 static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
 {
 	return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
 }
 
+/* return DMA_RTAIL configuration */
+static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
+{
+	return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
+}
+
+/**
+ * hfi1_seq_incr_wrap - wrapping increment for sequence
+ * @seq: the current sequence number
+ *
+ * Returns: the incremented seq
+ */
+static inline u8 hfi1_seq_incr_wrap(u8 seq)
+{
+	if (++seq > RHF_MAX_SEQ)
+		seq = 1;
+	return seq;
+}
+
+/**
+ * hfi1_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->seq_cnt;
+}
+
+/**
+ * hfi1_set_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
+{
+	rcd->seq_cnt = cnt;
+}
+
+/**
+ * last_rcv_seq - is last
+ * @rcd: the receive context
+ * @seq: sequence
+ *
+ * return true if last packet
+ */
+static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+	return seq != rcd->seq_cnt;
+}
+
+/**
+ * rcd_seq_incr - increment context sequence number
+ * @rcd: the receive context
+ * @seq: the current sequence number
+ *
+ * Returns: true if the this was the last packet
+ */
+static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+	rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
+	return last_rcv_seq(rcd, seq);
+}
+
+/**
+ * get_hdrqentsize - return hdrq entry size
+ * @rcd: the receive context
+ */
+static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->rcvhdrqentsize;
+}
+
+/**
+ * get_hdrq_cnt - return hdrq count
+ * @rcd: the receive context
+ */
+static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->rcvhdrq_cnt;
+}
+
+/**
+ * hfi1_is_slowpath - check if this context is slow path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->do_interrupt == rcd->slow_handler;
+}
+
+/**
+ * hfi1_is_fastpath - check if this context is fast path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
+{
+	if (rcd->ctxt == HFI1_CTRL_CTXT)
+		return false;
+
+	return rcd->do_interrupt == rcd->fast_handler;
+}
+
+/**
+ * hfi1_set_fast - change to the fast handler
+ * @rcd: the receive context
+ */
+static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
+{
+	if (unlikely(!rcd))
+		return;
+	if (unlikely(!hfi1_is_fastpath(rcd)))
+		rcd->do_interrupt = rcd->fast_handler;
+}
+
 int hfi1_reset_device(int);
 
 void receive_interrupt_work(struct work_struct *work);
@@ -2015,9 +2159,21 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 			     size_t npages, bool dirty);
 
+/**
+ * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
+ * @rcd - the receive context
+ */
+static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
+{
+	return (__le64 *)rcd->rcvhdrtail_kvaddr;
+}
+
 static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 {
-	*((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
+	u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
+
+	if (kv)
+		*kv = 0ULL;
 }
 
 static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -2026,7 +2182,17 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 	 * volatile because it's a DMA target from the chip, routine is
 	 * inlined, and don't want register caching or reordering.
 	 */
-	return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+	return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
+}
+
+static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
+{
+	if (likely(!rcd->rcvhdrtail_kvaddr)) {
+		u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
+
+		return !last_rcv_seq(rcd, seq);
+	}
+	return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
 }
 
 /*
@@ -2298,6 +2464,25 @@ static inline bool is_integrated(struct hfi1_devdata *dd)
 	return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
 }
 
+/**
+ * hfi1_need_drop - detect need for drop
+ * @dd: - the device
+ *
+ * In some cases, the first packet needs to be dropped.
+ *
+ * Return true is the current packet needs to be dropped and false otherwise.
+ */
+static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
+{
+	if (unlikely(dd->do_drop &&
+		     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+		     DROP_PACKET_ON)) {
+		dd->do_drop = false;
+		return true;
+	}
+	return false;
+}
+
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 
 #define DD_DEV_ENTRY(dd)       __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 26b792bb1027..e3acda7a0800 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -78,8 +78,6 @@
  */
 #define HFI1_MIN_USER_CTXT_BUFCNT 7
 
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
 
@@ -122,8 +120,6 @@ unsigned int user_credit_return_threshold = 33;	/* default is 33% */
 module_param(user_credit_return_threshold, uint, S_IRUGO);
 MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
 
-static inline u64 encode_rcv_header_entry_size(u16 size);
-
 DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 
 static int hfi1_create_kctxt(struct hfi1_devdata *dd,
@@ -154,7 +150,12 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
 	/* Control context must use DMA_RTAIL */
 	if (rcd->ctxt == HFI1_CTRL_CTXT)
 		rcd->flags |= HFI1_CAP_DMA_RTAIL;
-	rcd->seq_cnt = 1;
+	rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+				handle_receive_interrupt_dma_rtail :
+				handle_receive_interrupt_nodma_rtail;
+	rcd->slow_handler = handle_receive_interrupt;
+
+	hfi1_set_seq_cnt(rcd, 1);
 
 	rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
 	if (!rcd->sc) {
@@ -511,23 +512,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
 }
 
 /*
- * Convert a receive header entry size that to the encoding used in the CSR.
- *
- * Return a zero if the given size is invalid.
- */
-static inline u64 encode_rcv_header_entry_size(u16 size)
-{
-	/* there are only 3 valid receive header entry sizes */
-	if (size == 2)
-		return 1;
-	if (size == 16)
-		return 2;
-	else if (size == 32)
-		return 4;
-	return 0; /* invalid */
-}
-
-/*
  * Select the largest ccti value over all SLs to determine the intra-
  * packet gap for the link.
  *
@@ -892,10 +876,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 
 	if (is_ax(dd)) {
 		atomic_set(&dd->drop_packet, DROP_PACKET_ON);
-		dd->do_drop = 1;
+		dd->do_drop = true;
 	} else {
 		atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
-		dd->do_drop = 0;
+		dd->do_drop = false;
 	}
 
 	/* make sure the link is not "up" */
@@ -1149,9 +1133,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 		dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
 				  rcd->rcvhdrq, rcd->rcvhdrq_dma);
 		rcd->rcvhdrq = NULL;
-		if (rcd->rcvhdrtail_kvaddr) {
+		if (hfi1_rcvhdrtail_kvaddr(rcd)) {
 			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-					  (void *)rcd->rcvhdrtail_kvaddr,
+					  (void *)hfi1_rcvhdrtail_kvaddr(rcd),
 					  rcd->rcvhdrqtailaddr_dma);
 			rcd->rcvhdrtail_kvaddr = NULL;
 		}
@@ -1611,29 +1595,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 	hfi1_free_devdata(dd);
 }
 
-static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
-{
-	if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
-		dd_dev_err(dd, "Receive header queue count too small\n");
-		return -EINVAL;
-	}
-
-	if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
-		dd_dev_err(dd,
-			   "Receive header queue count cannot be greater than %u\n",
-			   HFI1_MAX_HDRQ_EGRBUF_CNT);
-		return -EINVAL;
-	}
-
-	if (thecnt % HDRQ_INCREMENT) {
-		dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
-			   thecnt, HDRQ_INCREMENT);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
@@ -1661,7 +1622,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* Validate some global module parameters */
-	ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
+	ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
 	if (ret)
 		goto bail;
 
@@ -1842,7 +1803,6 @@ static void shutdown_one(struct pci_dev *pdev)
 int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 {
 	unsigned amt;
-	u64 reg;
 
 	if (!rcd->rcvhdrq) {
 		gfp_t gfp_flags;
@@ -1874,30 +1834,9 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 				goto bail_free;
 		}
 	}
-	/*
-	 * These values are per-context:
-	 *	RcvHdrCnt
-	 *	RcvHdrEntSize
-	 *	RcvHdrSize
-	 */
-	reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
-			& RCV_HDR_CNT_CNT_MASK)
-		<< RCV_HDR_CNT_CNT_SHIFT;
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
-	reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
-			& RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
-		<< RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
-	reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
-		<< RCV_HDR_SIZE_HDR_SIZE_SHIFT;
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
 
-	/*
-	 * Program dummy tail address for every receive context
-	 * before enabling any receive context
-	 */
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
-			dd->rcvhdrtail_dummy_dma);
+	set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+		      rcd->rcvhdrq_cnt);
 
 	return 0;
 
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index d920b165d696..db82db497b2c 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -115,13 +115,11 @@ int msix_initialize(struct hfi1_devdata *dd)
  */
 static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
 			    irq_handler_t handler, irq_handler_t thread,
-			    u32 idx, enum irq_type type)
+			    enum irq_type type, const char *name)
 {
 	unsigned long nr;
 	int irq;
 	int ret;
-	const char *err_info;
-	char name[MAX_NAME_SIZE];
 	struct hfi1_msix_entry *me;
 
 	/* Allocate an MSIx vector */
@@ -135,43 +133,15 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
 	if (nr == dd->msix_info.max_requested)
 		return -ENOSPC;
 
-	/* Specific verification and determine the name */
-	switch (type) {
-	case IRQ_GENERAL:
-		/* general interrupt must be MSIx vector 0 */
-		if (nr) {
-			spin_lock(&dd->msix_info.msix_lock);
-			__clear_bit(nr, dd->msix_info.in_use_msix);
-			spin_unlock(&dd->msix_info.msix_lock);
-			dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
-				   nr);
-			return -EINVAL;
-		}
-		snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
-		err_info = "general";
-		break;
-	case IRQ_SDMA:
-		snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
-			 dd->unit, idx);
-		err_info = "sdma";
-		break;
-	case IRQ_RCVCTXT:
-		snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
-			 dd->unit, idx);
-		err_info = "receive context";
-		break;
-	case IRQ_OTHER:
-	default:
+	if (type < IRQ_SDMA || type >= IRQ_OTHER)
 		return -EINVAL;
-	}
-	name[sizeof(name) - 1] = 0;
 
 	irq = pci_irq_vector(dd->pcidev, nr);
 	ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
 	if (ret) {
 		dd_dev_err(dd,
-			   "%s: request for IRQ %d failed, MSIx %d, err %d\n",
-			   err_info, irq, idx, ret);
+			   "%s: request for IRQ %d failed, MSIx %lu, err %d\n",
+			   name, irq, nr, ret);
 		spin_lock(&dd->msix_info.msix_lock);
 		__clear_bit(nr, dd->msix_info.in_use_msix);
 		spin_unlock(&dd->msix_info.msix_lock);
@@ -195,17 +165,13 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
 	return nr;
 }
 
-/**
- * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
- * @rcd: valid rcd context
- *
- */
-int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
+				       irq_handler_t handler,
+				       irq_handler_t thread,
+				       const char *name)
 {
-	int nr;
-
-	nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
-			      receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+	int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
+				  IRQ_RCVCTXT, name);
 	if (nr < 0)
 		return nr;
 
@@ -222,6 +188,22 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
 }
 
 /**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+	char name[MAX_NAME_SIZE];
+
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+		 rcd->dd->unit, rcd->ctxt);
+
+	return msix_request_rcd_irq_common(rcd, receive_context_interrupt,
+					   receive_context_thread, name);
+}
+
+/**
  * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
  * @sde: valid sdma engine
  *
@@ -229,9 +211,12 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
 int msix_request_sdma_irq(struct sdma_engine *sde)
 {
 	int nr;
+	char name[MAX_NAME_SIZE];
 
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+		 sde->dd->unit, sde->this_idx);
 	nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
-			      sde->this_idx, IRQ_SDMA);
+			      IRQ_SDMA, name);
 	if (nr < 0)
 		return nr;
 	sde->msix_intr = nr;
@@ -241,6 +226,32 @@ int msix_request_sdma_irq(struct sdma_engine *sde)
 }
 
 /**
+ * msix_request_general_irq(void) - Helper for getting general IRQ
+ * resources
+ * @dd: valid device data
+ */
+int msix_request_general_irq(struct hfi1_devdata *dd)
+{
+	int nr;
+	char name[MAX_NAME_SIZE];
+
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+	nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL,
+			      name);
+	if (nr < 0)
+		return nr;
+
+	/* general interrupt must be MSIx vector 0 */
+	if (nr) {
+		msix_free_irq(dd, (u8)nr);
+		dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * enable_sdma_src() - Helper to enable SDMA IRQ srcs
  * @dd: valid devdata structure
  * @i: index of SDMA engine
@@ -265,10 +276,9 @@ static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
 int msix_request_irqs(struct hfi1_devdata *dd)
 {
 	int i;
-	int ret;
+	int ret = msix_request_general_irq(dd);
 
-	ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
-	if (ret < 0)
+	if (ret)
 		return ret;
 
 	for (i = 0; i < dd->num_sdma; i++) {
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index a514881632a4..1a02ab7971c8 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -54,6 +54,7 @@
 int msix_initialize(struct hfi1_devdata *dd);
 int msix_request_irqs(struct hfi1_devdata *dd);
 void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_general_irq(struct hfi1_devdata *dd);
 int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
 int msix_request_sdma_irq(struct sdma_engine *sde);
 void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1a3c647675a7..f1734e5e9ac4 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2599,7 +2599,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 	 * to be sent before sending this one.
 	 */
 	e = NULL;
-	old_req = 1;
+	old_req = true;
 	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index e00c8a7d559c..b5fc5c6cd52f 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -80,7 +80,7 @@ TRACE_EVENT(hfi1_uctxtdata,
 			   __entry->credits = uctxt->sc->credits;
 			   __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
 			   __entry->piobase = uctxt->sc->base_addr;
-			   __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+			   __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt);
 			   __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
 			   __entry->eager_cnt = uctxt->egrbufs.alloced;
 			   __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 3cec960e9674..168079ed122c 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -106,19 +106,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
 			     ),
 	    TP_fast_assign(DD_DEV_ASSIGN(dd);
 			__entry->ctxt = rcd->ctxt;
-			if (rcd->do_interrupt ==
-			    &handle_receive_interrupt) {
-				__entry->slow_path = 1;
-				__entry->dma_rtail = 0xFF;
-			} else if (rcd->do_interrupt ==
-					&handle_receive_interrupt_dma_rtail){
-				__entry->dma_rtail = 1;
-				__entry->slow_path = 0;
-			} else if (rcd->do_interrupt ==
-					&handle_receive_interrupt_nodma_rtail) {
-				__entry->dma_rtail = 0;
-				__entry->slow_path = 0;
-			}
+			__entry->slow_path = hfi1_is_slowpath(rcd);
+			__entry->dma_rtail = get_dma_rtail_setting(rcd);
 			),
 	    TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
 		      __get_str(dev),
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index b49e60e8397d..6b14581b9965 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -78,7 +78,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
 	if (ret)
 		goto done;
 
-	if (uctxt->rcvhdrtail_kvaddr)
+	if (hfi1_rcvhdrtail_kvaddr(uctxt))
 		clear_rcvhdrtail(uctxt);
 
 	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index a2d1e5331bf1..5ffe4c996ed3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -370,6 +370,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 	hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
 	hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
 	spin_lock_init(&hr_cq->lock);
+	INIT_LIST_HEAD(&hr_cq->sq_list);
+	INIT_LIST_HEAD(&hr_cq->rq_list);
 
 	if (udata) {
 		ret = create_user_cq(hr_dev, hr_cq, udata, &resp);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 5617434cbfb4..a7c4ff975c28 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -45,8 +45,6 @@
 
 #define HNS_ROCE_MAX_MSG_LEN			0x80000000
 
-#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
-
 #define HNS_ROCE_IB_MIN_SQ_STRIDE		6
 
 #define HNS_ROCE_BA_SIZE			(32 * 4096)
@@ -107,11 +105,6 @@
 #define NODE_DESC_SIZE				64
 #define DB_REG_OFFSET				0x1000
 
-#define SERV_TYPE_RC				0
-#define SERV_TYPE_RD				1
-#define SERV_TYPE_UC				2
-#define SERV_TYPE_UD				3
-
 /* Configure to HW for PAGE_SIZE larger than 4KB */
 #define PG_SHIFT_OFFSET				(PAGE_SHIFT - 12)
 
@@ -131,6 +124,13 @@
 #define EQ_DEPTH_COEFF				2
 
 enum {
+	SERV_TYPE_RC,
+	SERV_TYPE_UC,
+	SERV_TYPE_RD,
+	SERV_TYPE_UD,
+};
+
+enum {
 	HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
 	HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
 };
@@ -423,7 +423,7 @@ struct hns_roce_mr_table {
 struct hns_roce_wq {
 	u64		*wrid;     /* Work request ID */
 	spinlock_t	lock;
-	int		wqe_cnt;  /* WQE num */
+	u32		wqe_cnt;  /* WQE num */
 	int		max_gs;
 	int		offset;
 	int		wqe_shift;	/* WQE size */
@@ -498,6 +498,10 @@ struct hns_roce_cq {
 	u32				vector;
 	atomic_t			refcount;
 	struct completion		free;
+	struct list_head		sq_list; /* all qps on this send cq */
+	struct list_head		rq_list; /* all qps on this recv cq */
+	int				is_armed; /* cq is armed */
+	struct list_head		node; /* all armed cqs are on a list */
 };
 
 struct hns_roce_idx_que {
@@ -647,7 +651,6 @@ struct hns_roce_qp {
 	u8			sdb_en;
 	u32			doorbell_qpn;
 	u32			sq_signal_bits;
-	u32			sq_next_wqe;
 	struct hns_roce_wq	sq;
 
 	struct ib_umem		*umem;
@@ -682,6 +685,9 @@ struct hns_roce_qp {
 	u32			next_sge;
 
 	struct hns_roce_rinl_buf rq_inl_buf;
+	struct list_head	node;		/* all qps are on a list */
+	struct list_head	rq_node;	/* all recv qps are on a list */
+	struct list_head	sq_node;	/* all send qps are on a list */
 };
 
 struct hns_roce_ib_iboe {
@@ -794,10 +800,8 @@ struct hns_roce_caps {
 	int             reserved_qps;
 	int		num_qpc_timer;
 	int		num_cqc_timer;
-	u32		max_srq_sg;
 	int		num_srqs;
 	u32		max_wqes;
-	u32		max_srqs;
 	u32		max_srq_wrs;
 	u32		max_srq_sges;
 	u32		max_sq_desc_sz;
@@ -811,7 +815,6 @@ struct hns_roce_caps {
 	u32		min_wqes;
 	int		reserved_cqs;
 	int		reserved_srqs;
-	u32		max_srqwqes;
 	int		num_aeq_vectors;
 	int		num_comp_vectors;
 	int		num_other_vectors;
@@ -895,6 +898,12 @@ struct hns_roce_caps {
 	u32		tpq_buf_pg_sz;
 	u32		chunk_sz;	/* chunk size in non multihop mode */
 	u64		flags;
+	u16		default_ceq_max_cnt;
+	u16		default_ceq_period;
+	u16		default_aeq_max_cnt;
+	u16		default_aeq_period;
+	u16		default_aeq_arm_st;
+	u16		default_ceq_arm_st;
 };
 
 struct hns_roce_work {
@@ -911,6 +920,12 @@ struct hns_roce_dfx_hw {
 			      int *buffer);
 };
 
+enum hns_roce_device_state {
+	HNS_ROCE_DEVICE_STATE_INITED,
+	HNS_ROCE_DEVICE_STATE_RST_DOWN,
+	HNS_ROCE_DEVICE_STATE_UNINIT,
+};
+
 struct hns_roce_hw {
 	int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
 	int (*cmq_init)(struct hns_roce_dev *hr_dev);
@@ -993,6 +1008,9 @@ struct hns_roce_dev {
 	bool			dis_db;
 	unsigned long		reset_cnt;
 	struct hns_roce_ib_iboe iboe;
+	enum hns_roce_device_state state;
+	struct list_head	qp_list; /* list of all qps on this dev */
+	spinlock_t		qp_list_lock; /* protect qp_list */
 
 	struct list_head        pgdir_list;
 	struct mutex            pgdir_mutex;
@@ -1133,7 +1151,6 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
 
 int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
@@ -1257,6 +1274,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
 void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
 int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
 int hns_roce_init(struct hns_roce_dev *hr_dev);
 void hns_roce_exit(struct hns_roce_dev *hr_dev);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 2a2b2112f886..c6e66586e533 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -74,8 +74,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 	unsigned long flags = 0;
 	void *wqe = NULL;
 	__le32 doorbell[2];
+	u32 wqe_idx = 0;
 	int nreq = 0;
-	u32 ind = 0;
 	int ret = 0;
 	u8 *smac;
 	int loopback;
@@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 	}
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
-	ind = qp->sq_next_wqe;
+
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 			ret = -ENOMEM;
@@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 			goto out;
 		}
 
+		wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
 		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
 			dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
 				wr->num_sge, qp->sq.max_gs);
@@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
-		qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
-								      wr->wr_id;
+		wqe = get_send_wqe(qp, wqe_idx);
+		qp->sq.wrid[wqe_idx] = wr->wr_id;
 
 		/* Corresponding to the RC and RD type wqe process separately */
 		if (ibqp->qp_type == IB_QPT_GSI) {
@@ -210,7 +211,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 				       cpu_to_le32((wr->sg_list[1].addr) >> 32);
 			ud_sq_wqe->l_key1 =
 				       cpu_to_le32(wr->sg_list[1].lkey);
-			ind++;
 		} else if (ibqp->qp_type == IB_QPT_RC) {
 			u32 tmp_len = 0;
 
@@ -308,7 +308,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 				ctrl->flag |= cpu_to_le32(wr->num_sge <<
 					      HNS_ROCE_WQE_SGE_NUM_BIT);
 			}
-			ind++;
 		}
 	}
 
@@ -336,7 +335,6 @@ out:
 		doorbell[1] = sq_db.u32_8;
 
 		hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
-		qp->sq_next_wqe = ind;
 	}
 
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -348,12 +346,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
 				 const struct ib_recv_wr *wr,
 				 const struct ib_recv_wr **bad_wr)
 {
-	int ret = 0;
-	int nreq = 0;
-	int ind = 0;
-	int i = 0;
-	u32 reg_val;
-	unsigned long flags = 0;
 	struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
 	struct hns_roce_wqe_data_seg *scat = NULL;
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -361,9 +353,14 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
 	struct device *dev = &hr_dev->pdev->dev;
 	struct hns_roce_rq_db rq_db;
 	__le32 doorbell[2] = {0};
+	unsigned long flags = 0;
+	unsigned int wqe_idx;
+	int ret = 0;
+	int nreq = 0;
+	int i = 0;
+	u32 reg_val;
 
 	spin_lock_irqsave(&hr_qp->rq.lock, flags);
-	ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
@@ -373,6 +370,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
 			goto out;
 		}
 
+		wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+
 		if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
 			dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
 				wr->num_sge, hr_qp->rq.max_gs);
@@ -381,7 +380,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		ctrl = get_recv_wqe(hr_qp, ind);
+		ctrl = get_recv_wqe(hr_qp, wqe_idx);
 
 		roce_set_field(ctrl->rwqe_byte_12,
 			       RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
@@ -393,9 +392,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
 		for (i = 0; i < wr->num_sge; i++)
 			set_data_seg(scat + i, wr->sg_list + i);
 
-		hr_qp->rq.wrid[ind] = wr->wr_id;
-
-		ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+		hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
 	}
 
 out:
@@ -2701,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 		hr_qp->rq.tail = 0;
 		hr_qp->sq.head = 0;
 		hr_qp->sq.tail = 0;
-		hr_qp->sq_next_wqe = 0;
 	}
 
 	kfree(context);
@@ -3315,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 		hr_qp->rq.tail = 0;
 		hr_qp->sq.head = 0;
 		hr_qp->sq.tail = 0;
-		hr_qp->sq_next_wqe = 0;
 	}
 out:
 	kfree(context);
@@ -3614,14 +3609,18 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 	if (ret)
 		return ret;
 
-	send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
-	recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+	send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+	recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
 
 	hns_roce_lock_cqs(send_cq, recv_cq);
 	if (!udata) {
-		__hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
-				       to_hr_srq(hr_qp->ibqp.srq) : NULL);
-		if (send_cq != recv_cq)
+		if (recv_cq)
+			__hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn,
+					       (hr_qp->ibqp.srq ?
+						to_hr_srq(hr_qp->ibqp.srq) :
+						NULL));
+
+		if (send_cq && send_cq != recv_cq)
 			__hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
 	}
 	hns_roce_unlock_cqs(send_cq, recv_cq);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index cb8071a3e0d5..12c4cd8e9378 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -63,20 +63,15 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 	struct hns_roce_mr *mr = to_hr_mr(wr->mr);
 
 	/* use ib_access_flags */
-	roce_set_bit(rc_sq_wqe->byte_4,
-		     V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
 		     wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
-	roce_set_bit(rc_sq_wqe->byte_4,
-		     V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
 		     wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
-	roce_set_bit(rc_sq_wqe->byte_4,
-		     V2_RC_FRMR_WQE_BYTE_4_RR_S,
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S,
 		     wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
-	roce_set_bit(rc_sq_wqe->byte_4,
-		     V2_RC_FRMR_WQE_BYTE_4_RW_S,
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S,
 		     wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
-	roce_set_bit(rc_sq_wqe->byte_4,
-		     V2_RC_FRMR_WQE_BYTE_4_LW_S,
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S,
 		     wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
 
 	/* Data structure reuse may lead to confusion */
@@ -110,7 +105,7 @@ static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
 }
 
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
-			   unsigned int *sge_ind)
+			   unsigned int *sge_ind, int valid_num_sge)
 {
 	struct hns_roce_v2_wqe_data_seg *dseg;
 	struct ib_sge *sg;
@@ -123,7 +118,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 
 	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
 		num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
-	extend_sge_num = wr->num_sge - num_in_wqe;
+	extend_sge_num = valid_num_sge - num_in_wqe;
 	sg = wr->sg_list + num_in_wqe;
 	shift = qp->hr_buf.page_shift;
 
@@ -159,14 +154,16 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			     struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 			     void *wqe, unsigned int *sge_ind,
+			     int valid_num_sge,
 			     const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_v2_wqe_data_seg *dseg = wqe;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
+	int j = 0;
 	int i;
 
-	if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
+	if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
 		if (le32_to_cpu(rc_sq_wqe->msg_len) >
 		    hr_dev->caps.max_sq_inline) {
 			*bad_wr = wr;
@@ -190,7 +187,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
 			     1);
 	} else {
-		if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
+		if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
 			for (i = 0; i < wr->num_sge; i++) {
 				if (likely(wr->sg_list[i].length)) {
 					set_data_seg_v2(dseg, wr->sg_list + i);
@@ -203,19 +200,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 				     V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
 				     (*sge_ind) & (qp->sge.sge_cnt - 1));
 
-			for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
+			for (i = 0; i < wr->num_sge &&
+			     j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
 				if (likely(wr->sg_list[i].length)) {
 					set_data_seg_v2(dseg, wr->sg_list + i);
 					dseg++;
+					j++;
 				}
 			}
 
-			set_extend_sge(qp, wr, sge_ind);
+			set_extend_sge(qp, wr, sge_ind, valid_num_sge);
 		}
 
 		roce_set_field(rc_sq_wqe->byte_16,
 			       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
-			       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge);
+			       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
 	}
 
 	return 0;
@@ -226,6 +225,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 				 int attr_mask, enum ib_qp_state cur_state,
 				 enum ib_qp_state new_state);
 
+static int check_send_valid(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_qp *hr_qp)
+{
+	struct ib_qp *ibqp = &hr_qp->ibqp;
+	struct device *dev = hr_dev->dev;
+
+	if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+		     ibqp->qp_type != IB_QPT_GSI &&
+		     ibqp->qp_type != IB_QPT_UD)) {
+		dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
+		return -EOPNOTSUPP;
+	} else if (unlikely(hr_qp->state == IB_QPS_RESET ||
+		   hr_qp->state == IB_QPS_INIT ||
+		   hr_qp->state == IB_QPS_RTR)) {
+		dev_err(dev, "Post WQE fail, QP state %d!\n", hr_qp->state);
+		return -EINVAL;
+	} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
+		dev_err(dev, "Post WQE fail, dev state %d!\n", hr_dev->state);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				 const struct ib_send_wr *wr,
 				 const struct ib_send_wr **bad_wr)
@@ -239,38 +262,31 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_v2_db sq_db;
 	struct ib_qp_attr attr;
-	unsigned int sge_ind;
 	unsigned int owner_bit;
+	unsigned int sge_idx;
+	unsigned int wqe_idx;
 	unsigned long flags;
-	unsigned int ind;
+	int valid_num_sge;
 	void *wqe = NULL;
 	bool loopback;
 	int attr_mask;
 	u32 tmp_len;
-	int ret = 0;
 	u32 hr_op;
 	u8 *smac;
 	int nreq;
+	int ret;
 	int i;
 
-	if (unlikely(ibqp->qp_type != IB_QPT_RC &&
-		     ibqp->qp_type != IB_QPT_GSI &&
-		     ibqp->qp_type != IB_QPT_UD)) {
-		dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
-		*bad_wr = wr;
-		return -EOPNOTSUPP;
-	}
+	spin_lock_irqsave(&qp->sq.lock, flags);
 
-	if (unlikely(qp->state == IB_QPS_RESET || qp->state == IB_QPS_INIT ||
-		     qp->state == IB_QPS_RTR)) {
-		dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state);
+	ret = check_send_valid(hr_dev, qp);
+	if (ret) {
 		*bad_wr = wr;
-		return -EINVAL;
+		nreq = 0;
+		goto out;
 	}
 
-	spin_lock_irqsave(&qp->sq.lock, flags);
-	ind = qp->sq_next_wqe;
-	sge_ind = qp->next_sge;
+	sge_idx = qp->next_sge;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
@@ -279,6 +295,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			goto out;
 		}
 
+		wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
 		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
 			dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
 				wr->num_sge, qp->sq.max_gs);
@@ -287,14 +305,20 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
-		qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
-								      wr->wr_id;
-
+		wqe = get_send_wqe(qp, wqe_idx);
+		qp->sq.wrid[wqe_idx] = wr->wr_id;
 		owner_bit =
 		       ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+		valid_num_sge = 0;
 		tmp_len = 0;
 
+		for (i = 0; i < wr->num_sge; i++) {
+			if (likely(wr->sg_list[i].length)) {
+				tmp_len += wr->sg_list[i].length;
+				valid_num_sge++;
+			}
+		}
+
 		/* Corresponding to the QP type, wqe process separately */
 		if (ibqp->qp_type == IB_QPT_GSI) {
 			ud_sq_wqe = wqe;
@@ -330,9 +354,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				       V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
 				       HNS_ROCE_V2_WQE_OP_SEND);
 
-			for (i = 0; i < wr->num_sge; i++)
-				tmp_len += wr->sg_list[i].length;
-
 			ud_sq_wqe->msg_len =
 			 cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
 
@@ -368,12 +389,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			roce_set_field(ud_sq_wqe->byte_16,
 				       V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
 				       V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
-				       wr->num_sge);
+				       valid_num_sge);
 
 			roce_set_field(ud_sq_wqe->byte_20,
 				     V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
 				     V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-				     sge_ind & (qp->sge.sge_cnt - 1));
+				     sge_idx & (qp->sge.sge_cnt - 1));
 
 			roce_set_field(ud_sq_wqe->byte_24,
 				       V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
@@ -423,13 +444,10 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
 			       GID_LEN_V2);
 
-			set_extend_sge(qp, wr, &sge_ind);
-			ind++;
+			set_extend_sge(qp, wr, &sge_idx, valid_num_sge);
 		} else if (ibqp->qp_type == IB_QPT_RC) {
 			rc_sq_wqe = wqe;
 			memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
-			for (i = 0; i < wr->num_sge; i++)
-				tmp_len += wr->sg_list[i].length;
 
 			rc_sq_wqe->msg_len =
 			 cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len);
@@ -550,15 +568,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				roce_set_field(rc_sq_wqe->byte_16,
 					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
 					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
-					       wr->num_sge);
+					       valid_num_sge);
 			} else if (wr->opcode != IB_WR_REG_MR) {
 				ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
-							wqe, &sge_ind, bad_wr);
+							wqe, &sge_idx,
+							valid_num_sge, bad_wr);
 				if (ret)
 					goto out;
 			}
-
-			ind++;
 		} else {
 			dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
 			spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -588,8 +605,7 @@ out:
 
 		hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
 
-		qp->sq_next_wqe = ind;
-		qp->next_sge = sge_ind;
+		qp->next_sge = sge_idx;
 
 		if (qp->state == IB_QPS_ERR) {
 			attr_mask = IB_QP_STATE;
@@ -610,6 +626,17 @@ out:
 	return ret;
 }
 
+static int check_recv_valid(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_qp *hr_qp)
+{
+	if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+		return -EIO;
+	else if (hr_qp->state == IB_QPS_RESET)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 				 const struct ib_recv_wr *wr,
 				 const struct ib_recv_wr **bad_wr)
@@ -623,18 +650,18 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 	unsigned long flags;
 	void *wqe = NULL;
 	int attr_mask;
-	int ret = 0;
+	u32 wqe_idx;
 	int nreq;
-	int ind;
+	int ret;
 	int i;
 
 	spin_lock_irqsave(&hr_qp->rq.lock, flags);
-	ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
 
-	if (hr_qp->state == IB_QPS_RESET) {
-		spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+	ret = check_recv_valid(hr_dev, hr_qp);
+	if (ret) {
 		*bad_wr = wr;
-		return -EINVAL;
+		nreq = 0;
+		goto out;
 	}
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
@@ -645,6 +672,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 			goto out;
 		}
 
+		wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+
 		if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
 			dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
 				wr->num_sge, hr_qp->rq.max_gs);
@@ -653,7 +682,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		wqe = get_recv_wqe(hr_qp, ind);
+		wqe = get_recv_wqe(hr_qp, wqe_idx);
 		dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
 		for (i = 0; i < wr->num_sge; i++) {
 			if (!wr->sg_list[i].length)
@@ -669,8 +698,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 
 		/* rq support inline data */
 		if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
-			sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
-			hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
+			sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
+			hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
 							       (u32)wr->num_sge;
 			for (i = 0; i < wr->num_sge; i++) {
 				sge_list[i].addr =
@@ -679,9 +708,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 			}
 		}
 
-		hr_qp->rq.wrid[ind] = wr->wr_id;
-
-		ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+		hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
 	}
 
 out:
@@ -1254,7 +1281,6 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
 	}
 
 out:
-	dev_err(hr_dev->dev, "Func clear fail.\n");
 	hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag);
 }
 
@@ -1378,8 +1404,7 @@ static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev)
 	return 0;
 }
 
-static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
-						  int vf_id)
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, int vf_id)
 {
 	struct hns_roce_cmq_desc desc;
 	struct hns_roce_vf_switch *swt;
@@ -1388,13 +1413,12 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
 	swt = (struct hns_roce_vf_switch *)desc.data;
 	hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
 	swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
-	roce_set_field(swt->fun_id,
-			VF_SWITCH_DATA_FUN_ID_VF_ID_M,
-			VF_SWITCH_DATA_FUN_ID_VF_ID_S,
-			vf_id);
+	roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+		       VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id);
 	ret = hns_roce_cmq_send(hr_dev, &desc, 1);
 	if (ret)
 		return ret;
+
 	desc.flag =
 		cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
 	desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
@@ -1574,69 +1598,9 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
 	return hns_roce_cmq_send(hr_dev, &desc, 1);
 }
 
-static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+static void set_default_caps(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_caps *caps = &hr_dev->caps;
-	int ret;
-
-	ret = hns_roce_cmq_query_hw_info(hr_dev);
-	if (ret) {
-		dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
-			ret);
-		return ret;
-	}
-
-	ret = hns_roce_query_fw_ver(hr_dev);
-	if (ret) {
-		dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
-			ret);
-		return ret;
-	}
-
-	ret = hns_roce_config_global_param(hr_dev);
-	if (ret) {
-		dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
-			ret);
-		return ret;
-	}
-
-	/* Get pf resource owned by every pf */
-	ret = hns_roce_query_pf_resource(hr_dev);
-	if (ret) {
-		dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n",
-			ret);
-		return ret;
-	}
-
-	if (hr_dev->pci_dev->revision == 0x21) {
-		ret = hns_roce_query_pf_timer_resource(hr_dev);
-		if (ret) {
-			dev_err(hr_dev->dev,
-				"Query pf timer resource fail, ret = %d.\n",
-				ret);
-			return ret;
-		}
-	}
-
-	ret = hns_roce_alloc_vf_resource(hr_dev);
-	if (ret) {
-		dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
-			ret);
-		return ret;
-	}
-
-	if (hr_dev->pci_dev->revision == 0x21) {
-		ret = hns_roce_set_vf_switch_param(hr_dev, 0);
-		if (ret) {
-			dev_err(hr_dev->dev,
-				"Set function switch param fail, ret = %d.\n",
-				ret);
-			return ret;
-		}
-	}
-
-	hr_dev->vendor_part_id = hr_dev->pci_dev->device;
-	hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
 
 	caps->num_qps		= HNS_ROCE_V2_MAX_QP_NUM;
 	caps->max_wqes		= HNS_ROCE_V2_MAX_WQE_NUM;
@@ -1644,17 +1608,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->num_srqs		= HNS_ROCE_V2_MAX_SRQ_NUM;
 	caps->min_cqes		= HNS_ROCE_MIN_CQE_NUM;
 	caps->max_cqes		= HNS_ROCE_V2_MAX_CQE_NUM;
-	caps->max_srqwqes	= HNS_ROCE_V2_MAX_SRQWQE_NUM;
 	caps->max_sq_sg		= HNS_ROCE_V2_MAX_SQ_SGE_NUM;
 	caps->max_extend_sg	= HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
 	caps->max_rq_sg		= HNS_ROCE_V2_MAX_RQ_SGE_NUM;
 	caps->max_sq_inline	= HNS_ROCE_V2_MAX_SQ_INLINE;
-	caps->max_srq_sg	= HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
 	caps->num_uars		= HNS_ROCE_V2_UAR_NUM;
 	caps->phy_num_uars	= HNS_ROCE_V2_PHY_UAR_NUM;
 	caps->num_aeq_vectors	= HNS_ROCE_V2_AEQE_VEC_NUM;
 	caps->num_comp_vectors	= HNS_ROCE_V2_COMP_VEC_NUM;
-	caps->num_other_vectors	= HNS_ROCE_V2_ABNORMAL_VEC_NUM;
+	caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
 	caps->num_mtpts		= HNS_ROCE_V2_MAX_MTPT_NUM;
 	caps->num_mtt_segs	= HNS_ROCE_V2_MAX_MTT_SEGS;
 	caps->num_cqe_segs	= HNS_ROCE_V2_MAX_CQE_SEGS;
@@ -1668,12 +1630,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
 	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
 	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
-	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
+	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
 	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
 	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
 	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
 	caps->mtt_entry_sz	= HNS_ROCE_V2_MTT_ENTRY_SZ;
-	caps->idx_entry_sz	= 4;
+	caps->idx_entry_sz	= HNS_ROCE_V2_IDX_ENTRY_SZ;
 	caps->cq_entry_sz	= HNS_ROCE_V2_CQE_ENTRY_SIZE;
 	caps->page_size_cap	= HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
 	caps->reserved_lkey	= 0;
@@ -1696,16 +1658,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->mpt_ba_pg_sz	= 0;
 	caps->mpt_buf_pg_sz	= 0;
 	caps->mpt_hop_num	= HNS_ROCE_CONTEXT_HOP_NUM;
-	caps->pbl_ba_pg_sz	= 2;
-	caps->pbl_buf_pg_sz	= 0;
-	caps->pbl_hop_num	= HNS_ROCE_PBL_HOP_NUM;
 	caps->mtt_ba_pg_sz	= 0;
 	caps->mtt_buf_pg_sz	= 0;
 	caps->mtt_hop_num	= HNS_ROCE_MTT_HOP_NUM;
-	caps->wqe_sq_hop_num	= 2;
-	caps->wqe_sge_hop_num	= 1;
-	caps->wqe_rq_hop_num	= 2;
-	caps->cqe_ba_pg_sz	= 6;
+	caps->wqe_sq_hop_num	= HNS_ROCE_SQWQE_HOP_NUM;
+	caps->wqe_sge_hop_num	= HNS_ROCE_EXT_SGE_HOP_NUM;
+	caps->wqe_rq_hop_num	= HNS_ROCE_RQWQE_HOP_NUM;
+	caps->cqe_ba_pg_sz	= HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
 	caps->cqe_buf_pg_sz	= 0;
 	caps->cqe_hop_num	= HNS_ROCE_CQE_HOP_NUM;
 	caps->srqwqe_ba_pg_sz	= 0;
@@ -1714,10 +1673,6 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->idx_ba_pg_sz	= 0;
 	caps->idx_buf_pg_sz	= 0;
 	caps->idx_hop_num	= HNS_ROCE_IDX_HOP_NUM;
-	caps->eqe_ba_pg_sz	= 0;
-	caps->eqe_buf_pg_sz	= 0;
-	caps->eqe_hop_num	= HNS_ROCE_EQE_HOP_NUM;
-	caps->tsq_buf_pg_sz	= 0;
 	caps->chunk_sz		= HNS_ROCE_V2_TABLE_CHUNK_SIZE;
 
 	caps->flags		= HNS_ROCE_CAP_FLAG_REREG_MR |
@@ -1726,24 +1681,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 				  HNS_ROCE_CAP_FLAG_RECORD_DB |
 				  HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
 
-	if (hr_dev->pci_dev->revision == 0x21)
-		caps->flags |= HNS_ROCE_CAP_FLAG_MW |
-			       HNS_ROCE_CAP_FLAG_FRMR;
-
 	caps->pkey_table_len[0] = 1;
-	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
+	caps->gid_table_len[0]	= HNS_ROCE_V2_GID_INDEX_NUM;
 	caps->ceqe_depth	= HNS_ROCE_V2_COMP_EQE_NUM;
 	caps->aeqe_depth	= HNS_ROCE_V2_ASYNC_EQE_NUM;
 	caps->local_ca_ack_delay = 0;
 	caps->max_mtu = IB_MTU_4096;
 
-	caps->max_srqs		= HNS_ROCE_V2_MAX_SRQ;
 	caps->max_srq_wrs	= HNS_ROCE_V2_MAX_SRQ_WR;
 	caps->max_srq_sges	= HNS_ROCE_V2_MAX_SRQ_SGE;
 
-	if (hr_dev->pci_dev->revision == 0x21) {
-		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
-			       HNS_ROCE_CAP_FLAG_SRQ |
+	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) {
+		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
+			       HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
 			       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
 
 		caps->num_qpc_timer	  = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
@@ -1757,12 +1707,345 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 		caps->cqc_timer_buf_pg_sz = 0;
 		caps->cqc_timer_hop_num   = HNS_ROCE_HOP_NUM_0;
 
-		caps->sccc_entry_sz	= HNS_ROCE_V2_SCCC_ENTRY_SZ;
-		caps->sccc_ba_pg_sz	= 0;
-		caps->sccc_buf_pg_sz    = 0;
-		caps->sccc_hop_num	= HNS_ROCE_SCCC_HOP_NUM;
+		caps->sccc_entry_sz	  = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+		caps->sccc_ba_pg_sz	  = 0;
+		caps->sccc_buf_pg_sz	  = 0;
+		caps->sccc_hop_num	  = HNS_ROCE_SCCC_HOP_NUM;
+	}
+}
+
+static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
+		       int *buf_page_size, int *bt_page_size, u32 hem_type)
+{
+	u64 obj_per_chunk;
+	int bt_chunk_size = 1 << PAGE_SHIFT;
+	int buf_chunk_size = 1 << PAGE_SHIFT;
+	int obj_per_chunk_default = buf_chunk_size / obj_size;
+
+	*buf_page_size = 0;
+	*bt_page_size = 0;
+
+	switch (hop_num) {
+	case 3:
+		obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+				(bt_chunk_size / BA_BYTE_LEN) *
+				(bt_chunk_size / BA_BYTE_LEN) *
+				 obj_per_chunk_default;
+		break;
+	case 2:
+		obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+				(bt_chunk_size / BA_BYTE_LEN) *
+				 obj_per_chunk_default;
+		break;
+	case 1:
+		obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+				obj_per_chunk_default;
+		break;
+	case HNS_ROCE_HOP_NUM_0:
+		obj_per_chunk = ctx_bt_num * obj_per_chunk_default;
+		break;
+	default:
+		pr_err("Table %d not support hop_num = %d!\n", hem_type,
+			hop_num);
+		return;
+	}
+
+	if (hem_type >= HEM_TYPE_MTT)
+		*bt_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+	else
+		*buf_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+}
+
+static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM];
+	struct hns_roce_caps *caps = &hr_dev->caps;
+	struct hns_roce_query_pf_caps_a *resp_a;
+	struct hns_roce_query_pf_caps_b *resp_b;
+	struct hns_roce_query_pf_caps_c *resp_c;
+	struct hns_roce_query_pf_caps_d *resp_d;
+	struct hns_roce_query_pf_caps_e *resp_e;
+	int ctx_hop_num;
+	int pbl_hop_num;
+	int ret;
+	int i;
+
+	for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) {
+		hns_roce_cmq_setup_basic_desc(&desc[i],
+					      HNS_ROCE_OPC_QUERY_PF_CAPS_NUM,
+					      true);
+		if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1))
+			desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+	}
+
+	ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM);
+	if (ret)
+		return ret;
+
+	resp_a = (struct hns_roce_query_pf_caps_a *)desc[0].data;
+	resp_b = (struct hns_roce_query_pf_caps_b *)desc[1].data;
+	resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
+	resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
+	resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+
+	caps->local_ca_ack_delay     = resp_a->local_ca_ack_delay;
+	caps->max_sq_sg		     = le16_to_cpu(resp_a->max_sq_sg);
+	caps->max_sq_inline	     = le16_to_cpu(resp_a->max_sq_inline);
+	caps->max_rq_sg		     = le16_to_cpu(resp_a->max_rq_sg);
+	caps->max_extend_sg	     = le32_to_cpu(resp_a->max_extend_sg);
+	caps->num_qpc_timer	     = le16_to_cpu(resp_a->num_qpc_timer);
+	caps->num_cqc_timer	     = le16_to_cpu(resp_a->num_cqc_timer);
+	caps->max_srq_sges	     = le16_to_cpu(resp_a->max_srq_sges);
+	caps->num_aeq_vectors	     = resp_a->num_aeq_vectors;
+	caps->num_other_vectors	     = resp_a->num_other_vectors;
+	caps->max_sq_desc_sz	     = resp_a->max_sq_desc_sz;
+	caps->max_rq_desc_sz	     = resp_a->max_rq_desc_sz;
+	caps->max_srq_desc_sz	     = resp_a->max_srq_desc_sz;
+	caps->cq_entry_sz	     = resp_a->cq_entry_sz;
+
+	caps->mtpt_entry_sz	     = resp_b->mtpt_entry_sz;
+	caps->irrl_entry_sz	     = resp_b->irrl_entry_sz;
+	caps->trrl_entry_sz	     = resp_b->trrl_entry_sz;
+	caps->cqc_entry_sz	     = resp_b->cqc_entry_sz;
+	caps->srqc_entry_sz	     = resp_b->srqc_entry_sz;
+	caps->idx_entry_sz	     = resp_b->idx_entry_sz;
+	caps->sccc_entry_sz	     = resp_b->scc_ctx_entry_sz;
+	caps->max_mtu		     = resp_b->max_mtu;
+	caps->qpc_entry_sz	     = le16_to_cpu(resp_b->qpc_entry_sz);
+	caps->min_cqes		     = resp_b->min_cqes;
+	caps->min_wqes		     = resp_b->min_wqes;
+	caps->page_size_cap	     = le32_to_cpu(resp_b->page_size_cap);
+	caps->pkey_table_len[0]	     = resp_b->pkey_table_len;
+	caps->phy_num_uars	     = resp_b->phy_num_uars;
+	ctx_hop_num		     = resp_b->ctx_hop_num;
+	pbl_hop_num		     = resp_b->pbl_hop_num;
+
+	caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds,
+					    V2_QUERY_PF_CAPS_C_NUM_PDS_M,
+					    V2_QUERY_PF_CAPS_C_NUM_PDS_S);
+	caps->flags = roce_get_field(resp_c->cap_flags_num_pds,
+				     V2_QUERY_PF_CAPS_C_CAP_FLAGS_M,
+				     V2_QUERY_PF_CAPS_C_CAP_FLAGS_S);
+	caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs,
+					    V2_QUERY_PF_CAPS_C_NUM_CQS_M,
+					    V2_QUERY_PF_CAPS_C_NUM_CQS_S);
+	caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs,
+						V2_QUERY_PF_CAPS_C_MAX_GID_M,
+						V2_QUERY_PF_CAPS_C_MAX_GID_S);
+	caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth,
+					     V2_QUERY_PF_CAPS_C_CQ_DEPTH_M,
+					     V2_QUERY_PF_CAPS_C_CQ_DEPTH_S);
+	caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws,
+					      V2_QUERY_PF_CAPS_C_NUM_MRWS_M,
+					      V2_QUERY_PF_CAPS_C_NUM_MRWS_S);
+	caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps,
+					    V2_QUERY_PF_CAPS_C_NUM_QPS_M,
+					    V2_QUERY_PF_CAPS_C_NUM_QPS_S);
+	caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps,
+						V2_QUERY_PF_CAPS_C_MAX_ORD_M,
+						V2_QUERY_PF_CAPS_C_MAX_ORD_S);
+	caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
+	caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
+	caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs,
+					     V2_QUERY_PF_CAPS_D_NUM_SRQS_M,
+					     V2_QUERY_PF_CAPS_D_NUM_SRQS_S);
+	caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
+	caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth,
+					       V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M,
+					       V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S);
+	caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth,
+						V2_QUERY_PF_CAPS_D_NUM_CEQS_M,
+						V2_QUERY_PF_CAPS_D_NUM_CEQS_S);
+	caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth,
+					       V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M,
+					       V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S);
+	caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
+					    V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M,
+					    V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S);
+	caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
+					    V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M,
+					    V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S);
+	caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds,
+					    V2_QUERY_PF_CAPS_D_RSV_PDS_M,
+					    V2_QUERY_PF_CAPS_D_RSV_PDS_S);
+	caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds,
+					     V2_QUERY_PF_CAPS_D_NUM_UARS_M,
+					     V2_QUERY_PF_CAPS_D_NUM_UARS_S);
+	caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps,
+					    V2_QUERY_PF_CAPS_D_RSV_QPS_M,
+					    V2_QUERY_PF_CAPS_D_RSV_QPS_S);
+	caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps,
+					     V2_QUERY_PF_CAPS_D_RSV_UARS_M,
+					     V2_QUERY_PF_CAPS_D_RSV_UARS_S);
+	caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
+					     V2_QUERY_PF_CAPS_E_RSV_MRWS_M,
+					     V2_QUERY_PF_CAPS_E_RSV_MRWS_S);
+	caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
+					 V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M,
+					 V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S);
+	caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs,
+					    V2_QUERY_PF_CAPS_E_RSV_CQS_M,
+					    V2_QUERY_PF_CAPS_E_RSV_CQS_S);
+	caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs,
+					     V2_QUERY_PF_CAPS_E_RSV_SRQS_M,
+					     V2_QUERY_PF_CAPS_E_RSV_SRQS_S);
+	caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey,
+					     V2_QUERY_PF_CAPS_E_RSV_LKEYS_M,
+					     V2_QUERY_PF_CAPS_E_RSV_LKEYS_S);
+	caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
+	caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
+	caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
+	caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period);
+
+	caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+	caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+	caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+	caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
+	caps->mtt_ba_pg_sz = 0;
+	caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
+	caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+	caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
+
+	caps->qpc_hop_num = ctx_hop_num;
+	caps->srqc_hop_num = ctx_hop_num;
+	caps->cqc_hop_num = ctx_hop_num;
+	caps->mpt_hop_num = ctx_hop_num;
+	caps->mtt_hop_num = pbl_hop_num;
+	caps->cqe_hop_num = pbl_hop_num;
+	caps->srqwqe_hop_num = pbl_hop_num;
+	caps->idx_hop_num = pbl_hop_num;
+	caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
+					  V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M,
+					  V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S);
+	caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
+					  V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M,
+					  V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S);
+	caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
+					  V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
+					  V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
+
+	calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num,
+		   caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
+		   HEM_TYPE_QPC);
+	calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
+		   caps->mpt_bt_num, &caps->mpt_buf_pg_sz, &caps->mpt_ba_pg_sz,
+		   HEM_TYPE_MTPT);
+	calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
+		   caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
+		   HEM_TYPE_CQC);
+	calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz, caps->srqc_hop_num,
+		   caps->srqc_bt_num, &caps->srqc_buf_pg_sz,
+		   &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC);
+
+	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) {
+		caps->sccc_hop_num = ctx_hop_num;
+		caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+		caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+
+		calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+			   caps->sccc_hop_num, caps->sccc_bt_num,
+			   &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
+			   HEM_TYPE_SCCC);
+		calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
+			   caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
+			   &caps->cqc_timer_buf_pg_sz,
+			   &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
+	}
+
+	calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num,
+		   1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
+	calc_pg_sz(caps->num_srqwqe_segs, caps->mtt_entry_sz,
+		   caps->srqwqe_hop_num, 1, &caps->srqwqe_buf_pg_sz,
+		   &caps->srqwqe_ba_pg_sz, HEM_TYPE_SRQWQE);
+	calc_pg_sz(caps->num_idx_segs, caps->idx_entry_sz, caps->idx_hop_num,
+		   1, &caps->idx_buf_pg_sz, &caps->idx_ba_pg_sz, HEM_TYPE_IDX);
+
+	return 0;
+}
+
+static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_caps *caps = &hr_dev->caps;
+	int ret;
+
+	ret = hns_roce_cmq_query_hw_info(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hns_roce_query_fw_ver(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hns_roce_config_global_param(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	/* Get pf resource owned by every pf */
+	ret = hns_roce_query_pf_resource(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	if (hr_dev->pci_dev->revision == 0x21) {
+		ret = hns_roce_query_pf_timer_resource(hr_dev);
+		if (ret) {
+			dev_err(hr_dev->dev,
+				"Query pf timer resource fail, ret = %d.\n",
+				ret);
+			return ret;
+		}
+	}
+
+	ret = hns_roce_alloc_vf_resource(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	if (hr_dev->pci_dev->revision == 0x21) {
+		ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+		if (ret) {
+			dev_err(hr_dev->dev,
+				"Set function switch param fail, ret = %d.\n",
+				ret);
+			return ret;
+		}
 	}
 
+	hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+	hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
+
+	caps->num_mtt_segs	= HNS_ROCE_V2_MAX_MTT_SEGS;
+	caps->num_cqe_segs	= HNS_ROCE_V2_MAX_CQE_SEGS;
+	caps->num_srqwqe_segs	= HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+	caps->num_idx_segs	= HNS_ROCE_V2_MAX_IDX_SEGS;
+
+	caps->pbl_ba_pg_sz	= HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
+	caps->pbl_buf_pg_sz	= 0;
+	caps->pbl_hop_num	= HNS_ROCE_PBL_HOP_NUM;
+	caps->eqe_ba_pg_sz	= 0;
+	caps->eqe_buf_pg_sz	= 0;
+	caps->eqe_hop_num	= HNS_ROCE_EQE_HOP_NUM;
+	caps->tsq_buf_pg_sz	= 0;
+
+	ret = hns_roce_query_pf_caps(hr_dev);
+	if (ret)
+		set_default_caps(hr_dev);
+
 	ret = hns_roce_v2_set_bt(hr_dev);
 	if (ret)
 		dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1818,37 +2101,32 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
 			req_a->base_addr_h =
 				cpu_to_le32(link_tbl->table.map >> 32);
 			roce_set_field(req_a->depth_pgsz_init_en,
-				       CFG_LLM_QUE_DEPTH_M,
-				       CFG_LLM_QUE_DEPTH_S,
+				       CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S,
 				       link_tbl->npages);
 			roce_set_field(req_a->depth_pgsz_init_en,
-				       CFG_LLM_QUE_PGSZ_M,
-				       CFG_LLM_QUE_PGSZ_S,
+				       CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S,
 				       link_tbl->pg_sz);
 			req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0);
 			req_a->head_ba_h_nxtptr =
 				cpu_to_le32(entry[0].blk_ba1_nxt_ptr);
-			roce_set_field(req_a->head_ptr,
-				       CFG_LLM_HEAD_PTR_M,
+			roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M,
 				       CFG_LLM_HEAD_PTR_S, 0);
 		} else {
 			req_b->tail_ba_l =
 				cpu_to_le32(entry[page_num - 1].blk_ba0);
-			roce_set_field(req_b->tail_ba_h,
-				       CFG_LLM_TAIL_BA_H_M,
+			roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M,
 				       CFG_LLM_TAIL_BA_H_S,
 				       entry[page_num - 1].blk_ba1_nxt_ptr &
-				       HNS_ROCE_LINK_TABLE_BA1_M);
-			roce_set_field(req_b->tail_ptr,
-				       CFG_LLM_TAIL_PTR_M,
+					       HNS_ROCE_LINK_TABLE_BA1_M);
+			roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M,
 				       CFG_LLM_TAIL_PTR_S,
 				       (entry[page_num - 2].blk_ba1_nxt_ptr &
-				       HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
-				       HNS_ROCE_LINK_TABLE_NXT_PTR_S);
+					HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
+					       HNS_ROCE_LINK_TABLE_NXT_PTR_S);
 		}
 	}
-	roce_set_field(req_a->depth_pgsz_init_en,
-		       CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1);
+	roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M,
+		       CFG_LLM_INIT_EN_S, 1);
 
 	return hns_roce_cmq_send(hr_dev, desc, 2);
 }
@@ -2141,11 +2419,9 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
 
 	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
 
-	roce_set_field(sgid_tb->table_idx_rsv,
-		       CFG_SGID_TB_TABLE_IDX_M,
+	roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M,
 		       CFG_SGID_TB_TABLE_IDX_S, gid_index);
-	roce_set_field(sgid_tb->vf_sgid_type_rsv,
-		       CFG_SGID_TB_VF_SGID_TYPE_M,
+	roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
 		       CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
 
 	p = (u32 *)&gid->raw[0];
@@ -2416,11 +2692,10 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 		       V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
 	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
 		       V2_MPT_BYTE_4_PD_S, mw->pdn);
-	roce_set_field(mpt_entry->byte_4_pd_hop_st,
-		       V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
 		       V2_MPT_BYTE_4_PBL_HOP_NUM_S,
-		       mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
-		       0 : mw->pbl_hop_num);
+		       mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+							       mw->pbl_hop_num);
 	roce_set_field(mpt_entry->byte_4_pd_hop_st,
 		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
 		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
@@ -2561,8 +2836,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M,
 		       V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE);
 	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
-		       V2_CQC_BYTE_4_SHIFT_S,
-		       ilog2(hr_cq->cq_depth));
+		       V2_CQC_BYTE_4_SHIFT_S, ilog2(hr_cq->cq_depth));
 	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
 		       V2_CQC_BYTE_4_CEQN_S, hr_cq->vector);
 
@@ -2687,6 +2961,55 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
 	return 0;
 }
 
+static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq,
+		   int num_entries, struct ib_wc *wc)
+{
+	unsigned int left;
+	int npolled = 0;
+
+	left = wq->head - wq->tail;
+	if (left == 0)
+		return 0;
+
+	left = min_t(unsigned int, (unsigned int)num_entries, left);
+	while (npolled < left) {
+		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+		wc->status = IB_WC_WR_FLUSH_ERR;
+		wc->vendor_err = 0;
+		wc->qp = &hr_qp->ibqp;
+
+		wq->tail++;
+		wc++;
+		npolled++;
+	}
+
+	return npolled;
+}
+
+static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries,
+				  struct ib_wc *wc)
+{
+	struct hns_roce_qp *hr_qp;
+	int npolled = 0;
+
+	list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
+		npolled += sw_comp(hr_qp, &hr_qp->sq,
+				   num_entries - npolled, wc + npolled);
+		if (npolled >= num_entries)
+			goto out;
+	}
+
+	list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
+		npolled += sw_comp(hr_qp, &hr_qp->rq,
+				   num_entries - npolled, wc + npolled);
+		if (npolled >= num_entries)
+			goto out;
+	}
+
+out:
+	return npolled;
+}
+
 static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 				struct hns_roce_qp **cur_qp, struct ib_wc *wc)
 {
@@ -2967,6 +3290,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
 			       struct ib_wc *wc)
 {
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
 	struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
 	struct hns_roce_qp *cur_qp = NULL;
 	unsigned long flags;
@@ -2974,6 +3298,18 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
 
 	spin_lock_irqsave(&hr_cq->lock, flags);
 
+	/*
+	 * When the device starts to reset, the state is RST_DOWN. At this time,
+	 * there may still be some valid CQEs in the hardware that are not
+	 * polled. Therefore, it is not allowed to switch to the software mode
+	 * immediately. When the state changes to UNINIT, CQE no longer exists
+	 * in the hardware, and then switch to software mode.
+	 */
+	if (hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) {
+		npolled = hns_roce_v2_sw_poll_cq(hr_cq, num_entries, wc);
+		goto out;
+	}
+
 	for (npolled = 0; npolled < num_entries; ++npolled) {
 		if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled))
 			break;
@@ -2985,6 +3321,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
 		hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
 	}
 
+out:
 	spin_unlock_irqrestore(&hr_cq->lock, flags);
 
 	return npolled;
@@ -3159,8 +3496,6 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
 }
 
 static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
-				 enum ib_qp_state cur_state,
-				 enum ib_qp_state new_state,
 				 struct hns_roce_v2_qp_context *context,
 				 struct hns_roce_qp *hr_qp)
 {
@@ -3210,6 +3545,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
 	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
 	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
+		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
 }
 
 static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
@@ -3577,6 +3915,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(attr->qp_access_flags &
+				IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	} else {
 		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
@@ -3592,6 +3936,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	}
 
 	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
@@ -3838,13 +4187,11 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
 	/* Configure GID index */
 	port_num = rdma_ah_get_port_num(&attr->ah_attr);
 	roce_set_field(context->byte_20_smac_sgid_idx,
-		       V2_QPC_BYTE_20_SGID_IDX_M,
-		       V2_QPC_BYTE_20_SGID_IDX_S,
+		       V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
 		       hns_get_gid_index(hr_dev, port_num - 1,
 					 grh->sgid_index));
 	roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
-		       V2_QPC_BYTE_20_SGID_IDX_M,
-		       V2_QPC_BYTE_20_SGID_IDX_S, 0);
+		       V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
 	memcpy(&(context->dmac), dmac, sizeof(u32));
 	roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
 		       V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4])));
@@ -4234,8 +4581,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
 
 		roce_set_field(context->byte_212_lsn,
 			       V2_QPC_BYTE_212_RETRY_CNT_M,
-			       V2_QPC_BYTE_212_RETRY_CNT_S,
-			       attr->retry_cnt);
+			       V2_QPC_BYTE_212_RETRY_CNT_S, attr->retry_cnt);
 		roce_set_field(qpc_mask->byte_212_lsn,
 			       V2_QPC_BYTE_212_RETRY_CNT_M,
 			       V2_QPC_BYTE_212_RETRY_CNT_S, 0);
@@ -4443,7 +4789,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 		       V2_QPC_BYTE_60_QP_ST_S, 0);
 
 	/* SW pass context to HW */
-	ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, ctx, hr_qp);
+	ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp);
 	if (ret) {
 		dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret);
 		goto out;
@@ -4464,7 +4810,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 		hr_qp->rq.tail = 0;
 		hr_qp->sq.head = 0;
 		hr_qp->sq.tail = 0;
-		hr_qp->sq_next_wqe = 0;
 		hr_qp->next_sge = 0;
 		if (hr_qp->rq.wqe_cnt)
 			*hr_qp->rdb.db_record = 0;
@@ -4649,6 +4994,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 {
 	struct hns_roce_cq *send_cq, *recv_cq;
 	struct ib_device *ibdev = &hr_dev->ib_dev;
+	unsigned long flags;
 	int ret = 0;
 
 	if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
@@ -4659,21 +5005,32 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 			ibdev_err(ibdev, "modify QP to Reset failed.\n");
 	}
 
-	send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
-	recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+	send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+	recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
 
+	spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
 	hns_roce_lock_cqs(send_cq, recv_cq);
 
+	list_del(&hr_qp->node);
+	list_del(&hr_qp->sq_node);
+	list_del(&hr_qp->rq_node);
+
 	if (!udata) {
-		__hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
-				       to_hr_srq(hr_qp->ibqp.srq) : NULL);
-		if (send_cq != recv_cq)
+		if (recv_cq)
+			__hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn,
+					       (hr_qp->ibqp.srq ?
+						to_hr_srq(hr_qp->ibqp.srq) :
+						NULL));
+
+		if (send_cq && send_cq != recv_cq)
 			__hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL);
+
 	}
 
 	hns_roce_qp_remove(hr_dev, hr_qp);
 
 	hns_roce_unlock_cqs(send_cq, recv_cq);
+	spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
 
 	hns_roce_qp_free(hr_dev, hr_qp);
 
@@ -5155,8 +5512,7 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
 		 */
 		dma_rmb();
 
-		cqn = roce_get_field(ceqe->comp,
-				     HNS_ROCE_V2_CEQE_COMP_CQN_M,
+		cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M,
 				     HNS_ROCE_V2_CEQE_COMP_CQN_S);
 
 		hns_roce_cq_completion(hr_dev, cqn);
@@ -5409,126 +5765,98 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
 		eq->eqe_ba = eq->l0_dma;
 
 	/* set eqc state */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_EQ_ST_M,
-		       HNS_ROCE_EQC_EQ_ST_S,
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
 		       HNS_ROCE_V2_EQ_STATE_VALID);
 
 	/* set eqe hop num */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_HOP_NUM_M,
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_HOP_NUM_M,
 		       HNS_ROCE_EQC_HOP_NUM_S, eq->hop_num);
 
 	/* set eqc over_ignore */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_OVER_IGNORE_M,
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_OVER_IGNORE_M,
 		       HNS_ROCE_EQC_OVER_IGNORE_S, eq->over_ignore);
 
 	/* set eqc coalesce */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_COALESCE_M,
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_COALESCE_M,
 		       HNS_ROCE_EQC_COALESCE_S, eq->coalesce);
 
 	/* set eqc arm_state */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_ARM_ST_M,
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_ARM_ST_M,
 		       HNS_ROCE_EQC_ARM_ST_S, eq->arm_st);
 
 	/* set eqn */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_EQN_M,
-		       HNS_ROCE_EQC_EQN_S, eq->eqn);
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQN_M, HNS_ROCE_EQC_EQN_S,
+		       eq->eqn);
 
 	/* set eqe_cnt */
-	roce_set_field(eqc->byte_4,
-		       HNS_ROCE_EQC_EQE_CNT_M,
-		       HNS_ROCE_EQC_EQE_CNT_S,
-		       HNS_ROCE_EQ_INIT_EQE_CNT);
+	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQE_CNT_M,
+		       HNS_ROCE_EQC_EQE_CNT_S, HNS_ROCE_EQ_INIT_EQE_CNT);
 
 	/* set eqe_ba_pg_sz */
-	roce_set_field(eqc->byte_8,
-		       HNS_ROCE_EQC_BA_PG_SZ_M,
+	roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M,
 		       HNS_ROCE_EQC_BA_PG_SZ_S,
 		       eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET);
 
 	/* set eqe_buf_pg_sz */
-	roce_set_field(eqc->byte_8,
-		       HNS_ROCE_EQC_BUF_PG_SZ_M,
+	roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M,
 		       HNS_ROCE_EQC_BUF_PG_SZ_S,
 		       eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET);
 
 	/* set eq_producer_idx */
-	roce_set_field(eqc->byte_8,
-		       HNS_ROCE_EQC_PROD_INDX_M,
-		       HNS_ROCE_EQC_PROD_INDX_S,
-		       HNS_ROCE_EQ_INIT_PROD_IDX);
+	roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M,
+		       HNS_ROCE_EQC_PROD_INDX_S, HNS_ROCE_EQ_INIT_PROD_IDX);
 
 	/* set eq_max_cnt */
-	roce_set_field(eqc->byte_12,
-		       HNS_ROCE_EQC_MAX_CNT_M,
+	roce_set_field(eqc->byte_12, HNS_ROCE_EQC_MAX_CNT_M,
 		       HNS_ROCE_EQC_MAX_CNT_S, eq->eq_max_cnt);
 
 	/* set eq_period */
-	roce_set_field(eqc->byte_12,
-		       HNS_ROCE_EQC_PERIOD_M,
+	roce_set_field(eqc->byte_12, HNS_ROCE_EQC_PERIOD_M,
 		       HNS_ROCE_EQC_PERIOD_S, eq->eq_period);
 
 	/* set eqe_report_timer */
-	roce_set_field(eqc->eqe_report_timer,
-		       HNS_ROCE_EQC_REPORT_TIMER_M,
+	roce_set_field(eqc->eqe_report_timer, HNS_ROCE_EQC_REPORT_TIMER_M,
 		       HNS_ROCE_EQC_REPORT_TIMER_S,
 		       HNS_ROCE_EQ_INIT_REPORT_TIMER);
 
 	/* set eqe_ba [34:3] */
-	roce_set_field(eqc->eqe_ba0,
-		       HNS_ROCE_EQC_EQE_BA_L_M,
+	roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M,
 		       HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3);
 
 	/* set eqe_ba [64:35] */
-	roce_set_field(eqc->eqe_ba1,
-		       HNS_ROCE_EQC_EQE_BA_H_M,
+	roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M,
 		       HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35);
 
 	/* set eq shift */
-	roce_set_field(eqc->byte_28,
-		       HNS_ROCE_EQC_SHIFT_M,
-		       HNS_ROCE_EQC_SHIFT_S, eq->shift);
+	roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S,
+		       eq->shift);
 
 	/* set eq MSI_IDX */
-	roce_set_field(eqc->byte_28,
-		       HNS_ROCE_EQC_MSI_INDX_M,
-		       HNS_ROCE_EQC_MSI_INDX_S,
-		       HNS_ROCE_EQ_INIT_MSI_IDX);
+	roce_set_field(eqc->byte_28, HNS_ROCE_EQC_MSI_INDX_M,
+		       HNS_ROCE_EQC_MSI_INDX_S, HNS_ROCE_EQ_INIT_MSI_IDX);
 
 	/* set cur_eqe_ba [27:12] */
-	roce_set_field(eqc->byte_28,
-		       HNS_ROCE_EQC_CUR_EQE_BA_L_M,
+	roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M,
 		       HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12);
 
 	/* set cur_eqe_ba [59:28] */
-	roce_set_field(eqc->byte_32,
-		       HNS_ROCE_EQC_CUR_EQE_BA_M_M,
+	roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M,
 		       HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28);
 
 	/* set cur_eqe_ba [63:60] */
-	roce_set_field(eqc->byte_36,
-		       HNS_ROCE_EQC_CUR_EQE_BA_H_M,
+	roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M,
 		       HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60);
 
 	/* set eq consumer idx */
-	roce_set_field(eqc->byte_36,
-		       HNS_ROCE_EQC_CONS_INDX_M,
-		       HNS_ROCE_EQC_CONS_INDX_S,
-		       HNS_ROCE_EQ_INIT_CONS_IDX);
+	roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M,
+		       HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX);
 
 	/* set nex_eqe_ba[43:12] */
-	roce_set_field(eqc->nxt_eqe_ba0,
-		       HNS_ROCE_EQC_NXT_EQE_BA_L_M,
+	roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M,
 		       HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12);
 
 	/* set nex_eqe_ba[63:44] */
-	roce_set_field(eqc->nxt_eqe_ba1,
-		       HNS_ROCE_EQC_NXT_EQE_BA_H_M,
+	roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M,
 		       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
 }
 
@@ -5828,18 +6156,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
 
 	/* irq contains: abnormal + AEQ + CEQ */
 	for (j = 0; j < other_num; j++)
-		snprintf((char *)hr_dev->irq_names[j],
-			 HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", j);
+		snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+			 "hns-abn-%d", j);
 
 	for (j = other_num; j < (other_num + aeq_num); j++)
-		snprintf((char *)hr_dev->irq_names[j],
-			 HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
-			 j - other_num);
+		snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+			 "hns-aeq-%d", j - other_num);
 
 	for (j = (other_num + aeq_num); j < irq_num; j++)
-		snprintf((char *)hr_dev->irq_names[j],
-			 HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
-			 j - other_num - aeq_num);
+		snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+			 "hns-ceq-%d", j - other_num - aeq_num);
 
 	for (j = 0; j < irq_num; j++) {
 		if (j < other_num)
@@ -6448,6 +6774,10 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 		return;
 
 	handle->priv = NULL;
+
+	hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
+	hns_roce_handle_device_err(hr_dev);
+
 	hns_roce_exit(hr_dev);
 	kfree(hr_dev->priv);
 	ib_dealloc_device(&hr_dev->ib_dev);
@@ -6509,7 +6839,6 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 {
 	struct hns_roce_dev *hr_dev;
-	struct ib_event event;
 
 	if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
 		set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
@@ -6527,10 +6856,7 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 	hr_dev->active = false;
 	hr_dev->dis_db = true;
 
-	event.event = IB_EVENT_DEVICE_FATAL;
-	event.device = &hr_dev->ib_dev;
-	event.element.port_num = 1;
-	ib_dispatch_event(&event);
+	hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 76a14db7028d..2a117ff6a6be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -81,10 +81,12 @@
 #define HNS_ROCE_V2_QPC_ENTRY_SZ		256
 #define HNS_ROCE_V2_IRRL_ENTRY_SZ		64
 #define HNS_ROCE_V2_TRRL_ENTRY_SZ		48
+#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ	100
 #define HNS_ROCE_V2_CQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_SRQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ		64
 #define HNS_ROCE_V2_MTT_ENTRY_SZ		64
+#define HNS_ROCE_V2_IDX_ENTRY_SZ		4
 #define HNS_ROCE_V2_CQE_ENTRY_SIZE		32
 #define HNS_ROCE_V2_SCCC_ENTRY_SZ		32
 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ		PAGE_SIZE
@@ -109,7 +111,12 @@
 #define HNS_ROCE_PBL_HOP_NUM			2
 #define HNS_ROCE_EQE_HOP_NUM			2
 #define HNS_ROCE_IDX_HOP_NUM			1
+#define HNS_ROCE_SQWQE_HOP_NUM			2
+#define HNS_ROCE_EXT_SGE_HOP_NUM		1
+#define HNS_ROCE_RQWQE_HOP_NUM			2
 
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K	6
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K		2
 #define HNS_ROCE_V2_GID_INDEX_NUM		256
 
 #define HNS_ROCE_V2_TABLE_CHUNK_SIZE		(1 << 18)
@@ -237,6 +244,7 @@ enum hns_roce_opcode_type {
 	HNS_ROCE_OPC_CFG_EXT_LLM			= 0x8403,
 	HNS_ROCE_OPC_CFG_TMOUT_LLM			= 0x8404,
 	HNS_ROCE_OPC_QUERY_PF_TIMER_RES			= 0x8406,
+	HNS_ROCE_OPC_QUERY_PF_CAPS_NUM                  = 0x8408,
 	HNS_ROCE_OPC_CFG_SGID_TB			= 0x8500,
 	HNS_ROCE_OPC_CFG_SMAC_TB			= 0x8501,
 	HNS_ROCE_OPC_POST_MB				= 0x8504,
@@ -643,7 +651,7 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_76_ATE_S 27
 
 #define	V2_QPC_BYTE_76_RQIE_S 28
-
+#define	V2_QPC_BYTE_76_EXT_ATE_S 29
 #define	V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
 #define	V2_QPC_BYTE_80_RX_CQN_S 0
 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
@@ -1569,6 +1577,155 @@ struct hns_roce_cfg_smac_tb {
 #define CFG_SMAC_TB_VF_SMAC_H_S 0
 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
 
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
+struct hns_roce_query_pf_caps_a {
+	u8 number_ports;
+	u8 local_ca_ack_delay;
+	__le16 max_sq_sg;
+	__le16 max_sq_inline;
+	__le16 max_rq_sg;
+	__le32 max_extend_sg;
+	__le16 num_qpc_timer;
+	__le16 num_cqc_timer;
+	__le16 max_srq_sges;
+	u8 num_aeq_vectors;
+	u8 num_other_vectors;
+	u8 max_sq_desc_sz;
+	u8 max_rq_desc_sz;
+	u8 max_srq_desc_sz;
+	u8 cq_entry_sz;
+};
+
+struct hns_roce_query_pf_caps_b {
+	u8 mtpt_entry_sz;
+	u8 irrl_entry_sz;
+	u8 trrl_entry_sz;
+	u8 cqc_entry_sz;
+	u8 srqc_entry_sz;
+	u8 idx_entry_sz;
+	u8 scc_ctx_entry_sz;
+	u8 max_mtu;
+	__le16 qpc_entry_sz;
+	__le16 qpc_timer_entry_sz;
+	__le16 cqc_timer_entry_sz;
+	u8 min_cqes;
+	u8 min_wqes;
+	__le32 page_size_cap;
+	u8 pkey_table_len;
+	u8 phy_num_uars;
+	u8 ctx_hop_num;
+	u8 pbl_hop_num;
+};
+
+struct hns_roce_query_pf_caps_c {
+	__le32 cap_flags_num_pds;
+	__le32 max_gid_num_cqs;
+	__le32 cq_depth;
+	__le32 num_mrws;
+	__le32 ord_num_qps;
+	__le16 sq_depth;
+	__le16 rq_depth;
+};
+
+#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20
+#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20)
+
+#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20
+#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20)
+
+#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0
+#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0)
+
+#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0
+#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20
+#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20)
+
+struct hns_roce_query_pf_caps_d {
+	__le32 wq_hop_num_max_srqs;
+	__le16 srq_depth;
+	__le16 rsv;
+	__le32 num_ceqs_ceq_depth;
+	__le32 arm_st_aeq_depth;
+	__le32 num_uars_rsv_pds;
+	__le32 rsv_uars_rsv_qps;
+};
+#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0
+#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(20, 0)
+
+#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20
+#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20)
+
+#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22
+#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22)
+
+#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24
+#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24)
+
+
+#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0
+#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0)
+
+#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22
+#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22)
+
+#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0
+#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0)
+
+#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22
+#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22)
+
+#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24
+#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24)
+
+#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0
+#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20
+#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20)
+
+#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0
+#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20
+#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20)
+
+struct hns_roce_query_pf_caps_e {
+	__le32 chunk_size_shift_rsv_mrws;
+	__le32 rsv_cqs;
+	__le32 rsv_srqs;
+	__le32 rsv_lkey;
+	__le16 ceq_max_cnt;
+	__le16 ceq_period;
+	__le16 aeq_max_cnt;
+	__le16 aeq_period;
+};
+
+#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20
+#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20)
+
+#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0)
+
+#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0
+#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0)
+
 struct hns_roce_cmq_desc {
 	__le16 opcode;
 	__le16 flag;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 854ef6e74788..d0031d559213 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -90,7 +90,7 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
 static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
-	struct ib_gid_attr zattr = { };
+	struct ib_gid_attr zattr = {};
 	u8 port = attr->port_num - 1;
 	int ret;
 
@@ -210,7 +210,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
 	props->max_pkeys = 1;
 	props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
-		props->max_srq = hr_dev->caps.max_srqs;
+		props->max_srq = hr_dev->caps.num_srqs;
 		props->max_srq_wr = hr_dev->caps.max_srq_wrs;
 		props->max_srq_sge = hr_dev->caps.max_srq_sges;
 	}
@@ -259,11 +259,12 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
 
 	mtu = iboe_get_mtu(net_dev->mtu);
 	props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
-	props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
-			IB_PORT_ACTIVE : IB_PORT_DOWN;
-	props->phys_state = (props->state == IB_PORT_ACTIVE) ?
-			     IB_PORT_PHYS_STATE_LINK_UP :
-			     IB_PORT_PHYS_STATE_DISABLED;
+	props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ?
+			       IB_PORT_ACTIVE :
+			       IB_PORT_DOWN;
+	props->phys_state = props->state == IB_PORT_ACTIVE ?
+				    IB_PORT_PHYS_STATE_LINK_UP :
+				    IB_PORT_PHYS_STATE_DISABLED;
 
 	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
 
@@ -481,13 +482,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 
 	ib_dev = &hr_dev->ib_dev;
 
-	ib_dev->node_type		= RDMA_NODE_IB_CA;
-	ib_dev->dev.parent		= dev;
+	ib_dev->node_type = RDMA_NODE_IB_CA;
+	ib_dev->dev.parent = dev;
 
-	ib_dev->phys_port_cnt		= hr_dev->caps.num_ports;
-	ib_dev->local_dma_lkey		= hr_dev->caps.reserved_lkey;
-	ib_dev->num_comp_vectors	= hr_dev->caps.num_comp_vectors;
-	ib_dev->uverbs_cmd_mask		=
+	ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
+	ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
+	ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
+	ib_dev->uverbs_cmd_mask =
 		(1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
 		(1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
 		(1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
@@ -503,8 +504,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 		(1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
 		(1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
 
-	ib_dev->uverbs_ex_cmd_mask |=
-		(1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+	ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) {
 		ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
@@ -589,11 +589,13 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
 
 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
 		ret = hns_roce_init_hem_table(hr_dev,
-				      &hr_dev->mr_table.mtt_cqe_table,
-				      HEM_TYPE_CQE, hr_dev->caps.mtt_entry_sz,
-				      hr_dev->caps.num_cqe_segs, 1);
+					      &hr_dev->mr_table.mtt_cqe_table,
+					      HEM_TYPE_CQE,
+					      hr_dev->caps.mtt_entry_sz,
+					      hr_dev->caps.num_cqe_segs, 1);
 		if (ret) {
-			dev_err(dev, "Failed to init MTT CQE context memory, aborting.\n");
+			dev_err(dev,
+				"Failed to init CQE context memory, aborting.\n");
 			goto err_unmap_cqe;
 		}
 	}
@@ -633,7 +635,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
 					      hr_dev->caps.num_qps, 1);
 		if (ret) {
 			dev_err(dev,
-			       "Failed to init trrl_table memory, aborting.\n");
+				"Failed to init trrl_table memory, aborting.\n");
 			goto err_unmap_irrl;
 		}
 	}
@@ -653,7 +655,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
 					      hr_dev->caps.num_srqs, 1);
 		if (ret) {
 			dev_err(dev,
-			      "Failed to init SRQ context memory, aborting.\n");
+				"Failed to init SRQ context memory, aborting.\n");
 			goto err_unmap_cq;
 		}
 	}
@@ -692,33 +694,31 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
 					      hr_dev->caps.num_qps, 1);
 		if (ret) {
 			dev_err(dev,
-			      "Failed to init SCC context memory, aborting.\n");
+				"Failed to init SCC context memory, aborting.\n");
 			goto err_unmap_idx;
 		}
 	}
 
 	if (hr_dev->caps.qpc_timer_entry_sz) {
-		ret = hns_roce_init_hem_table(hr_dev,
-					      &hr_dev->qpc_timer_table,
+		ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
 					      HEM_TYPE_QPC_TIMER,
 					      hr_dev->caps.qpc_timer_entry_sz,
 					      hr_dev->caps.num_qpc_timer, 1);
 		if (ret) {
 			dev_err(dev,
-			      "Failed to init QPC timer memory, aborting.\n");
+				"Failed to init QPC timer memory, aborting.\n");
 			goto err_unmap_ctx;
 		}
 	}
 
 	if (hr_dev->caps.cqc_timer_entry_sz) {
-		ret = hns_roce_init_hem_table(hr_dev,
-					      &hr_dev->cqc_timer_table,
+		ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
 					      HEM_TYPE_CQC_TIMER,
 					      hr_dev->caps.cqc_timer_entry_sz,
 					      hr_dev->caps.num_cqc_timer, 1);
 		if (ret) {
 			dev_err(dev,
-			      "Failed to init CQC timer memory, aborting.\n");
+				"Failed to init CQC timer memory, aborting.\n");
 			goto err_unmap_qpc_timer;
 		}
 	}
@@ -727,8 +727,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
 
 err_unmap_qpc_timer:
 	if (hr_dev->caps.qpc_timer_entry_sz)
-		hns_roce_cleanup_hem_table(hr_dev,
-					   &hr_dev->qpc_timer_table);
+		hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
 
 err_unmap_ctx:
 	if (hr_dev->caps.sccc_entry_sz)
@@ -863,6 +862,50 @@ err_uar_table_free:
 	return ret;
 }
 
+static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
+{
+	struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hr_cq->lock, flags);
+	if (cq->comp_handler) {
+		if (!hr_cq->is_armed) {
+			hr_cq->is_armed = 1;
+			list_add_tail(&hr_cq->node, cq_list);
+		}
+	}
+	spin_unlock_irqrestore(&hr_cq->lock, flags);
+}
+
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_qp *hr_qp;
+	struct hns_roce_cq *hr_cq;
+	struct list_head cq_list;
+	unsigned long flags_qp;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&cq_list);
+
+	spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+	list_for_each_entry(hr_qp, &hr_dev->qp_list, node) {
+		spin_lock_irqsave(&hr_qp->sq.lock, flags_qp);
+		if (hr_qp->sq.tail != hr_qp->sq.head)
+			check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq);
+		spin_unlock_irqrestore(&hr_qp->sq.lock, flags_qp);
+
+		spin_lock_irqsave(&hr_qp->rq.lock, flags_qp);
+		if ((!hr_qp->ibqp.srq) && (hr_qp->rq.tail != hr_qp->rq.head))
+			check_and_get_armed_cq(&cq_list, hr_qp->ibqp.recv_cq);
+		spin_unlock_irqrestore(&hr_qp->rq.lock, flags_qp);
+	}
+
+	list_for_each_entry(hr_cq, &cq_list, node)
+		hns_roce_cq_completion(hr_dev, hr_cq->cqn);
+
+	spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
 int hns_roce_init(struct hns_roce_dev *hr_dev)
 {
 	int ret;
@@ -933,6 +976,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
 		}
 	}
 
+	INIT_LIST_HEAD(&hr_dev->qp_list);
+	spin_lock_init(&hr_dev->qp_list_lock);
+
 	ret = hns_roce_register_device(hr_dev);
 	if (ret)
 		goto error_failed_register_device;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 3ff610549c74..b9898e71655a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -1064,8 +1064,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
 		if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
 			if (page_addr & ((1 << mtt->page_shift) - 1)) {
 				dev_err(dev,
-					"page_addr 0x%llx is not page_shift %d alignment!\n",
-					page_addr, mtt->page_shift);
+					"page_addr is not page_shift %d alignment!\n",
+					mtt->page_shift);
 				ret = -EINVAL;
 				goto out;
 			}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index eb2ee6a581aa..3257ad11be48 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -393,40 +393,38 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 
 	/* Get buf size, SQ and RQ  are aligned to page_szie */
 	if (hr_dev->caps.max_sq_sg <= 2) {
-		hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt <<
+		hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
 					     hr_qp->rq.wqe_shift), PAGE_SIZE) +
-				   HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt <<
+				   round_up((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), PAGE_SIZE);
 
 		hr_qp->sq.offset = 0;
-		hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt <<
+		hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), PAGE_SIZE);
 	} else {
 		page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 		hr_qp->sge.sge_cnt = ex_sge_num ?
 		   max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
-		hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt <<
+		hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt <<
 					     hr_qp->rq.wqe_shift), page_size) +
-				   HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt <<
+				   round_up((hr_qp->sge.sge_cnt <<
 					     hr_qp->sge.sge_shift), page_size) +
-				   HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt <<
+				   round_up((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), page_size);
 
 		hr_qp->sq.offset = 0;
 		if (ex_sge_num) {
-			hr_qp->sge.offset = HNS_ROCE_ALIGN_UP(
-							(hr_qp->sq.wqe_cnt <<
-							hr_qp->sq.wqe_shift),
-							page_size);
+			hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt <<
+						      hr_qp->sq.wqe_shift),
+						     page_size);
 			hr_qp->rq.offset = hr_qp->sge.offset +
-					HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt <<
-						hr_qp->sge.sge_shift),
-						page_size);
+					   round_up((hr_qp->sge.sge_cnt <<
+						     hr_qp->sge.sge_shift),
+						    page_size);
 		} else {
-			hr_qp->rq.offset = HNS_ROCE_ALIGN_UP(
-							(hr_qp->sq.wqe_cnt <<
-							hr_qp->sq.wqe_shift),
-							page_size);
+			hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt <<
+						     hr_qp->sq.wqe_shift),
+						    page_size);
 		}
 	}
 
@@ -593,20 +591,18 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 	/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
 	page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 	hr_qp->sq.offset = 0;
-	size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
-				 page_size);
+	size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
 
 	if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
 		hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
-					(u32)hr_qp->sge.sge_cnt);
+					 (u32)hr_qp->sge.sge_cnt);
 		hr_qp->sge.offset = size;
-		size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt <<
-					  hr_qp->sge.sge_shift, page_size);
+		size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift,
+				 page_size);
 	}
 
 	hr_qp->rq.offset = size;
-	size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
-				  page_size);
+	size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size);
 	hr_qp->buff_size = size;
 
 	/* Get wr and sge number which send */
@@ -681,6 +677,29 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
 	kfree(hr_qp->rq_inl_buf.wqe_list);
 }
 
+static void add_qp_to_list(struct hns_roce_dev *hr_dev,
+			   struct hns_roce_qp *hr_qp,
+			   struct ib_cq *send_cq, struct ib_cq *recv_cq)
+{
+	struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
+	unsigned long flags;
+
+	hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
+	hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
+
+	spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+	hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
+
+	list_add_tail(&hr_qp->node, &hr_dev->qp_list);
+	if (hr_send_cq)
+		list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
+	if (hr_recv_cq)
+		list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
+
+	hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
+	spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
 static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 				     struct ib_pd *ib_pd,
 				     struct ib_qp_init_attr *init_attr,
@@ -950,6 +969,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	}
 
 	hr_qp->event = hns_roce_ib_qp_event;
+
+	add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, init_attr->recv_cq);
+
 	hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
 
 	return 0;
@@ -1232,7 +1254,16 @@ out:
 void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
 		       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
 {
-	if (send_cq == recv_cq) {
+	if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+		__acquire(&send_cq->lock);
+		__acquire(&recv_cq->lock);
+	} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+		spin_lock_irq(&send_cq->lock);
+		__acquire(&recv_cq->lock);
+	} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+		spin_lock_irq(&recv_cq->lock);
+		__acquire(&send_cq->lock);
+	} else if (send_cq == recv_cq) {
 		spin_lock_irq(&send_cq->lock);
 		__acquire(&recv_cq->lock);
 	} else if (send_cq->cqn < recv_cq->cqn) {
@@ -1248,7 +1279,16 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
 			 struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
 			 __releases(&recv_cq->lock)
 {
-	if (send_cq == recv_cq) {
+	if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+		__release(&recv_cq->lock);
+		__release(&send_cq->lock);
+	} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+		__release(&recv_cq->lock);
+		spin_unlock(&send_cq->lock);
+	} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+		__release(&send_cq->lock);
+		spin_unlock(&recv_cq->lock);
+	} else if (send_cq == recv_cq) {
 		__release(&recv_cq->lock);
 		spin_unlock_irq(&send_cq->lock);
 	} else if (send_cq->cqn < recv_cq->cqn) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index d44cf33df81a..238614370927 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1225,6 +1225,8 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
 			const struct in_ifaddr *ifa;
 
 			idev = in_dev_get(dev);
+			if (!idev)
+				continue;
 			in_dev_for_each_ifa_rtnl(ifa, idev) {
 				i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
 					    "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index ecd6cadd529a..b591861934b3 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -186,23 +186,6 @@ out:
 	kfree(ent);
 }
 
-static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
-{
-	struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
-	struct rb_root *sl_id_map = &sriov->sl_id_map;
-	struct id_map_entry *ent, *found_ent;
-
-	spin_lock(&sriov->id_map_lock);
-	ent = xa_erase(&sriov->pv_id_table, pv_cm_id);
-	if (!ent)
-		goto out;
-	found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
-	if (found_ent && found_ent == ent)
-		rb_erase(&found_ent->node, sl_id_map);
-out:
-	spin_unlock(&sriov->id_map_lock);
-}
-
 static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
 {
 	struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
@@ -294,7 +277,7 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
 	spin_lock(&sriov->id_map_lock);
 	spin_lock_irqsave(&sriov->going_down_lock, flags);
 	/*make sure that there is no schedule inside the scheduled work.*/
-	if (!sriov->is_going_down) {
+	if (!sriov->is_going_down && !id->scheduled_delete) {
 		id->scheduled_delete = 1;
 		schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
 	}
@@ -341,9 +324,6 @@ cont:
 
 	if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
 		schedule_delayed(ibdev, id);
-	else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
-		id_map_find_del(ibdev, pv_cm_id);
-
 	return 0;
 }
 
@@ -382,12 +362,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
 		*slave = id->slave_id;
 	set_remote_comm_id(mad, id->sl_cm_id);
 
-	if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+	if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID ||
+	    mad->mad_hdr.attr_id == CM_REJ_ATTR_ID)
 		schedule_delayed(ibdev, id);
-	else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
-			mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
-		id_map_find_del(ibdev, (int) pv_cm_id);
-	}
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a57033d4b0e5..f8b936b76dcd 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -568,18 +568,13 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
 	wc->vendor_err = cqe->vendor_err_syndrome;
 }
 
-static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
+static int mlx4_ib_ipoib_csum_ok(__be16 status, u8 badfcs_enc, __be16 checksum)
 {
-	return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4      |
-				      MLX4_CQE_STATUS_IPV4F     |
-				      MLX4_CQE_STATUS_IPV4OPT   |
-				      MLX4_CQE_STATUS_IPV6      |
-				      MLX4_CQE_STATUS_IPOK)) ==
-		cpu_to_be16(MLX4_CQE_STATUS_IPV4        |
-			    MLX4_CQE_STATUS_IPOK))              &&
-		(status & cpu_to_be16(MLX4_CQE_STATUS_UDP       |
-				      MLX4_CQE_STATUS_TCP))     &&
-		checksum == cpu_to_be16(0xffff);
+	return ((badfcs_enc & MLX4_CQE_STATUS_L4_CSUM) ||
+		((status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+		 (status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+				       MLX4_CQE_STATUS_UDP)) &&
+		 (checksum == cpu_to_be16(0xffff))));
 }
 
 static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
@@ -855,6 +850,7 @@ repoll:
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
 		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
 		wc->wc_flags	  |= mlx4_ib_ipoib_csum_ok(cqe->status,
+					cqe->badfcs_enc,
 					cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
 		if (is_eth) {
 			wc->slid = 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 34055cbab38c..2f5d9b181848 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,6 +246,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
 	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
 }
 
+static void free_gid_entry(struct gid_entry *entry)
+{
+	memset(&entry->gid, 0, sizeof(entry->gid));
+	kfree(entry->ctx);
+	entry->ctx = NULL;
+}
+
 static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
@@ -313,6 +320,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 				     GFP_ATOMIC);
 		if (!gids) {
 			ret = -ENOMEM;
+			*context = NULL;
+			free_gid_entry(&port_gid_table->gids[free]);
 		} else {
 			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
 				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
@@ -324,6 +333,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 
 	if (!ret && hw_update) {
 		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
+		if (ret) {
+			spin_lock_bh(&iboe->lock);
+			*context = NULL;
+			free_gid_entry(&port_gid_table->gids[free]);
+			spin_unlock_bh(&iboe->lock);
+		}
 		kfree(gids);
 	}
 
@@ -353,10 +368,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
 		if (!ctx->refcount) {
 			unsigned int real_index = ctx->real_index;
 
-			memset(&port_gid_table->gids[real_index].gid, 0,
-			       sizeof(port_gid_table->gids[real_index].gid));
-			kfree(port_gid_table->gids[real_index].ctx);
-			port_gid_table->gids[real_index].ctx = NULL;
+			free_gid_entry(&port_gid_table->gids[real_index]);
 			hw_update = 1;
 		}
 	}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index cb23cdb9389a..26425dd2d960 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -849,7 +849,7 @@ static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
 	 * reused for further WQN allocations.
 	 * The next created WQ will allocate a new range.
 	 */
-		range->dirty = 1;
+		range->dirty = true;
 	}
 
 	mutex_unlock(&context->wqn_ranges_mutex);
@@ -3085,7 +3085,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
 		}
 		if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
 			vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
-			is_vlan = 1;
+			is_vlan = true;
 		}
 	}
 	err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 685b8ed96b4e..d7efc9f6daf0 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -30,7 +30,7 @@ enum devx_obj_flags {
 struct devx_async_data {
 	struct mlx5_ib_dev *mdev;
 	struct list_head list;
-	struct ib_uobject *fd_uobj;
+	struct devx_async_cmd_event_file *ev_file;
 	struct mlx5_async_work cb_work;
 	u16 cmd_out_len;
 	/* must be last field in this structure */
@@ -72,7 +72,6 @@ struct devx_event_subscription {
 	struct rcu_head	rcu;
 	u64 cookie;
 	struct devx_async_event_file *ev_file;
-	struct file *filp; /* Upon hot unplug we need a direct access to */
 	struct eventfd_ctx *eventfd;
 };
 
@@ -1674,21 +1673,20 @@ static void devx_query_callback(int status, struct mlx5_async_work *context)
 {
 	struct devx_async_data *async_data =
 		container_of(context, struct devx_async_data, cb_work);
-	struct ib_uobject *fd_uobj = async_data->fd_uobj;
-	struct devx_async_cmd_event_file *ev_file;
-	struct devx_async_event_queue *ev_queue;
+	struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
+	struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
 	unsigned long flags;
 
-	ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
-			       uobj);
-	ev_queue = &ev_file->ev_queue;
-
+	/*
+	 * Note that if the struct devx_async_cmd_event_file uobj begins to be
+	 * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
+	 * routine returns, ensuring that it always remains valid here.
+	 */
 	spin_lock_irqsave(&ev_queue->lock, flags);
 	list_add_tail(&async_data->list, &ev_queue->event_list);
 	spin_unlock_irqrestore(&ev_queue->lock, flags);
 
 	wake_up_interruptible(&ev_queue->poll_wait);
-	fput(fd_uobj->object);
 }
 
 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
@@ -1757,9 +1755,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
 
 	async_data->cmd_out_len = cmd_out_len;
 	async_data->mdev = mdev;
-	async_data->fd_uobj = fd_uobj;
+	async_data->ev_file = ev_file;
 
-	get_file(fd_uobj->object);
 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
 	err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
 		    uverbs_attr_get_len(attrs,
@@ -1769,12 +1766,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
 		    devx_query_callback, &async_data->cb_work);
 
 	if (err)
-		goto cb_err;
+		goto free_async;
 
 	return 0;
 
-cb_err:
-	fput(fd_uobj->object);
 free_async:
 	kvfree(async_data);
 sub_bytes:
@@ -2032,6 +2027,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
 			goto err;
 
 		list_add_tail(&event_sub->event_list, &sub_list);
+		uverbs_uobject_get(&ev_file->uobj);
 		if (use_eventfd) {
 			event_sub->eventfd =
 				eventfd_ctx_fdget(redirect_fd);
@@ -2045,7 +2041,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
 
 		event_sub->cookie = cookie;
 		event_sub->ev_file = ev_file;
-		event_sub->filp = fd_uobj->object;
 		/* May be needed upon cleanup the devx object/subscription */
 		event_sub->xa_key_level1 = key_level1;
 		event_sub->xa_key_level2 = obj_id;
@@ -2099,7 +2094,7 @@ err:
 
 		if (event_sub->eventfd)
 			eventfd_ctx_put(event_sub->eventfd);
-
+		uverbs_uobject_put(&event_sub->ev_file->uobj);
 		kfree(event_sub);
 	}
 
@@ -2329,6 +2324,9 @@ static int deliver_event(struct devx_event_subscription *event_sub,
 			return 0;
 		}
 
+		/* is_destroyed is ignored here because we don't have any memory
+		 * allocation to clean up for the omit_data case
+		 */
 		list_add_tail(&event_sub->event_list, &ev_file->event_list);
 		spin_unlock_irqrestore(&ev_file->lock, flags);
 		wake_up_interruptible(&ev_file->poll_wait);
@@ -2348,7 +2346,10 @@ static int deliver_event(struct devx_event_subscription *event_sub,
 	memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
 
 	spin_lock_irqsave(&ev_file->lock, flags);
-	list_add_tail(&event_data->list, &ev_file->event_list);
+	if (!ev_file->is_destroyed)
+		list_add_tail(&event_data->list, &ev_file->event_list);
+	else
+		kfree(event_data);
 	spin_unlock_irqrestore(&ev_file->lock, flags);
 	wake_up_interruptible(&ev_file->poll_wait);
 
@@ -2361,17 +2362,10 @@ static void dispatch_event_fd(struct list_head *fd_list,
 	struct devx_event_subscription *item;
 
 	list_for_each_entry_rcu(item, fd_list, xa_list) {
-		if (!get_file_rcu(item->filp))
-			continue;
-
-		if (item->eventfd) {
+		if (item->eventfd)
 			eventfd_signal(item->eventfd, 1);
-			fput(item->filp);
-			continue;
-		}
-
-		deliver_event(item, data);
-		fput(item->filp);
+		else
+			deliver_event(item, data);
 	}
 }
 
@@ -2509,23 +2503,6 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
 	return ret;
 }
 
-static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
-{
-	struct ib_uobject *uobj = filp->private_data;
-	struct devx_async_cmd_event_file *comp_ev_file = container_of(
-		uobj, struct devx_async_cmd_event_file, uobj);
-	struct devx_async_data *entry, *tmp;
-
-	spin_lock_irq(&comp_ev_file->ev_queue.lock);
-	list_for_each_entry_safe(entry, tmp,
-				 &comp_ev_file->ev_queue.event_list, list)
-		kvfree(entry);
-	spin_unlock_irq(&comp_ev_file->ev_queue.lock);
-
-	uverbs_close_fd(filp);
-	return 0;
-}
-
 static __poll_t devx_async_cmd_event_poll(struct file *filp,
 					      struct poll_table_struct *wait)
 {
@@ -2549,7 +2526,7 @@ static const struct file_operations devx_async_cmd_event_fops = {
 	.owner	 = THIS_MODULE,
 	.read	 = devx_async_cmd_event_read,
 	.poll    = devx_async_cmd_event_poll,
-	.release = devx_async_cmd_event_close,
+	.release = uverbs_uobject_fd_release,
 	.llseek	 = no_llseek,
 };
 
@@ -2653,81 +2630,85 @@ static __poll_t devx_async_event_poll(struct file *filp,
 	return pollflags;
 }
 
-static int devx_async_event_close(struct inode *inode, struct file *filp)
+static void devx_free_subscription(struct rcu_head *rcu)
 {
-	struct devx_async_event_file *ev_file = filp->private_data;
-	struct devx_event_subscription *event_sub, *event_sub_tmp;
-	struct devx_async_event_data *entry, *tmp;
-	struct mlx5_ib_dev *dev = ev_file->dev;
-
-	mutex_lock(&dev->devx_event_table.event_xa_lock);
-	/* delete the subscriptions which are related to this FD */
-	list_for_each_entry_safe(event_sub, event_sub_tmp,
-				 &ev_file->subscribed_events_list, file_list) {
-		devx_cleanup_subscription(dev, event_sub);
-		if (event_sub->eventfd)
-			eventfd_ctx_put(event_sub->eventfd);
-
-		list_del_rcu(&event_sub->file_list);
-		/* subscription may not be used by the read API any more */
-		kfree_rcu(event_sub, rcu);
-	}
-
-	mutex_unlock(&dev->devx_event_table.event_xa_lock);
+	struct devx_event_subscription *event_sub =
+		container_of(rcu, struct devx_event_subscription, rcu);
 
-	/* free the pending events allocation */
-	if (!ev_file->omit_data) {
-		spin_lock_irq(&ev_file->lock);
-		list_for_each_entry_safe(entry, tmp,
-					 &ev_file->event_list, list)
-			kfree(entry); /* read can't come any more */
-		spin_unlock_irq(&ev_file->lock);
-	}
-
-	uverbs_close_fd(filp);
-	put_device(&dev->ib_dev.dev);
-	return 0;
+	if (event_sub->eventfd)
+		eventfd_ctx_put(event_sub->eventfd);
+	uverbs_uobject_put(&event_sub->ev_file->uobj);
+	kfree(event_sub);
 }
 
 static const struct file_operations devx_async_event_fops = {
 	.owner	 = THIS_MODULE,
 	.read	 = devx_async_event_read,
 	.poll    = devx_async_event_poll,
-	.release = devx_async_event_close,
+	.release = uverbs_uobject_fd_release,
 	.llseek	 = no_llseek,
 };
 
-static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
-						   enum rdma_remove_reason why)
+static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+					     enum rdma_remove_reason why)
 {
 	struct devx_async_cmd_event_file *comp_ev_file =
 		container_of(uobj, struct devx_async_cmd_event_file,
 			     uobj);
 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+	struct devx_async_data *entry, *tmp;
 
 	spin_lock_irq(&ev_queue->lock);
 	ev_queue->is_destroyed = 1;
 	spin_unlock_irq(&ev_queue->lock);
-
-	if (why == RDMA_REMOVE_DRIVER_REMOVE)
-		wake_up_interruptible(&ev_queue->poll_wait);
+	wake_up_interruptible(&ev_queue->poll_wait);
 
 	mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
+
+	spin_lock_irq(&comp_ev_file->ev_queue.lock);
+	list_for_each_entry_safe(entry, tmp,
+				 &comp_ev_file->ev_queue.event_list, list)
+		kvfree(entry);
+	spin_unlock_irq(&comp_ev_file->ev_queue.lock);
 	return 0;
 };
 
-static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj,
-					    enum rdma_remove_reason why)
+static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+					 enum rdma_remove_reason why)
 {
 	struct devx_async_event_file *ev_file =
 		container_of(uobj, struct devx_async_event_file,
 			     uobj);
+	struct devx_event_subscription *event_sub, *event_sub_tmp;
+	struct devx_async_event_data *entry, *tmp;
+	struct mlx5_ib_dev *dev = ev_file->dev;
 
 	spin_lock_irq(&ev_file->lock);
 	ev_file->is_destroyed = 1;
 	spin_unlock_irq(&ev_file->lock);
-
 	wake_up_interruptible(&ev_file->poll_wait);
+
+	mutex_lock(&dev->devx_event_table.event_xa_lock);
+	/* delete the subscriptions which are related to this FD */
+	list_for_each_entry_safe(event_sub, event_sub_tmp,
+				 &ev_file->subscribed_events_list, file_list) {
+		devx_cleanup_subscription(dev, event_sub);
+		list_del_rcu(&event_sub->file_list);
+		/* subscription may not be used by the read API any more */
+		call_rcu(&event_sub->rcu, devx_free_subscription);
+	}
+	mutex_unlock(&dev->devx_event_table.event_xa_lock);
+
+	/* free the pending events allocation */
+	if (!ev_file->omit_data) {
+		spin_lock_irq(&ev_file->lock);
+		list_for_each_entry_safe(entry, tmp,
+					 &ev_file->event_list, list)
+			kfree(entry); /* read can't come any more */
+		spin_unlock_irq(&ev_file->lock);
+	}
+
+	put_device(&dev->ib_dev.dev);
 	return 0;
 };
 
@@ -2913,7 +2894,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 DECLARE_UVERBS_NAMED_OBJECT(
 	MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
 	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
-			     devx_hot_unplug_async_cmd_event_file,
+			     devx_async_cmd_event_destroy_uobj,
 			     &devx_async_cmd_event_fops, "[devx_async_cmd]",
 			     O_RDONLY),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
@@ -2931,7 +2912,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 DECLARE_UVERBS_NAMED_OBJECT(
 	MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
 	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
-			     devx_hot_unplug_async_event_file,
+			     devx_async_event_destroy_uobj,
 			     &devx_async_event_fops, "[devx_async_event]",
 			     O_RDONLY),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index ac4d8d1b9a07..1ae6fd95acaa 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
 		ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
 		if (ret) {
 			/* Undo the effect of adding the outstanding wr */
-			gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
-					      gsi->cap.max_send_wr;
+			gsi->outstanding_pi--;
 			goto err;
 		}
 		spin_unlock_irqrestore(&gsi->lock, flags);
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index 4f0edd4832bd..b61165359954 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -164,8 +164,10 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
 	in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
 	in->node_guid = guid;
 	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
-	if (!err)
+	if (!err) {
 		vfs_ctx[vf].node_guid = guid;
+		vfs_ctx[vf].node_guid_valid = 1;
+	}
 	kfree(in);
 	return err;
 }
@@ -185,8 +187,10 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
 	in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
 	in->port_guid = guid;
 	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
-	if (!err)
+	if (!err) {
 		vfs_ctx[vf].port_guid = guid;
+		vfs_ctx[vf].port_guid_valid = 1;
+	}
 	kfree(in);
 	return err;
 }
@@ -208,20 +212,12 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
 {
 	struct mlx5_ib_dev *dev = to_mdev(device);
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct mlx5_hca_vport_context *rep;
-	int err;
-
-	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
-	if (!rep)
-		return -ENOMEM;
+	struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
 
-	err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep);
-	if (err)
-		goto ex;
+	node_guid->guid =
+		vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0;
+	port_guid->guid =
+		vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0;
 
-	port_guid->guid = rep->port_guid;
-	node_guid->guid = rep->node_guid;
-ex:
-	kfree(rep);
-	return err;
+	return 0;
 }
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ab4ec33409b8..e874d688d040 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2094,6 +2094,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
 {
 	struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
 	struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+	struct mlx5_var_table *var_table = &dev->var_table;
 	struct mlx5_ib_dm *mdm;
 
 	switch (mentry->mmap_flag) {
@@ -2103,6 +2104,12 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
 				       mdm->size);
 		kfree(mdm);
 		break;
+	case MLX5_IB_MMAP_TYPE_VAR:
+		mutex_lock(&var_table->bitmap_lock);
+		clear_bit(mentry->page_idx, var_table->bitmap);
+		mutex_unlock(&var_table->bitmap_lock);
+		kfree(mentry);
+		break;
 	default:
 		WARN_ON(true);
 	}
@@ -2262,7 +2269,10 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
 
 	mentry = to_mmmap(entry);
 	pfn = (mentry->address >> PAGE_SHIFT);
-	prot = pgprot_writecombine(vma->vm_page_prot);
+	if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR)
+		prot = pgprot_noncached(vma->vm_page_prot);
+	else
+		prot = pgprot_writecombine(vma->vm_page_prot);
 	ret = rdma_user_mmap_io(ucontext, vma, pfn,
 				entry->npages * PAGE_SIZE,
 				prot,
@@ -2271,6 +2281,15 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
 	return ret;
 }
 
+static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry)
+{
+	u16 cmd = entry->rdma_entry.start_pgoff >> 16;
+	u16 index = entry->rdma_entry.start_pgoff & 0xFFFF;
+
+	return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) |
+		(index & 0xFF)) << PAGE_SHIFT;
+}
+
 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 {
 	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -5368,6 +5387,14 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
 	INIT_Q_COUNTER(req_cqe_flush_error),
 };
 
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+	INIT_Q_COUNTER(roce_adp_retrans),
+	INIT_Q_COUNTER(roce_adp_retrans_to),
+	INIT_Q_COUNTER(roce_slow_restart),
+	INIT_Q_COUNTER(roce_slow_restart_cnps),
+	INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
 #define INIT_EXT_PPCNT_COUNTER(_name)		\
 	{ .name = #_name, .offset =	\
 	MLX5_BYTE_OFF(ppcnt_reg, \
@@ -5416,6 +5443,9 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
 		num_counters += ARRAY_SIZE(extended_err_cnts);
 
+	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+		num_counters += ARRAY_SIZE(roce_accl_cnts);
+
 	cnts->num_q_counters = num_counters;
 
 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -5476,6 +5506,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 		}
 	}
 
+	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+			names[j] = roce_accl_cnts[i].name;
+			offsets[j] = roce_accl_cnts[i].offset;
+		}
+	}
+
 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
 			names[j] = cong_cnts[i].name;
@@ -6051,6 +6088,145 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
 	mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
+static int var_obj_cleanup(struct ib_uobject *uobject,
+			   enum rdma_remove_reason why,
+			   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_user_mmap_entry *obj = uobject->object;
+
+	rdma_user_mmap_entry_remove(&obj->rdma_entry);
+	return 0;
+}
+
+static struct mlx5_user_mmap_entry *
+alloc_var_entry(struct mlx5_ib_ucontext *c)
+{
+	struct mlx5_user_mmap_entry *entry;
+	struct mlx5_var_table *var_table;
+	u32 page_idx;
+	int err;
+
+	var_table = &to_mdev(c->ibucontext.device)->var_table;
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&var_table->bitmap_lock);
+	page_idx = find_first_zero_bit(var_table->bitmap,
+				       var_table->num_var_hw_entries);
+	if (page_idx >= var_table->num_var_hw_entries) {
+		err = -ENOSPC;
+		mutex_unlock(&var_table->bitmap_lock);
+		goto end;
+	}
+
+	set_bit(page_idx, var_table->bitmap);
+	mutex_unlock(&var_table->bitmap_lock);
+
+	entry->address = var_table->hw_start_addr +
+				(page_idx * var_table->stride_size);
+	entry->page_idx = page_idx;
+	entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
+
+	err = rdma_user_mmap_entry_insert_range(
+		&c->ibucontext, &entry->rdma_entry, var_table->stride_size,
+		MLX5_IB_MMAP_OFFSET_START << 16,
+		(MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1);
+	if (err)
+		goto err_insert;
+
+	return entry;
+
+err_insert:
+	mutex_lock(&var_table->bitmap_lock);
+	clear_bit(page_idx, var_table->bitmap);
+	mutex_unlock(&var_table->bitmap_lock);
+end:
+	kfree(entry);
+	return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_user_mmap_entry *entry;
+	u64 mmap_offset;
+	u32 length;
+	int err;
+
+	c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+
+	entry = alloc_var_entry(c);
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	mmap_offset = mlx5_entry_to_mmap_offset(entry);
+	length = entry->rdma_entry.npages * PAGE_SIZE;
+	uobj->object = entry;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+			     &mmap_offset, sizeof(mmap_offset));
+	if (err)
+		goto err;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+			     &entry->page_idx, sizeof(entry->page_idx));
+	if (err)
+		goto err;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+			     &length, sizeof(length));
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	rdma_user_mmap_entry_remove(&entry->rdma_entry);
+	return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_VAR_OBJ_ALLOC,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE,
+			MLX5_IB_OBJECT_VAR,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+			    UVERBS_ATTR_TYPE(u64),
+			    UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_VAR_OBJ_DESTROY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE,
+			MLX5_IB_OBJECT_VAR,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
+			    UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
+
+static bool var_is_supported(struct ib_device *device)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+
+	return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+			MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
+}
+
 ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	mlx5_ib_dm,
 	UVERBS_OBJECT_DM,
@@ -6073,14 +6249,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 			     enum mlx5_ib_uapi_flow_action_flags));
 
 static const struct uapi_definition mlx5_ib_defs[] = {
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 	UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
 	UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
-#endif
 
 	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
 				&mlx5_ib_flow_action),
 	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
+				UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
 	{}
 };
 
@@ -6352,6 +6528,35 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
 	.reg_dm_mr = mlx5_ib_reg_dm_mr,
 };
 
+static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_var_table *var_table = &dev->var_table;
+	u8 log_doorbell_bar_size;
+	u8 log_doorbell_stride;
+	u64 bar_size;
+
+	log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+					log_doorbell_bar_size);
+	log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+					log_doorbell_stride);
+	var_table->hw_start_addr = dev->mdev->bar_addr +
+				MLX5_CAP64_DEV_VDPA_EMULATION(mdev,
+					doorbell_bar_offset);
+	bar_size = (1ULL << log_doorbell_bar_size) * 4096;
+	var_table->stride_size = 1ULL << log_doorbell_stride;
+	var_table->num_var_hw_entries = bar_size / var_table->stride_size;
+	mutex_init(&var_table->bitmap_lock);
+	var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
+					  GFP_KERNEL);
+	return (var_table->bitmap) ? 0 : -ENOMEM;
+}
+
+static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
+{
+	bitmap_free(dev->var_table.bitmap);
+}
+
 static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
@@ -6439,6 +6644,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	     MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		mutex_init(&dev->lb.mutex);
 
+	if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+			MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
+		err = mlx5_ib_init_var_table(dev);
+		if (err)
+			return err;
+	}
+
 	dev->ib_dev.use_cq_dim = true;
 
 	return 0;
@@ -6742,6 +6954,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 		      const struct mlx5_ib_profile *profile,
 		      int stage)
 {
+	dev->ib_active = false;
+
 	/* Number of stages to cleanup */
 	while (stage) {
 		stage--;
@@ -6787,7 +7001,7 @@ static const struct mlx5_ib_profile pf_profile = {
 		     mlx5_ib_stage_flow_db_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
 		     mlx5_ib_stage_caps_init,
-		     NULL),
+		     mlx5_ib_stage_caps_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
 		     mlx5_ib_stage_non_default_cb,
 		     NULL),
@@ -6844,7 +7058,7 @@ const struct mlx5_ib_profile raw_eth_profile = {
 		     mlx5_ib_stage_flow_db_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
 		     mlx5_ib_stage_caps_init,
-		     NULL),
+		     mlx5_ib_stage_caps_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
 		     mlx5_ib_stage_raw_eth_non_default_cb,
 		     NULL),
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 048f4e974a61..b90a3649e7d1 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -101,18 +101,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 	*count = i;
 }
 
-static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
-{
-	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
-
-	if (umem_dma & ODP_READ_ALLOWED_BIT)
-		mtt_entry |= MLX5_IB_MTT_READ;
-	if (umem_dma & ODP_WRITE_ALLOWED_BIT)
-		mtt_entry |= MLX5_IB_MTT_WRITE;
-
-	return mtt_entry;
-}
-
 /*
  * Populate the given array with bus addresses from the umem.
  *
@@ -139,19 +127,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	struct scatterlist *sg;
 	int entry;
 
-	if (umem->is_odp) {
-		WARN_ON(shift != 0);
-		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
-
-		for (i = 0; i < num_pages; ++i) {
-			dma_addr_t pa =
-				to_ib_umem_odp(umem)->dma_list[offset + i];
-
-			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
-		}
-		return;
-	}
-
 	i = 0;
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 		len = sg_dma_len(sg) >> PAGE_SHIFT;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 77d495b2032d..d9bffcc93587 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -72,6 +72,11 @@
 #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
 
 enum {
+	MLX5_IB_MMAP_OFFSET_START = 9,
+	MLX5_IB_MMAP_OFFSET_END = 255,
+};
+
+enum {
 	MLX5_IB_MMAP_CMD_SHIFT	= 8,
 	MLX5_IB_MMAP_CMD_MASK	= 0xff,
 };
@@ -120,6 +125,7 @@ enum {
 
 enum mlx5_ib_mmap_type {
 	MLX5_IB_MMAP_TYPE_MEMIC = 1,
+	MLX5_IB_MMAP_TYPE_VAR = 2,
 };
 
 #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)                                        \
@@ -563,6 +569,7 @@ struct mlx5_user_mmap_entry {
 	struct rdma_user_mmap_entry rdma_entry;
 	u8 mmap_flag;
 	u64 address;
+	u32 page_idx;
 };
 
 struct mlx5_ib_dm {
@@ -959,6 +966,15 @@ struct mlx5_devx_event_table {
 	struct xarray event_xa;
 };
 
+struct mlx5_var_table {
+	/* serialize updating the bitmap */
+	struct mutex bitmap_lock;
+	unsigned long *bitmap;
+	u64 hw_start_addr;
+	u32 stride_size;
+	u64 num_var_hw_entries;
+};
+
 struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
 	struct mlx5_core_dev		*mdev;
@@ -1013,6 +1029,7 @@ struct mlx5_ib_dev {
 	struct mlx5_srq_table   srq_table;
 	struct mlx5_async_ctx   async_ctx;
 	struct mlx5_devx_event_table devx_event_table;
+	struct mlx5_var_table var_table;
 
 	struct xarray sig_mrs;
 };
@@ -1276,8 +1293,8 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
 int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
 void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
-			   size_t nentries, struct mlx5_ib_mr *mr, int flags);
+void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+			   struct mlx5_ib_mr *mr, int flags);
 
 int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 			       enum ib_uverbs_advise_mr_advice advice,
@@ -1293,9 +1310,8 @@ static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
 static inline void mlx5_ib_odp_cleanup(void)				    {}
 static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
-static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
-					 size_t nentries, struct mlx5_ib_mr *mr,
-					 int flags) {}
+static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+					 struct mlx5_ib_mr *mr, int flags) {}
 
 static inline int
 mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
@@ -1363,14 +1379,14 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg,
 int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
 			    struct rdma_restrack_entry *res);
 
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
+
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
 void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
 void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
-extern const struct uapi_definition mlx5_ib_devx_defs[];
-extern const struct uapi_definition mlx5_ib_flow_defs[];
 struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
 	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
 	struct mlx5_flow_context *flow_context,
@@ -1378,7 +1394,6 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
 	void *cmd_in, int inlen, int dest_id, int dest_type);
 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
 bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
 void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
 #else
 static inline int
@@ -1507,7 +1522,7 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
 
 static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
-				       bool do_modify_atomic)
+				       bool do_modify_atomic, int access_flags)
 {
 	if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
 		return false;
@@ -1517,6 +1532,9 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
 	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
 		return false;
 
+	if (access_flags & IB_ACCESS_RELAXED_ORDERING)
+		return false;
+
 	return true;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 44a0ee6bd9f1..6fa0a83c19de 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -147,7 +147,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 			break;
 		}
 		mr->order = ent->order;
-		mr->allocated_from_cache = 1;
+		mr->allocated_from_cache = true;
 		mr->dev = dev;
 
 		MLX5_SET(mkc, mkc, free, 1);
@@ -661,12 +661,21 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
 					  struct ib_pd *pd)
 {
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
 	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
 	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
 	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
 	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
 	MLX5_SET(mkc, mkc, lr, 1);
 
+	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+		MLX5_SET(mkc, mkc, relaxed_ordering_write,
+			 !!(acc & IB_ACCESS_RELAXED_ORDERING));
+	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+		MLX5_SET(mkc, mkc, relaxed_ordering_read,
+			 !!(acc & IB_ACCESS_RELAXED_ORDERING));
+
 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
 	MLX5_SET64(mkc, mkc, start_addr, start_addr);
@@ -867,36 +876,6 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(
 	return mr;
 }
 
-static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
-			       void *xlt, int page_shift, size_t size,
-			       int flags)
-{
-	struct mlx5_ib_dev *dev = mr->dev;
-	struct ib_umem *umem = mr->umem;
-
-	if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
-		if (!umr_can_use_indirect_mkey(dev))
-			return -EPERM;
-		mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
-		return npages;
-	}
-
-	npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
-
-	if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
-		__mlx5_ib_populate_pas(dev, umem, page_shift,
-				       idx, npages, xlt,
-				       MLX5_IB_MTT_PRESENT);
-		/* Clear padding after the pages
-		 * brought from the umem.
-		 */
-		memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
-		       size - npages * sizeof(struct mlx5_mtt));
-	}
-
-	return npages;
-}
-
 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
 			    MLX5_UMR_MTT_ALIGNMENT)
 #define MLX5_SPARE_UMR_CHUNK 0x10000
@@ -920,6 +899,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 	size_t pages_mapped = 0;
 	size_t pages_to_map = 0;
 	size_t pages_iter = 0;
+	size_t size_to_map = 0;
 	gfp_t gfp;
 	bool use_emergency_page = false;
 
@@ -966,6 +946,15 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 		goto free_xlt;
 	}
 
+	if (mr->umem->is_odp) {
+		if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+			struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+			size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+
+			pages_to_map = min_t(size_t, pages_to_map, max_pages);
+		}
+	}
+
 	sg.addr = dma;
 	sg.lkey = dev->umrc.pd->local_dma_lkey;
 
@@ -988,14 +977,22 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 	     pages_mapped < pages_to_map && !err;
 	     pages_mapped += pages_iter, idx += pages_iter) {
 		npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
+		size_to_map = npages * desc_size;
 		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
-		npages = populate_xlt(mr, idx, npages, xlt,
-				      page_shift, size, flags);
-
+		if (mr->umem->is_odp) {
+			mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+		} else {
+			__mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
+					       npages, xlt,
+					       MLX5_IB_MTT_PRESENT);
+			/* Clear padding after the pages
+			 * brought from the umem.
+			 */
+			memset(xlt + size_to_map, 0, size - size_to_map);
+		}
 		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 
-		sg.length = ALIGN(npages * desc_size,
-				  MLX5_UMR_MTT_ALIGNMENT);
+		sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
 
 		if (pages_mapped + pages_iter >= pages_to_map) {
 			if (flags & MLX5_IB_UPD_XLT_ENABLE)
@@ -1074,6 +1071,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	MLX5_SET(mkc, mkc, free, !populate);
 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+		MLX5_SET(mkc, mkc, relaxed_ordering_write,
+			 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
+		MLX5_SET(mkc, mkc, relaxed_ordering_read,
+			 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
 	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
 	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
 	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
@@ -1264,7 +1267,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (err < 0)
 		return ERR_PTR(err);
 
-	use_umr = mlx5_ib_can_use_umr(dev, true);
+	use_umr = mlx5_ib_can_use_umr(dev, true, access_flags);
 
 	if (order <= mr_cache_max_order(dev) && use_umr) {
 		mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
@@ -1431,7 +1434,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 			goto err;
 	}
 
-	if (!mlx5_ib_can_use_umr(dev, true) ||
+	if (!mlx5_ib_can_use_umr(dev, true, access_flags) ||
 	    (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
 		/*
 		 * UMR can't be used - MKey needs to be replaced.
@@ -1452,7 +1455,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 			goto err;
 		}
 
-		mr->allocated_from_cache = 0;
+		mr->allocated_from_cache = false;
 	} else {
 		/*
 		 * Send a UMR WQE
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 0afb0042bd53..4216814ba871 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -93,8 +93,8 @@ struct mlx5_pagefault {
 
 static u64 mlx5_imr_ksm_entries;
 
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
-			   struct mlx5_ib_mr *imr, int flags)
+static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
+			struct mlx5_ib_mr *imr, int flags)
 {
 	struct mlx5_klm *end = pklm + nentries;
 
@@ -144,6 +144,44 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
 	}
 }
 
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
+{
+	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+	if (umem_dma & ODP_READ_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_READ;
+	if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_WRITE;
+
+	return mtt_entry;
+}
+
+static void populate_mtt(__be64 *pas, size_t idx, size_t nentries,
+			 struct mlx5_ib_mr *mr, int flags)
+{
+	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+	dma_addr_t pa;
+	size_t i;
+
+	if (flags & MLX5_IB_UPD_XLT_ZAP)
+		return;
+
+	for (i = 0; i < nentries; i++) {
+		pa = odp->dma_list[idx + i];
+		pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+	}
+}
+
+void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+			   struct mlx5_ib_mr *mr, int flags)
+{
+	if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+		populate_klm(xlt, idx, nentries, mr, flags);
+	} else {
+		populate_mtt(xlt, idx, nentries, mr, flags);
+	}
+}
+
 static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
 {
 	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
@@ -342,7 +380,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 	memset(caps, 0, sizeof(*caps));
 
 	if (!MLX5_CAP_GEN(dev->mdev, pg) ||
-	    !mlx5_ib_can_use_umr(dev, true))
+	    !mlx5_ib_can_use_umr(dev, true, 0))
 		return;
 
 	caps->general_caps = IB_ODP_SUPPORT;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ae7cbd9c9bca..a4f8e7030787 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1918,7 +1918,7 @@ static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
 {
 	enum ib_qp_type qpt = init_attr->qp_type;
 	int scqe_sz;
-	bool allow_scat_cqe = 0;
+	bool allow_scat_cqe = false;
 
 	if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
 		return;
@@ -4870,7 +4870,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
 	bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
 	u8 flags = 0;
 
-	if (!mlx5_ib_can_use_umr(dev, atomic)) {
+	if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) {
 		mlx5_ib_warn(to_mdev(qp->ibqp.device),
 			     "Fast update of %s for MR is disabled\n",
 			     (MLX5_CAP_GEN(dev->mdev,
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 920f35e28cfc..484b555150e0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -312,7 +312,18 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 	}
 	ctx->db_mmap_entry = &entry->rdma_entry;
 
-	uresp.dpm_enabled = dev->user_dpm_enabled;
+	if (!dev->user_dpm_enabled)
+		uresp.dpm_flags = 0;
+	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
+		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
+	else
+		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
+				  QEDR_DPM_TYPE_ROCE_LEGACY;
+
+	uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
+	uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
+	uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
+
 	uresp.wids_enabled = 1;
 	uresp.wid_count = oparams.wid_count;
 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index 890d7b760d2e..977906cc0d11 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -195,7 +195,14 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
 }
 EXPORT_SYMBOL(rvt_get_credit);
 
-/* rvt_restart_sge - rewind the sge state for a wqe */
+/**
+ * rvt_restart_sge - rewind the sge state for a wqe
+ * @ss: the sge state pointer
+ * @wqe: the wqe to rewind
+ * @len: the data length from the start of the wqe in bytes
+ *
+ * Returns the remaining data length.
+ */
 u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len)
 {
 	ss->sge = wqe->sg_list[0];
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 353c6668249e..f59616b02477 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -34,6 +34,8 @@
 #ifndef RXE_PARAM_H
 #define RXE_PARAM_H
 
+#include <uapi/rdma/rdma_user_rxe.h>
+
 static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu)
 {
 	if (mtu < 256)
@@ -64,7 +66,6 @@ enum rxe_device_param {
 	RXE_PAGE_SIZE_CAP		= 0xfffff000,
 	RXE_MAX_QP			= 0x10000,
 	RXE_MAX_QP_WR			= 0x4000,
-	RXE_MAX_INLINE_DATA		= 400,
 	RXE_DEVICE_CAP_FLAGS		= IB_DEVICE_BAD_PKEY_CNTR
 					| IB_DEVICE_BAD_QKEY_CNTR
 					| IB_DEVICE_AUTO_PATH_MIG
@@ -77,6 +78,10 @@ enum rxe_device_param {
 					| IB_DEVICE_MEM_MGT_EXTENSIONS
 					| IB_DEVICE_ALLOW_USER_UNREG,
 	RXE_MAX_SGE			= 32,
+	RXE_MAX_WQE_SIZE		= sizeof(struct rxe_send_wqe) +
+					  sizeof(struct ib_sge) * RXE_MAX_SGE,
+	RXE_MAX_INLINE_DATA		= RXE_MAX_WQE_SIZE -
+					  sizeof(struct rxe_send_wqe),
 	RXE_MAX_SGE_RD			= 32,
 	RXE_MAX_CQ			= 16384,
 	RXE_MAX_LOG_CQE			= 15,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index e2c6d1cedf41..ec21f616ac98 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -237,19 +237,17 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 	 */
 	qp->src_port = RXE_ROCE_V2_SPORT +
 		(hash_32_generic(qp_num(qp), 14) & 0x3fff);
-
 	qp->sq.max_wr		= init->cap.max_send_wr;
-	qp->sq.max_sge		= init->cap.max_send_sge;
-	qp->sq.max_inline	= init->cap.max_inline_data;
 
-	wqe_size = max_t(int, sizeof(struct rxe_send_wqe) +
-			 qp->sq.max_sge * sizeof(struct ib_sge),
-			 sizeof(struct rxe_send_wqe) +
-			 qp->sq.max_inline);
+	/* These caps are limited by rxe_qp_chk_cap() done by the caller */
+	wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge),
+			 init->cap.max_inline_data);
+	qp->sq.max_sge = init->cap.max_send_sge =
+		wqe_size / sizeof(struct ib_sge);
+	qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
+	wqe_size += sizeof(struct rxe_send_wqe);
 
-	qp->sq.queue = rxe_queue_init(rxe,
-				      &qp->sq.max_wr,
-				      wqe_size);
+	qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size);
 	if (!qp->sq.queue)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 95834206c80c..92de39c4a7c1 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -408,7 +408,7 @@ struct rxe_dev {
 	struct list_head	pending_mmaps;
 
 	spinlock_t		mmap_offset_lock; /* guard mmap_offset */
-	int			mmap_offset;
+	u64			mmap_offset;
 
 	atomic64_t		stats_counters[RXE_NUM_OF_COUNTERS];
 
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index b939f489cd46..af5e9f8c0fcd 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -7,6 +7,7 @@
 #define _SIW_H
 
 #include <rdma/ib_verbs.h>
+#include <rdma/restrack.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <crypto/hash.h>
@@ -209,7 +210,6 @@ struct siw_cq {
 	u32 cq_put;
 	u32 cq_get;
 	u32 num_cqe;
-	bool kernel_verbs;
 	struct rdma_user_mmap_entry *cq_entry; /* mmap info for CQE array */
 	u32 id; /* For debugging only */
 };
@@ -254,8 +254,8 @@ struct siw_srq {
 	u32 rq_get;
 	u32 num_rqe; /* max # of wqe's allowed */
 	struct rdma_user_mmap_entry *srq_entry; /* mmap info for SRQ array */
-	char armed; /* inform user if limit hit */
-	char kernel_verbs; /* '1' if kernel client */
+	bool armed:1; /* inform user if limit hit */
+	bool is_kernel_res:1; /* true if kernel client */
 };
 
 struct siw_qp_attrs {
@@ -418,13 +418,11 @@ struct siw_iwarp_tx {
 };
 
 struct siw_qp {
+	struct ib_qp base_qp;
 	struct siw_device *sdev;
-	struct ib_qp *ib_qp;
 	struct kref ref;
-	u32 qp_num;
 	struct list_head devq;
 	int tx_cpu;
-	bool kernel_verbs;
 	struct siw_qp_attrs attrs;
 
 	struct siw_cep *cep;
@@ -472,11 +470,6 @@ struct siw_qp {
 	struct rcu_head rcu;
 };
 
-struct siw_base_qp {
-	struct ib_qp base_qp;
-	struct siw_qp *qp;
-};
-
 /* helper macros */
 #define rx_qp(rx) container_of(rx, struct siw_qp, rx_stream)
 #define tx_qp(tx) container_of(tx, struct siw_qp, tx_ctx)
@@ -572,14 +565,9 @@ static inline struct siw_ucontext *to_siw_ctx(struct ib_ucontext *base_ctx)
 	return container_of(base_ctx, struct siw_ucontext, base_ucontext);
 }
 
-static inline struct siw_base_qp *to_siw_base_qp(struct ib_qp *base_qp)
-{
-	return container_of(base_qp, struct siw_base_qp, base_qp);
-}
-
 static inline struct siw_qp *to_siw_qp(struct ib_qp *base_qp)
 {
-	return to_siw_base_qp(base_qp)->qp;
+	return container_of(base_qp, struct siw_qp, base_qp);
 }
 
 static inline struct siw_cq *to_siw_cq(struct ib_cq *base_cq)
@@ -624,7 +612,7 @@ static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id)
 
 static inline u32 qp_id(struct siw_qp *qp)
 {
-	return qp->qp_num;
+	return qp->base_qp.qp_num;
 }
 
 static inline void siw_qp_get(struct siw_qp *qp)
@@ -735,7 +723,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
 		  "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__)
 
 #define siw_dbg_cep(cep, fmt, ...)                                             \
-	ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt,                  \
+	ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt,                 \
 		  cep, __func__, ##__VA_ARGS__)
 
 void siw_cq_flush(struct siw_cq *cq);
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 3bccfef40e7e..0c3f0588346e 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -29,7 +29,7 @@
  * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR
  */
 static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR;
-static const bool relaxed_ird_negotiation = 1;
+static const bool relaxed_ird_negotiation = true;
 
 static void siw_cm_llp_state_change(struct sock *s);
 static void siw_cm_llp_data_ready(struct sock *s);
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index d8db3bee9da7..d68e37859e73 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -65,7 +65,7 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
 		 * reaped here, which do not hold a QP reference
 		 * and do not qualify for memory extension verbs.
 		 */
-		if (likely(cq->kernel_verbs)) {
+		if (likely(rdma_is_kernel_res(&cq->base_cq.res))) {
 			if (cqe->flags & SIW_WQE_REM_INVAL) {
 				wc->ex.invalidate_rkey = cqe->inval_stag;
 				wc->wc_flags = IB_WC_WITH_INVALIDATE;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index c147f0613d95..96ed349c0939 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -244,7 +244,7 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
 		 * siw_qp_id2obj() increments object reference count
 		 */
 		siw_qp_put(qp);
-		return qp->ib_qp;
+		return &qp->base_qp;
 	}
 	return NULL;
 }
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index b4317480cee7..875d36d4b1c6 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1070,8 +1070,8 @@ int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
 			cqe->imm_data = 0;
 			cqe->bytes = bytes;
 
-			if (cq->kernel_verbs)
-				cqe->base_qp = qp->ib_qp;
+			if (rdma_is_kernel_res(&cq->base_cq.res))
+				cqe->base_qp = &qp->base_qp;
 			else
 				cqe->qp_id = qp_id(qp);
 
@@ -1128,8 +1128,8 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
 			cqe->imm_data = 0;
 			cqe->bytes = bytes;
 
-			if (cq->kernel_verbs) {
-				cqe->base_qp = qp->ib_qp;
+			if (rdma_is_kernel_res(&cq->base_cq.res)) {
+				cqe->base_qp = &qp->base_qp;
 				if (inval_stag) {
 					cqe_flags |= SIW_WQE_REM_INVAL;
 					cqe->inval_stag = inval_stag;
@@ -1297,13 +1297,12 @@ void siw_rq_flush(struct siw_qp *qp)
 
 int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
 {
-	int rv = xa_alloc(&sdev->qp_xa, &qp->ib_qp->qp_num, qp, xa_limit_32b,
+	int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
 			  GFP_KERNEL);
 
 	if (!rv) {
 		kref_init(&qp->ref);
 		qp->sdev = sdev;
-		qp->qp_num = qp->ib_qp->qp_num;
 		siw_dbg_qp(qp, "new QP\n");
 	}
 	return rv;
@@ -1312,7 +1311,6 @@ int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
 void siw_free_qp(struct kref *ref)
 {
 	struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
-	struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp);
 	struct siw_device *sdev = qp->sdev;
 	unsigned long flags;
 
@@ -1335,5 +1333,4 @@ void siw_free_qp(struct kref *ref)
 	atomic_dec(&sdev->num_qp);
 	siw_dbg_qp(qp, "free QP\n");
 	kfree_rcu(qp, rcu);
-	kfree(siw_base_qp);
 }
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index c0a887240325..9ccce2909ac4 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -68,7 +68,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
 			return -EFAULT;
 		}
 		if (srx->mpa_crc_hd) {
-			if (rx_qp(srx)->kernel_verbs) {
+			if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
 				crypto_shash_update(srx->mpa_crc_hd,
 					(u8 *)(dest + pg_off), bytes);
 				kunmap_atomic(dest);
@@ -388,7 +388,7 @@ static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
 				struct siw_rqe *rqe2 = &srq->recvq[off];
 
 				if (!(rqe2->flags & SIW_WQE_VALID)) {
-					srq->armed = 0;
+					srq->armed = false;
 					srq_event = true;
 				}
 			}
@@ -1264,7 +1264,7 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error)
 
 			if (wc_status == SIW_WC_SUCCESS)
 				wc_status = SIW_WC_GENERAL_ERR;
-		} else if (qp->kernel_verbs &&
+		} else if (rdma_is_kernel_res(&qp->base_qp.res) &&
 			   rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
 			/*
 			 * Handle any STag invalidation request
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 5d97bba0ce6d..ae92c8080967 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -817,7 +817,7 @@ static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe)
 			}
 		} else {
 			wqe->bytes = wqe->sqe.sge[0].length;
-			if (!qp->kernel_verbs) {
+			if (!rdma_is_kernel_res(&qp->base_qp.res)) {
 				if (wqe->bytes > SIW_MAX_INLINE) {
 					rv = -EINVAL;
 					goto tx_error;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 5fd6d6499b3d..07e30138aaa1 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -303,7 +303,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 			    struct ib_udata *udata)
 {
 	struct siw_qp *qp = NULL;
-	struct siw_base_qp *siw_base_qp = NULL;
 	struct ib_device *base_dev = pd->device;
 	struct siw_device *sdev = to_siw_dev(base_dev);
 	struct siw_ucontext *uctx =
@@ -357,26 +356,16 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		rv = -EINVAL;
 		goto err_out;
 	}
-	siw_base_qp = kzalloc(sizeof(*siw_base_qp), GFP_KERNEL);
-	if (!siw_base_qp) {
-		rv = -ENOMEM;
-		goto err_out;
-	}
 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 	if (!qp) {
 		rv = -ENOMEM;
 		goto err_out;
 	}
-	siw_base_qp->qp = qp;
-	qp->ib_qp = &siw_base_qp->base_qp;
-
 	init_rwsem(&qp->state_lock);
 	spin_lock_init(&qp->sq_lock);
 	spin_lock_init(&qp->rq_lock);
 	spin_lock_init(&qp->orq_lock);
 
-	qp->kernel_verbs = !udata;
-
 	rv = siw_qp_add(sdev, qp);
 	if (rv)
 		goto err_out;
@@ -389,10 +378,10 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
 	num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr);
 
-	if (qp->kernel_verbs)
-		qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
-	else
+	if (udata)
 		qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
+	else
+		qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
 
 	if (qp->sendq == NULL) {
 		siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe);
@@ -419,13 +408,14 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		 */
 		qp->srq = to_siw_srq(attrs->srq);
 		qp->attrs.rq_size = 0;
-		siw_dbg(base_dev, "QP [%u]: SRQ attached\n", qp->qp_num);
+		siw_dbg(base_dev, "QP [%u]: SRQ attached\n",
+			qp->base_qp.qp_num);
 	} else if (num_rqe) {
-		if (qp->kernel_verbs)
-			qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe));
-		else
+		if (udata)
 			qp->recvq =
 				vmalloc_user(num_rqe * sizeof(struct siw_rqe));
+		else
+			qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe));
 
 		if (qp->recvq == NULL) {
 			siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe);
@@ -492,13 +482,11 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	list_add_tail(&qp->devq, &sdev->qp_list);
 	spin_unlock_irqrestore(&sdev->lock, flags);
 
-	return qp->ib_qp;
+	return &qp->base_qp;
 
 err_out_xa:
 	xa_erase(&sdev->qp_xa, qp_id(qp));
 err_out:
-	kfree(siw_base_qp);
-
 	if (qp) {
 		if (uctx) {
 			rdma_user_mmap_entry_remove(qp->sq_entry);
@@ -742,7 +730,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
 	unsigned long flags;
 	int rv = 0;
 
-	if (wr && !qp->kernel_verbs) {
+	if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) {
 		siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
 		*bad_wr = wr;
 		return -EINVAL;
@@ -939,7 +927,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
 	if (rv <= 0)
 		goto skip_direct_sending;
 
-	if (qp->kernel_verbs) {
+	if (rdma_is_kernel_res(&qp->base_qp.res)) {
 		rv = siw_sq_start(qp);
 	} else {
 		qp->tx_ctx.in_syscall = 1;
@@ -984,8 +972,8 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
 		*bad_wr = wr;
 		return -EOPNOTSUPP; /* what else from errno.h? */
 	}
-	if (!qp->kernel_verbs) {
-		siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
+	if (!rdma_is_kernel_res(&qp->base_qp.res)) {
+		siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n");
 		*bad_wr = wr;
 		return -EINVAL;
 	}
@@ -1127,14 +1115,13 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 	cq->base_cq.cqe = size;
 	cq->num_cqe = size;
 
-	if (!udata) {
-		cq->kernel_verbs = 1;
-		cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
-				    sizeof(struct siw_cq_ctrl));
-	} else {
+	if (udata)
 		cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
 					 sizeof(struct siw_cq_ctrl));
-	}
+	else
+		cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
+				    sizeof(struct siw_cq_ctrl));
+
 	if (cq->queue == NULL) {
 		rv = -ENOMEM;
 		goto err_out;
@@ -1589,9 +1576,9 @@ int siw_create_srq(struct ib_srq *base_srq,
 	srq->num_rqe = roundup_pow_of_two(attrs->max_wr);
 	srq->limit = attrs->srq_limit;
 	if (srq->limit)
-		srq->armed = 1;
+		srq->armed = true;
 
-	srq->kernel_verbs = !udata;
+	srq->is_kernel_res = !udata;
 
 	if (udata)
 		srq->recvq =
@@ -1671,9 +1658,9 @@ int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs,
 				rv = -EINVAL;
 				goto out;
 			}
-			srq->armed = 1;
+			srq->armed = true;
 		} else {
-			srq->armed = 0;
+			srq->armed = false;
 		}
 		srq->limit = attrs->srq_limit;
 	}
@@ -1745,7 +1732,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
 	unsigned long flags;
 	int rv = 0;
 
-	if (unlikely(!srq->kernel_verbs)) {
+	if (unlikely(!srq->is_kernel_res)) {
 		siw_dbg_pd(base_srq->pd,
 			   "[SRQ]: no kernel post_recv for mapped srq\n");
 		rv = -EINVAL;
@@ -1797,7 +1784,7 @@ out:
 void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype)
 {
 	struct ib_event event;
-	struct ib_qp *base_qp = qp->ib_qp;
+	struct ib_qp *base_qp = &qp->base_qp;
 
 	/*
 	 * Do not report asynchronous errors on QP which gets
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 0f74dc6d12fa..7a8f24de3631 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -527,7 +527,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
 		if (unlikely(err))
 			goto err_reg;
 
-		desc->sig_protected = 1;
+		desc->sig_protected = true;
 	}
 
 	return 0;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 1f4a37a3c2b3..127887c6c03f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1093,7 +1093,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 	int ret;
 
 	if (desc && desc->sig_protected) {
-		desc->sig_protected = 0;
+		desc->sig_protected = false;
 		ret = ib_check_mr_status(desc->rsc.sig_mr,
 					 IB_MR_CHECK_SIG_STATUS, &mr_status);
 		if (ret) {
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index e4c9bf2ef7e2..4480092c68e0 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -358,7 +358,7 @@ struct opa_veswport_summary_counters {
  * @rx_drop_state: received packets in non-forwarding port state
  * @rx_logic: other receive errors
  *
- * All the above are counters of corresponding erorr conditions.
+ * All the above are counters of corresponding error conditions.
  */
 struct opa_veswport_error_counters {
 	__be16  vp_instance;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b7f7a5f7bd98..cd1181c39ed2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2546,7 +2546,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
 	if (lrsp->opcode == SRP_LOGIN_RSP) {
 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
-		ch->use_imm_data  = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
+		ch->use_imm_data  = srp_use_imm_data &&
+			(lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP);
 		ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
 						      ch->use_imm_data,
 						      target->max_it_iu_size);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 23c782e3d49a..98552749d71c 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2810,8 +2810,6 @@ static void srpt_queue_response(struct se_cmd *cmd)
 	int resp_len, ret, i;
 	u8 srp_tm_status;
 
-	BUG_ON(!ch);
-
 	state = ioctx->state;
 	switch (state) {
 	case SRPT_STATE_NEW:
diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h
index 508e8cc5ee86..653d2a0aa44c 100644
--- a/include/linux/mlx4/cq.h
+++ b/include/linux/mlx4/cq.h
@@ -130,6 +130,11 @@ enum {
 	MLX4_CQE_STATUS_IPOK		= 1 << 12,
 };
 
+/* L4_CSUM is logically part of status, but has to checked against badfcs_enc */
+enum {
+	MLX4_CQE_STATUS_L4_CSUM		= 1 << 2,
+};
+
 enum {
 	MLX4_CQE_LLC                     = 1,
 	MLX4_CQE_SNAP                    = 1 << 1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 22bd0d5024c8..277a51d3ec40 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -463,6 +463,11 @@ struct mlx5_vf_context {
 	int	enabled;
 	u64	port_guid;
 	u64	node_guid;
+	/* Valid bits are used to validate administrative guid only.
+	 * Enabled after ndo_set_vf_guid
+	 */
+	u8	port_guid_valid:1;
+	u8	node_guid_valid:1;
 	enum port_state_policy	policy;
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ee0a34d66c7c..032cd6630720 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1188,7 +1188,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_cq[0x5];
 
 	u8         log_max_eq_sz[0x8];
-	u8         reserved_at_e8[0x2];
+	u8         relaxed_ordering_write[0x1];
+	u8         relaxed_ordering_read[0x1];
 	u8         log_max_mkey[0x6];
 	u8         reserved_at_f0[0x8];
 	u8         dump_fill_mkey[0x1];
@@ -1211,7 +1212,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_130[0xa];
 	u8         log_max_ra_res_dc[0x6];
 
-	u8         reserved_at_140[0xa];
+	u8         reserved_at_140[0x9];
+	u8         roce_accl[0x1];
 	u8         log_max_ra_req_qp[0x6];
 	u8         reserved_at_150[0xa];
 	u8         log_max_ra_res_qp[0x6];
@@ -3428,7 +3430,9 @@ struct mlx5_ifc_mkc_bits {
 
 	u8         translations_octword_size[0x20];
 
-	u8         reserved_at_1c0[0x1b];
+	u8         reserved_at_1c0[0x19];
+	u8         relaxed_ordering_read[0x1];
+	u8         reserved_at_1d9[0x1];
 	u8         log_page_size[0x5];
 
 	u8         reserved_at_1e0[0x20];
@@ -4889,7 +4893,19 @@ struct mlx5_ifc_query_q_counter_out_bits {
 
 	u8         req_cqe_flush_error[0x20];
 
-	u8         reserved_at_620[0x1e0];
+	u8         reserved_at_620[0x20];
+
+	u8         roce_adp_retrans[0x20];
+
+	u8         roce_adp_retrans_to[0x20];
+
+	u8         roce_slow_restart[0x20];
+
+	u8         roce_slow_restart_cnps[0x20];
+
+	u8         roce_slow_restart_trans[0x20];
+
+	u8         reserved_at_6e0[0x120];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index b01a8a8d4de9..8ec482e391aa 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -500,21 +500,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 		   u8 private_data_len);
 
 /**
- * ib_send_cm_lap - Sends a load alternate path request.
- * @cm_id: Connection identifier associated with the load alternate path
- *   message.
- * @alternate_path: A path record that identifies the alternate path to
- *   load.
- * @private_data: Optional user-defined private data sent with the
- *   load alternate path message.
- * @private_data_len: Size of the private data buffer, in bytes.
- */
-int ib_send_cm_lap(struct ib_cm_id *cm_id,
-		   struct sa_path_rec *alternate_path,
-		   const void *private_data,
-		   u8 private_data_len);
-
-/**
  * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
  *   to a specified QP state.
  * @cm_id: Communication identifier associated with the QP attributes to
@@ -534,25 +519,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
 		       struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask);
 
-/**
- * ib_send_cm_apr - Sends an alternate path response message in response to
- *   a load alternate path request.
- * @cm_id: Connection identifier associated with the alternate path response.
- * @status: Reply status sent with the alternate path response.
- * @info: Optional additional information sent with the alternate path
- *   response.
- * @info_length: Size of the additional information, in bytes.
- * @private_data: Optional user-defined private data sent with the
- *   alternate path response message.
- * @private_data_len: Size of the private data buffer, in bytes.
- */
-int ib_send_cm_apr(struct ib_cm_id *cm_id,
-		   enum ib_cm_apr_status status,
-		   void *info,
-		   u8 info_length,
-		   const void *private_data,
-		   u8 private_data_len);
-
 struct ib_cm_sidr_req_param {
 	struct sa_path_rec	*path;
 	const struct ib_gid_attr *sgid_attr;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e2cc62217cc2..1f779fad3a1e 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -72,11 +72,16 @@
 #define IB_FW_VERSION_NAME_MAX	ETHTOOL_FWVERS_LEN
 
 struct ib_umem_odp;
+struct ib_uqp_object;
+struct ib_usrq_object;
+struct ib_uwq_object;
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
 extern struct workqueue_struct *ib_comp_unbound_wq;
 
+struct ib_ucq_object;
+
 __printf(3, 4) __cold
 void ibdev_printk(const char *level, const struct ib_device *ibdev,
 		  const char *format, ...);
@@ -1413,8 +1418,11 @@ enum ib_access_flags {
 	IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
 	IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
 	IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+	IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
 
-	IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
+	IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
+	IB_ACCESS_SUPPORTED =
+		((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
 };
 
 /*
@@ -1544,7 +1552,7 @@ enum ib_poll_context {
 
 struct ib_cq {
 	struct ib_device       *device;
-	struct ib_uobject      *uobject;
+	struct ib_ucq_object   *uobject;
 	ib_comp_handler   	comp_handler;
 	void                  (*event_handler)(struct ib_event *, void *);
 	void                   *cq_context;
@@ -1558,6 +1566,11 @@ struct ib_cq {
 	};
 	struct workqueue_struct *comp_wq;
 	struct dim *dim;
+
+	/* updated only by trace points */
+	ktime_t timestamp;
+	bool interrupt;
+
 	/*
 	 * Implementation details of the RDMA core, don't use in drivers:
 	 */
@@ -1567,7 +1580,7 @@ struct ib_cq {
 struct ib_srq {
 	struct ib_device       *device;
 	struct ib_pd	       *pd;
-	struct ib_uobject      *uobject;
+	struct ib_usrq_object  *uobject;
 	void		      (*event_handler)(struct ib_event *, void *);
 	void		       *srq_context;
 	enum ib_srq_type	srq_type;
@@ -1612,7 +1625,7 @@ enum ib_wq_state {
 
 struct ib_wq {
 	struct ib_device       *device;
-	struct ib_uobject      *uobject;
+	struct ib_uwq_object   *uobject;
 	void		    *wq_context;
 	void		    (*event_handler)(struct ib_event *, void *);
 	struct ib_pd	       *pd;
@@ -1728,7 +1741,7 @@ struct ib_qp {
 	atomic_t		usecnt;
 	struct list_head	open_list;
 	struct ib_qp           *real_qp;
-	struct ib_uobject      *uobject;
+	struct ib_uqp_object   *uobject;
 	void                  (*event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
 	/* sgid_attrs associated with the AV's */
@@ -2147,11 +2160,6 @@ struct ib_port_cache {
 	enum ib_port_state     port_state;
 };
 
-struct ib_cache {
-	rwlock_t                lock;
-	struct ib_event_handler event_handler;
-};
-
 struct ib_port_immutable {
 	int                           pkey_tbl_len;
 	int                           gid_tbl_len;
@@ -2627,13 +2635,18 @@ struct ib_device {
 	struct rcu_head rcu_head;
 
 	struct list_head              event_handler_list;
-	spinlock_t                    event_handler_lock;
+	/* Protects event_handler_list */
+	struct rw_semaphore event_handler_rwsem;
+
+	/* Protects QP's event_handler calls and open_qp list */
+	spinlock_t qp_open_list_lock;
 
 	struct rw_semaphore	      client_data_rwsem;
 	struct xarray                 client_data;
 	struct mutex                  unregistration_lock;
 
-	struct ib_cache               cache;
+	/* Synchronize GID, Pkey cache entries, subnet prefix, LMC */
+	rwlock_t cache_lock;
 	/**
 	 * port_data is indexed by port number
 	 */
@@ -2942,7 +2955,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 
 void ib_register_event_handler(struct ib_event_handler *event_handler);
 void ib_unregister_event_handler(struct ib_event_handler *event_handler);
-void ib_dispatch_event(struct ib_event *event);
+void ib_dispatch_event(const struct ib_event *event);
 
 int ib_query_port(struct ib_device *device,
 		  u8 port_num, struct ib_port_attr *port_attr);
@@ -4309,6 +4322,9 @@ static inline int ib_check_mr_access(int flags)
 	    !(flags & IB_ACCESS_LOCAL_WRITE))
 		return -EINVAL;
 
+	if (flags & ~IB_ACCESS_SUPPORTED)
+		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/include/rdma/iba.h b/include/rdma/iba.h
new file mode 100644
index 000000000000..6a1115b02a0d
--- /dev/null
+++ b/include/rdma/iba.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved.
+ */
+#ifndef _IBA_DEFS_H_
+#define _IBA_DEFS_H_
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+#include <asm/unaligned.h>
+
+static inline u32 _iba_get8(const u8 *ptr)
+{
+	return *ptr;
+}
+
+static inline void _iba_set8(u8 *ptr, u32 mask, u32 prep_value)
+{
+	*ptr = (*ptr & ~mask) | prep_value;
+}
+
+static inline u16 _iba_get16(const __be16 *ptr)
+{
+	return be16_to_cpu(*ptr);
+}
+
+static inline void _iba_set16(__be16 *ptr, u16 mask, u16 prep_value)
+{
+	*ptr = cpu_to_be16((be16_to_cpu(*ptr) & ~mask) | prep_value);
+}
+
+static inline u32 _iba_get32(const __be32 *ptr)
+{
+	return be32_to_cpu(*ptr);
+}
+
+static inline void _iba_set32(__be32 *ptr, u32 mask, u32 prep_value)
+{
+	*ptr = cpu_to_be32((be32_to_cpu(*ptr) & ~mask) | prep_value);
+}
+
+static inline u64 _iba_get64(const __be64 *ptr)
+{
+	/*
+	 * The mads are constructed so that 32 bit and smaller are naturally
+	 * aligned, everything larger has a max alignment of 4 bytes.
+	 */
+	return be64_to_cpu(get_unaligned(ptr));
+}
+
+static inline void _iba_set64(__be64 *ptr, u64 mask, u64 prep_value)
+{
+	put_unaligned(cpu_to_be64((_iba_get64(ptr) & ~mask) | prep_value), ptr);
+}
+
+#define _IBA_SET(field_struct, field_offset, field_mask, num_bits, ptr, value) \
+	({                                                                     \
+		field_struct *_ptr = ptr;                                      \
+		_iba_set##num_bits((void *)_ptr + (field_offset), field_mask,  \
+				   FIELD_PREP(field_mask, value));             \
+	})
+#define IBA_SET(field, ptr, value) _IBA_SET(field, ptr, value)
+
+#define _IBA_GET_MEM_PTR(field_struct, field_offset, type, num_bits, ptr)      \
+	({                                                                     \
+		field_struct *_ptr = ptr;                                      \
+		(type *)((void *)_ptr + (field_offset));                       \
+	})
+#define IBA_GET_MEM_PTR(field, ptr) _IBA_GET_MEM_PTR(field, ptr)
+
+/* FIXME: A set should always set the entire field, meaning we should zero the trailing bytes */
+#define _IBA_SET_MEM(field_struct, field_offset, type, num_bits, ptr, in,      \
+		     bytes)                                                    \
+	({                                                                     \
+		const type *_in_ptr = in;                                      \
+		WARN_ON(bytes * 8 > num_bits);                                 \
+		if (in && bytes)                                               \
+			memcpy(_IBA_GET_MEM_PTR(field_struct, field_offset,    \
+						type, num_bits, ptr),          \
+			       _in_ptr, bytes);                                \
+	})
+#define IBA_SET_MEM(field, ptr, in, bytes) _IBA_SET_MEM(field, ptr, in, bytes)
+
+#define _IBA_GET(field_struct, field_offset, field_mask, num_bits, ptr)        \
+	({                                                                     \
+		const field_struct *_ptr = ptr;                                \
+		(u##num_bits) FIELD_GET(                                       \
+			field_mask, _iba_get##num_bits((const void *)_ptr +    \
+						       (field_offset)));       \
+	})
+#define IBA_GET(field, ptr) _IBA_GET(field, ptr)
+
+#define _IBA_GET_MEM(field_struct, field_offset, type, num_bits, ptr, out,     \
+		     bytes)                                                    \
+	({                                                                     \
+		type *_out_ptr = out;                                          \
+		WARN_ON(bytes * 8 > num_bits);                                 \
+		if (out && bytes)                                              \
+			memcpy(_out_ptr,                                       \
+			       _IBA_GET_MEM_PTR(field_struct, field_offset,    \
+						type, num_bits, ptr),          \
+			       bytes);                                         \
+	})
+#define IBA_GET_MEM(field, ptr, out, bytes) _IBA_GET_MEM(field, ptr, out, bytes)
+
+/*
+ * The generated list becomes the parameters to the macros, the order is:
+ *  - struct this applies to
+ *  - starting offset of the max
+ *  - GENMASK or GENMASK_ULL in CPU order
+ *  - The width of data the mask operations should work on, in bits
+ */
+
+/*
+ * Extraction using a tabular description like table 106. bit_offset is from
+ * the Byte[Bit] notation.
+ */
+#define IBA_FIELD_BLOC(field_struct, byte_offset, bit_offset, num_bits)        \
+	field_struct, byte_offset,                                             \
+		GENMASK(7 - (bit_offset), 7 - (bit_offset) - (num_bits - 1)),  \
+		8
+#define IBA_FIELD8_LOC(field_struct, byte_offset, num_bits)                    \
+	IBA_FIELD_BLOC(field_struct, byte_offset, 0, num_bits)
+
+#define IBA_FIELD16_LOC(field_struct, byte_offset, num_bits)                   \
+	field_struct, (byte_offset)&0xFFFE,                                    \
+		GENMASK(15 - (((byte_offset) % 2) * 8),                        \
+			15 - (((byte_offset) % 2) * 8) - (num_bits - 1)),      \
+		16
+
+#define IBA_FIELD32_LOC(field_struct, byte_offset, num_bits)                   \
+	field_struct, (byte_offset)&0xFFFC,                                    \
+		GENMASK(31 - (((byte_offset) % 4) * 8),                        \
+			31 - (((byte_offset) % 4) * 8) - (num_bits - 1)),      \
+		32
+
+#define IBA_FIELD64_LOC(field_struct, byte_offset)                             \
+	field_struct, byte_offset, GENMASK_ULL(63, 0), 64
+/*
+ * In IBTA spec, everything that is more than 64bits is multiple
+ * of bytes without leftover bits.
+ */
+#define IBA_FIELD_MLOC(field_struct, byte_offset, num_bits, type)              \
+	field_struct, byte_offset, type, num_bits
+
+#endif /* _IBA_DEFS_H_ */
diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h
new file mode 100644
index 000000000000..269904425d3f
--- /dev/null
+++ b/include/rdma/ibta_vol1_c12.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ *
+ * This file is IBTA volume 1, chapter 12 declarations:
+ *  CHAPTER 12: COMMUNICATION MANAGEMENT
+ */
+#ifndef _IBTA_VOL1_C12_H_
+#define _IBTA_VOL1_C12_H_
+
+#include <rdma/iba.h>
+
+#define CM_FIELD_BLOC(field_struct, byte_offset, bits_offset, width)           \
+	IBA_FIELD_BLOC(field_struct,                                           \
+		       (byte_offset + sizeof(struct ib_mad_hdr)), bits_offset, \
+		       width)
+#define CM_FIELD8_LOC(field_struct, byte_offset, width)                        \
+	IBA_FIELD8_LOC(field_struct,                                           \
+		       (byte_offset + sizeof(struct ib_mad_hdr)), width)
+#define CM_FIELD16_LOC(field_struct, byte_offset, width)                       \
+	IBA_FIELD16_LOC(field_struct,                                          \
+			(byte_offset + sizeof(struct ib_mad_hdr)), width)
+#define CM_FIELD32_LOC(field_struct, byte_offset, width)                       \
+	IBA_FIELD32_LOC(field_struct,                                          \
+			(byte_offset + sizeof(struct ib_mad_hdr)), width)
+#define CM_FIELD64_LOC(field_struct, byte_offset)                              \
+	IBA_FIELD64_LOC(field_struct, (byte_offset + sizeof(struct ib_mad_hdr)))
+#define CM_FIELD_MLOC(field_struct, byte_offset, width, type)                  \
+	IBA_FIELD_MLOC(field_struct,                                           \
+		       (byte_offset + sizeof(struct ib_mad_hdr)), width, type)
+#define CM_STRUCT(field_struct, total_len)                                     \
+	field_struct                                                           \
+	{                                                                      \
+		struct ib_mad_hdr hdr;                                         \
+		u32 _data[(total_len) / 32 +                                   \
+			  BUILD_BUG_ON_ZERO((total_len) % 32 != 0)];           \
+	}
+
+/* Table 106 REQ Message Contents */
+#define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32)
+#define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8)
+#define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16)
+#define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32)
+#define CM_REQ_LOCAL_QPN CM_FIELD32_LOC(struct cm_req_msg, 32, 24)
+#define CM_REQ_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_req_msg, 35, 8)
+#define CM_REQ_LOCAL_EECN CM_FIELD32_LOC(struct cm_req_msg, 36, 24)
+#define CM_REQ_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_req_msg, 39, 8)
+#define CM_REQ_REMOTE_EECN CM_FIELD32_LOC(struct cm_req_msg, 40, 24)
+#define CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT                                      \
+	CM_FIELD8_LOC(struct cm_req_msg, 43, 5)
+#define CM_REQ_TRANSPORT_SERVICE_TYPE CM_FIELD_BLOC(struct cm_req_msg, 43, 5, 2)
+#define CM_REQ_END_TO_END_FLOW_CONTROL                                         \
+	CM_FIELD_BLOC(struct cm_req_msg, 43, 7, 1)
+#define CM_REQ_STARTING_PSN CM_FIELD32_LOC(struct cm_req_msg, 44, 24)
+#define CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 47, 5)
+#define CM_REQ_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 47, 5, 3)
+#define CM_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_req_msg, 48, 16)
+#define CM_REQ_PATH_PACKET_PAYLOAD_MTU CM_FIELD8_LOC(struct cm_req_msg, 50, 4)
+#define CM_REQ_RDC_EXISTS CM_FIELD_BLOC(struct cm_req_msg, 50, 4, 1)
+#define CM_REQ_RNR_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 50, 5, 3)
+#define CM_REQ_MAX_CM_RETRIES CM_FIELD8_LOC(struct cm_req_msg, 51, 4)
+#define CM_REQ_SRQ CM_FIELD_BLOC(struct cm_req_msg, 51, 4, 1)
+#define CM_REQ_EXTENDED_TRANSPORT_TYPE                                         \
+	CM_FIELD_BLOC(struct cm_req_msg, 51, 5, 3)
+#define CM_REQ_PRIMARY_LOCAL_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 52, 16)
+#define CM_REQ_PRIMARY_REMOTE_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 54, 16)
+#define CM_REQ_PRIMARY_LOCAL_PORT_GID                                          \
+	CM_FIELD_MLOC(struct cm_req_msg, 56, 128, union ib_gid)
+#define CM_REQ_PRIMARY_REMOTE_PORT_GID                                         \
+	CM_FIELD_MLOC(struct cm_req_msg, 72, 128, union ib_gid)
+#define CM_REQ_PRIMARY_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 88, 20)
+#define CM_REQ_PRIMARY_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 91, 2, 6)
+#define CM_REQ_PRIMARY_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 92, 8)
+#define CM_REQ_PRIMARY_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 93, 8)
+#define CM_REQ_PRIMARY_SL CM_FIELD8_LOC(struct cm_req_msg, 94, 4)
+#define CM_REQ_PRIMARY_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_req_msg, 94, 4, 1)
+#define CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 95, 5)
+#define CM_REQ_ALTERNATE_LOCAL_PORT_LID                                        \
+	CM_FIELD16_LOC(struct cm_req_msg, 96, 16)
+#define CM_REQ_ALTERNATE_REMOTE_PORT_LID                                       \
+	CM_FIELD16_LOC(struct cm_req_msg, 98, 16)
+#define CM_REQ_ALTERNATE_LOCAL_PORT_GID                                        \
+	CM_FIELD_MLOC(struct cm_req_msg, 100, 128, union ib_gid)
+#define CM_REQ_ALTERNATE_REMOTE_PORT_GID                                       \
+	CM_FIELD_MLOC(struct cm_req_msg, 116, 128, union ib_gid)
+#define CM_REQ_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 132, 20)
+#define CM_REQ_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 135, 2, 6)
+#define CM_REQ_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 136, 8)
+#define CM_REQ_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 137, 8)
+#define CM_REQ_ALTERNATE_SL CM_FIELD8_LOC(struct cm_req_msg, 138, 4)
+#define CM_REQ_ALTERNATE_SUBNET_LOCAL                                          \
+	CM_FIELD_BLOC(struct cm_req_msg, 138, 4, 1)
+#define CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT                                     \
+	CM_FIELD8_LOC(struct cm_req_msg, 139, 5)
+#define CM_REQ_SAP_SUPPORTED CM_FIELD_BLOC(struct cm_req_msg, 139, 5, 1)
+#define CM_REQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_req_msg, 140, 736, void)
+CM_STRUCT(struct cm_req_msg, 140 * 8 + 736);
+
+/* Table 107 MRA Message Contents */
+#define CM_MRA_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 0, 32)
+#define CM_MRA_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 4, 32)
+#define CM_MRA_MESSAGE_MRAED CM_FIELD8_LOC(struct cm_mra_msg, 8, 2)
+#define CM_MRA_SERVICE_TIMEOUT CM_FIELD8_LOC(struct cm_mra_msg, 9, 5)
+#define CM_MRA_PRIVATE_DATA CM_FIELD_MLOC(struct cm_mra_msg, 10, 1776, void)
+CM_STRUCT(struct cm_mra_msg, 10 * 8 + 1776);
+
+/* Table 108 REJ Message Contents */
+#define CM_REJ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 0, 32)
+#define CM_REJ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 4, 32)
+#define CM_REJ_MESSAGE_REJECTED CM_FIELD8_LOC(struct cm_rej_msg, 8, 2)
+#define CM_REJ_REJECTED_INFO_LENGTH CM_FIELD8_LOC(struct cm_rej_msg, 9, 7)
+#define CM_REJ_REASON CM_FIELD16_LOC(struct cm_rej_msg, 10, 16)
+#define CM_REJ_ARI CM_FIELD_MLOC(struct cm_rej_msg, 12, 576, void)
+#define CM_REJ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rej_msg, 84, 1184, void)
+CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184);
+
+/* Table 110 REP Message Contents */
+#define CM_REP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 0, 32)
+#define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32)
+#define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32)
+#define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24)
+#define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24)
+#define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24)
+#define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8)
+#define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8)
+#define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5)
+#define CM_REP_FAILOVER_ACCEPTED CM_FIELD_BLOC(struct cm_rep_msg, 26, 5, 2)
+#define CM_REP_END_TO_END_FLOW_CONTROL                                         \
+	CM_FIELD_BLOC(struct cm_rep_msg, 26, 7, 1)
+#define CM_REP_RNR_RETRY_COUNT CM_FIELD8_LOC(struct cm_rep_msg, 27, 3)
+#define CM_REP_SRQ CM_FIELD_BLOC(struct cm_rep_msg, 27, 3, 1)
+#define CM_REP_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_rep_msg, 28)
+#define CM_REP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rep_msg, 36, 1568, void)
+CM_STRUCT(struct cm_rep_msg, 36 * 8 + 1568);
+
+/* Table 111 RTU Message Contents */
+#define CM_RTU_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 0, 32)
+#define CM_RTU_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 4, 32)
+#define CM_RTU_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rtu_msg, 8, 1792, void)
+CM_STRUCT(struct cm_rtu_msg, 8 * 8 + 1792);
+
+/* Table 112 DREQ Message Contents */
+#define CM_DREQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 0, 32)
+#define CM_DREQ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 4, 32)
+#define CM_DREQ_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_dreq_msg, 8, 24)
+#define CM_DREQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_dreq_msg, 12, 1760, void)
+CM_STRUCT(struct cm_dreq_msg, 12 * 8 + 1760);
+
+/* Table 113 DREP Message Contents */
+#define CM_DREP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 0, 32)
+#define CM_DREP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 4, 32)
+#define CM_DREP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_drep_msg, 8, 1792, void)
+CM_STRUCT(struct cm_drep_msg, 8 * 8 + 1792);
+
+/* Table 115 LAP Message Contents */
+#define CM_LAP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 0, 32)
+#define CM_LAP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 4, 32)
+#define CM_LAP_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_lap_msg, 12, 24)
+#define CM_LAP_REMOTE_CM_RESPONSE_TIMEOUT                                      \
+	CM_FIELD8_LOC(struct cm_lap_msg, 15, 5)
+#define CM_LAP_ALTERNATE_LOCAL_PORT_LID                                        \
+	CM_FIELD16_LOC(struct cm_lap_msg, 20, 16)
+#define CM_LAP_ALTERNATE_REMOTE_PORT_LID                                       \
+	CM_FIELD16_LOC(struct cm_lap_msg, 22, 16)
+#define CM_LAP_ALTERNATE_LOCAL_PORT_GID                                        \
+	CM_FIELD_MLOC(struct cm_lap_msg, 24, 128, union ib_gid)
+#define CM_LAP_ALTERNATE_REMOTE_PORT_GID                                       \
+	CM_FIELD_MLOC(struct cm_lap_msg, 40, 128, union ib_gid)
+#define CM_LAP_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_lap_msg, 56, 20)
+#define CM_LAP_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_lap_msg, 59, 8)
+#define CM_LAP_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_lap_msg, 60, 8)
+#define CM_LAP_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_lap_msg, 61, 2, 6)
+#define CM_LAP_ALTERNATE_SL CM_FIELD8_LOC(struct cm_lap_msg, 62, 4)
+#define CM_LAP_ALTERNATE_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_lap_msg, 62, 4, 1)
+#define CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT                                     \
+	CM_FIELD8_LOC(struct cm_lap_msg, 63, 5)
+#define CM_LAP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_lap_msg, 64, 1344, void)
+CM_STRUCT(struct cm_lap_msg, 64 * 8 + 1344);
+
+/* Table 116 APR Message Contents */
+#define CM_APR_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 0, 32)
+#define CM_APR_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 4, 32)
+#define CM_APR_ADDITIONAL_INFORMATION_LENGTH                                   \
+	CM_FIELD8_LOC(struct cm_apr_msg, 8, 8)
+#define CM_APR_AR_STATUS CM_FIELD8_LOC(struct cm_apr_msg, 9, 8)
+#define CM_APR_ADDITIONAL_INFORMATION                                          \
+	CM_FIELD_MLOC(struct cm_apr_msg, 12, 576, void)
+#define CM_APR_PRIVATE_DATA CM_FIELD_MLOC(struct cm_apr_msg, 84, 1184, void)
+CM_STRUCT(struct cm_apr_msg, 84 * 8 + 1184);
+
+/* Table 119 SIDR_REQ Message Contents */
+#define CM_SIDR_REQ_REQUESTID CM_FIELD32_LOC(struct cm_sidr_req_msg, 0, 32)
+#define CM_SIDR_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_sidr_req_msg, 4, 16)
+#define CM_SIDR_REQ_SERVICEID CM_FIELD64_LOC(struct cm_sidr_req_msg, 8)
+#define CM_SIDR_REQ_PRIVATE_DATA                                               \
+	CM_FIELD_MLOC(struct cm_sidr_req_msg, 16, 1728, void)
+CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728);
+
+/* Table 120 SIDR_REP Message Contents */
+#define CM_SIDR_REP_REQUESTID CM_FIELD32_LOC(struct cm_sidr_rep_msg, 0, 32)
+#define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8)
+#define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH                              \
+	CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8)
+#define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24)
+#define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12)
+#define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32)
+#define CM_SIDR_REP_ADDITIONAL_INFORMATION                                     \
+	CM_FIELD_MLOC(struct cm_sidr_rep_msg, 24, 576, void)
+#define CM_SIDR_REP_PRIVATE_DATA                                               \
+	CM_FIELD_MLOC(struct cm_sidr_rep_msg, 96, 1088, void)
+CM_STRUCT(struct cm_sidr_rep_msg, 96 * 8 + 1088);
+
+#endif /* _IBTA_VOL1_C12_H_ */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index b550ae89bf85..0d5c70e2d8ab 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -640,34 +640,14 @@ static inline int rvt_cmp_msn(u32 a, u32 b)
 	return (((int)a) - ((int)b)) << 8;
 }
 
-/**
- * rvt_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
 __be32 rvt_compute_aeth(struct rvt_qp *qp);
 
-/**
- * rvt_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
 void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
 
-/**
- * rvt_restart_sge - rewind the sge state for a wqe
- * @ss: the sge state pointer
- * @wqe: the wqe to rewind
- * @len: the data length from the start of the wqe in bytes
- *
- * Returns the remaining data length.
- */
 u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
 
 /**
+ * rvt_div_round_up_mtu - round up divide
  * @qp - the qp pair
  * @len - the length
  *
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index 3447bfe356d6..6ae6cf8e4c2e 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -76,7 +76,7 @@
 #define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...)              \
 	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
 		_object_id)[] = { __VA_ARGS__ };                               \
-	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
+	static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {    \
 		.id = _object_id,                                              \
 		.type_attrs = &_type_attrs,                                    \
 		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
@@ -88,10 +88,10 @@
  * identify all uapi methods with a (object,method) tuple. However, they have
  * no type pointer.
  */
-#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...)	\
+#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...)                         \
 	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
 		_object_id)[] = { __VA_ARGS__ };                               \
-	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
+	static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {    \
 		.id = _object_id,                                              \
 		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
 		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 05eabfd5d0d3..1b28ce1aba07 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -104,16 +104,6 @@ static inline void uobj_put_write(struct ib_uobject *uobj)
 	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 }
 
-static inline int __must_check
-uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
-{
-	int ret = rdma_alloc_commit_uobject(uobj, attrs);
-
-	if (ret)
-		return ret;
-	return 0;
-}
-
 static inline void uobj_alloc_abort(struct ib_uobject *uobj,
 				    struct uverbs_attr_bundle *attrs)
 {
@@ -124,8 +114,7 @@ static inline struct ib_uobject *
 __uobj_alloc(const struct uverbs_api_object *obj,
 	     struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
 {
-	struct ib_uobject *uobj =
-		rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
+	struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs);
 
 	if (!IS_ERR(uobj))
 		*ib_dev = attrs->context->device;
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index d57a5ba00c74..f1cbdae67250 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -83,9 +83,9 @@ enum rdma_lookup_mode {
  */
 struct uverbs_obj_type_class {
 	struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj,
-					  struct ib_uverbs_file *ufile);
+					  struct uverbs_attr_bundle *attrs);
 	/* This consumes the kref on uobj */
-	int (*alloc_commit)(struct ib_uobject *uobj);
+	void (*alloc_commit)(struct ib_uobject *uobj);
 	/* This does not consume the kref on uobj */
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
@@ -98,7 +98,6 @@ struct uverbs_obj_type_class {
 				       enum rdma_remove_reason why,
 				       struct uverbs_attr_bundle *attrs);
 	void (*remove_handle)(struct ib_uobject *uobj);
-	u8    needs_kfree_rcu;
 };
 
 struct uverbs_obj_type {
@@ -138,23 +137,34 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 			     enum rdma_lookup_mode mode);
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
-					    struct ib_uverbs_file *ufile,
 					    struct uverbs_attr_bundle *attrs);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
 			      struct uverbs_attr_bundle *attrs);
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
-					   struct uverbs_attr_bundle *attrs);
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+			       struct uverbs_attr_bundle *attrs);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+static inline void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+	kref_get(&uobject->ref);
+}
+void uverbs_uobject_put(struct ib_uobject *uobject);
 
 struct uverbs_obj_fd_type {
 	/*
 	 * In fd based objects, uverbs_obj_type_ops points to generic
 	 * fd operations. In order to specialize the underlying types (e.g.
 	 * completion_channel), we use fops, name and flags for fd creation.
-	 * context_closed is called when the context is closed either when
-	 * the driver is removed or the process terminated.
+	 * destroy_object is called when the uobject is to be destroyed,
+	 * because the driver is removed or the FD is closed.
 	 */
 	struct uverbs_obj_type  type;
-	int (*context_closed)(struct ib_uobject *uobj,
+	int (*destroy_object)(struct ib_uobject *uobj,
 			      enum rdma_remove_reason why);
 	const struct file_operations	*fops;
 	const char			*name;
@@ -163,11 +173,11 @@ struct uverbs_obj_fd_type {
 
 extern const struct uverbs_obj_type_class uverbs_idr_class;
 extern const struct uverbs_obj_type_class uverbs_fd_class;
-void uverbs_close_fd(struct file *f);
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
 				   sizeof(char))
-#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\
+#define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \
 	((&((const struct uverbs_obj_fd_type)				\
 	 {.type = {							\
 		.type_class = &uverbs_fd_class,				\
@@ -175,7 +185,7 @@ void uverbs_close_fd(struct file *f);
 			UVERBS_BUILD_BUG_ON((_obj_size) <               \
 					    sizeof(struct ib_uobject)), \
 	 },								\
-	 .context_closed = _context_closed,				\
+	 .destroy_object = _destroy_object,				\
 	 .fops = _fops,							\
 	 .name = _name,							\
 	 .flags = _flags}))->type)
diff --git a/include/trace/events/rdma_core.h b/include/trace/events/rdma_core.h
new file mode 100644
index 000000000000..17642aa54437
--- /dev/null
+++ b/include/trace/events/rdma_core.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdma_core
+
+#if !defined(_TRACE_RDMA_CORE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RDMA_CORE_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_verbs.h>
+
+/*
+ * enum ib_poll_context, from include/rdma/ib_verbs.h
+ */
+#define IB_POLL_CTX_LIST			\
+	ib_poll_ctx(DIRECT)			\
+	ib_poll_ctx(SOFTIRQ)			\
+	ib_poll_ctx(WORKQUEUE)			\
+	ib_poll_ctx_end(UNBOUND_WORKQUEUE)
+
+#undef ib_poll_ctx
+#undef ib_poll_ctx_end
+
+#define ib_poll_ctx(x)		TRACE_DEFINE_ENUM(IB_POLL_##x);
+#define ib_poll_ctx_end(x)	TRACE_DEFINE_ENUM(IB_POLL_##x);
+
+IB_POLL_CTX_LIST
+
+#undef ib_poll_ctx
+#undef ib_poll_ctx_end
+
+#define ib_poll_ctx(x)		{ IB_POLL_##x, #x },
+#define ib_poll_ctx_end(x)	{ IB_POLL_##x, #x }
+
+#define rdma_show_ib_poll_ctx(x) \
+		__print_symbolic(x, IB_POLL_CTX_LIST)
+
+/**
+ ** Completion Queue events
+ **/
+
+TRACE_EVENT(cq_schedule,
+	TP_PROTO(
+		struct ib_cq *cq
+	),
+
+	TP_ARGS(cq),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+	),
+
+	TP_fast_assign(
+		cq->timestamp = ktime_get();
+		cq->interrupt = true;
+
+		__entry->cq_id = cq->res.id;
+	),
+
+	TP_printk("cq.id=%u", __entry->cq_id)
+);
+
+TRACE_EVENT(cq_reschedule,
+	TP_PROTO(
+		struct ib_cq *cq
+	),
+
+	TP_ARGS(cq),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+	),
+
+	TP_fast_assign(
+		cq->timestamp = ktime_get();
+		cq->interrupt = false;
+
+		__entry->cq_id = cq->res.id;
+	),
+
+	TP_printk("cq.id=%u", __entry->cq_id)
+);
+
+TRACE_EVENT(cq_process,
+	TP_PROTO(
+		const struct ib_cq *cq
+	),
+
+	TP_ARGS(cq),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+		__field(bool, interrupt)
+		__field(s64, latency)
+	),
+
+	TP_fast_assign(
+		ktime_t latency = ktime_sub(ktime_get(), cq->timestamp);
+
+		__entry->cq_id = cq->res.id;
+		__entry->latency = ktime_to_us(latency);
+		__entry->interrupt = cq->interrupt;
+	),
+
+	TP_printk("cq.id=%u wake-up took %lld [us] from %s",
+		__entry->cq_id, __entry->latency,
+		__entry->interrupt ? "interrupt" : "reschedule"
+	)
+);
+
+TRACE_EVENT(cq_poll,
+	TP_PROTO(
+		const struct ib_cq *cq,
+		int requested,
+		int rc
+	),
+
+	TP_ARGS(cq, requested, rc),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+		__field(int, requested)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->cq_id = cq->res.id;
+		__entry->requested = requested;
+		__entry->rc = rc;
+	),
+
+	TP_printk("cq.id=%u requested %d, returned %d",
+		__entry->cq_id, __entry->requested, __entry->rc
+	)
+);
+
+TRACE_EVENT(cq_drain_complete,
+	TP_PROTO(
+		const struct ib_cq *cq
+	),
+
+	TP_ARGS(cq),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+	),
+
+	TP_fast_assign(
+		__entry->cq_id = cq->res.id;
+	),
+
+	TP_printk("cq.id=%u",
+		__entry->cq_id
+	)
+);
+
+
+TRACE_EVENT(cq_modify,
+	TP_PROTO(
+		const struct ib_cq *cq,
+		u16 comps,
+		u16 usec
+	),
+
+	TP_ARGS(cq, comps, usec),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+		__field(unsigned int, comps)
+		__field(unsigned int, usec)
+	),
+
+	TP_fast_assign(
+		__entry->cq_id = cq->res.id;
+		__entry->comps = comps;
+		__entry->usec = usec;
+	),
+
+	TP_printk("cq.id=%u comps=%u usec=%u",
+		__entry->cq_id, __entry->comps, __entry->usec
+	)
+);
+
+TRACE_EVENT(cq_alloc,
+	TP_PROTO(
+		const struct ib_cq *cq,
+		int nr_cqe,
+		int comp_vector,
+		enum ib_poll_context poll_ctx
+	),
+
+	TP_ARGS(cq, nr_cqe, comp_vector, poll_ctx),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+		__field(int, nr_cqe)
+		__field(int, comp_vector)
+		__field(unsigned long, poll_ctx)
+	),
+
+	TP_fast_assign(
+		__entry->cq_id = cq->res.id;
+		__entry->nr_cqe = nr_cqe;
+		__entry->comp_vector = comp_vector;
+		__entry->poll_ctx = poll_ctx;
+	),
+
+	TP_printk("cq.id=%u nr_cqe=%d comp_vector=%d poll_ctx=%s",
+		__entry->cq_id, __entry->nr_cqe, __entry->comp_vector,
+		rdma_show_ib_poll_ctx(__entry->poll_ctx)
+	)
+);
+
+TRACE_EVENT(cq_alloc_error,
+	TP_PROTO(
+		int nr_cqe,
+		int comp_vector,
+		enum ib_poll_context poll_ctx,
+		int rc
+	),
+
+	TP_ARGS(nr_cqe, comp_vector, poll_ctx, rc),
+
+	TP_STRUCT__entry(
+		__field(int, rc)
+		__field(int, nr_cqe)
+		__field(int, comp_vector)
+		__field(unsigned long, poll_ctx)
+	),
+
+	TP_fast_assign(
+		__entry->rc = rc;
+		__entry->nr_cqe = nr_cqe;
+		__entry->comp_vector = comp_vector;
+		__entry->poll_ctx = poll_ctx;
+	),
+
+	TP_printk("nr_cqe=%d comp_vector=%d poll_ctx=%s rc=%d",
+		__entry->nr_cqe, __entry->comp_vector,
+		rdma_show_ib_poll_ctx(__entry->poll_ctx), __entry->rc
+	)
+);
+
+TRACE_EVENT(cq_free,
+	TP_PROTO(
+		const struct ib_cq *cq
+	),
+
+	TP_ARGS(cq),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+	),
+
+	TP_fast_assign(
+		__entry->cq_id = cq->res.id;
+	),
+
+	TP_printk("cq.id=%u", __entry->cq_id)
+);
+
+/**
+ ** Memory Region events
+ **/
+
+/*
+ * enum ib_mr_type, from include/rdma/ib_verbs.h
+ */
+#define IB_MR_TYPE_LIST				\
+	ib_mr_type_item(MEM_REG)		\
+	ib_mr_type_item(SG_GAPS)		\
+	ib_mr_type_item(DM)			\
+	ib_mr_type_item(USER)			\
+	ib_mr_type_item(DMA)			\
+	ib_mr_type_end(INTEGRITY)
+
+#undef ib_mr_type_item
+#undef ib_mr_type_end
+
+#define ib_mr_type_item(x)	TRACE_DEFINE_ENUM(IB_MR_TYPE_##x);
+#define ib_mr_type_end(x)	TRACE_DEFINE_ENUM(IB_MR_TYPE_##x);
+
+IB_MR_TYPE_LIST
+
+#undef ib_mr_type_item
+#undef ib_mr_type_end
+
+#define ib_mr_type_item(x)	{ IB_MR_TYPE_##x, #x },
+#define ib_mr_type_end(x)	{ IB_MR_TYPE_##x, #x }
+
+#define rdma_show_ib_mr_type(x) \
+		__print_symbolic(x, IB_MR_TYPE_LIST)
+
+TRACE_EVENT(mr_alloc,
+	TP_PROTO(
+		const struct ib_pd *pd,
+		enum ib_mr_type mr_type,
+		u32 max_num_sg,
+		const struct ib_mr *mr
+	),
+
+	TP_ARGS(pd, mr_type, max_num_sg, mr),
+
+	TP_STRUCT__entry(
+		__field(u32, pd_id)
+		__field(u32, mr_id)
+		__field(u32, max_num_sg)
+		__field(int, rc)
+		__field(unsigned long, mr_type)
+	),
+
+	TP_fast_assign(
+		__entry->pd_id = pd->res.id;
+		if (IS_ERR(mr)) {
+			__entry->mr_id = 0;
+			__entry->rc = PTR_ERR(mr);
+		} else {
+			__entry->mr_id = mr->res.id;
+			__entry->rc = 0;
+		}
+		__entry->max_num_sg = max_num_sg;
+		__entry->mr_type = mr_type;
+	),
+
+	TP_printk("pd.id=%u mr.id=%u type=%s max_num_sg=%u rc=%d",
+		__entry->pd_id, __entry->mr_id,
+		rdma_show_ib_mr_type(__entry->mr_type),
+		__entry->max_num_sg, __entry->rc)
+);
+
+TRACE_EVENT(mr_integ_alloc,
+	TP_PROTO(
+		const struct ib_pd *pd,
+		u32 max_num_data_sg,
+		u32 max_num_meta_sg,
+		const struct ib_mr *mr
+	),
+
+	TP_ARGS(pd, max_num_data_sg, max_num_meta_sg, mr),
+
+	TP_STRUCT__entry(
+		__field(u32, pd_id)
+		__field(u32, mr_id)
+		__field(u32, max_num_data_sg)
+		__field(u32, max_num_meta_sg)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->pd_id = pd->res.id;
+		if (IS_ERR(mr)) {
+			__entry->mr_id = 0;
+			__entry->rc = PTR_ERR(mr);
+		} else {
+			__entry->mr_id = mr->res.id;
+			__entry->rc = 0;
+		}
+		__entry->max_num_data_sg = max_num_data_sg;
+		__entry->max_num_meta_sg = max_num_meta_sg;
+	),
+
+	TP_printk("pd.id=%u mr.id=%u max_num_data_sg=%u max_num_meta_sg=%u rc=%d",
+		__entry->pd_id, __entry->mr_id, __entry->max_num_data_sg,
+		__entry->max_num_meta_sg, __entry->rc)
+);
+
+TRACE_EVENT(mr_dereg,
+	TP_PROTO(
+		const struct ib_mr *mr
+	),
+
+	TP_ARGS(mr),
+
+	TP_STRUCT__entry(
+		__field(u32, id)
+	),
+
+	TP_fast_assign(
+		__entry->id = mr->res.id;
+	),
+
+	TP_printk("mr.id=%u", __entry->id)
+);
+
+#endif /* _TRACE_RDMA_CORE_H */
+
+#include <trace/define_trace.h>
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 64f0e3aacd3f..d4ddbe4e696c 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -56,6 +56,7 @@ enum uverbs_default_objects {
 	UVERBS_OBJECT_FLOW_ACTION,
 	UVERBS_OBJECT_DM,
 	UVERBS_OBJECT_COUNTERS,
+	UVERBS_OBJECT_ASYNC_EVENT,
 };
 
 enum {
@@ -67,6 +68,7 @@ enum uverbs_methods_device {
 	UVERBS_METHOD_INVOKE_WRITE,
 	UVERBS_METHOD_INFO_HANDLES,
 	UVERBS_METHOD_QUERY_PORT,
+	UVERBS_METHOD_GET_CONTEXT,
 };
 
 enum uverbs_attrs_invoke_write_cmd_attr_ids {
@@ -80,6 +82,11 @@ enum uverbs_attrs_query_port_cmd_attr_ids {
 	UVERBS_ATTR_QUERY_PORT_RESP,
 };
 
+enum uverbs_attrs_get_context_attr_ids {
+	UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+	UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+};
+
 enum uverbs_attrs_create_cq_cmd_attr_ids {
 	UVERBS_ATTR_CREATE_CQ_HANDLE,
 	UVERBS_ATTR_CREATE_CQ_CQE,
@@ -241,4 +248,12 @@ enum uverbs_attrs_flow_destroy_ids {
 	UVERBS_ATTR_DESTROY_FLOW_HANDLE,
 };
 
+enum uverbs_method_async_event {
+	UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+};
+
+enum uverbs_attrs_async_event_create {
+	UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+};
+
 #endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 9019b2d906ea..a640bb814be0 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -41,6 +41,13 @@
 #define RDMA_UAPI_PTR(_type, _name)	__aligned_u64 _name
 #endif
 
+#define IB_UVERBS_ACCESS_OPTIONAL_FIRST (1 << 20)
+#define IB_UVERBS_ACCESS_OPTIONAL_LAST (1 << 29)
+
+enum ib_uverbs_core_support {
+	IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS = 1 << 0,
+};
+
 enum ib_uverbs_access_flags {
 	IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0,
 	IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1,
@@ -50,6 +57,11 @@ enum ib_uverbs_access_flags {
 	IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
 	IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
 	IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+
+	IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST,
+	IB_UVERBS_ACCESS_OPTIONAL_RANGE =
+		((IB_UVERBS_ACCESS_OPTIONAL_LAST << 1) - 1) &
+		~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1)
 };
 
 enum ib_uverbs_query_port_cap_flags {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 20d88307f75f..afe7da6f2b8e 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -115,6 +115,22 @@ enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
 };
 
+enum mlx5_ib_var_alloc_attrs {
+	MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+	MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+	MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+};
+
+enum mlx5_ib_var_obj_destroy_attrs {
+	MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_var_obj_methods {
+	MLX5_IB_METHOD_VAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_VAR_OBJ_DESTROY,
+};
+
 enum mlx5_ib_devx_umem_reg_attrs {
 	MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
@@ -156,6 +172,7 @@ enum mlx5_ib_objects {
 	MLX5_IB_OBJECT_FLOW_MATCHER,
 	MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
 	MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+	MLX5_IB_OBJECT_VAR,
 };
 
 enum mlx5_ib_flow_matcher_create_attrs {
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index c022ee26089b..a0b83c9d4498 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -48,6 +48,18 @@ struct qedr_alloc_ucontext_req {
 	__u32 reserved;
 };
 
+#define QEDR_LDPM_MAX_SIZE	(8192)
+#define QEDR_EDPM_TRANS_SIZE	(64)
+
+enum qedr_rdma_dpm_type {
+	QEDR_DPM_TYPE_NONE		= 0,
+	QEDR_DPM_TYPE_ROCE_ENHANCED	= 1 << 0,
+	QEDR_DPM_TYPE_ROCE_LEGACY	= 1 << 1,
+	QEDR_DPM_TYPE_IWARP_LEGACY	= 1 << 2,
+	QEDR_DPM_TYPE_RESERVED		= 1 << 3,
+	QEDR_DPM_SIZES_SET		= 1 << 4,
+};
+
 struct qedr_alloc_ucontext_resp {
 	__aligned_u64 db_pa;
 	__u32 db_size;
@@ -59,10 +71,12 @@ struct qedr_alloc_ucontext_resp {
 	__u32 sges_per_recv_wr;
 	__u32 sges_per_srq_wr;
 	__u32 max_cqes;
-	__u8 dpm_enabled;
+	__u8 dpm_flags;
 	__u8 wids_enabled;
 	__u16 wid_count;
-	__u32 reserved;
+	__u16 ldpm_limit_size;
+	__u8 edpm_trans_size;
+	__u8 reserved;
 };
 
 struct qedr_alloc_pd_ureq {