summary refs log tree commit diff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-27 10:17:28 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-27 10:17:28 -0800
commitd76886972823ce456c0c61cd2284e85668e2131e (patch)
tree2171359a7aeb2539c327f6d2604b1ad3aa21f588 /drivers/infiniband
parent0e45384cecccaa950783e67e7a29ed470133f19d (diff)
parentf295e4cece5cb4c60715fed539abcd62468f9ef1 (diff)
downloadlinux-d76886972823ce456c0c61cd2284e85668e2131e.tar.gz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
 "Again another fairly quiet cycle with few notable core code changes
  and the usual variety of driver bug fixes and small improvements.

   - Various driver updates and bug fixes for siw, bnxt_re, hns, qedr,
     iw_cxgb4, vmw_pvrdma, mlx5

   - Improvements in SRPT from working with iWarp

   - SRIOV VF support for bnxt_re

   - Skeleton kernel-doc files for drivers/infiniband

   - User visible counters for events related to ODP

   - Common code for tracking of mmap lifetimes so that drivers can link
     HW object liftime to a VMA

   - ODP bug fixes and rework

   - RDMA READ support for efa

   - Removal of the very old cxgb3 driver"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (168 commits)
  RDMA/hns: Delete unnecessary callback functions for cq
  RDMA/hns: Rename the functions used inside creating cq
  RDMA/hns: Redefine the member of hns_roce_cq struct
  RDMA/hns: Redefine interfaces used in creating cq
  RDMA/efa: Expose RDMA read related attributes
  RDMA/efa: Support remote read access in MR registration
  RDMA/efa: Store network attributes in device attributes
  IB/hfi1: remove redundant assignment to variable ret
  RDMA/bnxt_re: Fix missing le16_to_cpu
  RDMA/bnxt_re: Fix stat push into dma buffer on gen p5 devices
  RDMA/bnxt_re: Fix chip number validation Broadcom's Gen P5 series
  RDMA/bnxt_re: Fix Kconfig indentation
  IB/mlx5: Implement callbacks for getting VFs GUID attributes
  IB/ipoib: Add ndo operation for getting VFs GUID attributes
  IB/core: Add interfaces to get VF node and port GUIDs
  net/core: Add support for getting VF GUIDs
  RDMA/qedr: Fix null-pointer dereference when calling rdma_user_mmap_get_offset
  RDMA/cm: Use refcount_t type for refcount variable
  IB/mlx5: Support extended number of strides for Striding RQ
  IB/mlx4: Update HW GID table while adding vlan GID
  ...
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/cache.c8
-rw-r--r--drivers/infiniband/core/cm.c66
-rw-r--r--drivers/infiniband/core/cm_msgs.h32
-rw-r--r--drivers/infiniband/core/cma.c107
-rw-r--r--drivers/infiniband/core/core_priv.h11
-rw-r--r--drivers/infiniband/core/counters.c40
-rw-r--r--drivers/infiniband/core/device.c50
-rw-r--r--drivers/infiniband/core/ib_core_uverbs.c335
-rw-r--r--drivers/infiniband/core/iwpm_util.h5
-rw-r--r--drivers/infiniband/core/mad.c31
-rw-r--r--drivers/infiniband/core/nldev.c141
-rw-r--r--drivers/infiniband/core/rdma_core.c1
-rw-r--r--drivers/infiniband/core/restrack.c20
-rw-r--r--drivers/infiniband/core/restrack.h1
-rw-r--r--drivers/infiniband/core/rw.c25
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/sysfs.c12
-rw-r--r--drivers/infiniband/core/umem.c12
-rw-r--r--drivers/infiniband/core/umem_odp.c38
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c3
-rw-r--r--drivers/infiniband/core/uverbs_main.c84
-rw-r--r--drivers/infiniband/core/verbs.c12
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig12
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c28
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c143
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c5
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig19
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile7
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1312
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h204
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c344
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h69
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h802
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c282
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h155
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2258
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h233
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c230
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c232
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c101
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1321
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h347
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1082
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c7
-rw-r--r--drivers/infiniband/hw/efa/efa.h13
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h29
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c5
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c40
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h19
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c17
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c370
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c17
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h5
-rw-r--r--drivers/infiniband/hw/hns/Kconfig17
-rw-r--r--drivers/infiniband/hw/hns/Makefile8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c300
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h55
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c38
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c76
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c21
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c69
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c54
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c86
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c30
-rw-r--r--drivers/infiniband/hw/mlx4/main.c18
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c5
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c37
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c25
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c2
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c29
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c24
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c124
-rw-r--r--drivers/infiniband/hw/mlx5/main.c75
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c199
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h64
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c177
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c989
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c60
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c90
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c74
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c33
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h11
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h2
-rw-r--r--drivers/infiniband/hw/qedr/main.c5
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h72
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c150
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c643
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h12
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c38
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c119
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c1
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c30
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h1
-rw-r--r--drivers/infiniband/sw/siw/siw.h31
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c45
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c35
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c338
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c5
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h34
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c5
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c72
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c47
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h4
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c247
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h58
158 files changed, 3934 insertions, 12467 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b44b1c322ec8..ade86388434f 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -83,7 +83,6 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS
 if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
-source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
 source "drivers/infiniband/hw/efa/Kconfig"
 source "drivers/infiniband/hw/i40iw/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 09881bd5f12d..9a8871e21545 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,7 +11,7 @@ ib_core-y :=			packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
 				device.o fmr_pool.o cache.o netlink.o \
 				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
 				multicast.o mad.o smi.o agent.o mad_rmpp.o \
-				nldev.o restrack.o counters.o
+				nldev.o restrack.o counters.o ib_core_uverbs.o
 
 ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
 ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 00fb3eacda19..d535995711c3 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -819,22 +819,16 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 				   struct ib_gid_table *table)
 {
 	int i;
-	bool deleted = false;
 
 	if (!table)
 		return;
 
 	mutex_lock(&table->lock);
 	for (i = 0; i < table->sz; ++i) {
-		if (is_gid_entry_valid(table->data_vec[i])) {
+		if (is_gid_entry_valid(table->data_vec[i]))
 			del_gid(ib_dev, port, table, i);
-			deleted = true;
-		}
 	}
 	mutex_unlock(&table->lock);
-
-	if (deleted)
-		dispatch_gid_change_event(ib_dev, port);
 }
 
 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 5920c0085d35..455b3659d84b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,36 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
  */
 
 #include <linux/completion.h>
@@ -246,7 +220,7 @@ struct cm_work {
 };
 
 struct cm_timewait_info {
-	struct cm_work work;			/* Must be first. */
+	struct cm_work work;
 	struct list_head list;
 	struct rb_node remote_qp_node;
 	struct rb_node remote_id_node;
@@ -263,7 +237,7 @@ struct cm_id_private {
 	struct rb_node sidr_id_node;
 	spinlock_t lock;	/* Do not acquire inside cm.lock */
 	struct completion comp;
-	atomic_t refcount;
+	refcount_t refcount;
 	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
 	 * Protected by the cm.lock spinlock. */
 	int listen_sharecount;
@@ -308,7 +282,7 @@ static void cm_work_handler(struct work_struct *work);
 
 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
 {
-	if (atomic_dec_and_test(&cm_id_priv->refcount))
+	if (refcount_dec_and_test(&cm_id_priv->refcount))
 		complete(&cm_id_priv->comp);
 }
 
@@ -365,7 +339,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	m->ah = ah;
 	m->retries = cm_id_priv->max_cm_retries;
 
-	atomic_inc(&cm_id_priv->refcount);
+	refcount_inc(&cm_id_priv->refcount);
 	m->context[0] = cm_id_priv;
 	*msg = m;
 
@@ -626,7 +600,7 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
 	if (cm_id_priv) {
 		if (cm_id_priv->id.remote_id == remote_id)
-			atomic_inc(&cm_id_priv->refcount);
+			refcount_inc(&cm_id_priv->refcount);
 		else
 			cm_id_priv = NULL;
 	}
@@ -883,7 +857,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
 	INIT_LIST_HEAD(&cm_id_priv->prim_list);
 	INIT_LIST_HEAD(&cm_id_priv->altr_list);
 	atomic_set(&cm_id_priv->work_count, -1);
-	atomic_set(&cm_id_priv->refcount, 1);
+	refcount_set(&cm_id_priv->refcount, 1);
 	return &cm_id_priv->id;
 
 error:
@@ -1230,7 +1204,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
 			spin_unlock_irqrestore(&cm.lock, flags);
 			return ERR_PTR(-EINVAL);
 		}
-		atomic_inc(&cm_id_priv->refcount);
+		refcount_inc(&cm_id_priv->refcount);
 		++cm_id_priv->listen_sharecount;
 		spin_unlock_irqrestore(&cm.lock, flags);
 
@@ -1525,14 +1499,6 @@ static int cm_issue_rej(struct cm_port *port,
 	return ret;
 }
 
-static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
-				    __be32 local_qpn, __be32 remote_qpn)
-{
-	return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
-		((local_ca_guid == remote_ca_guid) &&
-		 (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
-}
-
 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
 {
 	return ((req_msg->alt_local_lid) ||
@@ -1895,8 +1861,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 			     NULL, 0);
 		goto out;
 	}
-	atomic_inc(&listen_cm_id_priv->refcount);
-	atomic_inc(&cm_id_priv->refcount);
+	refcount_inc(&listen_cm_id_priv->refcount);
+	refcount_inc(&cm_id_priv->refcount);
 	cm_id_priv->id.state = IB_CM_REQ_RCVD;
 	atomic_inc(&cm_id_priv->work_count);
 	spin_unlock_irq(&cm.lock);
@@ -2052,7 +2018,7 @@ static int cm_req_handler(struct cm_work *work)
 	return 0;
 
 rejected:
-	atomic_dec(&cm_id_priv->refcount);
+	refcount_dec(&cm_id_priv->refcount);
 	cm_deref_id(listen_cm_id_priv);
 free_timeinfo:
 	kfree(cm_id_priv->timewait_info);
@@ -2826,7 +2792,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 				cm_local_id(timewait_info->work.local_id));
 		if (cm_id_priv) {
 			if (cm_id_priv->id.remote_id == remote_id)
-				atomic_inc(&cm_id_priv->refcount);
+				refcount_inc(&cm_id_priv->refcount);
 			else
 				cm_id_priv = NULL;
 		}
@@ -3434,7 +3400,7 @@ static int cm_timewait_handler(struct cm_work *work)
 	struct cm_id_private *cm_id_priv;
 	int ret;
 
-	timewait_info = (struct cm_timewait_info *)work;
+	timewait_info = container_of(work, struct cm_timewait_info, work);
 	spin_lock_irq(&cm.lock);
 	list_del(&timewait_info->list);
 	spin_unlock_irq(&cm.lock);
@@ -3596,8 +3562,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
 		goto out; /* No match. */
 	}
-	atomic_inc(&cur_cm_id_priv->refcount);
-	atomic_inc(&cm_id_priv->refcount);
+	refcount_inc(&cur_cm_id_priv->refcount);
+	refcount_inc(&cm_id_priv->refcount);
 	spin_unlock_irq(&cm.lock);
 
 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 3d16d614aff6..92d7260ac913 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,37 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2004, 2011 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING the madirectory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use source and binary forms, with or
- *     withmodification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retathe above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
  */
-#if !defined(CM_MSGS_H)
+#ifndef CM_MSGS_H
 #define CM_MSGS_H
 
 #include <rdma/ib_mad.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d78f67623f24..25f2b70fd8ef 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1,36 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
  */
 
 #include <linux/completion.h>
@@ -2531,7 +2504,9 @@ EXPORT_SYMBOL(rdma_set_service_type);
  * This function should be called before rdma_connect() on active side,
  * and on passive side before rdma_accept(). It is applicable to primary
  * path only. The timeout will affect the local side of the QP, it is not
- * negotiated with remote side and zero disables the timer.
+ * negotiated with remote side and zero disables the timer. In case it is
+ * set before rdma_resolve_route, the value will also be used to determine
+ * PacketLifeTime for RoCE.
  *
  * Return: 0 for success
  */
@@ -2828,22 +2803,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
 	return 0;
 }
 
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
 {
-	int prio;
 	struct net_device *dev;
 
-	prio = rt_tos2priority(tos);
-	dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+	dev = vlan_dev_real_dev(vlan_ndev);
 	if (dev->num_tc)
 		return netdev_get_prio_tc_map(dev, prio);
 
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+		VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+	int input_prio;
+	int output_tc;
+	bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+{
+	struct iboe_prio_tc_map *map = data;
+
+	if (is_vlan_dev(dev))
+		map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+	else if (dev->num_tc)
+		map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+	else
+		map->output_tc = 0;
+	/* We are interested only in first level VLAN device, so always
+	 * return 1 to stop iterating over next level devices.
+	 */
+	map->found = true;
+	return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+	struct iboe_prio_tc_map prio_tc_map = {};
+	int prio = rt_tos2priority(tos);
+
+	/* If VLAN device, get it directly from the VLAN netdev */
 	if (is_vlan_dev(ndev))
-		return (vlan_dev_get_egress_qos_mask(ndev, prio) &
-			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
-	return 0;
+		return get_vlan_ndev_tc(ndev, prio);
+
+	prio_tc_map.input_prio = prio;
+	rcu_read_lock();
+	netdev_walk_all_lower_dev_rcu(ndev,
+				      get_lower_vlan_dev_tc,
+				      &prio_tc_map);
+	rcu_read_unlock();
+	/* If map is found from lower device, use it; Otherwise
+	 * continue with the current netdevice to get priority to tc map.
+	 */
+	if (prio_tc_map.found)
+		return prio_tc_map.output_tc;
+	else if (ndev->num_tc)
+		return netdev_get_prio_tc_map(ndev, prio);
+	else
+		return 0;
 }
 
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
@@ -2897,7 +2915,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	route->path_rec->rate = iboe_get_rate(ndev);
 	dev_put(ndev);
 	route->path_rec->packet_life_time_selector = IB_SA_EQ;
-	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+	/* In case ACK timeout is set, use this value to calculate
+	 * PacketLifeTime.  As per IBTA 12.7.34,
+	 * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
+	 * Assuming a negligible local ACK delay, we can use
+	 * PacketLifeTime = local ACK timeout/2
+	 * as a reasonable approximation for RoCE networks.
+	 */
+	route->path_rec->packet_life_time = id_priv->timeout_set ?
+		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
+
 	if (!route->path_rec->mtu) {
 		ret = -EINVAL;
 		goto err2;
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 9d07378b5b42..3645e092e1c7 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -388,4 +388,15 @@ int ib_device_set_netns_put(struct sk_buff *skb,
 
 int rdma_nl_net_init(struct rdma_dev_net *rnet);
 void rdma_nl_net_exit(struct rdma_dev_net *rnet);
+
+struct rdma_umap_priv {
+	struct vm_area_struct *vma;
+	struct list_head list;
+	struct rdma_user_mmap_entry *entry;
+};
+
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+			 struct vm_area_struct *vma,
+			 struct rdma_user_mmap_entry *entry);
+
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 680ad27f497d..8434ec082c3a 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -149,11 +149,18 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
 	struct auto_mode_param *param = &counter->mode.param;
 	bool match = true;
 
-	if (!rdma_is_visible_in_pid_ns(&qp->res))
-		return false;
-
-	/* Ensure that counter belongs to the right PID */
-	if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task))
+	/*
+	 * Ensure that counter belongs to the right PID.  This operation can
+	 * race with user space which kills the process and leaves QP and
+	 * counters orphans.
+	 *
+	 * It is not a big deal because exitted task will leave both QP and
+	 * counter in the same bucket of zombie process. Just ensure that
+	 * process is still alive before procedding.
+	 *
+	 */
+	if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) ||
+	    !task_pid_nr(qp->res.task))
 		return false;
 
 	if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
@@ -229,9 +236,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
 	rt = &dev->res[RDMA_RESTRACK_COUNTER];
 	xa_lock(&rt->xa);
 	xa_for_each(&rt->xa, id, res) {
-		if (!rdma_is_visible_in_pid_ns(res))
-			continue;
-
 		counter = container_of(res, struct rdma_counter, res);
 		if ((counter->device != qp->device) || (counter->port != port))
 			goto next;
@@ -412,9 +416,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
 	if (IS_ERR(res))
 		return NULL;
 
-	if (!rdma_is_visible_in_pid_ns(res))
-		goto err;
-
 	qp = container_of(res, struct ib_qp, res);
 	if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 		goto err;
@@ -445,11 +446,6 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
 	if (IS_ERR(res))
 		return NULL;
 
-	if (!rdma_is_visible_in_pid_ns(res)) {
-		rdma_restrack_put(res);
-		return NULL;
-	}
-
 	counter = container_of(res, struct rdma_counter, res);
 	kref_get(&counter->kref);
 	rdma_restrack_put(res);
@@ -463,10 +459,15 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
 int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
 			  u32 qp_num, u32 counter_id)
 {
+	struct rdma_port_counter *port_counter;
 	struct rdma_counter *counter;
 	struct ib_qp *qp;
 	int ret;
 
+	port_counter = &dev->port_data[port].port_counter;
+	if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+		return -EINVAL;
+
 	qp = rdma_counter_get_qp(dev, qp_num);
 	if (!qp)
 		return -ENOENT;
@@ -503,6 +504,7 @@ err:
 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
 				u32 qp_num, u32 *counter_id)
 {
+	struct rdma_port_counter *port_counter;
 	struct rdma_counter *counter;
 	struct ib_qp *qp;
 	int ret;
@@ -510,9 +512,13 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
 	if (!rdma_is_port_valid(dev, port))
 		return -EINVAL;
 
-	if (!dev->port_data[port].port_counter.hstats)
+	port_counter = &dev->port_data[port].port_counter;
+	if (!port_counter->hstats)
 		return -EOPNOTSUPP;
 
+	if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+		return -EINVAL;
+
 	qp = rdma_counter_get_qp(dev, qp_num);
 	if (!qp)
 		return -ENOENT;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 50a92442c4f7..9ebb09a75049 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -128,17 +128,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
 MODULE_PARM_DESC(netns_mode,
 		 "Share device among net namespaces; default=1 (shared)");
 /**
- * rdma_dev_access_netns() - Return whether a rdma device can be accessed
+ * rdma_dev_access_netns() - Return whether an rdma device can be accessed
  *			     from a specified net namespace or not.
- * @device:	Pointer to rdma device which needs to be checked
+ * @dev:	Pointer to rdma device which needs to be checked
  * @net:	Pointer to net namesapce for which access to be checked
  *
- * rdma_dev_access_netns() - Return whether a rdma device can be accessed
- *			     from a specified net namespace or not. When
- *			     rdma device is in shared mode, it ignores the
- *			     net namespace. When rdma device is exclusive
- *			     to a net namespace, rdma device net namespace is
- *			     checked against the specified one.
+ * When the rdma device is in shared mode, it ignores the net namespace.
+ * When the rdma device is exclusive to a net namespace, rdma device net
+ * namespace is checked against the specified one.
  */
 bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
 {
@@ -1199,9 +1196,21 @@ static void setup_dma_device(struct ib_device *device)
 		WARN_ON_ONCE(!parent);
 		device->dma_device = parent;
 	}
-	/* Setup default max segment size for all IB devices */
-	dma_set_max_seg_size(device->dma_device, SZ_2G);
 
+	if (!device->dev.dma_parms) {
+		if (parent) {
+			/*
+			 * The caller did not provide DMA parameters, so
+			 * 'parent' probably represents a PCI device. The PCI
+			 * core sets the maximum segment size to 64
+			 * KB. Increase this parameter to 2 GB.
+			 */
+			device->dev.dma_parms = parent->dma_parms;
+			dma_set_max_seg_size(device->dma_device, SZ_2G);
+		} else {
+			WARN_ON_ONCE(true);
+		}
+	}
 }
 
 /*
@@ -1317,7 +1326,9 @@ out:
 
 /**
  * ib_register_device - Register an IB device with IB core
- * @device:Device to register
+ * @device: Device to register
+ * @name: unique string device name. This may include a '%' which will
+ * cause a unique index to be added to the passed device name.
  *
  * Low-level drivers use ib_register_device() to register their
  * devices with the IB core.  All registered clients will receive a
@@ -1444,7 +1455,7 @@ out:
 
 /**
  * ib_unregister_device - Unregister an IB device
- * @device: The device to unregister
+ * @ib_dev: The device to unregister
  *
  * Unregister an IB device.  All clients will receive a remove callback.
  *
@@ -1466,7 +1477,7 @@ EXPORT_SYMBOL(ib_unregister_device);
 
 /**
  * ib_unregister_device_and_put - Unregister a device while holding a 'get'
- * device: The device to unregister
+ * @ib_dev: The device to unregister
  *
  * This is the same as ib_unregister_device(), except it includes an internal
  * ib_device_put() that should match a 'get' obtained by the caller.
@@ -1536,7 +1547,7 @@ static void ib_unregister_work(struct work_struct *work)
 
 /**
  * ib_unregister_device_queued - Unregister a device using a work queue
- * device: The device to unregister
+ * @ib_dev: The device to unregister
  *
  * This schedules an asynchronous unregistration using a WQ for the device. A
  * driver should use this to avoid holding locks while doing unregistration,
@@ -2366,7 +2377,7 @@ int ib_modify_device(struct ib_device *device,
 		     struct ib_device_modify *device_modify)
 {
 	if (!device->ops.modify_device)
-		return -ENOSYS;
+		return -EOPNOTSUPP;
 
 	return device->ops.modify_device(device, device_modify_mask,
 					 device_modify);
@@ -2397,8 +2408,12 @@ int ib_modify_port(struct ib_device *device,
 		rc = device->ops.modify_port(device, port_num,
 					     port_modify_mask,
 					     port_modify);
+	else if (rdma_protocol_roce(device, port_num) &&
+		 ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
+		  (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
+		rc = 0;
 	else
-		rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
+		rc = -EOPNOTSUPP;
 	return rc;
 }
 EXPORT_SYMBOL(ib_modify_port);
@@ -2607,6 +2622,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, drain_sq);
 	SET_DEVICE_OP(dev_ops, enable_driver);
 	SET_DEVICE_OP(dev_ops, fill_res_entry);
+	SET_DEVICE_OP(dev_ops, fill_stat_entry);
 	SET_DEVICE_OP(dev_ops, get_dev_fw_str);
 	SET_DEVICE_OP(dev_ops, get_dma_mr);
 	SET_DEVICE_OP(dev_ops, get_hw_stats);
@@ -2615,6 +2631,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, get_port_immutable);
 	SET_DEVICE_OP(dev_ops, get_vector_affinity);
 	SET_DEVICE_OP(dev_ops, get_vf_config);
+	SET_DEVICE_OP(dev_ops, get_vf_guid);
 	SET_DEVICE_OP(dev_ops, get_vf_stats);
 	SET_DEVICE_OP(dev_ops, init_port);
 	SET_DEVICE_OP(dev_ops, invalidate_range);
@@ -2630,6 +2647,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
 	SET_DEVICE_OP(dev_ops, map_phys_fmr);
 	SET_DEVICE_OP(dev_ops, mmap);
+	SET_DEVICE_OP(dev_ops, mmap_free);
 	SET_DEVICE_OP(dev_ops, modify_ah);
 	SET_DEVICE_OP(dev_ops, modify_cq);
 	SET_DEVICE_OP(dev_ops, modify_device);
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
new file mode 100644
index 000000000000..f509c478b469
--- /dev/null
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019 Marvell. All rights reserved.
+ */
+#include <linux/xarray.h>
+#include "uverbs.h"
+#include "core_priv.h"
+
+/**
+ * rdma_umap_priv_init() - Initialize the private data of a vma
+ *
+ * @priv: The already allocated private data
+ * @vma: The vm area struct that needs private data
+ * @entry: entry into the mmap_xa that needs to be linked with
+ *       this vma
+ *
+ * Each time we map IO memory into user space this keeps track of the
+ * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
+ * to point to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ *
+ */
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+			 struct vm_area_struct *vma,
+			 struct rdma_user_mmap_entry *entry)
+{
+	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+	priv->vma = vma;
+	if (entry) {
+		kref_get(&entry->ref);
+		priv->entry = entry;
+	}
+	vma->vm_private_data = priv;
+	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
+
+	mutex_lock(&ufile->umap_lock);
+	list_add(&priv->list, &ufile->umaps);
+	mutex_unlock(&ufile->umap_lock);
+}
+EXPORT_SYMBOL(rdma_umap_priv_init);
+
+/**
+ * rdma_user_mmap_io() - Map IO memory into a process
+ *
+ * @ucontext: associated user context
+ * @vma: the vma related to the current mmap call
+ * @pfn: pfn to map
+ * @size: size to map
+ * @prot: pgprot to use in remap call
+ * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
+ *         if mmap_entry is not used by the driver
+ *
+ * This is to be called by drivers as part of their mmap() functions if they
+ * wish to send something like PCI-E BAR memory to userspace.
+ *
+ * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
+ * success.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+		      unsigned long pfn, unsigned long size, pgprot_t prot,
+		      struct rdma_user_mmap_entry *entry)
+{
+	struct ib_uverbs_file *ufile = ucontext->ufile;
+	struct rdma_umap_priv *priv;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	if (vma->vm_end - vma->vm_start != size)
+		return -EINVAL;
+
+	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
+	if (WARN_ON(!vma->vm_file ||
+		    vma->vm_file->private_data != ufile))
+		return -EINVAL;
+	lockdep_assert_held(&ufile->device->disassociate_srcu);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	vma->vm_page_prot = prot;
+	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+		kfree(priv);
+		return -EAGAIN;
+	}
+
+	rdma_umap_priv_init(priv, vma, entry);
+	return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/**
+ * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @pgoff: The mmap offset >> PAGE_SHIFT
+ *
+ * This function is called when a user tries to mmap with an offset (returned
+ * by rdma_user_mmap_get_offset()) it initially received from the driver. The
+ * rdma_user_mmap_entry was created by the function
+ * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
+ * entry so that it won't be deleted from the xarray in the meantime.
+ *
+ * Return an reference to an entry if exists or NULL if there is no
+ * match. rdma_user_mmap_entry_put() must be called to put the reference.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+			       unsigned long pgoff)
+{
+	struct rdma_user_mmap_entry *entry;
+
+	if (pgoff > U32_MAX)
+		return NULL;
+
+	xa_lock(&ucontext->mmap_xa);
+
+	entry = xa_load(&ucontext->mmap_xa, pgoff);
+
+	/*
+	 * If refcount is zero, entry is already being deleted, driver_removed
+	 * indicates that the no further mmaps are possible and we waiting for
+	 * the active VMAs to be closed.
+	 */
+	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
+	    !kref_get_unless_zero(&entry->ref))
+		goto err;
+
+	xa_unlock(&ucontext->mmap_xa);
+
+	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
+		  pgoff, entry->npages);
+
+	return entry;
+
+err:
+	xa_unlock(&ucontext->mmap_xa);
+	return NULL;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @vma: the vma being mmap'd into
+ *
+ * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
+ * checks that the VMA is correct.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+			 struct vm_area_struct *vma)
+{
+	struct rdma_user_mmap_entry *entry;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return NULL;
+	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
+	if (!entry)
+		return NULL;
+	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
+		rdma_user_mmap_entry_put(entry);
+		return NULL;
+	}
+	return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+static void rdma_user_mmap_entry_free(struct kref *kref)
+{
+	struct rdma_user_mmap_entry *entry =
+		container_of(kref, struct rdma_user_mmap_entry, ref);
+	struct ib_ucontext *ucontext = entry->ucontext;
+	unsigned long i;
+
+	/*
+	 * Erase all entries occupied by this single entry, this is deferred
+	 * until all VMA are closed so that the mmap offsets remain unique.
+	 */
+	xa_lock(&ucontext->mmap_xa);
+	for (i = 0; i < entry->npages; i++)
+		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
+	xa_unlock(&ucontext->mmap_xa);
+
+	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
+		  entry->start_pgoff, entry->npages);
+
+	if (ucontext->device->ops.mmap_free)
+		ucontext->device->ops.mmap_free(entry);
+}
+
+/**
+ * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
+ *
+ * @entry: an entry in the mmap_xa
+ *
+ * This function is called when the mapping is closed if it was
+ * an io mapping or when the driver is done with the entry for
+ * some other reason.
+ * Should be called after rdma_user_mmap_entry_get was called
+ * and entry is no longer needed. This function will erase the
+ * entry and free it if its refcnt reaches zero.
+ */
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
+{
+	kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_put);
+
+/**
+ * rdma_user_mmap_entry_remove() - Drop reference to entry and
+ *				   mark it as unmmapable
+ *
+ * @entry: the entry to insert into the mmap_xa
+ *
+ * Drivers can call this to prevent userspace from creating more mappings for
+ * entry, however existing mmaps continue to exist and ops->mmap_free() will
+ * not be called until all user mmaps are destroyed.
+ */
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
+{
+	if (!entry)
+		return;
+
+	entry->driver_removed = true;
+	kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for implementing their mmap syscall A database of mmap offsets is
+ * handled in the core and helper functions are provided to insert entries
+ * into the database and extract entries when the user calls mmap with the
+ * given offset.  The function allocates a unique page offset that should be
+ * provided to user, the user will use the offset to retrieve information such
+ * as address to be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+				struct rdma_user_mmap_entry *entry,
+				size_t length)
+{
+	struct ib_uverbs_file *ufile = ucontext->ufile;
+	XA_STATE(xas, &ucontext->mmap_xa, 0);
+	u32 xa_first, xa_last, npages;
+	int err;
+	u32 i;
+
+	if (!entry)
+		return -EINVAL;
+
+	kref_init(&entry->ref);
+	entry->ucontext = ucontext;
+
+	/*
+	 * We want the whole allocation to be done without interruption from a
+	 * different thread. The allocation requires finding a free range and
+	 * storing. During the xa_insert the lock could be released, possibly
+	 * allowing another thread to choose the same range.
+	 */
+	mutex_lock(&ufile->umap_lock);
+
+	xa_lock(&ucontext->mmap_xa);
+
+	/* We want to find an empty range */
+	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
+	entry->npages = npages;
+	while (true) {
+		/* First find an empty index */
+		xas_find_marked(&xas, U32_MAX, XA_FREE_MARK);
+		if (xas.xa_node == XAS_RESTART)
+			goto err_unlock;
+
+		xa_first = xas.xa_index;
+
+		/* Is there enough room to have the range? */
+		if (check_add_overflow(xa_first, npages, &xa_last))
+			goto err_unlock;
+
+		/*
+		 * Now look for the next present entry. If an entry doesn't
+		 * exist, we found an empty range and can proceed.
+		 */
+		xas_next_entry(&xas, xa_last - 1);
+		if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
+			break;
+	}
+
+	for (i = xa_first; i < xa_last; i++) {
+		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
+		if (err)
+			goto err_undo;
+	}
+
+	/*
+	 * Internally the kernel uses a page offset, in libc this is a byte
+	 * offset. Drivers should not return pgoff to userspace.
+	 */
+	entry->start_pgoff = xa_first;
+	xa_unlock(&ucontext->mmap_xa);
+	mutex_unlock(&ufile->umap_lock);
+
+	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
+		  entry->start_pgoff, npages);
+
+	return 0;
+
+err_undo:
+	for (; i > xa_first; i--)
+		__xa_erase(&ucontext->mmap_xa, i - 1);
+
+err_unlock:
+	xa_unlock(&ucontext->mmap_xa);
+	mutex_unlock(&ufile->umap_lock);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 7e2bcc72f66c..1bf87d9fd0bd 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -210,8 +210,10 @@ int iwpm_mapinfo_available(void);
 
 /**
  * iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ * @a_sockaddr: first sockaddr to compare
+ * @b_sockaddr: second sockaddr to compare
  *
- * Returns 0 if they are holding the same ip/tcp address info,
+ * Return: 0 if they are holding the same ip/tcp address info,
  * otherwise returns 1
  */
 int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
@@ -272,6 +274,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
  * iwpm_send_hello - Send hello response to iwpmd
  *
  * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
  * @abi_version: The kernel's abi_version
  *
  * Returns 0 on success or a negative error code
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 9947d16edef2..c54db13fa9b0 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -913,9 +913,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 
 	/* No GRH for DR SMP */
 	ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
-				      (const struct ib_mad_hdr *)smp, mad_size,
-				      (struct ib_mad_hdr *)mad_priv->mad,
-				      &mad_size, &out_mad_pkey_index);
+				      (const struct ib_mad *)smp,
+				      (struct ib_mad *)mad_priv->mad, &mad_size,
+				      &out_mad_pkey_index);
 	switch (ret)
 	{
 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
@@ -1397,25 +1397,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
 }
 EXPORT_SYMBOL(ib_free_recv_mad);
 
-struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
-					u8 rmpp_version,
-					ib_mad_send_handler send_handler,
-					ib_mad_recv_handler recv_handler,
-					void *context)
-{
-	return ERR_PTR(-EINVAL);	/* XXX: for now */
-}
-EXPORT_SYMBOL(ib_redirect_mad_qp);
-
-int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
-		      struct ib_wc *wc)
-{
-	dev_err(&mad_agent->device->dev,
-		"ib_process_mad_wc() not implemented yet\n");
-	return 0;
-}
-EXPORT_SYMBOL(ib_process_mad_wc);
-
 static int method_in_use(struct ib_mad_mgmt_method_table **method,
 			 struct ib_mad_reg_req *mad_reg_req)
 {
@@ -2340,9 +2321,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 	if (port_priv->device->ops.process_mad) {
 		ret = port_priv->device->ops.process_mad(
 			port_priv->device, 0, port_priv->port_num, wc,
-			&recv->grh, (const struct ib_mad_hdr *)recv->mad,
-			recv->mad_size, (struct ib_mad_hdr *)response->mad,
-			&mad_size, &resp_mad_pkey_index);
+			&recv->grh, (const struct ib_mad *)recv->mad,
+			(struct ib_mad *)response->mad, &mad_size,
+			&resp_mad_pkey_index);
 
 		if (opa)
 			wc->pkey_index = resp_mad_pkey_index;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index c03af08b80e7..cbf6041a5d4a 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -42,6 +42,9 @@
 #include "cma_priv.h"
 #include "restrack.h"
 
+typedef int (*res_fill_func_t)(struct sk_buff*, bool,
+			       struct rdma_restrack_entry*, uint32_t);
+
 /*
  * Sort array elements by the netlink attribute name
  */
@@ -180,6 +183,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
 	return 0;
 }
 
+int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
+			      const char *str)
+{
+	if (put_driver_name_print_type(msg, name,
+				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
+		return -EMSGSIZE;
+	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
+		return -EMSGSIZE;
+
+	return 0;
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_string);
+
 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
 {
 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
@@ -399,20 +415,34 @@ err:
 static int fill_res_name_pid(struct sk_buff *msg,
 			     struct rdma_restrack_entry *res)
 {
+	int err = 0;
+
 	/*
 	 * For user resources, user is should read /proc/PID/comm to get the
 	 * name of the task file.
 	 */
 	if (rdma_is_kernel_res(res)) {
-		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
-		    res->kern_name))
-			return -EMSGSIZE;
+		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+				     res->kern_name);
 	} else {
-		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
-		    task_pid_vnr(res->task)))
-			return -EMSGSIZE;
+		pid_t pid;
+
+		pid = task_pid_vnr(res->task);
+		/*
+		 * Task is dead and in zombie state.
+		 * There is no need to print PID anymore.
+		 */
+		if (pid)
+			/*
+			 * This part is racy, task can be killed and PID will
+			 * be zero right here but it is ok, next query won't
+			 * return PID. We don't promise real-time reflection
+			 * of SW objects.
+			 */
+			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
 	}
-	return 0;
+
+	return err ? -EMSGSIZE : 0;
 }
 
 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
@@ -423,6 +453,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
 	return dev->ops.fill_res_entry(msg, res);
 }
 
+static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg,
+			    struct rdma_restrack_entry *res)
+{
+	if (!dev->ops.fill_stat_entry)
+		return false;
+	return dev->ops.fill_stat_entry(msg, res);
+}
+
 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			     struct rdma_restrack_entry *res, uint32_t port)
 {
@@ -698,9 +736,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
 	rt = &counter->device->res[RDMA_RESTRACK_QP];
 	xa_lock(&rt->xa);
 	xa_for_each(&rt->xa, id, res) {
-		if (!rdma_is_visible_in_pid_ns(res))
-			continue;
-
 		qp = container_of(res, struct ib_qp, res);
 		if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 			continue;
@@ -723,8 +758,8 @@ err:
 	return ret;
 }
 
-static int fill_stat_hwcounter_entry(struct sk_buff *msg,
-				     const char *name, u64 value)
+int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
+				 u64 value)
 {
 	struct nlattr *entry_attr;
 
@@ -746,6 +781,25 @@ err:
 	nla_nest_cancel(msg, entry_attr);
 	return -EMSGSIZE;
 }
+EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
+
+static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
+			      struct rdma_restrack_entry *res, uint32_t port)
+{
+	struct ib_mr *mr = container_of(res, struct ib_mr, res);
+	struct ib_device *dev = mr->pd->device;
+
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
+		goto err;
+
+	if (fill_stat_entry(dev, msg, res))
+		goto err;
+
+	return 0;
+
+err:
+	return -EMSGSIZE;
+}
 
 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
 					struct rdma_counter *counter)
@@ -759,7 +813,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg,
 		return -EMSGSIZE;
 
 	for (i = 0; i < st->num_counters; i++)
-		if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
+		if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
 			goto err;
 
 	nla_nest_end(msg, table_attr);
@@ -1117,8 +1171,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
 }
 
 struct nldev_fill_res_entry {
-	int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
-			     struct rdma_restrack_entry *res, u32 port);
 	enum rdma_nldev_attr nldev_attr;
 	enum rdma_nldev_command nldev_cmd;
 	u8 flags;
@@ -1132,21 +1184,18 @@ enum nldev_res_flags {
 
 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 	[RDMA_RESTRACK_QP] = {
-		.fill_res_func = fill_res_qp_entry,
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
 	},
 	[RDMA_RESTRACK_CM_ID] = {
-		.fill_res_func = fill_res_cm_id_entry,
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
 	},
 	[RDMA_RESTRACK_CQ] = {
-		.fill_res_func = fill_res_cq_entry,
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
 		.flags = NLDEV_PER_DEV,
@@ -1154,7 +1203,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.id = RDMA_NLDEV_ATTR_RES_CQN,
 	},
 	[RDMA_RESTRACK_MR] = {
-		.fill_res_func = fill_res_mr_entry,
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
 		.flags = NLDEV_PER_DEV,
@@ -1162,7 +1210,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.id = RDMA_NLDEV_ATTR_RES_MRN,
 	},
 	[RDMA_RESTRACK_PD] = {
-		.fill_res_func = fill_res_pd_entry,
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
 		.flags = NLDEV_PER_DEV,
@@ -1170,7 +1217,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.id = RDMA_NLDEV_ATTR_RES_PDN,
 	},
 	[RDMA_RESTRACK_COUNTER] = {
-		.fill_res_func = fill_res_counter_entry,
 		.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
@@ -1180,7 +1226,8 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 
 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			       struct netlink_ext_ack *extack,
-			       enum rdma_restrack_type res_type)
+			       enum rdma_restrack_type res_type,
+			       res_fill_func_t fill_func)
 {
 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
@@ -1222,11 +1269,6 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto err;
 	}
 
-	if (!rdma_is_visible_in_pid_ns(res)) {
-		ret = -ENOENT;
-		goto err_get;
-	}
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
@@ -1243,7 +1285,9 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
-	ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
+
+	ret = fill_func(msg, has_cap_net_admin, res, port);
+
 	rdma_restrack_put(res);
 	if (ret)
 		goto err_free;
@@ -1263,7 +1307,8 @@ err:
 
 static int res_get_common_dumpit(struct sk_buff *skb,
 				 struct netlink_callback *cb,
-				 enum rdma_restrack_type res_type)
+				 enum rdma_restrack_type res_type,
+				 res_fill_func_t fill_func)
 {
 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
@@ -1334,9 +1379,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 	 * objects.
 	 */
 	xa_for_each(&rt->xa, id, res) {
-		if (!rdma_is_visible_in_pid_ns(res))
-			continue;
-
 		if (idx < start || !rdma_restrack_get(res))
 			goto next;
 
@@ -1351,7 +1393,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 			goto msg_full;
 		}
 
-		ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
+		ret = fill_func(skb, has_cap_net_admin, res, port);
+
 		rdma_restrack_put(res);
 
 		if (ret) {
@@ -1394,17 +1437,19 @@ err_index:
 	return ret;
 }
 
-#define RES_GET_FUNCS(name, type)                                              \
-	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
+#define RES_GET_FUNCS(name, type)					       \
+	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,	       \
 						 struct netlink_callback *cb)  \
-	{                                                                      \
-		return res_get_common_dumpit(skb, cb, type);                   \
-	}                                                                      \
-	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
-					       struct nlmsghdr *nlh,           \
+	{								       \
+		return res_get_common_dumpit(skb, cb, type,		       \
+					     fill_res_##name##_entry);	       \
+	}								       \
+	static int nldev_res_get_##name##_doit(struct sk_buff *skb,	       \
+					       struct nlmsghdr *nlh,	       \
 					       struct netlink_ext_ack *extack) \
-	{                                                                      \
-		return res_get_common_doit(skb, nlh, extack, type);            \
+	{								       \
+		return res_get_common_doit(skb, nlh, extack, type,	       \
+					   fill_res_##name##_entry);	       \
 	}
 
 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
@@ -1880,7 +1925,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
 	for (i = 0; i < num_cnts; i++) {
 		v = stats->value[i] +
 			rdma_counter_get_hwstat_value(device, port, i);
-		if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
+		if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
 			ret = -EMSGSIZE;
 			goto err_table;
 		}
@@ -1989,7 +2034,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	case RDMA_NLDEV_ATTR_RES_QP:
 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
 		break;
-
+	case RDMA_NLDEV_ATTR_RES_MR:
+		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
+					  fill_stat_mr_entry);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -2013,7 +2061,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb,
 	case RDMA_NLDEV_ATTR_RES_QP:
 		ret = nldev_res_get_counter_dumpit(skb, cb);
 		break;
-
+	case RDMA_NLDEV_ATTR_RES_MR:
+		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
+					    fill_stat_mr_entry);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..6c72773faf29 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 	rdma_restrack_del(&ucontext->res);
 
 	ib_dev->ops.dealloc_ucontext(ucontext);
+	WARN_ON(!xa_empty(&ucontext->mmap_xa));
 	kfree(ucontext);
 
 	ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index a07665f7ef8c..62fbb0ae9cb4 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -116,11 +116,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
 	u32 cnt = 0;
 
 	xa_lock(&rt->xa);
-	xas_for_each(&xas, e, U32_MAX) {
-		if (!rdma_is_visible_in_pid_ns(e))
-			continue;
+	xas_for_each(&xas, e, U32_MAX)
 		cnt++;
-	}
 	xa_unlock(&rt->xa);
 	return cnt;
 }
@@ -346,18 +343,3 @@ out:
 	}
 }
 EXPORT_SYMBOL(rdma_restrack_del);
-
-bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res)
-{
-	/*
-	 * 1. Kern resources should be visible in init
-	 *    namespace only
-	 * 2. Present only resources visible in the current
-	 *     namespace
-	 */
-	if (rdma_is_kernel_res(res))
-		return task_active_pid_ns(current) == &init_pid_ns;
-
-	/* PID 0 means that resource is not found in current namespace */
-	return task_pid_vnr(res->task);
-}
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index 7bd177cc0a61..d084e5f89849 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -27,5 +27,4 @@ int rdma_restrack_init(struct ib_device *dev);
 void rdma_restrack_clean(struct ib_device *dev);
 void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
 			       struct task_struct *task);
-bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res);
 #endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 5337393d4dfe..4fad732f9b3c 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
 MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
 
 /*
- * Check if the device might use memory registration.  This is currently only
- * true for iWarp devices. In the future we can hopefully fine tune this based
- * on HCA driver input.
+ * Report whether memory registration should be used. Memory registration must
+ * be used for iWarp devices because of iWARP-specific limitations. Memory
+ * registration is also enabled if registering memory might yield better
+ * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
  */
 static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
 {
 	if (rdma_protocol_iwarp(dev, port_num))
 		return true;
+	if (dev->attrs.max_sgl_rd)
+		return true;
 	if (unlikely(rdma_rw_force_mr))
 		return true;
 	return false;
@@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
 
 /*
  * Check if the device will use memory registration for this RW operation.
- * We currently always use memory registrations for iWarp RDMA READs, and
- * have a debug option to force usage of MRs.
- *
- * XXX: In the future we can hopefully fine tune this based on HCA driver
- * input.
+ * For RDMA READs we must use MRs on iWarp and can optionally use them as an
+ * optimization otherwise.  Additionally we have a debug option to force usage
+ * of MRs to help testing this code path.
  */
 static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
 		enum dma_data_direction dir, int dma_nents)
 {
-	if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
-		return true;
+	if (dir == DMA_FROM_DEVICE) {
+		if (rdma_protocol_iwarp(dev, port_num))
+			return true;
+		if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
+			return true;
+	}
 	if (unlikely(rdma_rw_force_mr))
 		return true;
 	return false;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 17fc2936c077..8917125ea16d 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1246,7 +1246,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
  * @port_num: Port on the specified device.
  * @rec: path record entry to use for ah attributes initialization.
  * @ah_attr: address handle attributes to initialization from path record.
- * @sgid_attr: SGID attribute to consider during initialization.
+ * @gid_attr: SGID attribute to consider during initialization.
  *
  * When ib_init_ah_attr_from_path() returns success,
  * (a) for IB link layer it optionally contains a reference to SGID attribute
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7a50cedcef1f..087682e6969e 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -481,8 +481,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
 	if (!dev->ops.process_mad)
 		return -ENOSYS;
 
-	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+	in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL);
 	if (!in_mad || !out_mad) {
 		ret = -ENOMEM;
 		goto out;
@@ -497,10 +497,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
 	if (attr != IB_PMA_CLASS_PORT_INFO)
 		in_mad->data[41] = port_num;	/* PortSelect field */
 
-	if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY,
-				  port_num, NULL, NULL,
-				  (const struct ib_mad_hdr *)in_mad, mad_size,
-				  (struct ib_mad_hdr *)out_mad, &mad_size,
+	if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL,
+				  in_mad, out_mad, &mad_size,
 				  &out_mad_pkey_index) &
 	     (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
 	    (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
@@ -1268,7 +1266,7 @@ static ssize_t node_desc_store(struct device *device,
 	int ret;
 
 	if (!dev->ops.modify_device)
-		return -EIO;
+		return -EOPNOTSUPP;
 
 	memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
 	ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 24244a2f68cc..7a3b99597ead 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -185,10 +185,9 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
  * @access: IB_ACCESS_xxx flags for memory being pinned
- * @dmasync: flush in-flight DMA when the memory region is written
  */
 struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
-			    size_t size, int access, int dmasync)
+			    size_t size, int access)
 {
 	struct ib_ucontext *context;
 	struct ib_umem *umem;
@@ -199,7 +198,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	struct mm_struct *mm;
 	unsigned long npages;
 	int ret;
-	unsigned long dma_attrs = 0;
 	struct scatterlist *sg;
 	unsigned int gup_flags = FOLL_WRITE;
 
@@ -211,9 +209,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	if (!context)
 		return ERR_PTR(-EIO);
 
-	if (dmasync)
-		dma_attrs |= DMA_ATTR_WRITE_BARRIER;
-
 	/*
 	 * If the combination of the addr and size requested for this memory
 	 * region causes an integer overflow, return error.
@@ -294,11 +289,10 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 
 	sg_mark_end(sg);
 
-	umem->nmap = ib_dma_map_sg_attrs(context->device,
+	umem->nmap = ib_dma_map_sg(context->device,
 				  umem->sg_head.sgl,
 				  umem->sg_nents,
-				  DMA_BIDIRECTIONAL,
-				  dma_attrs);
+				  DMA_BIDIRECTIONAL);
 
 	if (!umem->nmap) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 163ff7ba92b7..d7d5fadf0899 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -508,7 +508,6 @@ static int ib_umem_odp_map_dma_single_page(
 {
 	struct ib_device *dev = umem_odp->umem.ibdev;
 	dma_addr_t dma_addr;
-	int remove_existing_mapping = 0;
 	int ret = 0;
 
 	/*
@@ -534,28 +533,29 @@ static int ib_umem_odp_map_dma_single_page(
 	} else if (umem_odp->page_list[page_index] == page) {
 		umem_odp->dma_list[page_index] |= access_mask;
 	} else {
-		pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
-		       umem_odp->page_list[page_index], page);
-		/* Better remove the mapping now, to prevent any further
-		 * damage. */
-		remove_existing_mapping = 1;
+		/*
+		 * This is a race here where we could have done:
+		 *
+		 *         CPU0                             CPU1
+		 *   get_user_pages()
+		 *                                       invalidate()
+		 *                                       page_fault()
+		 *   mutex_lock(umem_mutex)
+		 *    page from GUP != page in ODP
+		 *
+		 * It should be prevented by the retry test above as reading
+		 * the seq number should be reliable under the
+		 * umem_mutex. Thus something is really not working right if
+		 * things get here.
+		 */
+		WARN(true,
+		     "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
+		     umem_odp->page_list[page_index], page);
+		ret = -EAGAIN;
 	}
 
 out:
 	put_user_page(page);
-
-	if (remove_existing_mapping) {
-		ib_umem_notifier_start_account(umem_odp);
-		dev->ops.invalidate_range(
-			umem_odp,
-			ib_umem_start(umem_odp) +
-				(page_index << umem_odp->page_shift),
-			ib_umem_start(umem_odp) +
-				((page_index + 1) << umem_odp->page_shift));
-		ib_umem_notifier_end_account(umem_odp);
-		ret = -EAGAIN;
-	}
-
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 14a80fd9f464..06ed32c8662f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -252,6 +252,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 	ucontext->closing = false;
 	ucontext->cleanup_retryable = false;
 
+	xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
 	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret < 0)
 		goto err_free;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 61758201d9b2..269938f59d3f 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -795,6 +795,9 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
 {
 	const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
 
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
 	if (size < attr->ptr_attr.len) {
 		if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size,
 			       attr->ptr_attr.len - size))
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index db98111b47f4..9aa7ffc1d12a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -772,6 +772,8 @@ out_unlock:
 	return (ret) ? : count;
 }
 
+static const struct vm_operations_struct rdma_umap_ops;
+
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct ib_uverbs_file *file = filp->private_data;
@@ -785,7 +787,7 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 		ret = PTR_ERR(ucontext);
 		goto out;
 	}
-
+	vma->vm_ops = &rdma_umap_ops;
 	ret = ucontext->device->ops.mmap(ucontext, vma);
 out:
 	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
@@ -793,38 +795,6 @@ out:
 }
 
 /*
- * Each time we map IO memory into user space this keeps track of the mapping.
- * When the device is hot-unplugged we 'zap' the mmaps in user space to point
- * to the zero page and allow the hot unplug to proceed.
- *
- * This is necessary for cases like PCI physical hot unplug as the actual BAR
- * memory may vanish after this and access to it from userspace could MCE.
- *
- * RDMA drivers supporting disassociation must have their user space designed
- * to cope in some way with their IO pages going to the zero page.
- */
-struct rdma_umap_priv {
-	struct vm_area_struct *vma;
-	struct list_head list;
-};
-
-static const struct vm_operations_struct rdma_umap_ops;
-
-static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
-				struct vm_area_struct *vma)
-{
-	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
-
-	priv->vma = vma;
-	vma->vm_private_data = priv;
-	vma->vm_ops = &rdma_umap_ops;
-
-	mutex_lock(&ufile->umap_lock);
-	list_add(&priv->list, &ufile->umaps);
-	mutex_unlock(&ufile->umap_lock);
-}
-
-/*
  * The VMA has been dup'd, initialize the vm_private_data with a new tracking
  * struct
  */
@@ -849,7 +819,7 @@ static void rdma_umap_open(struct vm_area_struct *vma)
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		goto out_unlock;
-	rdma_umap_priv_init(priv, vma);
+	rdma_umap_priv_init(priv, vma, opriv->entry);
 
 	up_read(&ufile->hw_destroy_rwsem);
 	return;
@@ -880,6 +850,9 @@ static void rdma_umap_close(struct vm_area_struct *vma)
 	 * this point.
 	 */
 	mutex_lock(&ufile->umap_lock);
+	if (priv->entry)
+		rdma_user_mmap_entry_put(priv->entry);
+
 	list_del(&priv->list);
 	mutex_unlock(&ufile->umap_lock);
 	kfree(priv);
@@ -931,44 +904,6 @@ static const struct vm_operations_struct rdma_umap_ops = {
 	.fault = rdma_umap_fault,
 };
 
-/*
- * Map IO memory into a process. This is to be called by drivers as part of
- * their mmap() functions if they wish to send something like PCI-E BAR memory
- * to userspace.
- */
-int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
-		      unsigned long pfn, unsigned long size, pgprot_t prot)
-{
-	struct ib_uverbs_file *ufile = ucontext->ufile;
-	struct rdma_umap_priv *priv;
-
-	if (!(vma->vm_flags & VM_SHARED))
-		return -EINVAL;
-
-	if (vma->vm_end - vma->vm_start != size)
-		return -EINVAL;
-
-	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
-	if (WARN_ON(!vma->vm_file ||
-		    vma->vm_file->private_data != ufile))
-		return -EINVAL;
-	lockdep_assert_held(&ufile->device->disassociate_srcu);
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	vma->vm_page_prot = prot;
-	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
-		kfree(priv);
-		return -EAGAIN;
-	}
-
-	rdma_umap_priv_init(priv, vma);
-	return 0;
-}
-EXPORT_SYMBOL(rdma_user_mmap_io);
-
 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 {
 	struct rdma_umap_priv *priv, *next_priv;
@@ -1018,6 +953,11 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 
 			zap_vma_ptes(vma, vma->vm_start,
 				     vma->vm_end - vma->vm_start);
+
+			if (priv->entry) {
+				rdma_user_mmap_entry_put(priv->entry);
+				priv->entry = NULL;
+			}
 		}
 		mutex_unlock(&ufile->umap_lock);
 	skip_mm:
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 35c2841a569e..dd765e176cdd 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -244,6 +244,8 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
 /**
  * ib_alloc_pd - Allocates an unused protection domain.
  * @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ * @caller: caller's build-time module name
  *
  * A protection domain object provides an association between QPs, shared
  * receive queues, address handles, memory regions, and memory windows.
@@ -2459,6 +2461,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
 }
 EXPORT_SYMBOL(ib_set_vf_guid);
 
+int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+		   struct ifla_vf_guid *node_guid,
+		   struct ifla_vf_guid *port_guid)
+{
+	if (!device->ops.get_vf_guid)
+		return -EOPNOTSUPP;
+
+	return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid);
+}
+EXPORT_SYMBOL(ib_get_vf_guid);
 /**
  * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
  *     information) and set an appropriate memory region for registration.
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 433fca59febd..0aeccd984889 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= mthca/
 obj-$(CONFIG_INFINIBAND_QIB)		+= qib/
-obj-$(CONFIG_INFINIBAND_CXGB3)		+= cxgb3/
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= cxgb4/
 obj-$(CONFIG_INFINIBAND_EFA)		+= efa/
 obj-$(CONFIG_INFINIBAND_I40IW)		+= i40iw/
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index ab8779d23382..b83f1cc38c52 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -1,11 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config INFINIBAND_BNXT_RE
-        tristate "Broadcom Netxtreme HCA support"
-        depends on 64BIT
-        depends on ETHERNET && NETDEVICES && PCI && INET && DCB
-        select NET_VENDOR_BROADCOM
-        select BNXT
-        ---help---
+	tristate "Broadcom Netxtreme HCA support"
+	depends on 64BIT
+	depends on ETHERNET && NETDEVICES && PCI && INET && DCB
+	select NET_VENDOR_BROADCOM
+	select BNXT
+	---help---
 	  This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
 	  RoCE HCAs.  To compile this driver as a module, choose M here:
 	  the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index e55a1666c0cd..725b2350e349 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries {
 #define BNXT_RE_MAX_MSIX		9
 #define BNXT_RE_AEQ_IDX			0
 #define BNXT_RE_NQ_IDX			1
+#define BNXT_RE_GEN_P5_MAX_VF		64
 
 struct bnxt_re_dev {
 	struct ib_device		ibdev;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index b4149dc9e824..9b6ca15a183c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	return 0;
 }
 
-int bnxt_re_modify_device(struct ib_device *ibdev,
-			  int device_modify_mask,
-			  struct ib_device_modify *device_modify)
-{
-	switch (device_modify_mask) {
-	case IB_DEVICE_MODIFY_SYS_IMAGE_GUID:
-		/* Modify the GUID requires the modification of the GID table */
-		/* GUID should be made as READ-ONLY */
-		break;
-	case IB_DEVICE_MODIFY_NODE_DESC:
-		/* Node Desc should be made as READ-ONLY */
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
 /* Port */
 int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 		       struct ib_port_attr *port_attr)
@@ -855,7 +837,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
 		bytes += (qplib_qp->sq.max_wqe * psn_sz);
 	}
 	bytes = PAGE_ALIGN(bytes);
-	umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
+	umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(umem))
 		return PTR_ERR(umem);
 
@@ -869,7 +851,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
 		bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
 		bytes = PAGE_ALIGN(bytes);
 		umem = ib_umem_get(udata, ureq.qprva, bytes,
-				   IB_ACCESS_LOCAL_WRITE, 1);
+				   IB_ACCESS_LOCAL_WRITE);
 		if (IS_ERR(umem))
 			goto rqfail;
 		qp->rumem = umem;
@@ -1322,7 +1304,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
 
 	bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
 	bytes = PAGE_ALIGN(bytes);
-	umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
+	umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(umem))
 		return PTR_ERR(umem);
 
@@ -2565,7 +2547,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 		cq->umem = ib_umem_get(udata, req.cq_va,
 				       entries * sizeof(struct cq_base),
-				       IB_ACCESS_LOCAL_WRITE, 1);
+				       IB_ACCESS_LOCAL_WRITE);
 		if (IS_ERR(cq->umem)) {
 			rc = PTR_ERR(cq->umem);
 			goto fail;
@@ -3530,7 +3512,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 	/* The fixed portion of the rkey is the same as the lkey */
 	mr->ib_mr.rkey = mr->qplib_mr.rkey;
 
-	umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
+	umem = ib_umem_get(udata, start, length, mr_access_flags);
 	if (IS_ERR(umem)) {
 		dev_err(rdev_to_dev(rdev), "Failed to get umem");
 		rc = -EFAULT;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 31662b1ee35a..23d972da5652 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -145,9 +145,6 @@ struct bnxt_re_ucontext {
 int bnxt_re_query_device(struct ib_device *ibdev,
 			 struct ib_device_attr *ib_attr,
 			 struct ib_udata *udata);
-int bnxt_re_modify_device(struct ib_device *ibdev,
-			  int device_modify_mask,
-			  struct ib_device_modify *device_modify);
 int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 		       struct ib_port_attr *port_attr);
 int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 30a54f8aa42c..e7e8a0f49464 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
  * reserved for the function. The driver may choose to allocate fewer
  * resources than the firmware maximum.
  */
-static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
 {
-	u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0;
-	u32 i;
-	u32 vf_pct;
-	u32 num_vfs;
-	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+	struct bnxt_qplib_dev_attr *attr;
+	struct bnxt_qplib_ctx *ctx;
+	int i;
 
-	rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
-					  dev_attr->max_qp);
+	attr = &rdev->dev_attr;
+	ctx = &rdev->qplib_ctx;
 
-	rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
+	ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+			       attr->max_qp);
+	ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
 	/* Use max_mr from fw since max_mrw does not get set */
-	rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count,
-					  dev_attr->max_mr);
-	rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
-					   dev_attr->max_srq);
-	rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT,
-					 dev_attr->max_cq);
-
-	for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
-		rdev->qplib_ctx.tqm_count[i] =
-		rdev->dev_attr.tqm_alloc_reqs[i];
-
-	if (rdev->num_vfs) {
-		/*
-		 * Reserve a set of resources for the PF. Divide the remaining
-		 * resources among the VFs
-		 */
-		vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
-		num_vfs = 100 * rdev->num_vfs;
-		vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs;
-		vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs;
-		vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs;
-		/*
-		 * The driver allows many more MRs than other resources. If the
-		 * firmware does also, then reserve a fixed amount for the PF
-		 * and divide the rest among VFs. VFs may use many MRs for NFS
-		 * mounts, ISER, NVME applications, etc. If the firmware
-		 * severely restricts the number of MRs, then let PF have
-		 * half and divide the rest among VFs, as for the other
-		 * resource types.
-		 */
-		if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K)
-			vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs;
-		else
-			vf_mrws = (rdev->qplib_ctx.mrw_count -
-				   BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs;
-		vf_gids = BNXT_RE_MAX_GID_PER_VF;
+	ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
+	ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
+				attr->max_srq);
+	ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
+	if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
+		for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+			rdev->qplib_ctx.tqm_count[i] =
+			rdev->dev_attr.tqm_alloc_reqs[i];
+}
+
+static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
+{
+	struct bnxt_qplib_vf_res *vf_res;
+	u32 mrws = 0;
+	u32 vf_pct;
+	u32 nvfs;
+
+	vf_res = &qplib_ctx->vf_res;
+	/*
+	 * Reserve a set of resources for the PF. Divide the remaining
+	 * resources among the VFs
+	 */
+	vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
+	nvfs = num_vf;
+	num_vf = 100 * num_vf;
+	vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
+	vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
+	vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
+	/*
+	 * The driver allows many more MRs than other resources. If the
+	 * firmware does also, then reserve a fixed amount for the PF and
+	 * divide the rest among VFs. VFs may use many MRs for NFS
+	 * mounts, ISER, NVME applications, etc. If the firmware severely
+	 * restricts the number of MRs, then let PF have half and divide
+	 * the rest among VFs, as for the other resource types.
+	 */
+	if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
+		mrws = qplib_ctx->mrw_count * vf_pct;
+		nvfs = num_vf;
+	} else {
+		mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
 	}
-	rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws;
-	rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids;
-	rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps;
-	rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs;
-	rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs;
+	vf_res->max_mrw_per_vf = (mrws / nvfs);
+	vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
+}
+
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+	u32 num_vfs;
+
+	memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
+	bnxt_re_limit_pf_res(rdev);
+
+	num_vfs =  bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
+			BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
+	if (num_vfs)
+		bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
 }
 
 /* for handling bnxt_en callbacks later */
@@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs)
 		return;
 
 	rdev->num_vfs = num_vfs;
-	bnxt_re_set_resource_limits(rdev);
-	bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
-				      &rdev->qplib_ctx);
+	if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) {
+		bnxt_re_set_resource_limits(rdev);
+		bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
+					      &rdev->qplib_ctx);
+	}
 }
 
 static void bnxt_re_shutdown(void *p)
@@ -477,6 +494,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
 	req.update_period_ms = cpu_to_le32(1000);
 	req.stats_dma_addr = cpu_to_le64(dma_map);
+	req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext));
 	req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@@ -625,7 +643,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
 	.map_mr_sg = bnxt_re_map_mr_sg,
 	.mmap = bnxt_re_mmap,
 	.modify_ah = bnxt_re_modify_ah,
-	.modify_device = bnxt_re_modify_device,
 	.modify_qp = bnxt_re_modify_qp,
 	.modify_srq = bnxt_re_modify_srq,
 	.poll_cq = bnxt_re_poll_cq,
@@ -895,10 +912,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
 	return 0;
 }
 
+#define BNXT_RE_GEN_P5_PF_NQ_DB		0x10000
+#define BNXT_RE_GEN_P5_VF_NQ_DB		0x4000
 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
 {
 	return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
-				0x10000 : rdev->msix_entries[indx].db_offset;
+		(rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB :
+				   BNXT_RE_GEN_P5_PF_NQ_DB) :
+				   rdev->msix_entries[indx].db_offset;
 }
 
 static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
@@ -1270,10 +1291,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
 		return;
 	}
 	rdev->qplib_ctx.hwrm_intf_ver =
-		(u64)resp.hwrm_intf_major << 48 |
-		(u64)resp.hwrm_intf_minor << 32 |
-		(u64)resp.hwrm_intf_build << 16 |
-		resp.hwrm_intf_patch;
+		(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
+		(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
+		(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
+		le16_to_cpu(resp.hwrm_intf_patch);
 }
 
 static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
@@ -1408,8 +1429,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 				     rdev->is_virtfn);
 	if (rc)
 		goto disable_rcfw;
-	if (!rdev->is_virtfn)
-		bnxt_re_set_resource_limits(rdev);
+
+	bnxt_re_set_resource_limits(rdev);
 
 	rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0,
 				  bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx));
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 60c8f76aab33..5cdfa84faf85 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -494,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 	 * shall setup this area for VF. Skipping the
 	 * HW programming
 	 */
-	if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+	if (is_virtfn)
 		goto skip_ctx_setup;
+	if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+		goto config_vf_res;
 
 	level = ctx->qpc_tbl.level;
 	req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
@@ -540,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 	req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
 	req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
 
+config_vf_res:
 	req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
 	req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
 	req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index fbda11a7ab1a..aaa76d792185 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx {
 	u8	chip_metal;
 };
 
-#define CHIP_NUM_57500          0x1750
+#define CHIP_NUM_57508		0x1750
+#define CHIP_NUM_57504		0x1751
+#define CHIP_NUM_57502		0x1752
 
 struct bnxt_qplib_res {
 	struct pci_dev			*pdev;
@@ -203,7 +205,9 @@ struct bnxt_qplib_res {
 
 static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
 {
-	return (cctx->chip_num == CHIP_NUM_57500);
+	return (cctx->chip_num == CHIP_NUM_57508 ||
+		cctx->chip_num == CHIP_NUM_57504 ||
+		cctx->chip_num == CHIP_NUM_57502);
 }
 
 static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
deleted file mode 100644
index 8c1a72bff447..000000000000
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config INFINIBAND_CXGB3
-	tristate "Chelsio RDMA Driver"
-	depends on CHELSIO_T3
-	select GENERIC_ALLOCATOR
-	---help---
-	  This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
-	  10GbE adapters.
-
-	  For general information about Chelsio and our products, visit
-	  our website at <http://www.chelsio.com>.
-
-	  For customer support, please visit our customer support page at
-	  <http://www.chelsio.com/support.html>.
-
-	  Please send feedback to <linux-bugs@chelsio.com>.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called iw_cxgb3.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
deleted file mode 100644
index 34bb86a6ae3a..000000000000
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3
-
-obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
-
-iw_cxgb3-y :=  iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
-	       iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
deleted file mode 100644
index 95b22a651673..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ /dev/null
@@ -1,1312 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <asm/delay.h>
-
-#include <linux/mutex.h>
-#include <linux/netdevice.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-#include "sge_defs.h"
-
-static LIST_HEAD(rdev_list);
-static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
-{
-	struct cxio_rdev *rdev;
-
-	list_for_each_entry(rdev, &rdev_list, entry)
-		if (!strcmp(rdev->dev_name, dev_name))
-			return rdev;
-	return NULL;
-}
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
-{
-	struct cxio_rdev *rdev;
-
-	list_for_each_entry(rdev, &rdev_list, entry)
-		if (rdev->t3cdev_p == tdev)
-			return rdev;
-	return NULL;
-}
-
-int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
-		   enum t3_cq_opcode op, u32 credit)
-{
-	int ret;
-	struct t3_cqe *cqe;
-	u32 rptr;
-
-	struct rdma_cq_op setup;
-	setup.id = cq->cqid;
-	setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
-	setup.op = op;
-	ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
-
-	if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
-		return ret;
-
-	/*
-	 * If the rearm returned an index other than our current index,
-	 * then there might be CQE's in flight (being DMA'd).  We must wait
-	 * here for them to complete or the consumer can miss a notification.
-	 */
-	if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
-		int i=0;
-
-		rptr = cq->rptr;
-
-		/*
-		 * Keep the generation correct by bumping rptr until it
-		 * matches the index returned by the rearm - 1.
-		 */
-		while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
-			rptr++;
-
-		/*
-		 * Now rptr is the index for the (last) cqe that was
-		 * in-flight at the time the HW rearmed the CQ.  We
-		 * spin until that CQE is valid.
-		 */
-		cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
-		while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
-			udelay(1);
-			if (i++ > 1000000) {
-				pr_err("%s: stalled rnic\n", rdev_p->dev_name);
-				return -EIO;
-			}
-		}
-
-		return 1;
-	}
-
-	return 0;
-}
-
-static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
-{
-	struct rdma_cq_setup setup;
-	setup.id = cqid;
-	setup.base_addr = 0;	/* NULL address */
-	setup.size = 0;		/* disaable the CQ */
-	setup.credits = 0;
-	setup.credit_thres = 0;
-	setup.ovfl_mode = 0;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
-{
-	u64 sge_cmd;
-	struct t3_modify_qp_wr *wqe;
-	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
-	if (!skb) {
-		pr_debug("%s alloc_skb failed\n", __func__);
-		return -ENOMEM;
-	}
-	wqe = skb_put_zero(skb, sizeof(*wqe));
-	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
-		       T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
-		       T3_SOPEOP);
-	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
-	sge_cmd = qpid << 8 | 3;
-	wqe->sge_cmd = cpu_to_be64(sge_cmd);
-	skb->priority = CPL_PRIORITY_CONTROL;
-	return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
-{
-	struct rdma_cq_setup setup;
-	int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
-
-	size += 1; /* one extra page for storing cq-in-err state */
-	cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
-	if (!cq->cqid)
-		return -ENOMEM;
-	if (kernel) {
-		cq->sw_queue = kzalloc(size, GFP_KERNEL);
-		if (!cq->sw_queue)
-			return -ENOMEM;
-	}
-	cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
-					     &(cq->dma_addr), GFP_KERNEL);
-	if (!cq->queue) {
-		kfree(cq->sw_queue);
-		return -ENOMEM;
-	}
-	dma_unmap_addr_set(cq, mapping, cq->dma_addr);
-	setup.id = cq->cqid;
-	setup.base_addr = (u64) (cq->dma_addr);
-	setup.size = 1UL << cq->size_log2;
-	setup.credits = 65535;
-	setup.credit_thres = 1;
-	if (rdev_p->t3cdev_p->type != T3A)
-		setup.ovfl_mode = 0;
-	else
-		setup.ovfl_mode = 1;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
-	struct cxio_qpid_list *entry;
-	u32 qpid;
-	int i;
-
-	mutex_lock(&uctx->lock);
-	if (!list_empty(&uctx->qpids)) {
-		entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
-				   entry);
-		list_del(&entry->entry);
-		qpid = entry->qpid;
-		kfree(entry);
-	} else {
-		qpid = cxio_hal_get_qpid(rdev_p->rscp);
-		if (!qpid)
-			goto out;
-		for (i = qpid+1; i & rdev_p->qpmask; i++) {
-			entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-			if (!entry)
-				break;
-			entry->qpid = i;
-			list_add_tail(&entry->entry, &uctx->qpids);
-		}
-	}
-out:
-	mutex_unlock(&uctx->lock);
-	pr_debug("%s qpid 0x%x\n", __func__, qpid);
-	return qpid;
-}
-
-static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
-		     struct cxio_ucontext *uctx)
-{
-	struct cxio_qpid_list *entry;
-
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return;
-	pr_debug("%s qpid 0x%x\n", __func__, qpid);
-	entry->qpid = qpid;
-	mutex_lock(&uctx->lock);
-	list_add_tail(&entry->entry, &uctx->qpids);
-	mutex_unlock(&uctx->lock);
-}
-
-void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
-	struct list_head *pos, *nxt;
-	struct cxio_qpid_list *entry;
-
-	mutex_lock(&uctx->lock);
-	list_for_each_safe(pos, nxt, &uctx->qpids) {
-		entry = list_entry(pos, struct cxio_qpid_list, entry);
-		list_del_init(&entry->entry);
-		if (!(entry->qpid & rdev_p->qpmask))
-			cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
-		kfree(entry);
-	}
-	mutex_unlock(&uctx->lock);
-}
-
-void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
-	INIT_LIST_HEAD(&uctx->qpids);
-	mutex_init(&uctx->lock);
-}
-
-int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
-		   struct t3_wq *wq, struct cxio_ucontext *uctx)
-{
-	int depth = 1UL << wq->size_log2;
-	int rqsize = 1UL << wq->rq_size_log2;
-
-	wq->qpid = get_qpid(rdev_p, uctx);
-	if (!wq->qpid)
-		return -ENOMEM;
-
-	wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL);
-	if (!wq->rq)
-		goto err1;
-
-	wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
-	if (!wq->rq_addr)
-		goto err2;
-
-	wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL);
-	if (!wq->sq)
-		goto err3;
-
-	wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
-				       depth * sizeof(union t3_wr),
-				       &(wq->dma_addr), GFP_KERNEL);
-	if (!wq->queue)
-		goto err4;
-
-	dma_unmap_addr_set(wq, mapping, wq->dma_addr);
-	wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
-	if (!kernel_domain)
-		wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
-					(wq->qpid << rdev_p->qpshift);
-	wq->rdev = rdev_p;
-	pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n",
-		 __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb);
-	return 0;
-err4:
-	kfree(wq->sq);
-err3:
-	cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
-err2:
-	kfree(wq->rq);
-err1:
-	put_qpid(rdev_p, wq->qpid, uctx);
-	return -ENOMEM;
-}
-
-void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
-	cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
-	kfree(cq->sw_queue);
-	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
-			  (1UL << (cq->size_log2))
-			  * sizeof(struct t3_cqe) + 1, cq->queue,
-			  dma_unmap_addr(cq, mapping));
-	cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
-}
-
-int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
-		    struct cxio_ucontext *uctx)
-{
-	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
-			  (1UL << (wq->size_log2))
-			  * sizeof(union t3_wr), wq->queue,
-			  dma_unmap_addr(wq, mapping));
-	kfree(wq->sq);
-	cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
-	kfree(wq->rq);
-	put_qpid(rdev_p, wq->qpid, uctx);
-	return 0;
-}
-
-static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
-{
-	struct t3_cqe cqe;
-
-	pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
-		 wq, cq, cq->sw_rptr, cq->sw_wptr);
-	memset(&cqe, 0, sizeof(cqe));
-	cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
-			         V_CQE_OPCODE(T3_SEND) |
-				 V_CQE_TYPE(0) |
-				 V_CQE_SWCQE(1) |
-				 V_CQE_QPID(wq->qpid) |
-				 V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
-						       cq->size_log2)));
-	*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
-	cq->sw_wptr++;
-}
-
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
-	u32 ptr;
-	int flushed = 0;
-
-	pr_debug("%s wq %p cq %p\n", __func__, wq, cq);
-
-	/* flush RQ */
-	pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
-		 wq->rq_rptr, wq->rq_wptr, count);
-	ptr = wq->rq_rptr + count;
-	while (ptr++ != wq->rq_wptr) {
-		insert_recv_cqe(wq, cq);
-		flushed++;
-	}
-	return flushed;
-}
-
-static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
-		          struct t3_swsq *sqp)
-{
-	struct t3_cqe cqe;
-
-	pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
-		 wq, cq, cq->sw_rptr, cq->sw_wptr);
-	memset(&cqe, 0, sizeof(cqe));
-	cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
-			         V_CQE_OPCODE(sqp->opcode) |
-			         V_CQE_TYPE(1) |
-			         V_CQE_SWCQE(1) |
-			         V_CQE_QPID(wq->qpid) |
-			         V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
-						       cq->size_log2)));
-	cqe.u.scqe.wrid_hi = sqp->sq_wptr;
-
-	*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
-	cq->sw_wptr++;
-}
-
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
-	__u32 ptr = wq->sq_rptr + count;
-	int flushed = 0;
-	struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
-
-	while (ptr != wq->sq_wptr) {
-		sqp->signaled = 0;
-		insert_sq_cqe(wq, cq, sqp);
-		ptr++;
-		sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
-		flushed++;
-	}
-	return flushed;
-}
-
-/*
- * Move all CQEs from the HWCQ into the SWCQ.
- */
-void cxio_flush_hw_cq(struct t3_cq *cq)
-{
-	struct t3_cqe *cqe, *swcqe;
-
-	pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
-	cqe = cxio_next_hw_cqe(cq);
-	while (cqe) {
-		pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
-			 __func__, cq->rptr, cq->sw_wptr);
-		swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
-		*swcqe = *cqe;
-		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
-		cq->sw_wptr++;
-		cq->rptr++;
-		cqe = cxio_next_hw_cqe(cq);
-	}
-}
-
-static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
-{
-	if (CQE_OPCODE(*cqe) == T3_TERMINATE)
-		return 0;
-
-	if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
-		return 0;
-
-	if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
-		return 0;
-
-	if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
-	    Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
-		return 0;
-
-	return 1;
-}
-
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
-	struct t3_cqe *cqe;
-	u32 ptr;
-
-	*count = 0;
-	ptr = cq->sw_rptr;
-	while (!Q_EMPTY(ptr, cq->sw_wptr)) {
-		cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
-		if ((SQ_TYPE(*cqe) ||
-		     ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
-		    (CQE_QPID(*cqe) == wq->qpid))
-			(*count)++;
-		ptr++;
-	}
-	pr_debug("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
-	struct t3_cqe *cqe;
-	u32 ptr;
-
-	*count = 0;
-	pr_debug("%s count zero %d\n", __func__, *count);
-	ptr = cq->sw_rptr;
-	while (!Q_EMPTY(ptr, cq->sw_wptr)) {
-		cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
-		if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
-		    (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
-			(*count)++;
-		ptr++;
-	}
-	pr_debug("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
-{
-	struct rdma_cq_setup setup;
-	setup.id = 0;
-	setup.base_addr = 0;	/* NULL address */
-	setup.size = 1;		/* enable the CQ */
-	setup.credits = 0;
-
-	/* force SGE to redirect to RspQ and interrupt */
-	setup.credit_thres = 0;
-	setup.ovfl_mode = 1;
-	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
-{
-	int err;
-	u64 sge_cmd, ctx0, ctx1;
-	u64 base_addr;
-	struct t3_modify_qp_wr *wqe;
-	struct sk_buff *skb;
-
-	skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
-	if (!skb) {
-		pr_debug("%s alloc_skb failed\n", __func__);
-		return -ENOMEM;
-	}
-	err = cxio_hal_init_ctrl_cq(rdev_p);
-	if (err) {
-		pr_debug("%s err %d initializing ctrl_cq\n", __func__, err);
-		goto err;
-	}
-	rdev_p->ctrl_qp.workq = dma_alloc_coherent(
-					&(rdev_p->rnic_info.pdev->dev),
-					(1 << T3_CTRL_QP_SIZE_LOG2) *
-					sizeof(union t3_wr),
-					&(rdev_p->ctrl_qp.dma_addr),
-					GFP_KERNEL);
-	if (!rdev_p->ctrl_qp.workq) {
-		pr_debug("%s dma_alloc_coherent failed\n", __func__);
-		err = -ENOMEM;
-		goto err;
-	}
-	dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
-			   rdev_p->ctrl_qp.dma_addr);
-	rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
-
-	mutex_init(&rdev_p->ctrl_qp.lock);
-	init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
-
-	/* update HW Ctrl QP context */
-	base_addr = rdev_p->ctrl_qp.dma_addr;
-	base_addr >>= 12;
-	ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
-		V_EC_BASE_LO((u32) base_addr & 0xffff));
-	ctx0 <<= 32;
-	ctx0 |= V_EC_CREDITS(FW_WR_NUM);
-	base_addr >>= 16;
-	ctx1 = (u32) base_addr;
-	base_addr >>= 32;
-	ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
-			V_EC_TYPE(0) | V_EC_GEN(1) |
-			V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
-	wqe = skb_put_zero(skb, sizeof(*wqe));
-	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
-		       T3_CTL_QP_TID, 7, T3_SOPEOP);
-	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
-	sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
-	wqe->sge_cmd = cpu_to_be64(sge_cmd);
-	wqe->ctx1 = cpu_to_be64(ctx1);
-	wqe->ctx0 = cpu_to_be64(ctx0);
-	pr_debug("CtrlQP dma_addr %pad workq %p size %d\n",
-		 &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq,
-		 1 << T3_CTRL_QP_SIZE_LOG2);
-	skb->priority = CPL_PRIORITY_CONTROL;
-	return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-err:
-	kfree_skb(skb);
-	return err;
-}
-
-static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
-{
-	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
-			  (1UL << T3_CTRL_QP_SIZE_LOG2)
-			  * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
-			  dma_unmap_addr(&rdev_p->ctrl_qp, mapping));
-	return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
-}
-
-/* write len bytes of data into addr (32B aligned address)
- * If data is NULL, clear len byte of memory to zero.
- * caller acquires the ctrl_qp lock before the call
- */
-static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
-				      u32 len, void *data)
-{
-	u32 i, nr_wqe, copy_len;
-	u8 *copy_data;
-	u8 wr_len, utx_len;	/* length in 8 byte flit */
-	enum t3_wr_flags flag;
-	__be64 *wqe;
-	u64 utx_cmd;
-	addr &= 0x7FFFFFF;
-	nr_wqe = len % 96 ? len / 96 + 1 : len / 96;	/* 96B max per WQE */
-	pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
-		 __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
-		 nr_wqe, data, addr);
-	utx_len = 3;		/* in 32B unit */
-	for (i = 0; i < nr_wqe; i++) {
-		if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
-		           T3_CTRL_QP_SIZE_LOG2)) {
-			pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n",
-				 __func__,
-				 rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
-			if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
-					     !Q_FULL(rdev_p->ctrl_qp.rptr,
-						     rdev_p->ctrl_qp.wptr,
-						     T3_CTRL_QP_SIZE_LOG2))) {
-				pr_debug("%s ctrl_qp workq interrupted\n",
-					 __func__);
-				return -ERESTARTSYS;
-			}
-			pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n",
-				 __func__, i);
-		}
-		wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
-						(1 << T3_CTRL_QP_SIZE_LOG2)));
-		flag = 0;
-		if (i == (nr_wqe - 1)) {
-			/* last WQE */
-			flag = T3_COMPLETION_FLAG;
-			if (len % 32)
-				utx_len = len / 32 + 1;
-			else
-				utx_len = len / 32;
-		}
-
-		/*
-		 * Force a CQE to return the credit to the workq in case
-		 * we posted more than half the max QP size of WRs
-		 */
-		if ((i != 0) &&
-		    (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
-			flag = T3_COMPLETION_FLAG;
-			pr_debug("%s force completion at i %d\n", __func__, i);
-		}
-
-		/* build the utx mem command */
-		wqe += (sizeof(struct t3_bypass_wr) >> 3);
-		utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
-		utx_cmd <<= 32;
-		utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
-		*wqe = cpu_to_be64(utx_cmd);
-		wqe++;
-		copy_data = (u8 *) data + i * 96;
-		copy_len = len > 96 ? 96 : len;
-
-		/* clear memory content if data is NULL */
-		if (data)
-			memcpy(wqe, copy_data, copy_len);
-		else
-			memset(wqe, 0, copy_len);
-		if (copy_len % 32)
-			memset(((u8 *) wqe) + copy_len, 0,
-			       32 - (copy_len % 32));
-		wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
-			 (utx_len << 2);
-		wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
-			      (1 << T3_CTRL_QP_SIZE_LOG2)));
-
-		/* wptr in the WRID[31:0] */
-		((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
-
-		/*
-		 * This must be the last write with a memory barrier
-		 * for the genbit
-		 */
-		build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
-			       Q_GENBIT(rdev_p->ctrl_qp.wptr,
-					T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
-			       wr_len, T3_SOPEOP);
-		if (flag == T3_COMPLETION_FLAG)
-			ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
-		len -= 96;
-		rdev_p->ctrl_qp.wptr++;
-	}
-	return 0;
-}
-
-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
- * OUT: stag index
- * TBD: shared memory region support
- */
-static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
-			 u32 *stag, u8 stag_state, u32 pdid,
-			 enum tpt_mem_type type, enum tpt_mem_perm perm,
-			 u32 zbva, u64 to, u32 len, u8 page_size,
-			 u32 pbl_size, u32 pbl_addr)
-{
-	int err;
-	struct tpt_entry tpt;
-	u32 stag_idx;
-	u32 wptr;
-
-	if (cxio_fatal_error(rdev_p))
-		return -EIO;
-
-	stag_state = stag_state > 0;
-	stag_idx = (*stag) >> 8;
-
-	if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
-		stag_idx = cxio_hal_get_stag(rdev_p->rscp);
-		if (!stag_idx)
-			return -ENOMEM;
-		*stag = (stag_idx << 8) | ((*stag) & 0xFF);
-	}
-	pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
-		 __func__, stag_state, type, pdid, stag_idx);
-
-	mutex_lock(&rdev_p->ctrl_qp.lock);
-
-	/* write TPT entry */
-	if (reset_tpt_entry)
-		memset(&tpt, 0, sizeof(tpt));
-	else {
-		tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
-				V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
-				V_TPT_STAG_STATE(stag_state) |
-				V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
-		BUG_ON(page_size >= 28);
-		tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
-			((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
-			V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
-			V_TPT_PAGE_SIZE(page_size));
-		tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
-		tpt.len = cpu_to_be32(len);
-		tpt.va_hi = cpu_to_be32((u32) (to >> 32));
-		tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
-		tpt.rsvd_bind_cnt_or_pstag = 0;
-		tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
-	}
-	err = cxio_hal_ctrl_qp_write_mem(rdev_p,
-				       stag_idx +
-				       (rdev_p->rnic_info.tpt_base >> 5),
-				       sizeof(tpt), &tpt);
-
-	/* release the stag index to free pool */
-	if (reset_tpt_entry)
-		cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-
-	wptr = rdev_p->ctrl_qp.wptr;
-	mutex_unlock(&rdev_p->ctrl_qp.lock);
-	if (!err)
-		if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
-					     SEQ32_GE(rdev_p->ctrl_qp.rptr,
-						      wptr)))
-			return -ERESTARTSYS;
-	return err;
-}
-
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
-		   u32 pbl_addr, u32 pbl_size)
-{
-	u32 wptr;
-	int err;
-
-	pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
-		 __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
-		 pbl_size);
-
-	mutex_lock(&rdev_p->ctrl_qp.lock);
-	err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
-					 pbl);
-	wptr = rdev_p->ctrl_qp.wptr;
-	mutex_unlock(&rdev_p->ctrl_qp.lock);
-	if (err)
-		return err;
-
-	if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
-				     SEQ32_GE(rdev_p->ctrl_qp.rptr,
-					      wptr)))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
-int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
-			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
-	*stag = T3_STAG_UNSET;
-	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
-			     zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
-			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
-	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
-			     zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
-		   u32 pbl_addr)
-{
-	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
-			     pbl_size, pbl_addr);
-}
-
-int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
-{
-	*stag = T3_STAG_UNSET;
-	return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
-			     0, 0);
-}
-
-int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
-{
-	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
-			     0, 0);
-}
-
-int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
-{
-	*stag = T3_STAG_UNSET;
-	return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
-			     0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
-}
-
-int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
-{
-	struct t3_rdma_init_wr *wqe;
-	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
-	if (!skb)
-		return -ENOMEM;
-	pr_debug("%s rdev_p %p\n", __func__, rdev_p);
-	wqe = __skb_put(skb, sizeof(*wqe));
-	wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
-	wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
-					   V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
-	wqe->wrid.id1 = 0;
-	wqe->qpid = cpu_to_be32(attr->qpid);
-	wqe->pdid = cpu_to_be32(attr->pdid);
-	wqe->scqid = cpu_to_be32(attr->scqid);
-	wqe->rcqid = cpu_to_be32(attr->rcqid);
-	wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
-	wqe->rq_size = cpu_to_be32(attr->rq_size);
-	wqe->mpaattrs = attr->mpaattrs;
-	wqe->qpcaps = attr->qpcaps;
-	wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
-	wqe->rqe_count = cpu_to_be16(attr->rqe_count);
-	wqe->flags_rtr_type = cpu_to_be16(attr->flags |
-					  V_RTR_TYPE(attr->rtr_type) |
-					  V_CHAN(attr->chan));
-	wqe->ord = cpu_to_be32(attr->ord);
-	wqe->ird = cpu_to_be32(attr->ird);
-	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
-	wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
-	wqe->irs = cpu_to_be32(attr->irs);
-	skb->priority = 0;	/* 0=>ToeQ; 1=>CtrlQ */
-	return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
-	cxio_ev_cb = ev_cb;
-}
-
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
-	cxio_ev_cb = NULL;
-}
-
-static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
-{
-	static int cnt;
-	struct cxio_rdev *rdev_p = NULL;
-	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
-	pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n",
-		 cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
-		 RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
-		 RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
-		 RSPQ_CREDIT_THRESH(rsp_msg));
-	pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
-		 CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
-		 CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
-		 CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
-		 CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-	rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
-	if (!rdev_p) {
-		pr_debug("%s called by t3cdev %p with null ulp\n", __func__,
-			 t3cdev_p);
-		return 0;
-	}
-	if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
-		rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
-		wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
-		dev_kfree_skb_irq(skb);
-	} else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
-		dev_kfree_skb_irq(skb);
-	else if (cxio_ev_cb)
-		(*cxio_ev_cb) (rdev_p, skb);
-	else
-		dev_kfree_skb_irq(skb);
-	cnt++;
-	return 0;
-}
-
-/* Caller takes care of locking if needed */
-int cxio_rdev_open(struct cxio_rdev *rdev_p)
-{
-	struct net_device *netdev_p = NULL;
-	int err = 0;
-	if (strlen(rdev_p->dev_name)) {
-		if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
-			return -EBUSY;
-		}
-		netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name);
-		if (!netdev_p) {
-			return -EINVAL;
-		}
-		dev_put(netdev_p);
-	} else if (rdev_p->t3cdev_p) {
-		if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
-			return -EBUSY;
-		}
-		netdev_p = rdev_p->t3cdev_p->lldev;
-		strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
-			T3_MAX_DEV_NAME_LEN);
-	} else {
-		pr_debug("%s t3cdev_p or dev_name must be set\n", __func__);
-		return -EINVAL;
-	}
-
-	list_add_tail(&rdev_p->entry, &rdev_list);
-
-	pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
-	memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
-	if (!rdev_p->t3cdev_p)
-		rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
-	rdev_p->t3cdev_p->ulp = (void *) rdev_p;
-
-	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
-					 &(rdev_p->fw_info));
-	if (err) {
-		pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
-		       __func__, rdev_p->t3cdev_p, err);
-		goto err1;
-	}
-	if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
-		pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n",
-		       CXIO_FW_MAJ,
-		       G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
-		err = -EINVAL;
-		goto err1;
-	}
-
-	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
-					 &(rdev_p->rnic_info));
-	if (err) {
-		pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
-		       __func__, rdev_p->t3cdev_p, err);
-		goto err1;
-	}
-	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
-				    &(rdev_p->port_info));
-	if (err) {
-		pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
-		       __func__, rdev_p->t3cdev_p, err);
-		goto err1;
-	}
-
-	/*
-	 * qpshift is the number of bits to shift the qpid left in order
-	 * to get the correct address of the doorbell for that qp.
-	 */
-	cxio_init_ucontext(rdev_p, &rdev_p->uctx);
-	rdev_p->qpshift = PAGE_SHIFT -
-			  ilog2(65536 >>
-			            ilog2(rdev_p->rnic_info.udbell_len >>
-					      PAGE_SHIFT));
-	rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
-	rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
-	pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
-		 __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
-		 rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
-		 rdev_p->rnic_info.pbl_base,
-		 rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
-		 rdev_p->rnic_info.rqt_top);
-	pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n",
-		 rdev_p->rnic_info.udbell_len,
-		 rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
-		 rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
-
-	err = cxio_hal_init_ctrl_qp(rdev_p);
-	if (err) {
-		pr_err("%s error %d initializing ctrl_qp\n", __func__, err);
-		goto err1;
-	}
-	err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
-				     0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
-				     T3_MAX_NUM_PD);
-	if (err) {
-		pr_err("%s error %d initializing hal resources\n",
-		       __func__, err);
-		goto err2;
-	}
-	err = cxio_hal_pblpool_create(rdev_p);
-	if (err) {
-		pr_err("%s error %d initializing pbl mem pool\n",
-		       __func__, err);
-		goto err3;
-	}
-	err = cxio_hal_rqtpool_create(rdev_p);
-	if (err) {
-		pr_err("%s error %d initializing rqt mem pool\n",
-		       __func__, err);
-		goto err4;
-	}
-	return 0;
-err4:
-	cxio_hal_pblpool_destroy(rdev_p);
-err3:
-	cxio_hal_destroy_resource(rdev_p->rscp);
-err2:
-	cxio_hal_destroy_ctrl_qp(rdev_p);
-err1:
-	rdev_p->t3cdev_p->ulp = NULL;
-	list_del(&rdev_p->entry);
-	return err;
-}
-
-void cxio_rdev_close(struct cxio_rdev *rdev_p)
-{
-	if (rdev_p) {
-		cxio_hal_pblpool_destroy(rdev_p);
-		cxio_hal_rqtpool_destroy(rdev_p);
-		list_del(&rdev_p->entry);
-		cxio_hal_destroy_ctrl_qp(rdev_p);
-		cxio_hal_destroy_resource(rdev_p->rscp);
-		rdev_p->t3cdev_p->ulp = NULL;
-	}
-}
-
-int __init cxio_hal_init(void)
-{
-	if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
-		return -ENOMEM;
-	t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
-	return 0;
-}
-
-void __exit cxio_hal_exit(void)
-{
-	struct cxio_rdev *rdev, *tmp;
-
-	t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
-	list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
-		cxio_rdev_close(rdev);
-	cxio_hal_destroy_rhdl_resource();
-}
-
-static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
-{
-	struct t3_swsq *sqp;
-	__u32 ptr = wq->sq_rptr;
-	int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
-
-	sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
-	while (count--)
-		if (!sqp->signaled) {
-			ptr++;
-			sqp = wq->sq + Q_PTR2IDX(ptr,  wq->sq_size_log2);
-		} else if (sqp->complete) {
-
-			/*
-			 * Insert this completed cqe into the swcq.
-			 */
-			pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
-				 __func__, Q_PTR2IDX(ptr,  wq->sq_size_log2),
-				 Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
-			sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
-			*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
-				= sqp->cqe;
-			cq->sw_wptr++;
-			sqp->signaled = 0;
-			break;
-		} else
-			break;
-}
-
-static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
-				struct t3_cqe *read_cqe)
-{
-	read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
-	read_cqe->len = wq->oldest_read->read_len;
-	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
-				 V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
-				 V_CQE_OPCODE(T3_READ_REQ) |
-				 V_CQE_TYPE(1));
-}
-
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
-static void advance_oldest_read(struct t3_wq *wq)
-{
-
-	u32 rptr = wq->oldest_read - wq->sq + 1;
-	u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
-
-	while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
-		wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
-
-		if (wq->oldest_read->opcode == T3_READ_REQ)
-			return;
-		rptr++;
-	}
-	wq->oldest_read = NULL;
-}
-
-/*
- * cxio_poll_cq
- *
- * Caller must:
- *     check the validity of the first CQE,
- *     supply the wq assicated with the qpid.
- *
- * credit: cq credit to return to sge.
- * cqe_flushed: 1 iff the CQE is flushed.
- * cqe: copy of the polled CQE.
- *
- * return value:
- *     0       CQE returned,
- *    -1       CQE skipped, try again.
- */
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
-		     u8 *cqe_flushed, u64 *cookie, u32 *credit)
-{
-	int ret = 0;
-	struct t3_cqe *hw_cqe, read_cqe;
-
-	*cqe_flushed = 0;
-	*credit = 0;
-	hw_cqe = cxio_next_cqe(cq);
-
-	pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
-		 __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
-		 CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
-		 CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
-		 CQE_WRID_LOW(*hw_cqe));
-
-	/*
-	 * skip cqe's not affiliated with a QP.
-	 */
-	if (wq == NULL) {
-		ret = -1;
-		goto skip_cqe;
-	}
-
-	/*
-	 * Gotta tweak READ completions:
-	 *	1) the cqe doesn't contain the sq_wptr from the wr.
-	 *	2) opcode not reflected from the wr.
-	 *	3) read_len not reflected from the wr.
-	 *	4) cq_type is RQ_TYPE not SQ_TYPE.
-	 */
-	if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
-
-		/*
-		 * If this is an unsolicited read response, then the read
-		 * was generated by the kernel driver as part of peer-2-peer
-		 * connection setup.  So ignore the completion.
-		 */
-		if (!wq->oldest_read) {
-			if (CQE_STATUS(*hw_cqe))
-				wq->error = 1;
-			ret = -1;
-			goto skip_cqe;
-		}
-
-		/*
-		 * Don't write to the HWCQ, so create a new read req CQE
-		 * in local memory.
-		 */
-		create_read_req_cqe(wq, hw_cqe, &read_cqe);
-		hw_cqe = &read_cqe;
-		advance_oldest_read(wq);
-	}
-
-	/*
-	 * T3A: Discard TERMINATE CQEs.
-	 */
-	if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
-		ret = -1;
-		wq->error = 1;
-		goto skip_cqe;
-	}
-
-	if (CQE_STATUS(*hw_cqe) || wq->error) {
-		*cqe_flushed = wq->error;
-		wq->error = 1;
-
-		/*
-		 * T3A inserts errors into the CQE.  We cannot return
-		 * these as work completions.
-		 */
-		/* incoming write failures */
-		if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
-		     && RQ_TYPE(*hw_cqe)) {
-			ret = -1;
-			goto skip_cqe;
-		}
-		/* incoming read request failures */
-		if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
-			ret = -1;
-			goto skip_cqe;
-		}
-
-		/* incoming SEND with no receive posted failures */
-		if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
-		    Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
-			ret = -1;
-			goto skip_cqe;
-		}
-		BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
-		goto proc_cqe;
-	}
-
-	/*
-	 * RECV completion.
-	 */
-	if (RQ_TYPE(*hw_cqe)) {
-
-		/*
-		 * HW only validates 4 bits of MSN.  So we must validate that
-		 * the MSN in the SEND is the next expected MSN.  If its not,
-		 * then we complete this with TPT_ERR_MSN and mark the wq in
-		 * error.
-		 */
-
-		if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
-			wq->error = 1;
-			ret = -1;
-			goto skip_cqe;
-		}
-
-		if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
-			wq->error = 1;
-			hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
-			goto proc_cqe;
-		}
-		goto proc_cqe;
-	}
-
-	/*
-	 * If we get here its a send completion.
-	 *
-	 * Handle out of order completion. These get stuffed
-	 * in the SW SQ. Then the SW SQ is walked to move any
-	 * now in-order completions into the SW CQ.  This handles
-	 * 2 cases:
-	 *	1) reaping unsignaled WRs when the first subsequent
-	 *	   signaled WR is completed.
-	 *	2) out of order read completions.
-	 */
-	if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
-		struct t3_swsq *sqp;
-
-		pr_debug("%s out of order completion going in swsq at idx %ld\n",
-			 __func__,
-			 Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe),
-				   wq->sq_size_log2));
-		sqp = wq->sq +
-		      Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
-		sqp->cqe = *hw_cqe;
-		sqp->complete = 1;
-		ret = -1;
-		goto flush_wq;
-	}
-
-proc_cqe:
-	*cqe = *hw_cqe;
-
-	/*
-	 * Reap the associated WR(s) that are freed up with this
-	 * completion.
-	 */
-	if (SQ_TYPE(*hw_cqe)) {
-		wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
-		pr_debug("%s completing sq idx %ld\n", __func__,
-			 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
-		*cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
-		wq->sq_rptr++;
-	} else {
-		pr_debug("%s completing rq idx %ld\n", __func__,
-			 Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
-		*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
-		if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
-			cxio_hal_pblpool_free(wq->rdev,
-				wq->rq[Q_PTR2IDX(wq->rq_rptr,
-				wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
-		BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
-		wq->rq_rptr++;
-	}
-
-flush_wq:
-	/*
-	 * Flush any completed cqes that are now in-order.
-	 */
-	flush_completed_wrs(wq, cq);
-
-skip_cqe:
-	if (SW_CQE(*hw_cqe)) {
-		pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
-			 __func__, cq, cq->cqid, cq->sw_rptr);
-		++cq->sw_rptr;
-	} else {
-		pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
-			 __func__, cq, cq->cqid, cq->rptr);
-		++cq->rptr;
-
-		/*
-		 * T3A: compute credits.
-		 */
-		if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
-		    || ((cq->rptr - cq->wptr) >= 128)) {
-			*credit = cq->rptr - cq->wptr;
-			cq->wptr = cq->rptr;
-		}
-	}
-	return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
deleted file mode 100644
index 40c029ffa425..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef  __CXIO_HAL_H__
-#define  __CXIO_HAL_H__
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/kfifo.h>
-
-#include "t3_cpl.h"
-#include "t3cdev.h"
-#include "cxgb3_ctl_defs.h"
-#include "cxio_wr.h"
-
-#define T3_CTRL_QP_ID    FW_RI_SGEEC_START
-#define T3_CTL_QP_TID	 FW_RI_TID_START
-#define T3_CTRL_QP_SIZE_LOG2  8
-#define T3_CTRL_CQ_ID    0
-
-#define T3_MAX_NUM_RI (1<<15)
-#define T3_MAX_NUM_QP (1<<15)
-#define T3_MAX_NUM_CQ (1<<15)
-#define T3_MAX_NUM_PD (1<<15)
-#define T3_MAX_PBL_SIZE 256
-#define T3_MAX_RQ_SIZE 1024
-#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
-#define T3_MAX_CQ_DEPTH 65536
-#define T3_MAX_NUM_STAG (1<<15)
-#define T3_MAX_MR_SIZE 0x100000000ULL
-#define T3_PAGESIZE_MASK 0xffff000  /* 4KB-128MB */
-
-#define T3_STAG_UNSET 0xffffffff
-
-#define T3_MAX_DEV_NAME_LEN 32
-
-#define CXIO_FW_MAJ 7
-
-struct cxio_hal_ctrl_qp {
-	u32 wptr;
-	u32 rptr;
-	struct mutex lock;	/* for the wtpr, can sleep */
-	wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
-	union t3_wr *workq;	/* the work request queue */
-	dma_addr_t dma_addr;	/* pci bus address of the workq */
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	void __iomem *doorbell;
-};
-
-struct cxio_hal_resource {
-	struct kfifo tpt_fifo;
-	spinlock_t tpt_fifo_lock;
-	struct kfifo qpid_fifo;
-	spinlock_t qpid_fifo_lock;
-	struct kfifo cqid_fifo;
-	spinlock_t cqid_fifo_lock;
-	struct kfifo pdid_fifo;
-	spinlock_t pdid_fifo_lock;
-};
-
-struct cxio_qpid_list {
-	struct list_head entry;
-	u32 qpid;
-};
-
-struct cxio_ucontext {
-	struct list_head qpids;
-	struct mutex lock;
-};
-
-struct cxio_rdev {
-	char dev_name[T3_MAX_DEV_NAME_LEN];
-	struct t3cdev *t3cdev_p;
-	struct rdma_info rnic_info;
-	struct adap_ports port_info;
-	struct cxio_hal_resource *rscp;
-	struct cxio_hal_ctrl_qp ctrl_qp;
-	void *ulp;
-	unsigned long qpshift;
-	u32 qpnr;
-	u32 qpmask;
-	struct cxio_ucontext uctx;
-	struct gen_pool *pbl_pool;
-	struct gen_pool *rqt_pool;
-	struct list_head entry;
-	struct ch_embedded_info fw_info;
-	u32	flags;
-#define	CXIO_ERROR_FATAL	1
-};
-
-static inline int cxio_fatal_error(struct cxio_rdev *rdev_p)
-{
-	return rdev_p->flags & CXIO_ERROR_FATAL;
-}
-
-static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
-{
-	return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
-}
-
-typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
-					     struct sk_buff * skb);
-
-#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
-#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
-#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
-#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
-#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
-#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
-#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
-#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
-#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
-
-struct respQ_msg_t {
-	__be32 flags;		/* flit 0 */
-	__be32 cq_ptrid;
-	__be64 rsvd;		/* flit 1 */
-	struct t3_cqe cqe;	/* flits 2-3 */
-};
-
-enum t3_cq_opcode {
-	CQ_ARM_AN = 0x2,
-	CQ_ARM_SE = 0x6,
-	CQ_FORCE_AN = 0x3,
-	CQ_CREDIT_UPDATE = 0x7
-};
-
-int cxio_rdev_open(struct cxio_rdev *rdev);
-void cxio_rdev_close(struct cxio_rdev *rdev);
-int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
-		   enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
-void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
-		   struct cxio_ucontext *uctx);
-int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
-		    struct cxio_ucontext *uctx);
-int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
-		   u32 pbl_addr, u32 pbl_size);
-int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
-			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
-			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
-			   u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
-		   u32 pbl_addr);
-int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
-int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
-int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
-int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
-int __init cxio_hal_init(void);
-void __exit cxio_hal_exit(void);
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_flush_hw_cq(struct t3_cq *cq);
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
-		     u8 *cqe_flushed, u64 *cookie, u32 *credit);
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
deleted file mode 100644
index c6e7bc4420b6..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/* Crude resource management */
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-
-static struct kfifo rhdl_fifo;
-static spinlock_t rhdl_fifo_lock;
-
-#define RANDOM_SIZE 16
-
-static int __cxio_init_resource_fifo(struct kfifo *fifo,
-				   spinlock_t *fifo_lock,
-				   u32 nr, u32 skip_low,
-				   u32 skip_high,
-				   int random)
-{
-	u32 i, j, entry = 0, idx;
-	u32 random_bytes;
-	u32 rarray[16];
-	spin_lock_init(fifo_lock);
-
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
-		return -ENOMEM;
-
-	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
-	if (random) {
-		j = 0;
-		random_bytes = prandom_u32();
-		for (i = 0; i < RANDOM_SIZE; i++)
-			rarray[i] = i + skip_low;
-		for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
-			if (j >= RANDOM_SIZE) {
-				j = 0;
-				random_bytes = prandom_u32();
-			}
-			idx = (random_bytes >> (j * 2)) & 0xF;
-			kfifo_in(fifo,
-				(unsigned char *) &rarray[idx],
-				sizeof(u32));
-			rarray[idx] = i;
-			j++;
-		}
-		for (i = 0; i < RANDOM_SIZE; i++)
-			kfifo_in(fifo,
-				(unsigned char *) &rarray[i],
-				sizeof(u32));
-	} else
-		for (i = skip_low; i < nr - skip_high; i++)
-			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
-
-	for (i = 0; i < skip_low + skip_high; i++)
-		if (kfifo_out_locked(fifo, (unsigned char *) &entry,
-				sizeof(u32), fifo_lock) != sizeof(u32))
-					break;
-	return 0;
-}
-
-static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
-				   u32 nr, u32 skip_low, u32 skip_high)
-{
-	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
-					  skip_high, 0));
-}
-
-static int cxio_init_resource_fifo_random(struct kfifo *fifo,
-				   spinlock_t * fifo_lock,
-				   u32 nr, u32 skip_low, u32 skip_high)
-{
-
-	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
-					  skip_high, 1));
-}
-
-static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
-{
-	u32 i;
-
-	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
-
-	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
-					      GFP_KERNEL))
-		return -ENOMEM;
-
-	for (i = 16; i < T3_MAX_NUM_QP; i++)
-		if (!(i & rdev_p->qpmask))
-			kfifo_in(&rdev_p->rscp->qpid_fifo,
-				    (unsigned char *) &i, sizeof(u32));
-	return 0;
-}
-
-int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
-{
-	return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
-				       0);
-}
-
-void cxio_hal_destroy_rhdl_resource(void)
-{
-	kfifo_free(&rhdl_fifo);
-}
-
-/* nr_* must be power of 2 */
-int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
-			   u32 nr_tpt, u32 nr_pbl,
-			   u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
-{
-	int err = 0;
-	struct cxio_hal_resource *rscp;
-
-	rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
-	if (!rscp)
-		return -ENOMEM;
-	rdev_p->rscp = rscp;
-	err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
-				      &rscp->tpt_fifo_lock,
-				      nr_tpt, 1, 0);
-	if (err)
-		goto tpt_err;
-	err = cxio_init_qpid_fifo(rdev_p);
-	if (err)
-		goto qpid_err;
-	err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
-				      nr_cqid, 1, 0);
-	if (err)
-		goto cqid_err;
-	err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
-				      nr_pdid, 1, 0);
-	if (err)
-		goto pdid_err;
-	return 0;
-pdid_err:
-	kfifo_free(&rscp->cqid_fifo);
-cqid_err:
-	kfifo_free(&rscp->qpid_fifo);
-qpid_err:
-	kfifo_free(&rscp->tpt_fifo);
-tpt_err:
-	return -ENOMEM;
-}
-
-/*
- * returns 0 if no resource available
- */
-static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
-{
-	u32 entry;
-	if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
-		return entry;
-	else
-		return 0;	/* fifo emptry */
-}
-
-static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
-		u32 entry)
-{
-	BUG_ON(
-	kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
-	== 0);
-}
-
-u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
-{
-	return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
-}
-
-void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
-{
-	cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
-}
-
-u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
-{
-	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
-			&rscp->qpid_fifo_lock);
-	pr_debug("%s qpid 0x%x\n", __func__, qpid);
-	return qpid;
-}
-
-void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
-{
-	pr_debug("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
-}
-
-u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
-{
-	return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
-}
-
-void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
-{
-	cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
-}
-
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
-{
-	return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
-}
-
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
-{
-	cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
-}
-
-void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
-{
-	kfifo_free(&rscp->tpt_fifo);
-	kfifo_free(&rscp->cqid_fifo);
-	kfifo_free(&rscp->qpid_fifo);
-	kfifo_free(&rscp->pdid_fifo);
-	kfree(rscp);
-}
-
-/*
- * PBL Memory Manager.  Uses Linux generic allocator.
- */
-
-#define MIN_PBL_SHIFT 8			/* 256B == min PBL size (32 entries) */
-
-u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
-	unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
-	pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
-	return (u32)addr;
-}
-
-void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
-	pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
-	gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
-}
-
-int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
-{
-	unsigned pbl_start, pbl_chunk;
-
-	rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
-	if (!rdev_p->pbl_pool)
-		return -ENOMEM;
-
-	pbl_start = rdev_p->rnic_info.pbl_base;
-	pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
-
-	while (pbl_start < rdev_p->rnic_info.pbl_top) {
-		pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
-				pbl_chunk);
-		if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
-			pr_debug("%s failed to add PBL chunk (%x/%x)\n",
-				 __func__, pbl_start, pbl_chunk);
-			if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
-				pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n",
-					__func__, pbl_start,
-					rdev_p->rnic_info.pbl_top - pbl_start);
-				return 0;
-			}
-			pbl_chunk >>= 1;
-		} else {
-			pr_debug("%s added PBL chunk (%x/%x)\n",
-				 __func__, pbl_start, pbl_chunk);
-			pbl_start += pbl_chunk;
-		}
-	}
-
-	return 0;
-}
-
-void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
-{
-	gen_pool_destroy(rdev_p->pbl_pool);
-}
-
-/*
- * RQT Memory Manager.  Uses Linux generic allocator.
- */
-
-#define MIN_RQT_SHIFT 10	/* 1KB == mini RQT size (16 entries) */
-#define RQT_CHUNK 2*1024*1024
-
-u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
-	unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
-	pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
-	return (u32)addr;
-}
-
-void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
-	pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
-	gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
-}
-
-int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
-{
-	unsigned long i;
-	rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
-	if (rdev_p->rqt_pool)
-		for (i = rdev_p->rnic_info.rqt_base;
-		     i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
-		     i += RQT_CHUNK)
-			gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
-	return rdev_p->rqt_pool ? 0 : -ENOMEM;
-}
-
-void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
-{
-	gen_pool_destroy(rdev_p->rqt_pool);
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
deleted file mode 100644
index a2703a3d882d..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_RESOURCE_H__
-#define __CXIO_RESOURCE_H__
-
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/genalloc.h>
-#include "cxio_hal.h"
-
-extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
-extern void cxio_hal_destroy_rhdl_resource(void);
-extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
-				  u32 nr_tpt, u32 nr_pbl,
-				  u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
-				  u32 nr_pdid);
-extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
-extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
-extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
-extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
-
-#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
-extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-
-#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
-extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
deleted file mode 100644
index 53aa5c36247a..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_WR_H__
-#define __CXIO_WR_H__
-
-#include <asm/io.h>
-#include <linux/pci.h>
-#include <linux/timer.h>
-#include "firmware_exports.h"
-
-#define T3_MAX_SGE      4
-#define T3_MAX_INLINE	64
-#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
-#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
-#define T3_STAG0_PAGE_SHIFT 15
-
-#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
-#define Q_FULL(rptr,wptr,size_log2)  ( (((wptr)-(rptr))>>(size_log2)) && \
-				       ((rptr)!=(wptr)) )
-#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
-#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
-#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
-#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
-
-static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
-{
-	writel(((1<<31) | qpid), doorbell);
-}
-
-#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
-
-enum t3_wr_flags {
-	T3_COMPLETION_FLAG = 0x01,
-	T3_NOTIFY_FLAG = 0x02,
-	T3_SOLICITED_EVENT_FLAG = 0x04,
-	T3_READ_FENCE_FLAG = 0x08,
-	T3_LOCAL_FENCE_FLAG = 0x10
-} __packed;
-
-enum t3_wr_opcode {
-	T3_WR_BP = FW_WROPCODE_RI_BYPASS,
-	T3_WR_SEND = FW_WROPCODE_RI_SEND,
-	T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
-	T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
-	T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
-	T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
-	T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
-	T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
-	T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
-	T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
-} __packed;
-
-enum t3_rdma_opcode {
-	T3_RDMA_WRITE,		/* IETF RDMAP v1.0 ... */
-	T3_READ_REQ,
-	T3_READ_RESP,
-	T3_SEND,
-	T3_SEND_WITH_INV,
-	T3_SEND_WITH_SE,
-	T3_SEND_WITH_SE_INV,
-	T3_TERMINATE,
-	T3_RDMA_INIT,		/* CHELSIO RI specific ... */
-	T3_BIND_MW,
-	T3_FAST_REGISTER,
-	T3_LOCAL_INV,
-	T3_QP_MOD,
-	T3_BYPASS,
-	T3_RDMA_READ_REQ_WITH_INV,
-} __packed;
-
-static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
-{
-	switch (wrop) {
-		case T3_WR_BP: return T3_BYPASS;
-		case T3_WR_SEND: return T3_SEND;
-		case T3_WR_WRITE: return T3_RDMA_WRITE;
-		case T3_WR_READ: return T3_READ_REQ;
-		case T3_WR_INV_STAG: return T3_LOCAL_INV;
-		case T3_WR_BIND: return T3_BIND_MW;
-		case T3_WR_INIT: return T3_RDMA_INIT;
-		case T3_WR_QP_MOD: return T3_QP_MOD;
-		case T3_WR_FASTREG: return T3_FAST_REGISTER;
-		default: break;
-	}
-	return -1;
-}
-
-
-/* Work request id */
-union t3_wrid {
-	struct {
-		u32 hi;
-		u32 low;
-	} id0;
-	u64 id1;
-};
-
-#define WRID(wrid)		(wrid.id1)
-#define WRID_GEN(wrid)		(wrid.id0.wr_gen)
-#define WRID_IDX(wrid)		(wrid.id0.wr_idx)
-#define WRID_LO(wrid)		(wrid.id0.wr_lo)
-
-struct fw_riwrh {
-	__be32 op_seop_flags;
-	__be32 gen_tid_len;
-};
-
-#define S_FW_RIWR_OP		24
-#define M_FW_RIWR_OP		0xff
-#define V_FW_RIWR_OP(x)		((x) << S_FW_RIWR_OP)
-#define G_FW_RIWR_OP(x)	((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
-
-#define S_FW_RIWR_SOPEOP	22
-#define M_FW_RIWR_SOPEOP	0x3
-#define V_FW_RIWR_SOPEOP(x)	((x) << S_FW_RIWR_SOPEOP)
-
-#define S_FW_RIWR_FLAGS		8
-#define M_FW_RIWR_FLAGS		0x3fffff
-#define V_FW_RIWR_FLAGS(x)	((x) << S_FW_RIWR_FLAGS)
-#define G_FW_RIWR_FLAGS(x)	((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
-
-#define S_FW_RIWR_TID		8
-#define V_FW_RIWR_TID(x)	((x) << S_FW_RIWR_TID)
-
-#define S_FW_RIWR_LEN		0
-#define V_FW_RIWR_LEN(x)	((x) << S_FW_RIWR_LEN)
-
-#define S_FW_RIWR_GEN           31
-#define V_FW_RIWR_GEN(x)        ((x)  << S_FW_RIWR_GEN)
-
-struct t3_sge {
-	__be32 stag;
-	__be32 len;
-	__be64 to;
-};
-
-/* If num_sgle is zero, flit 5+ contains immediate data.*/
-struct t3_send_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-
-	u8 rdmaop;		/* 2 */
-	u8 reserved[3];
-	__be32 rem_stag;
-	__be32 plen;		/* 3 */
-	__be32 num_sgle;
-	struct t3_sge sgl[T3_MAX_SGE];	/* 4+ */
-};
-
-#define T3_MAX_FASTREG_DEPTH 10
-#define T3_MAX_FASTREG_FRAG 10
-
-struct t3_fastreg_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	__be32 stag;		/* 2 */
-	__be32 len;
-	__be32 va_base_hi;	/* 3 */
-	__be32 va_base_lo_fbo;
-	__be32 page_type_perms; /* 4 */
-	__be32 reserved1;
-	__be64 pbl_addrs[0];	/* 5+ */
-};
-
-/*
- * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
- */
-struct t3_pbl_frag {
-	struct fw_riwrh wrh;	/* 0 */
-	__be64 pbl_addrs[14];	/* 1..14 */
-};
-
-#define S_FR_PAGE_COUNT		24
-#define M_FR_PAGE_COUNT		0xff
-#define V_FR_PAGE_COUNT(x)	((x) << S_FR_PAGE_COUNT)
-#define G_FR_PAGE_COUNT(x)	((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
-
-#define S_FR_PAGE_SIZE		16
-#define M_FR_PAGE_SIZE		0x1f
-#define V_FR_PAGE_SIZE(x)	((x) << S_FR_PAGE_SIZE)
-#define G_FR_PAGE_SIZE(x)	((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
-
-#define S_FR_TYPE		8
-#define M_FR_TYPE		0x1
-#define V_FR_TYPE(x)		((x) << S_FR_TYPE)
-#define G_FR_TYPE(x)		((((x) >> S_FR_TYPE)) & M_FR_TYPE)
-
-#define S_FR_PERMS		0
-#define M_FR_PERMS		0xff
-#define V_FR_PERMS(x)		((x) << S_FR_PERMS)
-#define G_FR_PERMS(x)		((((x) >> S_FR_PERMS)) & M_FR_PERMS)
-
-struct t3_local_inv_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	__be32 stag;		/* 2 */
-	__be32 reserved;
-};
-
-struct t3_rdma_write_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	u8 rdmaop;		/* 2 */
-	u8 reserved[3];
-	__be32 stag_sink;
-	__be64 to_sink;		/* 3 */
-	__be32 plen;		/* 4 */
-	__be32 num_sgle;
-	struct t3_sge sgl[T3_MAX_SGE];	/* 5+ */
-};
-
-struct t3_rdma_read_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	u8 rdmaop;		/* 2 */
-	u8 local_inv;
-	u8 reserved[2];
-	__be32 rem_stag;
-	__be64 rem_to;		/* 3 */
-	__be32 local_stag;	/* 4 */
-	__be32 local_len;
-	__be64 local_to;	/* 5 */
-};
-
-struct t3_bind_mw_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	u16 reserved;		/* 2 */
-	u8 type;
-	u8 perms;
-	__be32 mr_stag;
-	__be32 mw_stag;		/* 3 */
-	__be32 mw_len;
-	__be64 mw_va;		/* 4 */
-	__be32 mr_pbl_addr;	/* 5 */
-	u8 reserved2[3];
-	u8 mr_pagesz;
-};
-
-struct t3_receive_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	u8 pagesz[T3_MAX_SGE];
-	__be32 num_sgle;		/* 2 */
-	struct t3_sge sgl[T3_MAX_SGE];	/* 3+ */
-	__be32 pbl_addr[T3_MAX_SGE];
-};
-
-struct t3_bypass_wr {
-	struct fw_riwrh wrh;
-	union t3_wrid wrid;	/* 1 */
-};
-
-struct t3_modify_qp_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	__be32 flags;		/* 2 */
-	__be32 quiesce;		/* 2 */
-	__be32 max_ird;		/* 3 */
-	__be32 max_ord;		/* 3 */
-	__be64 sge_cmd;		/* 4 */
-	__be64 ctx1;		/* 5 */
-	__be64 ctx0;		/* 6 */
-};
-
-enum t3_modify_qp_flags {
-	MODQP_QUIESCE  = 0x01,
-	MODQP_MAX_IRD  = 0x02,
-	MODQP_MAX_ORD  = 0x04,
-	MODQP_WRITE_EC = 0x08,
-	MODQP_READ_EC  = 0x10,
-};
-
-
-enum t3_mpa_attrs {
-	uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
-	uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
-	uP_RI_MPA_CRC_ENABLE = 0x4,
-	uP_RI_MPA_IETF_ENABLE = 0x8
-} __packed;
-
-enum t3_qp_caps {
-	uP_RI_QP_RDMA_READ_ENABLE = 0x01,
-	uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
-	uP_RI_QP_BIND_ENABLE = 0x04,
-	uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
-	uP_RI_QP_STAG0_ENABLE = 0x10
-} __packed;
-
-enum rdma_init_rtr_types {
-	RTR_READ = 1,
-	RTR_WRITE = 2,
-	RTR_SEND = 3,
-};
-
-#define S_RTR_TYPE	2
-#define M_RTR_TYPE	0x3
-#define V_RTR_TYPE(x)	((x) << S_RTR_TYPE)
-#define G_RTR_TYPE(x)	((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
-
-#define S_CHAN		4
-#define M_CHAN		0x3
-#define V_CHAN(x)	((x) << S_CHAN)
-#define G_CHAN(x)	((((x) >> S_CHAN)) & M_CHAN)
-
-struct t3_rdma_init_attr {
-	u32 tid;
-	u32 qpid;
-	u32 pdid;
-	u32 scqid;
-	u32 rcqid;
-	u32 rq_addr;
-	u32 rq_size;
-	enum t3_mpa_attrs mpaattrs;
-	enum t3_qp_caps qpcaps;
-	u16 tcp_emss;
-	u32 ord;
-	u32 ird;
-	u64 qp_dma_addr;
-	u32 qp_dma_size;
-	enum rdma_init_rtr_types rtr_type;
-	u16 flags;
-	u16 rqe_count;
-	u32 irs;
-	u32 chan;
-};
-
-struct t3_rdma_init_wr {
-	struct fw_riwrh wrh;	/* 0 */
-	union t3_wrid wrid;	/* 1 */
-	__be32 qpid;		/* 2 */
-	__be32 pdid;
-	__be32 scqid;		/* 3 */
-	__be32 rcqid;
-	__be32 rq_addr;		/* 4 */
-	__be32 rq_size;
-	u8 mpaattrs;		/* 5 */
-	u8 qpcaps;
-	__be16 ulpdu_size;
-	__be16 flags_rtr_type;
-	__be16 rqe_count;
-	__be32 ord;		/* 6 */
-	__be32 ird;
-	__be64 qp_dma_addr;	/* 7 */
-	__be32 qp_dma_size;	/* 8 */
-	__be32 irs;
-};
-
-struct t3_genbit {
-	u64 flit[15];
-	__be64 genbit;
-};
-
-struct t3_wq_in_err {
-	u64 flit[13];
-	u64 err;
-};
-
-enum rdma_init_wr_flags {
-	MPA_INITIATOR = (1<<0),
-	PRIV_QP = (1<<1),
-};
-
-union t3_wr {
-	struct t3_send_wr send;
-	struct t3_rdma_write_wr write;
-	struct t3_rdma_read_wr read;
-	struct t3_receive_wr recv;
-	struct t3_fastreg_wr fastreg;
-	struct t3_pbl_frag pbl_frag;
-	struct t3_local_inv_wr local_inv;
-	struct t3_bind_mw_wr bind;
-	struct t3_bypass_wr bypass;
-	struct t3_rdma_init_wr init;
-	struct t3_modify_qp_wr qp_mod;
-	struct t3_genbit genbit;
-	struct t3_wq_in_err wq_in_err;
-	__be64 flit[16];
-};
-
-#define T3_SQ_CQE_FLIT	  13
-#define T3_SQ_COOKIE_FLIT 14
-
-#define T3_RQ_COOKIE_FLIT 13
-#define T3_RQ_CQE_FLIT	  14
-
-static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
-{
-	return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
-}
-
-enum t3_wr_hdr_bits {
-	T3_EOP = 1,
-	T3_SOP = 2,
-	T3_SOPEOP = T3_EOP|T3_SOP,
-};
-
-static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
-				  enum t3_wr_flags flags, u8 genbit, u32 tid,
-				  u8 len, u8 sopeop)
-{
-	wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
-					 V_FW_RIWR_SOPEOP(sopeop) |
-					 V_FW_RIWR_FLAGS(flags));
-	wmb();
-	wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
-				       V_FW_RIWR_TID(tid) |
-				       V_FW_RIWR_LEN(len));
-	/* 2nd gen bit... */
-	((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
-}
-
-/*
- * T3 ULP2_TX commands
- */
-enum t3_utx_mem_op {
-	T3_UTX_MEM_READ = 2,
-	T3_UTX_MEM_WRITE = 3
-};
-
-/* T3 MC7 RDMA TPT entry format */
-
-enum tpt_mem_type {
-	TPT_NON_SHARED_MR = 0x0,
-	TPT_SHARED_MR = 0x1,
-	TPT_MW = 0x2,
-	TPT_MW_RELAXED_PROTECTION = 0x3
-};
-
-enum tpt_addr_type {
-	TPT_ZBTO = 0,
-	TPT_VATO = 1
-};
-
-enum tpt_mem_perm {
-	TPT_MW_BIND = 0x10,
-	TPT_LOCAL_READ = 0x8,
-	TPT_LOCAL_WRITE = 0x4,
-	TPT_REMOTE_READ = 0x2,
-	TPT_REMOTE_WRITE = 0x1
-};
-
-struct tpt_entry {
-	__be32 valid_stag_pdid;
-	__be32 flags_pagesize_qpid;
-
-	__be32 rsvd_pbl_addr;
-	__be32 len;
-	__be32 va_hi;
-	__be32 va_low_or_fbo;
-
-	__be32 rsvd_bind_cnt_or_pstag;
-	__be32 rsvd_pbl_size;
-};
-
-#define S_TPT_VALID		31
-#define V_TPT_VALID(x)		((x) << S_TPT_VALID)
-#define F_TPT_VALID		V_TPT_VALID(1U)
-
-#define S_TPT_STAG_KEY		23
-#define M_TPT_STAG_KEY		0xFF
-#define V_TPT_STAG_KEY(x)	((x) << S_TPT_STAG_KEY)
-#define G_TPT_STAG_KEY(x)	(((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
-
-#define S_TPT_STAG_STATE	22
-#define V_TPT_STAG_STATE(x)	((x) << S_TPT_STAG_STATE)
-#define F_TPT_STAG_STATE	V_TPT_STAG_STATE(1U)
-
-#define S_TPT_STAG_TYPE		20
-#define M_TPT_STAG_TYPE		0x3
-#define V_TPT_STAG_TYPE(x)	((x) << S_TPT_STAG_TYPE)
-#define G_TPT_STAG_TYPE(x)	(((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
-
-#define S_TPT_PDID		0
-#define M_TPT_PDID		0xFFFFF
-#define V_TPT_PDID(x)		((x) << S_TPT_PDID)
-#define G_TPT_PDID(x)		(((x) >> S_TPT_PDID) & M_TPT_PDID)
-
-#define S_TPT_PERM		28
-#define M_TPT_PERM		0xF
-#define V_TPT_PERM(x)		((x) << S_TPT_PERM)
-#define G_TPT_PERM(x)		(((x) >> S_TPT_PERM) & M_TPT_PERM)
-
-#define S_TPT_REM_INV_DIS	27
-#define V_TPT_REM_INV_DIS(x)	((x) << S_TPT_REM_INV_DIS)
-#define F_TPT_REM_INV_DIS	V_TPT_REM_INV_DIS(1U)
-
-#define S_TPT_ADDR_TYPE		26
-#define V_TPT_ADDR_TYPE(x)	((x) << S_TPT_ADDR_TYPE)
-#define F_TPT_ADDR_TYPE		V_TPT_ADDR_TYPE(1U)
-
-#define S_TPT_MW_BIND_ENABLE	25
-#define V_TPT_MW_BIND_ENABLE(x)	((x) << S_TPT_MW_BIND_ENABLE)
-#define F_TPT_MW_BIND_ENABLE    V_TPT_MW_BIND_ENABLE(1U)
-
-#define S_TPT_PAGE_SIZE		20
-#define M_TPT_PAGE_SIZE		0x1F
-#define V_TPT_PAGE_SIZE(x)	((x) << S_TPT_PAGE_SIZE)
-#define G_TPT_PAGE_SIZE(x)	(((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
-
-#define S_TPT_PBL_ADDR		0
-#define M_TPT_PBL_ADDR		0x1FFFFFFF
-#define V_TPT_PBL_ADDR(x)	((x) << S_TPT_PBL_ADDR)
-#define G_TPT_PBL_ADDR(x)       (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
-
-#define S_TPT_QPID		0
-#define M_TPT_QPID		0xFFFFF
-#define V_TPT_QPID(x)		((x) << S_TPT_QPID)
-#define G_TPT_QPID(x)		(((x) >> S_TPT_QPID) & M_TPT_QPID)
-
-#define S_TPT_PSTAG		0
-#define M_TPT_PSTAG		0xFFFFFF
-#define V_TPT_PSTAG(x)		((x) << S_TPT_PSTAG)
-#define G_TPT_PSTAG(x)		(((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
-
-#define S_TPT_PBL_SIZE		0
-#define M_TPT_PBL_SIZE		0xFFFFF
-#define V_TPT_PBL_SIZE(x)	((x) << S_TPT_PBL_SIZE)
-#define G_TPT_PBL_SIZE(x)	(((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
-
-/*
- * CQE defs
- */
-struct t3_cqe {
-	__be32 header;
-	__be32 len;
-	union {
-		struct {
-			__be32 stag;
-			__be32 msn;
-		} rcqe;
-		struct {
-			u32 wrid_hi;
-			u32 wrid_low;
-		} scqe;
-	} u;
-};
-
-#define S_CQE_OOO	  31
-#define M_CQE_OOO	  0x1
-#define G_CQE_OOO(x)	  ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
-#define V_CEQ_OOO(x)	  ((x)<<S_CQE_OOO)
-
-#define S_CQE_QPID        12
-#define M_CQE_QPID        0x7FFFF
-#define G_CQE_QPID(x)     ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
-#define V_CQE_QPID(x)	  ((x)<<S_CQE_QPID)
-
-#define S_CQE_SWCQE       11
-#define M_CQE_SWCQE       0x1
-#define G_CQE_SWCQE(x)    ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
-#define V_CQE_SWCQE(x)	  ((x)<<S_CQE_SWCQE)
-
-#define S_CQE_GENBIT      10
-#define M_CQE_GENBIT      0x1
-#define G_CQE_GENBIT(x)   (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
-#define V_CQE_GENBIT(x)	  ((x)<<S_CQE_GENBIT)
-
-#define S_CQE_STATUS      5
-#define M_CQE_STATUS      0x1F
-#define G_CQE_STATUS(x)   ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
-#define V_CQE_STATUS(x)   ((x)<<S_CQE_STATUS)
-
-#define S_CQE_TYPE        4
-#define M_CQE_TYPE        0x1
-#define G_CQE_TYPE(x)     ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
-#define V_CQE_TYPE(x)     ((x)<<S_CQE_TYPE)
-
-#define S_CQE_OPCODE      0
-#define M_CQE_OPCODE      0xF
-#define G_CQE_OPCODE(x)   ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
-#define V_CQE_OPCODE(x)   ((x)<<S_CQE_OPCODE)
-
-#define SW_CQE(x)         (G_CQE_SWCQE(be32_to_cpu((x).header)))
-#define CQE_OOO(x)        (G_CQE_OOO(be32_to_cpu((x).header)))
-#define CQE_QPID(x)       (G_CQE_QPID(be32_to_cpu((x).header)))
-#define CQE_GENBIT(x)     (G_CQE_GENBIT(be32_to_cpu((x).header)))
-#define CQE_TYPE(x)       (G_CQE_TYPE(be32_to_cpu((x).header)))
-#define SQ_TYPE(x)	  (CQE_TYPE((x)))
-#define RQ_TYPE(x)	  (!CQE_TYPE((x)))
-#define CQE_STATUS(x)     (G_CQE_STATUS(be32_to_cpu((x).header)))
-#define CQE_OPCODE(x)     (G_CQE_OPCODE(be32_to_cpu((x).header)))
-
-#define CQE_SEND_OPCODE(x)( \
-	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
-	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
-	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
-	(G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
-
-#define CQE_LEN(x)        (be32_to_cpu((x).len))
-
-/* used for RQ completion processing */
-#define CQE_WRID_STAG(x)  (be32_to_cpu((x).u.rcqe.stag))
-#define CQE_WRID_MSN(x)   (be32_to_cpu((x).u.rcqe.msn))
-
-/* used for SQ completion processing */
-#define CQE_WRID_SQ_WPTR(x)	((x).u.scqe.wrid_hi)
-#define CQE_WRID_WPTR(x)	((x).u.scqe.wrid_low)
-
-/* generic accessor macros */
-#define CQE_WRID_HI(x)		((x).u.scqe.wrid_hi)
-#define CQE_WRID_LOW(x)		((x).u.scqe.wrid_low)
-
-#define TPT_ERR_SUCCESS                     0x0
-#define TPT_ERR_STAG                        0x1	 /* STAG invalid: either the */
-						 /* STAG is offlimt, being 0, */
-						 /* or STAG_key mismatch */
-#define TPT_ERR_PDID                        0x2	 /* PDID mismatch */
-#define TPT_ERR_QPID                        0x3	 /* QPID mismatch */
-#define TPT_ERR_ACCESS                      0x4	 /* Invalid access right */
-#define TPT_ERR_WRAP                        0x5	 /* Wrap error */
-#define TPT_ERR_BOUND                       0x6	 /* base and bounds voilation */
-#define TPT_ERR_INVALIDATE_SHARED_MR        0x7	 /* attempt to invalidate a  */
-						 /* shared memory region */
-#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8	 /* attempt to invalidate a  */
-						 /* shared memory region */
-#define TPT_ERR_ECC                         0x9	 /* ECC error detected */
-#define TPT_ERR_ECC_PSTAG                   0xA	 /* ECC error detected when  */
-						 /* reading PSTAG for a MW  */
-						 /* Invalidate */
-#define TPT_ERR_PBL_ADDR_BOUND              0xB	 /* pbl addr out of bounds:  */
-						 /* software error */
-#define TPT_ERR_SWFLUSH			    0xC	 /* SW FLUSHED */
-#define TPT_ERR_CRC                         0x10 /* CRC error */
-#define TPT_ERR_MARKER                      0x11 /* Marker error */
-#define TPT_ERR_PDU_LEN_ERR                 0x12 /* invalid PDU length */
-#define TPT_ERR_OUT_OF_RQE                  0x13 /* out of RQE */
-#define TPT_ERR_DDP_VERSION                 0x14 /* wrong DDP version */
-#define TPT_ERR_RDMA_VERSION                0x15 /* wrong RDMA version */
-#define TPT_ERR_OPCODE                      0x16 /* invalid rdma opcode */
-#define TPT_ERR_DDP_QUEUE_NUM               0x17 /* invalid ddp queue number */
-#define TPT_ERR_MSN                         0x18 /* MSN error */
-#define TPT_ERR_TBIT                        0x19 /* tag bit not set correctly */
-#define TPT_ERR_MO                          0x1A /* MO not 0 for TERMINATE  */
-						 /* or READ_REQ */
-#define TPT_ERR_MSN_GAP                     0x1B
-#define TPT_ERR_MSN_RANGE                   0x1C
-#define TPT_ERR_IRD_OVERFLOW                0x1D
-#define TPT_ERR_RQE_ADDR_BOUND              0x1E /* RQE addr out of bounds:  */
-						 /* software error */
-#define TPT_ERR_INTERNAL_ERR                0x1F /* internal error (opcode  */
-						 /* mismatch) */
-
-struct t3_swsq {
-	__u64			wr_id;
-	struct t3_cqe		cqe;
-	__u32			sq_wptr;
-	__be32			read_len;
-	int			opcode;
-	int			complete;
-	int			signaled;
-};
-
-struct t3_swrq {
-	__u64			wr_id;
-	__u32			pbl_addr;
-};
-
-/*
- * A T3 WQ implements both the SQ and RQ.
- */
-struct t3_wq {
-	union t3_wr *queue;		/* DMA accessible memory */
-	dma_addr_t dma_addr;		/* DMA address for HW */
-	DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */
-	u32 error;			/* 1 once we go to ERROR */
-	u32 qpid;
-	u32 wptr;			/* idx to next available WR slot */
-	u32 size_log2;			/* total wq size */
-	struct t3_swsq *sq;		/* SW SQ */
-	struct t3_swsq *oldest_read;	/* tracks oldest pending read */
-	u32 sq_wptr;			/* sq_wptr - sq_rptr == count of */
-	u32 sq_rptr;			/* pending wrs */
-	u32 sq_size_log2;		/* sq size */
-	struct t3_swrq *rq;		/* SW RQ (holds consumer wr_ids */
-	u32 rq_wptr;			/* rq_wptr - rq_rptr == count of */
-	u32 rq_rptr;			/* pending wrs */
-	struct t3_swrq *rq_oldest_wr;	/* oldest wr on the SW RQ */
-	u32 rq_size_log2;		/* rq size */
-	u32 rq_addr;			/* rq adapter address */
-	void __iomem *doorbell;		/* kernel db */
-	u64 udb;			/* user db if any */
-	struct cxio_rdev *rdev;
-};
-
-struct t3_cq {
-	u32 cqid;
-	u32 rptr;
-	u32 wptr;
-	u32 size_log2;
-	dma_addr_t dma_addr;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	struct t3_cqe *queue;
-	struct t3_cqe *sw_queue;
-	u32 sw_rptr;
-	u32 sw_wptr;
-};
-
-#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
-					 CQE_GENBIT(*cqe))
-
-struct t3_cq_status_page {
-	u32 cq_err;
-};
-
-static inline int cxio_cq_in_error(struct t3_cq *cq)
-{
-	return ((struct t3_cq_status_page *)
-		&cq->queue[1 << cq->size_log2])->cq_err;
-}
-
-static inline void cxio_set_cq_in_error(struct t3_cq *cq)
-{
-	((struct t3_cq_status_page *)
-	 &cq->queue[1 << cq->size_log2])->cq_err = 1;
-}
-
-static inline void cxio_set_wq_in_error(struct t3_wq *wq)
-{
-	wq->queue->wq_in_err.err |= 1;
-}
-
-static inline void cxio_disable_wq_db(struct t3_wq *wq)
-{
-	wq->queue->wq_in_err.err |= 2;
-}
-
-static inline void cxio_enable_wq_db(struct t3_wq *wq)
-{
-	wq->queue->wq_in_err.err &= ~2;
-}
-
-static inline int cxio_wq_db_enabled(struct t3_wq *wq)
-{
-	return !(wq->queue->wq_in_err.err & 2);
-}
-
-static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
-{
-	struct t3_cqe *cqe;
-
-	cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
-	if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
-		return cqe;
-	return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
-{
-	struct t3_cqe *cqe;
-
-	if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
-		cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
-		return cqe;
-	}
-	return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
-{
-	struct t3_cqe *cqe;
-
-	if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
-		cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
-		return cqe;
-	}
-	cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
-	if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
-		return cqe;
-	return NULL;
-}
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
deleted file mode 100644
index 56a8ab6210cf..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-#include <rdma/cxgb3-abi.h>
-#include "iwch.h"
-#include "iwch_cm.h"
-
-#define DRV_VERSION "1.1"
-
-MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
-MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-
-static void open_rnic_dev(struct t3cdev *);
-static void close_rnic_dev(struct t3cdev *);
-static void iwch_event_handler(struct t3cdev *, u32, u32);
-
-struct cxgb3_client t3c_client = {
-	.name = "iw_cxgb3",
-	.add = open_rnic_dev,
-	.remove = close_rnic_dev,
-	.handlers = t3c_handlers,
-	.redirect = iwch_ep_redirect,
-	.event_handler = iwch_event_handler
-};
-
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(dev_mutex);
-
-static void disable_dbs(struct iwch_dev *rnicp)
-{
-	unsigned long index;
-	struct iwch_qp *qhp;
-
-	xa_lock_irq(&rnicp->qps);
-	xa_for_each(&rnicp->qps, index, qhp)
-		cxio_disable_wq_db(&qhp->wq);
-	xa_unlock_irq(&rnicp->qps);
-}
-
-static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
-{
-	unsigned long index;
-	struct iwch_qp *qhp;
-
-	xa_lock_irq(&rnicp->qps);
-	xa_for_each(&rnicp->qps, index, qhp) {
-		if (ring_db)
-			ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell,
-					qhp->wq.qpid);
-		cxio_enable_wq_db(&qhp->wq);
-	}
-	xa_unlock_irq(&rnicp->qps);
-}
-
-static void iwch_db_drop_task(struct work_struct *work)
-{
-	struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
-					      db_drop_task.work);
-	enable_dbs(rnicp, 1);
-}
-
-static void rnic_init(struct iwch_dev *rnicp)
-{
-	pr_debug("%s iwch_dev %p\n", __func__,  rnicp);
-	xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ);
-	xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ);
-	xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ);
-	INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
-
-	rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
-	rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
-	rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
-	rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
-	rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
-	rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
-	rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
-	rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
-	rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
-	rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
-	rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
-	rnicp->attr.can_resize_wq = 0;
-	rnicp->attr.max_rdma_reads_per_qp = 8;
-	rnicp->attr.max_rdma_read_resources =
-	    rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
-	rnicp->attr.max_rdma_read_qp_depth = 8;	/* IRD */
-	rnicp->attr.max_rdma_read_depth =
-	    rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
-	rnicp->attr.rq_overflow_handled = 0;
-	rnicp->attr.can_modify_ird = 0;
-	rnicp->attr.can_modify_ord = 0;
-	rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
-	rnicp->attr.stag0_value = 1;
-	rnicp->attr.zbva_support = 1;
-	rnicp->attr.local_invalidate_fence = 1;
-	rnicp->attr.cq_overflow_detection = 1;
-	return;
-}
-
-static void open_rnic_dev(struct t3cdev *tdev)
-{
-	struct iwch_dev *rnicp;
-
-	pr_debug("%s t3cdev %p\n", __func__,  tdev);
-	pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
-	rnicp = ib_alloc_device(iwch_dev, ibdev);
-	if (!rnicp) {
-		pr_err("Cannot allocate ib device\n");
-		return;
-	}
-	rnicp->rdev.ulp = rnicp;
-	rnicp->rdev.t3cdev_p = tdev;
-
-	mutex_lock(&dev_mutex);
-
-	if (cxio_rdev_open(&rnicp->rdev)) {
-		mutex_unlock(&dev_mutex);
-		pr_err("Unable to open CXIO rdev\n");
-		ib_dealloc_device(&rnicp->ibdev);
-		return;
-	}
-
-	rnic_init(rnicp);
-
-	list_add_tail(&rnicp->entry, &dev_list);
-	mutex_unlock(&dev_mutex);
-
-	if (iwch_register_device(rnicp)) {
-		pr_err("Unable to register device\n");
-		close_rnic_dev(tdev);
-	}
-	pr_info("Initialized device %s\n",
-		pci_name(rnicp->rdev.rnic_info.pdev));
-	return;
-}
-
-static void close_rnic_dev(struct t3cdev *tdev)
-{
-	struct iwch_dev *dev, *tmp;
-	pr_debug("%s t3cdev %p\n", __func__,  tdev);
-	mutex_lock(&dev_mutex);
-	list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
-		if (dev->rdev.t3cdev_p == tdev) {
-			dev->rdev.flags = CXIO_ERROR_FATAL;
-			synchronize_net();
-			cancel_delayed_work_sync(&dev->db_drop_task);
-			list_del(&dev->entry);
-			iwch_unregister_device(dev);
-			cxio_rdev_close(&dev->rdev);
-			WARN_ON(!xa_empty(&dev->cqs));
-			WARN_ON(!xa_empty(&dev->qps));
-			WARN_ON(!xa_empty(&dev->mrs));
-			ib_dealloc_device(&dev->ibdev);
-			break;
-		}
-	}
-	mutex_unlock(&dev_mutex);
-}
-
-static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
-{
-	struct cxio_rdev *rdev = tdev->ulp;
-	struct iwch_dev *rnicp;
-	struct ib_event event;
-	u32 portnum = port_id + 1;
-	int dispatch = 0;
-
-	if (!rdev)
-		return;
-	rnicp = rdev_to_iwch_dev(rdev);
-	switch (evt) {
-	case OFFLOAD_STATUS_DOWN: {
-		rdev->flags = CXIO_ERROR_FATAL;
-		synchronize_net();
-		event.event  = IB_EVENT_DEVICE_FATAL;
-		dispatch = 1;
-		break;
-		}
-	case OFFLOAD_PORT_DOWN: {
-		event.event  = IB_EVENT_PORT_ERR;
-		dispatch = 1;
-		break;
-		}
-	case OFFLOAD_PORT_UP: {
-		event.event  = IB_EVENT_PORT_ACTIVE;
-		dispatch = 1;
-		break;
-		}
-	case OFFLOAD_DB_FULL: {
-		disable_dbs(rnicp);
-		break;
-		}
-	case OFFLOAD_DB_EMPTY: {
-		enable_dbs(rnicp, 1);
-		break;
-		}
-	case OFFLOAD_DB_DROP: {
-		unsigned long delay = 1000;
-		unsigned short r;
-
-		disable_dbs(rnicp);
-		get_random_bytes(&r, 2);
-		delay += r & 1023;
-
-		/*
-		 * delay is between 1000-2023 usecs.
-		 */
-		schedule_delayed_work(&rnicp->db_drop_task,
-			usecs_to_jiffies(delay));
-		break;
-		}
-	}
-
-	if (dispatch) {
-		event.device = &rnicp->ibdev;
-		event.element.port_num = portnum;
-		ib_dispatch_event(&event);
-	}
-
-	return;
-}
-
-static int __init iwch_init_module(void)
-{
-	int err;
-
-	err = cxio_hal_init();
-	if (err)
-		return err;
-	err = iwch_cm_init();
-	if (err)
-		return err;
-	cxio_register_ev_cb(iwch_ev_dispatch);
-	cxgb3_register_client(&t3c_client);
-	return 0;
-}
-
-static void __exit iwch_exit_module(void)
-{
-	cxgb3_unregister_client(&t3c_client);
-	cxio_unregister_ev_cb(iwch_ev_dispatch);
-	iwch_cm_term();
-	cxio_hal_exit();
-}
-
-module_init(iwch_init_module);
-module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
deleted file mode 100644
index 310a937bffcf..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_H__
-#define __IWCH_H__
-
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/xarray.h>
-#include <linux/workqueue.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-
-struct iwch_pd;
-struct iwch_cq;
-struct iwch_qp;
-struct iwch_mr;
-
-struct iwch_rnic_attributes {
-	u32 max_qps;
-	u32 max_wrs;				/* Max for any SQ/RQ */
-	u32 max_sge_per_wr;
-	u32 max_sge_per_rdma_write_wr;	/* for RDMA Write WR */
-	u32 max_cqs;
-	u32 max_cqes_per_cq;
-	u32 max_mem_regs;
-	u32 max_phys_buf_entries;		/* for phys buf list */
-	u32 max_pds;
-
-	/*
-	 * The memory page sizes supported by this RNIC.
-	 * Bit position i in bitmap indicates page of
-	 * size (4k)^i.  Phys block list mode unsupported.
-	 */
-	u32 mem_pgsizes_bitmask;
-	u64 max_mr_size;
-	u8 can_resize_wq;
-
-	/*
-	 * The maximum number of RDMA Reads that can be outstanding
-	 * per QP with this RNIC as the target.
-	 */
-	u32 max_rdma_reads_per_qp;
-
-	/*
-	 * The maximum number of resources used for RDMA Reads
-	 * by this RNIC with this RNIC as the target.
-	 */
-	u32 max_rdma_read_resources;
-
-	/*
-	 * The max depth per QP for initiation of RDMA Read
-	 * by this RNIC.
-	 */
-	u32 max_rdma_read_qp_depth;
-
-	/*
-	 * The maximum depth for initiation of RDMA Read
-	 * operations by this RNIC on all QPs
-	 */
-	u32 max_rdma_read_depth;
-	u8 rq_overflow_handled;
-	u32 can_modify_ird;
-	u32 can_modify_ord;
-	u32 max_mem_windows;
-	u32 stag0_value;
-	u8 zbva_support;
-	u8 local_invalidate_fence;
-	u32 cq_overflow_detection;
-};
-
-struct iwch_dev {
-	struct ib_device ibdev;
-	struct cxio_rdev rdev;
-	u32 device_cap_flags;
-	struct iwch_rnic_attributes attr;
-	struct xarray cqs;
-	struct xarray qps;
-	struct xarray mrs;
-	struct list_head entry;
-	struct delayed_work db_drop_task;
-};
-
-static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct iwch_dev, ibdev);
-}
-
-static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev)
-{
-	return container_of(rdev, struct iwch_dev, rdev);
-}
-
-static inline int t3b_device(const struct iwch_dev *rhp)
-{
-	return rhp->rdev.t3cdev_p->type == T3B;
-}
-
-static inline int t3a_device(const struct iwch_dev *rhp)
-{
-	return rhp->rdev.t3cdev_p->type == T3A;
-}
-
-static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
-{
-	return xa_load(&rhp->cqs, cqid);
-}
-
-static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
-{
-	return xa_load(&rhp->qps, qpid);
-}
-
-static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
-{
-	return xa_load(&rhp->mrs, mmid);
-}
-
-extern struct cxgb3_client t3c_client;
-extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
deleted file mode 100644
index 0bca72cb4d9a..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ /dev/null
@@ -1,2258 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-#include <linux/inetdevice.h>
-
-#include <net/neighbour.h>
-#include <net/netevent.h>
-#include <net/route.h>
-
-#include "tcb.h"
-#include "cxgb3_offload.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-
-static char *states[] = {
-	"idle",
-	"listen",
-	"connecting",
-	"mpa_wait_req",
-	"mpa_req_sent",
-	"mpa_req_rcvd",
-	"mpa_rep_sent",
-	"fpdu_mode",
-	"aborting",
-	"closing",
-	"moribund",
-	"dead",
-	NULL,
-};
-
-int peer2peer = 0;
-module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
-
-static int ep_timeout_secs = 60;
-module_param(ep_timeout_secs, int, 0644);
-MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
-				   "in seconds (default=60)");
-
-static int mpa_rev = 1;
-module_param(mpa_rev, int, 0644);
-MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
-		 "1 is spec compliant. (default=1)");
-
-static int markers_enabled = 0;
-module_param(markers_enabled, int, 0644);
-MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
-
-static int crc_enabled = 1;
-module_param(crc_enabled, int, 0644);
-MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
-
-static int rcv_win = 256 * 1024;
-module_param(rcv_win, int, 0644);
-MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
-
-static int snd_win = 32 * 1024;
-module_param(snd_win, int, 0644);
-MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
-
-static unsigned int nocong = 0;
-module_param(nocong, uint, 0644);
-MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
-
-static unsigned int cong_flavor = 1;
-module_param(cong_flavor, uint, 0644);
-MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
-
-static struct workqueue_struct *workq;
-
-static struct sk_buff_head rxq;
-
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
-static void ep_timeout(struct timer_list *t);
-static void connect_reply_upcall(struct iwch_ep *ep, int status);
-
-static void start_ep_timer(struct iwch_ep *ep)
-{
-	pr_debug("%s ep %p\n", __func__, ep);
-	if (timer_pending(&ep->timer)) {
-		pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
-		del_timer_sync(&ep->timer);
-	} else
-		get_ep(&ep->com);
-	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
-	add_timer(&ep->timer);
-}
-
-static void stop_ep_timer(struct iwch_ep *ep)
-{
-	pr_debug("%s ep %p\n", __func__, ep);
-	if (!timer_pending(&ep->timer)) {
-		WARN(1, "%s timer stopped when its not running!  ep %p state %u\n",
-			__func__, ep, ep->com.state);
-		return;
-	}
-	del_timer_sync(&ep->timer);
-	put_ep(&ep->com);
-}
-
-static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
-{
-	int	error = 0;
-	struct cxio_rdev *rdev;
-
-	rdev = (struct cxio_rdev *)tdev->ulp;
-	if (cxio_fatal_error(rdev)) {
-		kfree_skb(skb);
-		return -EIO;
-	}
-	error = l2t_send(tdev, skb, l2e);
-	if (error < 0)
-		kfree_skb(skb);
-	return error < 0 ? error : 0;
-}
-
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
-{
-	int	error = 0;
-	struct cxio_rdev *rdev;
-
-	rdev = (struct cxio_rdev *)tdev->ulp;
-	if (cxio_fatal_error(rdev)) {
-		kfree_skb(skb);
-		return -EIO;
-	}
-	error = cxgb3_ofld_send(tdev, skb);
-	if (error < 0)
-		kfree_skb(skb);
-	return error < 0 ? error : 0;
-}
-
-static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
-{
-	struct cpl_tid_release *req;
-
-	skb = get_skb(skb, sizeof(*req), GFP_KERNEL);
-	if (!skb)
-		return;
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
-	skb->priority = CPL_PRIORITY_SETUP;
-	iwch_cxgb3_ofld_send(tdev, skb);
-	return;
-}
-
-int iwch_quiesce_tid(struct iwch_ep *ep)
-{
-	struct cpl_set_tcb_field *req;
-	struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
-	if (!skb)
-		return -ENOMEM;
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
-	req->reply = 0;
-	req->cpu_idx = 0;
-	req->word = htons(W_TCB_RX_QUIESCE);
-	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
-	req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
-
-	skb->priority = CPL_PRIORITY_DATA;
-	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-int iwch_resume_tid(struct iwch_ep *ep)
-{
-	struct cpl_set_tcb_field *req;
-	struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
-	if (!skb)
-		return -ENOMEM;
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
-	req->reply = 0;
-	req->cpu_idx = 0;
-	req->word = htons(W_TCB_RX_QUIESCE);
-	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
-	req->val = 0;
-
-	skb->priority = CPL_PRIORITY_DATA;
-	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static void set_emss(struct iwch_ep *ep, u16 opt)
-{
-	pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
-	ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
-	if (G_TCPOPT_TSTAMP(opt))
-		ep->emss -= 12;
-	if (ep->emss < 128)
-		ep->emss = 128;
-	pr_debug("emss=%d\n", ep->emss);
-}
-
-static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
-{
-	unsigned long flags;
-	enum iwch_ep_state state;
-
-	spin_lock_irqsave(&epc->lock, flags);
-	state = epc->state;
-	spin_unlock_irqrestore(&epc->lock, flags);
-	return state;
-}
-
-static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
-	epc->state = new;
-}
-
-static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&epc->lock, flags);
-	pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
-	__state_set(epc, new);
-	spin_unlock_irqrestore(&epc->lock, flags);
-	return;
-}
-
-static void *alloc_ep(int size, gfp_t gfp)
-{
-	struct iwch_ep_common *epc;
-
-	epc = kzalloc(size, gfp);
-	if (epc) {
-		kref_init(&epc->kref);
-		spin_lock_init(&epc->lock);
-		init_waitqueue_head(&epc->waitq);
-	}
-	pr_debug("%s alloc ep %p\n", __func__, epc);
-	return epc;
-}
-
-void __free_ep(struct kref *kref)
-{
-	struct iwch_ep *ep;
-	ep = container_of(container_of(kref, struct iwch_ep_common, kref),
-			  struct iwch_ep, com);
-	pr_debug("%s ep %p state %s\n",
-		 __func__, ep, states[state_read(&ep->com)]);
-	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
-		cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
-		dst_release(ep->dst);
-		l2t_release(ep->com.tdev, ep->l2t);
-	}
-	kfree(ep);
-}
-
-static void release_ep_resources(struct iwch_ep *ep)
-{
-	pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
-	set_bit(RELEASE_RESOURCES, &ep->com.flags);
-	put_ep(&ep->com);
-}
-
-static int status2errno(int status)
-{
-	switch (status) {
-	case CPL_ERR_NONE:
-		return 0;
-	case CPL_ERR_CONN_RESET:
-		return -ECONNRESET;
-	case CPL_ERR_ARP_MISS:
-		return -EHOSTUNREACH;
-	case CPL_ERR_CONN_TIMEDOUT:
-		return -ETIMEDOUT;
-	case CPL_ERR_TCAM_FULL:
-		return -ENOMEM;
-	case CPL_ERR_CONN_EXIST:
-		return -EADDRINUSE;
-	default:
-		return -EIO;
-	}
-}
-
-/*
- * Try and reuse skbs already allocated...
- */
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
-{
-	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
-		skb_trim(skb, 0);
-		skb_get(skb);
-	} else {
-		skb = alloc_skb(len, gfp);
-	}
-	return skb;
-}
-
-static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
-				 __be32 peer_ip, __be16 local_port,
-				 __be16 peer_port, u8 tos)
-{
-	struct rtable *rt;
-	struct flowi4 fl4;
-
-	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
-				   peer_port, local_port, IPPROTO_TCP,
-				   tos, 0);
-	if (IS_ERR(rt))
-		return NULL;
-	return rt;
-}
-
-static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
-{
-	int i = 0;
-
-	while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
-		++i;
-	return i;
-}
-
-static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
-{
-	pr_debug("%s t3cdev %p\n", __func__, dev);
-	kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for an active open.
- */
-static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
-	pr_err("ARP failure during connect\n");
-	kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
- * and send it along.
- */
-static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
-	struct cpl_abort_req *req = cplhdr(skb);
-
-	pr_debug("%s t3cdev %p\n", __func__, dev);
-	req->cmd = CPL_ABORT_NO_RST;
-	iwch_cxgb3_ofld_send(dev, skb);
-}
-
-static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
-{
-	struct cpl_close_con_req *req;
-	struct sk_buff *skb;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	skb = get_skb(NULL, sizeof(*req), gfp);
-	if (!skb) {
-		pr_err("%s - failed to alloc skb\n", __func__);
-		return -ENOMEM;
-	}
-	skb->priority = CPL_PRIORITY_DATA;
-	set_arp_failure_handler(skb, arp_failure_discard);
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
-	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
-	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
-	struct cpl_abort_req *req;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	skb = get_skb(skb, sizeof(*req), gfp);
-	if (!skb) {
-		pr_err("%s - failed to alloc skb\n", __func__);
-		return -ENOMEM;
-	}
-	skb->priority = CPL_PRIORITY_DATA;
-	set_arp_failure_handler(skb, abort_arp_failure);
-	req = skb_put_zero(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
-	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
-	req->cmd = CPL_ABORT_SEND_RST;
-	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_connect(struct iwch_ep *ep)
-{
-	struct cpl_act_open_req *req;
-	struct sk_buff *skb;
-	u32 opt0h, opt0l, opt2;
-	unsigned int mtu_idx;
-	int wscale;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-
-	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-	if (!skb) {
-		pr_err("%s - failed to alloc skb\n", __func__);
-		return -ENOMEM;
-	}
-	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
-	wscale = compute_wscale(rcv_win);
-	opt0h = V_NAGLE(0) |
-	    V_NO_CONG(nocong) |
-	    V_KEEP_ALIVE(1) |
-	    F_TCAM_BYPASS |
-	    V_WND_SCALE(wscale) |
-	    V_MSS_IDX(mtu_idx) |
-	    V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
-	opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
-	opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
-	       V_CONG_CONTROL_FLAVOR(cong_flavor);
-	skb->priority = CPL_PRIORITY_SETUP;
-	set_arp_failure_handler(skb, act_open_req_arp_failure);
-
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
-	req->local_port = ep->com.local_addr.sin_port;
-	req->peer_port = ep->com.remote_addr.sin_port;
-	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
-	req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
-	req->opt0h = htonl(opt0h);
-	req->opt0l = htonl(opt0l);
-	req->params = 0;
-	req->opt2 = htonl(opt2);
-	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
-{
-	int mpalen;
-	struct tx_data_wr *req;
-	struct mpa_message *mpa;
-	int len;
-
-	pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
-
-	BUG_ON(skb_cloned(skb));
-
-	mpalen = sizeof(*mpa) + ep->plen;
-	if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
-		kfree_skb(skb);
-		skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
-		if (!skb) {
-			connect_reply_upcall(ep, -ENOMEM);
-			return;
-		}
-	}
-	skb_trim(skb, 0);
-	skb_reserve(skb, sizeof(*req));
-	skb_put(skb, mpalen);
-	skb->priority = CPL_PRIORITY_DATA;
-	mpa = (struct mpa_message *) skb->data;
-	memset(mpa, 0, sizeof(*mpa));
-	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
-	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
-		     (markers_enabled ? MPA_MARKERS : 0);
-	mpa->private_data_size = htons(ep->plen);
-	mpa->revision = mpa_rev;
-
-	if (ep->plen)
-		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
-
-	/*
-	 * Reference the mpa skb.  This ensures the data area
-	 * will remain in memory until the hw acks the tx.
-	 * Function tx_ack() will deref it.
-	 */
-	skb_get(skb);
-	set_arp_failure_handler(skb, arp_failure_discard);
-	skb_reset_transport_header(skb);
-	len = skb->len;
-	req = skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
-	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
-	req->len = htonl(len);
-	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
-			   V_TX_SNDBUF(snd_win>>15));
-	req->flags = htonl(F_TX_INIT);
-	req->sndseq = htonl(ep->snd_seq);
-	BUG_ON(ep->mpa_skb);
-	ep->mpa_skb = skb;
-	iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-	start_ep_timer(ep);
-	state_set(&ep->com, MPA_REQ_SENT);
-	return;
-}
-
-static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
-	int mpalen;
-	struct tx_data_wr *req;
-	struct mpa_message *mpa;
-	struct sk_buff *skb;
-
-	pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
-
-	mpalen = sizeof(*mpa) + plen;
-
-	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
-	if (!skb) {
-		pr_err("%s - cannot alloc skb!\n", __func__);
-		return -ENOMEM;
-	}
-	skb_reserve(skb, sizeof(*req));
-	mpa = skb_put(skb, mpalen);
-	memset(mpa, 0, sizeof(*mpa));
-	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
-	mpa->flags = MPA_REJECT;
-	mpa->revision = mpa_rev;
-	mpa->private_data_size = htons(plen);
-	if (plen)
-		memcpy(mpa->private_data, pdata, plen);
-
-	/*
-	 * Reference the mpa skb again.  This ensures the data area
-	 * will remain in memory until the hw acks the tx.
-	 * Function tx_ack() will deref it.
-	 */
-	skb_get(skb);
-	skb->priority = CPL_PRIORITY_DATA;
-	set_arp_failure_handler(skb, arp_failure_discard);
-	skb_reset_transport_header(skb);
-	req = skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
-	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
-	req->len = htonl(mpalen);
-	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
-			   V_TX_SNDBUF(snd_win>>15));
-	req->flags = htonl(F_TX_INIT);
-	req->sndseq = htonl(ep->snd_seq);
-	BUG_ON(ep->mpa_skb);
-	ep->mpa_skb = skb;
-	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
-	int mpalen;
-	struct tx_data_wr *req;
-	struct mpa_message *mpa;
-	int len;
-	struct sk_buff *skb;
-
-	pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
-
-	mpalen = sizeof(*mpa) + plen;
-
-	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
-	if (!skb) {
-		pr_err("%s - cannot alloc skb!\n", __func__);
-		return -ENOMEM;
-	}
-	skb->priority = CPL_PRIORITY_DATA;
-	skb_reserve(skb, sizeof(*req));
-	mpa = skb_put(skb, mpalen);
-	memset(mpa, 0, sizeof(*mpa));
-	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
-	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
-		     (markers_enabled ? MPA_MARKERS : 0);
-	mpa->revision = mpa_rev;
-	mpa->private_data_size = htons(plen);
-	if (plen)
-		memcpy(mpa->private_data, pdata, plen);
-
-	/*
-	 * Reference the mpa skb.  This ensures the data area
-	 * will remain in memory until the hw acks the tx.
-	 * Function tx_ack() will deref it.
-	 */
-	skb_get(skb);
-	set_arp_failure_handler(skb, arp_failure_discard);
-	skb_reset_transport_header(skb);
-	len = skb->len;
-	req = skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
-	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
-	req->len = htonl(len);
-	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
-			   V_TX_SNDBUF(snd_win>>15));
-	req->flags = htonl(F_TX_INIT);
-	req->sndseq = htonl(ep->snd_seq);
-	ep->mpa_skb = skb;
-	state_set(&ep->com, MPA_REP_SENT);
-	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct cpl_act_establish *req = cplhdr(skb);
-	unsigned int tid = GET_TID(req);
-
-	pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
-
-	dst_confirm(ep->dst);
-
-	/* setup the hwtid for this connection */
-	ep->hwtid = tid;
-	cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
-
-	ep->snd_seq = ntohl(req->snd_isn);
-	ep->rcv_seq = ntohl(req->rcv_isn);
-
-	set_emss(ep, ntohs(req->tcp_opt));
-
-	/* dealloc the atid */
-	cxgb3_free_atid(ep->com.tdev, ep->atid);
-
-	/* start MPA negotiation */
-	send_mpa_req(ep, skb);
-
-	return 0;
-}
-
-static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
-	pr_debug("%s ep %p\n", __FILE__, ep);
-	state_set(&ep->com, ABORTING);
-	send_abort(ep, skb, gfp);
-}
-
-static void close_complete_upcall(struct iwch_ep *ep)
-{
-	struct iw_cm_event event;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	memset(&event, 0, sizeof(event));
-	event.event = IW_CM_EVENT_CLOSE;
-	if (ep->com.cm_id) {
-		pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
-			 ep, ep->com.cm_id, ep->hwtid);
-		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
-		ep->com.cm_id->rem_ref(ep->com.cm_id);
-		ep->com.cm_id = NULL;
-		ep->com.qp = NULL;
-	}
-}
-
-static void peer_close_upcall(struct iwch_ep *ep)
-{
-	struct iw_cm_event event;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	memset(&event, 0, sizeof(event));
-	event.event = IW_CM_EVENT_DISCONNECT;
-	if (ep->com.cm_id) {
-		pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
-			 ep, ep->com.cm_id, ep->hwtid);
-		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
-	}
-}
-
-static void peer_abort_upcall(struct iwch_ep *ep)
-{
-	struct iw_cm_event event;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	memset(&event, 0, sizeof(event));
-	event.event = IW_CM_EVENT_CLOSE;
-	event.status = -ECONNRESET;
-	if (ep->com.cm_id) {
-		pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
-			 ep->com.cm_id, ep->hwtid);
-		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
-		ep->com.cm_id->rem_ref(ep->com.cm_id);
-		ep->com.cm_id = NULL;
-		ep->com.qp = NULL;
-	}
-}
-
-static void connect_reply_upcall(struct iwch_ep *ep, int status)
-{
-	struct iw_cm_event event;
-
-	pr_debug("%s ep %p status %d\n", __func__, ep, status);
-	memset(&event, 0, sizeof(event));
-	event.event = IW_CM_EVENT_CONNECT_REPLY;
-	event.status = status;
-	memcpy(&event.local_addr, &ep->com.local_addr,
-	       sizeof(ep->com.local_addr));
-	memcpy(&event.remote_addr, &ep->com.remote_addr,
-	       sizeof(ep->com.remote_addr));
-
-	if ((status == 0) || (status == -ECONNREFUSED)) {
-		event.private_data_len = ep->plen;
-		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
-	}
-	if (ep->com.cm_id) {
-		pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
-			 ep->hwtid, status);
-		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
-	}
-	if (status < 0) {
-		ep->com.cm_id->rem_ref(ep->com.cm_id);
-		ep->com.cm_id = NULL;
-		ep->com.qp = NULL;
-	}
-}
-
-static void connect_request_upcall(struct iwch_ep *ep)
-{
-	struct iw_cm_event event;
-
-	pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
-	memset(&event, 0, sizeof(event));
-	event.event = IW_CM_EVENT_CONNECT_REQUEST;
-	memcpy(&event.local_addr, &ep->com.local_addr,
-	       sizeof(ep->com.local_addr));
-	memcpy(&event.remote_addr, &ep->com.remote_addr,
-	       sizeof(ep->com.local_addr));
-	event.private_data_len = ep->plen;
-	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
-	event.provider_data = ep;
-	/*
-	 * Until ird/ord negotiation via MPAv2 support is added, send max
-	 * supported values
-	 */
-	event.ird = event.ord = 8;
-	if (state_read(&ep->parent_ep->com) != DEAD) {
-		get_ep(&ep->com);
-		ep->parent_ep->com.cm_id->event_handler(
-						ep->parent_ep->com.cm_id,
-						&event);
-	}
-	put_ep(&ep->parent_ep->com);
-	ep->parent_ep = NULL;
-}
-
-static void established_upcall(struct iwch_ep *ep)
-{
-	struct iw_cm_event event;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	memset(&event, 0, sizeof(event));
-	event.event = IW_CM_EVENT_ESTABLISHED;
-	/*
-	 * Until ird/ord negotiation via MPAv2 support is added, send max
-	 * supported values
-	 */
-	event.ird = event.ord = 8;
-	if (ep->com.cm_id) {
-		pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
-		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
-	}
-}
-
-static int update_rx_credits(struct iwch_ep *ep, u32 credits)
-{
-	struct cpl_rx_data_ack *req;
-	struct sk_buff *skb;
-
-	pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
-	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-	if (!skb) {
-		pr_err("update_rx_credits - cannot alloc skb!\n");
-		return 0;
-	}
-
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
-	req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
-	skb->priority = CPL_PRIORITY_ACK;
-	iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-	return credits;
-}
-
-static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
-{
-	struct mpa_message *mpa;
-	u16 plen;
-	struct iwch_qp_attributes attrs;
-	enum iwch_qp_attr_mask mask;
-	int err;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-
-	/*
-	 * Stop mpa timer.  If it expired, then the state has
-	 * changed and we bail since ep_timeout already aborted
-	 * the connection.
-	 */
-	stop_ep_timer(ep);
-	if (state_read(&ep->com) != MPA_REQ_SENT)
-		return;
-
-	/*
-	 * If we get more than the supported amount of private data
-	 * then we must fail this connection.
-	 */
-	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
-		err = -EINVAL;
-		goto err;
-	}
-
-	/*
-	 * copy the new data into our accumulation buffer.
-	 */
-	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
-				  skb->len);
-	ep->mpa_pkt_len += skb->len;
-
-	/*
-	 * if we don't even have the mpa message, then bail.
-	 */
-	if (ep->mpa_pkt_len < sizeof(*mpa))
-		return;
-	mpa = (struct mpa_message *) ep->mpa_pkt;
-
-	/* Validate MPA header. */
-	if (mpa->revision != mpa_rev) {
-		err = -EPROTO;
-		goto err;
-	}
-	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
-		err = -EPROTO;
-		goto err;
-	}
-
-	plen = ntohs(mpa->private_data_size);
-
-	/*
-	 * Fail if there's too much private data.
-	 */
-	if (plen > MPA_MAX_PRIVATE_DATA) {
-		err = -EPROTO;
-		goto err;
-	}
-
-	/*
-	 * If plen does not account for pkt size
-	 */
-	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
-		err = -EPROTO;
-		goto err;
-	}
-
-	ep->plen = (u8) plen;
-
-	/*
-	 * If we don't have all the pdata yet, then bail.
-	 * We'll continue process when more data arrives.
-	 */
-	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
-		return;
-
-	if (mpa->flags & MPA_REJECT) {
-		err = -ECONNREFUSED;
-		goto err;
-	}
-
-	/*
-	 * If we get here we have accumulated the entire mpa
-	 * start reply message including private data. And
-	 * the MPA header is valid.
-	 */
-	state_set(&ep->com, FPDU_MODE);
-	ep->mpa_attr.initiator = 1;
-	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
-	ep->mpa_attr.recv_marker_enabled = markers_enabled;
-	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
-	ep->mpa_attr.version = mpa_rev;
-	pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
-		 __func__,
-		 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
-		 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
-	attrs.mpa_attr = ep->mpa_attr;
-	attrs.max_ird = ep->ird;
-	attrs.max_ord = ep->ord;
-	attrs.llp_stream_handle = ep;
-	attrs.next_state = IWCH_QP_STATE_RTS;
-
-	mask = IWCH_QP_ATTR_NEXT_STATE |
-	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
-	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
-
-	/* bind QP and TID with INIT_WR */
-	err = iwch_modify_qp(ep->com.qp->rhp,
-			     ep->com.qp, mask, &attrs, 1);
-	if (err)
-		goto err;
-
-	if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
-		iwch_post_zb_read(ep);
-	}
-
-	goto out;
-err:
-	abort_connection(ep, skb, GFP_KERNEL);
-out:
-	connect_reply_upcall(ep, err);
-	return;
-}
-
-static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
-{
-	struct mpa_message *mpa;
-	u16 plen;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-
-	/*
-	 * Stop mpa timer.  If it expired, then the state has
-	 * changed and we bail since ep_timeout already aborted
-	 * the connection.
-	 */
-	stop_ep_timer(ep);
-	if (state_read(&ep->com) != MPA_REQ_WAIT)
-		return;
-
-	/*
-	 * If we get more than the supported amount of private data
-	 * then we must fail this connection.
-	 */
-	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
-		abort_connection(ep, skb, GFP_KERNEL);
-		return;
-	}
-
-	pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-
-	/*
-	 * Copy the new data into our accumulation buffer.
-	 */
-	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
-				  skb->len);
-	ep->mpa_pkt_len += skb->len;
-
-	/*
-	 * If we don't even have the mpa message, then bail.
-	 * We'll continue process when more data arrives.
-	 */
-	if (ep->mpa_pkt_len < sizeof(*mpa))
-		return;
-	pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-	mpa = (struct mpa_message *) ep->mpa_pkt;
-
-	/*
-	 * Validate MPA Header.
-	 */
-	if (mpa->revision != mpa_rev) {
-		abort_connection(ep, skb, GFP_KERNEL);
-		return;
-	}
-
-	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
-		abort_connection(ep, skb, GFP_KERNEL);
-		return;
-	}
-
-	plen = ntohs(mpa->private_data_size);
-
-	/*
-	 * Fail if there's too much private data.
-	 */
-	if (plen > MPA_MAX_PRIVATE_DATA) {
-		abort_connection(ep, skb, GFP_KERNEL);
-		return;
-	}
-
-	/*
-	 * If plen does not account for pkt size
-	 */
-	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
-		abort_connection(ep, skb, GFP_KERNEL);
-		return;
-	}
-	ep->plen = (u8) plen;
-
-	/*
-	 * If we don't have all the pdata yet, then bail.
-	 */
-	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
-		return;
-
-	/*
-	 * If we get here we have accumulated the entire mpa
-	 * start reply message including private data.
-	 */
-	ep->mpa_attr.initiator = 0;
-	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
-	ep->mpa_attr.recv_marker_enabled = markers_enabled;
-	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
-	ep->mpa_attr.version = mpa_rev;
-	pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
-		 __func__,
-		 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
-		 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
-	state_set(&ep->com, MPA_REQ_RCVD);
-
-	/* drive upcall */
-	connect_request_upcall(ep);
-	return;
-}
-
-static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct cpl_rx_data *hdr = cplhdr(skb);
-	unsigned int dlen = ntohs(hdr->len);
-
-	pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
-
-	skb_pull(skb, sizeof(*hdr));
-	skb_trim(skb, dlen);
-
-	ep->rcv_seq += dlen;
-	BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
-
-	switch (state_read(&ep->com)) {
-	case MPA_REQ_SENT:
-		process_mpa_reply(ep, skb);
-		break;
-	case MPA_REQ_WAIT:
-		process_mpa_request(ep, skb);
-		break;
-	case MPA_REP_SENT:
-		break;
-	default:
-		pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
-		       __func__, ep, state_read(&ep->com), ep->hwtid);
-
-		/*
-		 * The ep will timeout and inform the ULP of the failure.
-		 * See ep_timeout().
-		 */
-		break;
-	}
-
-	/* update RX credits */
-	update_rx_credits(ep, dlen);
-
-	return CPL_RET_BUF_DONE;
-}
-
-/*
- * Upcall from the adapter indicating data has been transmitted.
- * For us its just the single MPA request or reply.  We can now free
- * the skb holding the mpa message.
- */
-static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct cpl_wr_ack *hdr = cplhdr(skb);
-	unsigned int credits = ntohs(hdr->credits);
-	unsigned long flags;
-	int post_zb = 0;
-
-	pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
-
-	if (credits == 0) {
-		pr_debug("%s 0 credit ack  ep %p state %u\n",
-			 __func__, ep, state_read(&ep->com));
-		return CPL_RET_BUF_DONE;
-	}
-
-	spin_lock_irqsave(&ep->com.lock, flags);
-	BUG_ON(credits != 1);
-	dst_confirm(ep->dst);
-	if (!ep->mpa_skb) {
-		pr_debug("%s rdma_init wr_ack ep %p state %u\n",
-			 __func__, ep, ep->com.state);
-		if (ep->mpa_attr.initiator) {
-			pr_debug("%s initiator ep %p state %u\n",
-				 __func__, ep, ep->com.state);
-			if (peer2peer && ep->com.state == FPDU_MODE)
-				post_zb = 1;
-		} else {
-			pr_debug("%s responder ep %p state %u\n",
-				 __func__, ep, ep->com.state);
-			if (ep->com.state == MPA_REQ_RCVD) {
-				ep->com.rpl_done = 1;
-				wake_up(&ep->com.waitq);
-			}
-		}
-	} else {
-		pr_debug("%s lsm ack ep %p state %u freeing skb\n",
-			 __func__, ep, ep->com.state);
-		kfree_skb(ep->mpa_skb);
-		ep->mpa_skb = NULL;
-	}
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-	if (post_zb)
-		iwch_post_zb_read(ep);
-	return CPL_RET_BUF_DONE;
-}
-
-static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	unsigned long flags;
-	int release = 0;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	BUG_ON(!ep);
-
-	/*
-	 * We get 2 abort replies from the HW.  The first one must
-	 * be ignored except for scribbling that we need one more.
-	 */
-	if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
-		return CPL_RET_BUF_DONE;
-	}
-
-	spin_lock_irqsave(&ep->com.lock, flags);
-	switch (ep->com.state) {
-	case ABORTING:
-		close_complete_upcall(ep);
-		__state_set(&ep->com, DEAD);
-		release = 1;
-		break;
-	default:
-		pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
-		break;
-	}
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-
-	if (release)
-		release_ep_resources(ep);
-	return CPL_RET_BUF_DONE;
-}
-
-/*
- * Return whether a failed active open has allocated a TID
- */
-static inline int act_open_has_tid(int status)
-{
-	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
-	       status != CPL_ERR_ARP_MISS;
-}
-
-static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct cpl_act_open_rpl *rpl = cplhdr(skb);
-
-	pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
-		 status2errno(rpl->status));
-	connect_reply_upcall(ep, status2errno(rpl->status));
-	state_set(&ep->com, DEAD);
-	if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
-		release_tid(ep->com.tdev, GET_TID(rpl), NULL);
-	cxgb3_free_atid(ep->com.tdev, ep->atid);
-	dst_release(ep->dst);
-	l2t_release(ep->com.tdev, ep->l2t);
-	put_ep(&ep->com);
-	return CPL_RET_BUF_DONE;
-}
-
-static int listen_start(struct iwch_listen_ep *ep)
-{
-	struct sk_buff *skb;
-	struct cpl_pass_open_req *req;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-	if (!skb) {
-		pr_err("t3c_listen_start failed to alloc skb!\n");
-		return -ENOMEM;
-	}
-
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
-	req->local_port = ep->com.local_addr.sin_port;
-	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
-	req->peer_port = 0;
-	req->peer_ip = 0;
-	req->peer_netmask = 0;
-	req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
-	req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
-	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
-
-	skb->priority = 1;
-	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_listen_ep *ep = ctx;
-	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
-
-	pr_debug("%s ep %p status %d error %d\n", __func__, ep,
-		 rpl->status, status2errno(rpl->status));
-	ep->com.rpl_err = status2errno(rpl->status);
-	ep->com.rpl_done = 1;
-	wake_up(&ep->com.waitq);
-
-	return CPL_RET_BUF_DONE;
-}
-
-static int listen_stop(struct iwch_listen_ep *ep)
-{
-	struct sk_buff *skb;
-	struct cpl_close_listserv_req *req;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-	if (!skb) {
-		pr_err("%s - failed to alloc skb\n", __func__);
-		return -ENOMEM;
-	}
-	req = skb_put(skb, sizeof(*req));
-	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	req->cpu_idx = 0;
-	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
-	skb->priority = 1;
-	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
-			     void *ctx)
-{
-	struct iwch_listen_ep *ep = ctx;
-	struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	ep->com.rpl_err = status2errno(rpl->status);
-	ep->com.rpl_done = 1;
-	wake_up(&ep->com.waitq);
-	return CPL_RET_BUF_DONE;
-}
-
-static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
-{
-	struct cpl_pass_accept_rpl *rpl;
-	unsigned int mtu_idx;
-	u32 opt0h, opt0l, opt2;
-	int wscale;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	BUG_ON(skb_cloned(skb));
-	skb_trim(skb, sizeof(*rpl));
-	skb_get(skb);
-	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
-	wscale = compute_wscale(rcv_win);
-	opt0h = V_NAGLE(0) |
-	    V_NO_CONG(nocong) |
-	    V_KEEP_ALIVE(1) |
-	    F_TCAM_BYPASS |
-	    V_WND_SCALE(wscale) |
-	    V_MSS_IDX(mtu_idx) |
-	    V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
-	opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
-	opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
-	       V_CONG_CONTROL_FLAVOR(cong_flavor);
-
-	rpl = cplhdr(skb);
-	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
-	rpl->peer_ip = peer_ip;
-	rpl->opt0h = htonl(opt0h);
-	rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
-	rpl->opt2 = htonl(opt2);
-	rpl->rsvd = rpl->opt2;	/* workaround for HW bug */
-	skb->priority = CPL_PRIORITY_SETUP;
-	iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-
-	return;
-}
-
-static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
-		      struct sk_buff *skb)
-{
-	pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
-		 peer_ip);
-	BUG_ON(skb_cloned(skb));
-	skb_trim(skb, sizeof(struct cpl_tid_release));
-	skb_get(skb);
-
-	if (tdev->type != T3A)
-		release_tid(tdev, hwtid, skb);
-	else {
-		struct cpl_pass_accept_rpl *rpl;
-
-		rpl = cplhdr(skb);
-		skb->priority = CPL_PRIORITY_SETUP;
-		rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
-		OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
-						      hwtid));
-		rpl->peer_ip = peer_ip;
-		rpl->opt0h = htonl(F_TCAM_BYPASS);
-		rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
-		rpl->opt2 = 0;
-		rpl->rsvd = rpl->opt2;
-		iwch_cxgb3_ofld_send(tdev, skb);
-	}
-}
-
-static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *child_ep, *parent_ep = ctx;
-	struct cpl_pass_accept_req *req = cplhdr(skb);
-	unsigned int hwtid = GET_TID(req);
-	struct dst_entry *dst;
-	struct l2t_entry *l2t;
-	struct rtable *rt;
-	struct iff_mac tim;
-
-	pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
-
-	if (state_read(&parent_ep->com) != LISTEN) {
-		pr_err("%s - listening ep not in LISTEN\n", __func__);
-		goto reject;
-	}
-
-	/*
-	 * Find the netdev for this connection request.
-	 */
-	tim.mac_addr = req->dst_mac;
-	tim.vlan_tag = ntohs(req->vlan_tag);
-	if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
-		pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
-		goto reject;
-	}
-
-	/* Find output route */
-	rt = find_route(tdev,
-			req->local_ip,
-			req->peer_ip,
-			req->local_port,
-			req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
-	if (!rt) {
-		pr_err("%s - failed to find dst entry!\n", __func__);
-		goto reject;
-	}
-	dst = &rt->dst;
-	l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
-	if (!l2t) {
-		pr_err("%s - failed to allocate l2t entry!\n", __func__);
-		dst_release(dst);
-		goto reject;
-	}
-	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
-	if (!child_ep) {
-		pr_err("%s - failed to allocate ep entry!\n", __func__);
-		l2t_release(tdev, l2t);
-		dst_release(dst);
-		goto reject;
-	}
-	state_set(&child_ep->com, CONNECTING);
-	child_ep->com.tdev = tdev;
-	child_ep->com.cm_id = NULL;
-	child_ep->com.local_addr.sin_family = AF_INET;
-	child_ep->com.local_addr.sin_port = req->local_port;
-	child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
-	child_ep->com.remote_addr.sin_family = AF_INET;
-	child_ep->com.remote_addr.sin_port = req->peer_port;
-	child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
-	get_ep(&parent_ep->com);
-	child_ep->parent_ep = parent_ep;
-	child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
-	child_ep->l2t = l2t;
-	child_ep->dst = dst;
-	child_ep->hwtid = hwtid;
-	timer_setup(&child_ep->timer, ep_timeout, 0);
-	cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
-	accept_cr(child_ep, req->peer_ip, skb);
-	goto out;
-reject:
-	reject_cr(tdev, hwtid, req->peer_ip, skb);
-out:
-	return CPL_RET_BUF_DONE;
-}
-
-static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct cpl_pass_establish *req = cplhdr(skb);
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	ep->snd_seq = ntohl(req->snd_isn);
-	ep->rcv_seq = ntohl(req->rcv_isn);
-
-	set_emss(ep, ntohs(req->tcp_opt));
-
-	dst_confirm(ep->dst);
-	state_set(&ep->com, MPA_REQ_WAIT);
-	start_ep_timer(ep);
-
-	return CPL_RET_BUF_DONE;
-}
-
-static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct iwch_qp_attributes attrs;
-	unsigned long flags;
-	int disconnect = 1;
-	int release = 0;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	dst_confirm(ep->dst);
-
-	spin_lock_irqsave(&ep->com.lock, flags);
-	switch (ep->com.state) {
-	case MPA_REQ_WAIT:
-		__state_set(&ep->com, CLOSING);
-		break;
-	case MPA_REQ_SENT:
-		__state_set(&ep->com, CLOSING);
-		connect_reply_upcall(ep, -ECONNRESET);
-		break;
-	case MPA_REQ_RCVD:
-
-		/*
-		 * We're gonna mark this puppy DEAD, but keep
-		 * the reference on it until the ULP accepts or
-		 * rejects the CR. Also wake up anyone waiting
-		 * in rdma connection migration (see iwch_accept_cr()).
-		 */
-		__state_set(&ep->com, CLOSING);
-		ep->com.rpl_done = 1;
-		ep->com.rpl_err = -ECONNRESET;
-		pr_debug("waking up ep %p\n", ep);
-		wake_up(&ep->com.waitq);
-		break;
-	case MPA_REP_SENT:
-		__state_set(&ep->com, CLOSING);
-		ep->com.rpl_done = 1;
-		ep->com.rpl_err = -ECONNRESET;
-		pr_debug("waking up ep %p\n", ep);
-		wake_up(&ep->com.waitq);
-		break;
-	case FPDU_MODE:
-		start_ep_timer(ep);
-		__state_set(&ep->com, CLOSING);
-		attrs.next_state = IWCH_QP_STATE_CLOSING;
-		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
-			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
-		peer_close_upcall(ep);
-		break;
-	case ABORTING:
-		disconnect = 0;
-		break;
-	case CLOSING:
-		__state_set(&ep->com, MORIBUND);
-		disconnect = 0;
-		break;
-	case MORIBUND:
-		stop_ep_timer(ep);
-		if (ep->com.cm_id && ep->com.qp) {
-			attrs.next_state = IWCH_QP_STATE_IDLE;
-			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
-				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
-		}
-		close_complete_upcall(ep);
-		__state_set(&ep->com, DEAD);
-		release = 1;
-		disconnect = 0;
-		break;
-	case DEAD:
-		disconnect = 0;
-		break;
-	default:
-		BUG_ON(1);
-	}
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-	if (disconnect)
-		iwch_ep_disconnect(ep, 0, GFP_KERNEL);
-	if (release)
-		release_ep_resources(ep);
-	return CPL_RET_BUF_DONE;
-}
-
-/*
- * Returns whether an ABORT_REQ_RSS message is a negative advice.
- */
-static int is_neg_adv_abort(unsigned int status)
-{
-	return status == CPL_ERR_RTX_NEG_ADVICE ||
-	       status == CPL_ERR_PERSIST_NEG_ADVICE;
-}
-
-static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct cpl_abort_req_rss *req = cplhdr(skb);
-	struct iwch_ep *ep = ctx;
-	struct cpl_abort_rpl *rpl;
-	struct sk_buff *rpl_skb;
-	struct iwch_qp_attributes attrs;
-	int ret;
-	int release = 0;
-	unsigned long flags;
-
-	if (is_neg_adv_abort(req->status)) {
-		pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
-			 ep->hwtid);
-		t3_l2t_send_event(ep->com.tdev, ep->l2t);
-		return CPL_RET_BUF_DONE;
-	}
-
-	/*
-	 * We get 2 peer aborts from the HW.  The first one must
-	 * be ignored except for scribbling that we need one more.
-	 */
-	if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
-		return CPL_RET_BUF_DONE;
-	}
-
-	spin_lock_irqsave(&ep->com.lock, flags);
-	pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
-	switch (ep->com.state) {
-	case CONNECTING:
-		break;
-	case MPA_REQ_WAIT:
-		stop_ep_timer(ep);
-		break;
-	case MPA_REQ_SENT:
-		stop_ep_timer(ep);
-		connect_reply_upcall(ep, -ECONNRESET);
-		break;
-	case MPA_REP_SENT:
-		ep->com.rpl_done = 1;
-		ep->com.rpl_err = -ECONNRESET;
-		pr_debug("waking up ep %p\n", ep);
-		wake_up(&ep->com.waitq);
-		break;
-	case MPA_REQ_RCVD:
-
-		/*
-		 * We're gonna mark this puppy DEAD, but keep
-		 * the reference on it until the ULP accepts or
-		 * rejects the CR. Also wake up anyone waiting
-		 * in rdma connection migration (see iwch_accept_cr()).
-		 */
-		ep->com.rpl_done = 1;
-		ep->com.rpl_err = -ECONNRESET;
-		pr_debug("waking up ep %p\n", ep);
-		wake_up(&ep->com.waitq);
-		break;
-	case MORIBUND:
-	case CLOSING:
-		stop_ep_timer(ep);
-		/*FALLTHROUGH*/
-	case FPDU_MODE:
-		if (ep->com.cm_id && ep->com.qp) {
-			attrs.next_state = IWCH_QP_STATE_ERROR;
-			ret = iwch_modify_qp(ep->com.qp->rhp,
-				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
-				     &attrs, 1);
-			if (ret)
-				pr_err("%s - qp <- error failed!\n", __func__);
-		}
-		peer_abort_upcall(ep);
-		break;
-	case ABORTING:
-		break;
-	case DEAD:
-		pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
-		spin_unlock_irqrestore(&ep->com.lock, flags);
-		return CPL_RET_BUF_DONE;
-	default:
-		BUG_ON(1);
-		break;
-	}
-	dst_confirm(ep->dst);
-	if (ep->com.state != ABORTING) {
-		__state_set(&ep->com, DEAD);
-		release = 1;
-	}
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-
-	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
-	if (!rpl_skb) {
-		pr_err("%s - cannot allocate skb!\n", __func__);
-		release = 1;
-		goto out;
-	}
-	rpl_skb->priority = CPL_PRIORITY_DATA;
-	rpl = skb_put(rpl_skb, sizeof(*rpl));
-	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
-	rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
-	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
-	rpl->cmd = CPL_ABORT_NO_RST;
-	iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
-out:
-	if (release)
-		release_ep_resources(ep);
-	return CPL_RET_BUF_DONE;
-}
-
-static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-	struct iwch_qp_attributes attrs;
-	unsigned long flags;
-	int release = 0;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	BUG_ON(!ep);
-
-	/* The cm_id may be null if we failed to connect */
-	spin_lock_irqsave(&ep->com.lock, flags);
-	switch (ep->com.state) {
-	case CLOSING:
-		__state_set(&ep->com, MORIBUND);
-		break;
-	case MORIBUND:
-		stop_ep_timer(ep);
-		if ((ep->com.cm_id) && (ep->com.qp)) {
-			attrs.next_state = IWCH_QP_STATE_IDLE;
-			iwch_modify_qp(ep->com.qp->rhp,
-					     ep->com.qp,
-					     IWCH_QP_ATTR_NEXT_STATE,
-					     &attrs, 1);
-		}
-		close_complete_upcall(ep);
-		__state_set(&ep->com, DEAD);
-		release = 1;
-		break;
-	case ABORTING:
-	case DEAD:
-		break;
-	default:
-		BUG_ON(1);
-		break;
-	}
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-	if (release)
-		release_ep_resources(ep);
-	return CPL_RET_BUF_DONE;
-}
-
-/*
- * T3A does 3 things when a TERM is received:
- * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
- * 2) generate an async event on the QP with the TERMINATE opcode
- * 3) post a TERMINATE opcode cqe into the associated CQ.
- *
- * For (1), we save the message in the qp for later consumer consumption.
- * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
- * For (3), we toss the CQE in cxio_poll_cq().
- *
- * terminate() handles case (1)...
- */
-static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep *ep = ctx;
-
-	if (state_read(&ep->com) != FPDU_MODE)
-		return CPL_RET_BUF_DONE;
-
-	pr_debug("%s ep %p\n", __func__, ep);
-	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
-	pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
-	skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
-				  skb->len);
-	ep->com.qp->attr.terminate_msg_len = skb->len;
-	ep->com.qp->attr.is_terminate_local = 0;
-	return CPL_RET_BUF_DONE;
-}
-
-static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct cpl_rdma_ec_status *rep = cplhdr(skb);
-	struct iwch_ep *ep = ctx;
-
-	pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
-		 rep->status);
-	if (rep->status) {
-		struct iwch_qp_attributes attrs;
-
-		pr_err("%s BAD CLOSE - Aborting tid %u\n",
-		       __func__, ep->hwtid);
-		stop_ep_timer(ep);
-		attrs.next_state = IWCH_QP_STATE_ERROR;
-		iwch_modify_qp(ep->com.qp->rhp,
-			       ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
-			       &attrs, 1);
-		abort_connection(ep, NULL, GFP_KERNEL);
-	}
-	return CPL_RET_BUF_DONE;
-}
-
-static void ep_timeout(struct timer_list *t)
-{
-	struct iwch_ep *ep = from_timer(ep, t, timer);
-	struct iwch_qp_attributes attrs;
-	unsigned long flags;
-	int abort = 1;
-
-	spin_lock_irqsave(&ep->com.lock, flags);
-	pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
-		 ep->com.state);
-	switch (ep->com.state) {
-	case MPA_REQ_SENT:
-		__state_set(&ep->com, ABORTING);
-		connect_reply_upcall(ep, -ETIMEDOUT);
-		break;
-	case MPA_REQ_WAIT:
-		__state_set(&ep->com, ABORTING);
-		break;
-	case CLOSING:
-	case MORIBUND:
-		if (ep->com.cm_id && ep->com.qp) {
-			attrs.next_state = IWCH_QP_STATE_ERROR;
-			iwch_modify_qp(ep->com.qp->rhp,
-				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
-				     &attrs, 1);
-		}
-		__state_set(&ep->com, ABORTING);
-		break;
-	default:
-		WARN(1, "%s unexpected state ep %p state %u\n",
-			__func__, ep, ep->com.state);
-		abort = 0;
-	}
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-	if (abort)
-		abort_connection(ep, NULL, GFP_ATOMIC);
-	put_ep(&ep->com);
-}
-
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-	struct iwch_ep *ep = to_ep(cm_id);
-
-	pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-
-	if (state_read(&ep->com) == DEAD) {
-		put_ep(&ep->com);
-		return -ECONNRESET;
-	}
-	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
-	if (mpa_rev == 0)
-		abort_connection(ep, NULL, GFP_KERNEL);
-	else {
-		send_mpa_reject(ep, pdata, pdata_len);
-		iwch_ep_disconnect(ep, 0, GFP_KERNEL);
-	}
-	put_ep(&ep->com);
-	return 0;
-}
-
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
-	int err;
-	struct iwch_qp_attributes attrs;
-	enum iwch_qp_attr_mask mask;
-	struct iwch_ep *ep = to_ep(cm_id);
-	struct iwch_dev *h = to_iwch_dev(cm_id->device);
-	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
-
-	pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-	if (state_read(&ep->com) == DEAD) {
-		err = -ECONNRESET;
-		goto err;
-	}
-
-	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
-	BUG_ON(!qp);
-
-	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
-	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
-		abort_connection(ep, NULL, GFP_KERNEL);
-		err = -EINVAL;
-		goto err;
-	}
-
-	cm_id->add_ref(cm_id);
-	ep->com.cm_id = cm_id;
-	ep->com.qp = qp;
-
-	ep->ird = conn_param->ird;
-	ep->ord = conn_param->ord;
-
-	if (peer2peer && ep->ird == 0)
-		ep->ird = 1;
-
-	pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
-
-	/* bind QP to EP and move to RTS */
-	attrs.mpa_attr = ep->mpa_attr;
-	attrs.max_ird = ep->ird;
-	attrs.max_ord = ep->ord;
-	attrs.llp_stream_handle = ep;
-	attrs.next_state = IWCH_QP_STATE_RTS;
-
-	/* bind QP and TID with INIT_WR */
-	mask = IWCH_QP_ATTR_NEXT_STATE |
-			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
-			     IWCH_QP_ATTR_MPA_ATTR |
-			     IWCH_QP_ATTR_MAX_IRD |
-			     IWCH_QP_ATTR_MAX_ORD;
-
-	err = iwch_modify_qp(ep->com.qp->rhp,
-			     ep->com.qp, mask, &attrs, 1);
-	if (err)
-		goto err1;
-
-	/* if needed, wait for wr_ack */
-	if (iwch_rqes_posted(qp)) {
-		wait_event(ep->com.waitq, ep->com.rpl_done);
-		err = ep->com.rpl_err;
-		if (err)
-			goto err1;
-	}
-
-	err = send_mpa_reply(ep, conn_param->private_data,
-			     conn_param->private_data_len);
-	if (err)
-		goto err1;
-
-
-	state_set(&ep->com, FPDU_MODE);
-	established_upcall(ep);
-	put_ep(&ep->com);
-	return 0;
-err1:
-	ep->com.cm_id = NULL;
-	ep->com.qp = NULL;
-	cm_id->rem_ref(cm_id);
-err:
-	put_ep(&ep->com);
-	return err;
-}
-
-static int is_loopback_dst(struct iw_cm_id *cm_id)
-{
-	struct net_device *dev;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
-	dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
-	if (!dev)
-		return 0;
-	dev_put(dev);
-	return 1;
-}
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
-	struct iwch_dev *h = to_iwch_dev(cm_id->device);
-	struct iwch_ep *ep;
-	struct rtable *rt;
-	int err = 0;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
-	if (cm_id->m_remote_addr.ss_family != PF_INET) {
-		err = -ENOSYS;
-		goto out;
-	}
-
-	if (is_loopback_dst(cm_id)) {
-		err = -ENOSYS;
-		goto out;
-	}
-
-	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
-	if (!ep) {
-		pr_err("%s - cannot alloc ep\n", __func__);
-		err = -ENOMEM;
-		goto out;
-	}
-	timer_setup(&ep->timer, ep_timeout, 0);
-	ep->plen = conn_param->private_data_len;
-	if (ep->plen)
-		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
-		       conn_param->private_data, ep->plen);
-	ep->ird = conn_param->ird;
-	ep->ord = conn_param->ord;
-
-	if (peer2peer && ep->ord == 0)
-		ep->ord = 1;
-
-	ep->com.tdev = h->rdev.t3cdev_p;
-
-	cm_id->add_ref(cm_id);
-	ep->com.cm_id = cm_id;
-	ep->com.qp = get_qhp(h, conn_param->qpn);
-	BUG_ON(!ep->com.qp);
-	pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
-		 ep->com.qp, cm_id);
-
-	/*
-	 * Allocate an active TID to initiate a TCP connection.
-	 */
-	ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
-	if (ep->atid == -1) {
-		pr_err("%s - cannot alloc atid\n", __func__);
-		err = -ENOMEM;
-		goto fail2;
-	}
-
-	/* find a route */
-	rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
-			raddr->sin_addr.s_addr, laddr->sin_port,
-			raddr->sin_port, IPTOS_LOWDELAY);
-	if (!rt) {
-		pr_err("%s - cannot find route\n", __func__);
-		err = -EHOSTUNREACH;
-		goto fail3;
-	}
-	ep->dst = &rt->dst;
-	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
-			     &raddr->sin_addr.s_addr);
-	if (!ep->l2t) {
-		pr_err("%s - cannot alloc l2e\n", __func__);
-		err = -ENOMEM;
-		goto fail4;
-	}
-
-	state_set(&ep->com, CONNECTING);
-	ep->tos = IPTOS_LOWDELAY;
-	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
-	       sizeof(ep->com.local_addr));
-	memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
-	       sizeof(ep->com.remote_addr));
-
-	/* send connect request to rnic */
-	err = send_connect(ep);
-	if (!err)
-		goto out;
-
-	l2t_release(h->rdev.t3cdev_p, ep->l2t);
-fail4:
-	dst_release(ep->dst);
-fail3:
-	cxgb3_free_atid(ep->com.tdev, ep->atid);
-fail2:
-	cm_id->rem_ref(cm_id);
-	put_ep(&ep->com);
-out:
-	return err;
-}
-
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
-	int err = 0;
-	struct iwch_dev *h = to_iwch_dev(cm_id->device);
-	struct iwch_listen_ep *ep;
-
-
-	might_sleep();
-
-	if (cm_id->m_local_addr.ss_family != PF_INET) {
-		err = -ENOSYS;
-		goto fail1;
-	}
-
-	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
-	if (!ep) {
-		pr_err("%s - cannot alloc ep\n", __func__);
-		err = -ENOMEM;
-		goto fail1;
-	}
-	pr_debug("%s ep %p\n", __func__, ep);
-	ep->com.tdev = h->rdev.t3cdev_p;
-	cm_id->add_ref(cm_id);
-	ep->com.cm_id = cm_id;
-	ep->backlog = backlog;
-	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
-	       sizeof(ep->com.local_addr));
-
-	/*
-	 * Allocate a server TID.
-	 */
-	ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
-	if (ep->stid == -1) {
-		pr_err("%s - cannot alloc atid\n", __func__);
-		err = -ENOMEM;
-		goto fail2;
-	}
-
-	state_set(&ep->com, LISTEN);
-	err = listen_start(ep);
-	if (err)
-		goto fail3;
-
-	/* wait for pass_open_rpl */
-	wait_event(ep->com.waitq, ep->com.rpl_done);
-	err = ep->com.rpl_err;
-	if (!err) {
-		cm_id->provider_data = ep;
-		goto out;
-	}
-fail3:
-	cxgb3_free_stid(ep->com.tdev, ep->stid);
-fail2:
-	cm_id->rem_ref(cm_id);
-	put_ep(&ep->com);
-fail1:
-out:
-	return err;
-}
-
-int iwch_destroy_listen(struct iw_cm_id *cm_id)
-{
-	int err;
-	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
-
-	pr_debug("%s ep %p\n", __func__, ep);
-
-	might_sleep();
-	state_set(&ep->com, DEAD);
-	ep->com.rpl_done = 0;
-	ep->com.rpl_err = 0;
-	err = listen_stop(ep);
-	if (err)
-		goto done;
-	wait_event(ep->com.waitq, ep->com.rpl_done);
-	cxgb3_free_stid(ep->com.tdev, ep->stid);
-done:
-	err = ep->com.rpl_err;
-	cm_id->rem_ref(cm_id);
-	put_ep(&ep->com);
-	return err;
-}
-
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
-{
-	int ret=0;
-	unsigned long flags;
-	int close = 0;
-	int fatal = 0;
-	struct t3cdev *tdev;
-	struct cxio_rdev *rdev;
-
-	spin_lock_irqsave(&ep->com.lock, flags);
-
-	pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
-		 states[ep->com.state], abrupt);
-
-	tdev = (struct t3cdev *)ep->com.tdev;
-	rdev = (struct cxio_rdev *)tdev->ulp;
-	if (cxio_fatal_error(rdev)) {
-		fatal = 1;
-		close_complete_upcall(ep);
-		ep->com.state = DEAD;
-	}
-	switch (ep->com.state) {
-	case MPA_REQ_WAIT:
-	case MPA_REQ_SENT:
-	case MPA_REQ_RCVD:
-	case MPA_REP_SENT:
-	case FPDU_MODE:
-		close = 1;
-		if (abrupt)
-			ep->com.state = ABORTING;
-		else {
-			ep->com.state = CLOSING;
-			start_ep_timer(ep);
-		}
-		set_bit(CLOSE_SENT, &ep->com.flags);
-		break;
-	case CLOSING:
-		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
-			close = 1;
-			if (abrupt) {
-				stop_ep_timer(ep);
-				ep->com.state = ABORTING;
-			} else
-				ep->com.state = MORIBUND;
-		}
-		break;
-	case MORIBUND:
-	case ABORTING:
-	case DEAD:
-		pr_debug("%s ignoring disconnect ep %p state %u\n",
-			 __func__, ep, ep->com.state);
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	spin_unlock_irqrestore(&ep->com.lock, flags);
-	if (close) {
-		if (abrupt)
-			ret = send_abort(ep, NULL, gfp);
-		else
-			ret = send_halfclose(ep, gfp);
-		if (ret)
-			fatal = 1;
-	}
-	if (fatal)
-		release_ep_resources(ep);
-	return ret;
-}
-
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
-		     struct l2t_entry *l2t)
-{
-	struct iwch_ep *ep = ctx;
-
-	if (ep->dst != old)
-		return 0;
-
-	pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
-		 l2t);
-	dst_hold(new);
-	l2t_release(ep->com.tdev, ep->l2t);
-	ep->l2t = l2t;
-	dst_release(old);
-	ep->dst = new;
-	return 1;
-}
-
-/*
- * All the CM events are handled on a work queue to have a safe context.
- * These are the real handlers that are called from the work queue.
- */
-static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
-	[CPL_ACT_ESTABLISH]	= act_establish,
-	[CPL_ACT_OPEN_RPL]	= act_open_rpl,
-	[CPL_RX_DATA]		= rx_data,
-	[CPL_TX_DMA_ACK]	= tx_ack,
-	[CPL_ABORT_RPL_RSS]	= abort_rpl,
-	[CPL_ABORT_RPL]		= abort_rpl,
-	[CPL_PASS_OPEN_RPL]	= pass_open_rpl,
-	[CPL_CLOSE_LISTSRV_RPL]	= close_listsrv_rpl,
-	[CPL_PASS_ACCEPT_REQ]	= pass_accept_req,
-	[CPL_PASS_ESTABLISH]	= pass_establish,
-	[CPL_PEER_CLOSE]	= peer_close,
-	[CPL_ABORT_REQ_RSS]	= peer_abort,
-	[CPL_CLOSE_CON_RPL]	= close_con_rpl,
-	[CPL_RDMA_TERMINATE]	= terminate,
-	[CPL_RDMA_EC_STATUS]	= ec_status,
-};
-
-static void process_work(struct work_struct *work)
-{
-	struct sk_buff *skb = NULL;
-	void *ep;
-	struct t3cdev *tdev;
-	int ret;
-
-	while ((skb = skb_dequeue(&rxq))) {
-		ep = *((void **) (skb->cb));
-		tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
-		ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
-		if (ret & CPL_RET_BUF_DONE)
-			kfree_skb(skb);
-
-		/*
-		 * ep was referenced in sched(), and is freed here.
-		 */
-		put_ep((struct iwch_ep_common *)ep);
-	}
-}
-
-static DECLARE_WORK(skb_work, process_work);
-
-static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct iwch_ep_common *epc = ctx;
-
-	get_ep(epc);
-
-	/*
-	 * Save ctx and tdev in the skb->cb area.
-	 */
-	*((void **) skb->cb) = ctx;
-	*((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
-
-	/*
-	 * Queue the skb and schedule the worker thread.
-	 */
-	skb_queue_tail(&rxq, skb);
-	queue_work(workq, &skb_work);
-	return 0;
-}
-
-static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
-	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
-
-	if (rpl->status != CPL_ERR_NONE) {
-		pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
-		       rpl->status, GET_TID(rpl));
-	}
-	return CPL_RET_BUF_DONE;
-}
-
-/*
- * All upcalls from the T3 Core go to sched() to schedule the
- * processing on a work queue.
- */
-cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
-	[CPL_ACT_ESTABLISH]	= sched,
-	[CPL_ACT_OPEN_RPL]	= sched,
-	[CPL_RX_DATA]		= sched,
-	[CPL_TX_DMA_ACK]	= sched,
-	[CPL_ABORT_RPL_RSS]	= sched,
-	[CPL_ABORT_RPL]		= sched,
-	[CPL_PASS_OPEN_RPL]	= sched,
-	[CPL_CLOSE_LISTSRV_RPL]	= sched,
-	[CPL_PASS_ACCEPT_REQ]	= sched,
-	[CPL_PASS_ESTABLISH]	= sched,
-	[CPL_PEER_CLOSE]	= sched,
-	[CPL_CLOSE_CON_RPL]	= sched,
-	[CPL_ABORT_REQ_RSS]	= sched,
-	[CPL_RDMA_TERMINATE]	= sched,
-	[CPL_RDMA_EC_STATUS]	= sched,
-	[CPL_SET_TCB_RPL]	= set_tcb_rpl,
-};
-
-int __init iwch_cm_init(void)
-{
-	skb_queue_head_init(&rxq);
-
-	workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM);
-	if (!workq)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void __exit iwch_cm_term(void)
-{
-	flush_workqueue(workq);
-	destroy_workqueue(workq);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
deleted file mode 100644
index cc7fe644d260..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _IWCH_CM_H_
-#define _IWCH_CM_H_
-
-#include <linux/inet.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/kref.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-
-#define MPA_KEY_REQ "MPA ID Req Frame"
-#define MPA_KEY_REP "MPA ID Rep Frame"
-
-#define MPA_MAX_PRIVATE_DATA	256
-#define MPA_REV		0	/* XXX - amso1100 uses rev 0 ! */
-#define MPA_REJECT		0x20
-#define MPA_CRC			0x40
-#define MPA_MARKERS		0x80
-#define MPA_FLAGS_MASK		0xE0
-
-#define put_ep(ep) {							\
-	pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n",		\
-		 __func__, __LINE__, ep, kref_read(&((ep)->kref)));	\
-	WARN_ON(kref_read(&((ep)->kref)) < 1);				\
-	kref_put(&((ep)->kref), __free_ep);				\
-}
-
-#define get_ep(ep) {							\
-	pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n",		\
-		 __func__, __LINE__, ep, kref_read(&((ep)->kref)));	\
-	kref_get(&((ep)->kref));					\
-}
-
-struct mpa_message {
-	u8 key[16];
-	u8 flags;
-	u8 revision;
-	__be16 private_data_size;
-	u8 private_data[0];
-};
-
-struct terminate_message {
-	u8 layer_etype;
-	u8 ecode;
-	__be16 hdrct_rsvd;
-	u8 len_hdrs[0];
-};
-
-#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
-
-enum iwch_layers_types {
-	LAYER_RDMAP		= 0x00,
-	LAYER_DDP		= 0x10,
-	LAYER_MPA		= 0x20,
-	RDMAP_LOCAL_CATA	= 0x00,
-	RDMAP_REMOTE_PROT	= 0x01,
-	RDMAP_REMOTE_OP		= 0x02,
-	DDP_LOCAL_CATA		= 0x00,
-	DDP_TAGGED_ERR		= 0x01,
-	DDP_UNTAGGED_ERR	= 0x02,
-	DDP_LLP			= 0x03
-};
-
-enum iwch_rdma_ecodes {
-	RDMAP_INV_STAG		= 0x00,
-	RDMAP_BASE_BOUNDS	= 0x01,
-	RDMAP_ACC_VIOL		= 0x02,
-	RDMAP_STAG_NOT_ASSOC	= 0x03,
-	RDMAP_TO_WRAP		= 0x04,
-	RDMAP_INV_VERS		= 0x05,
-	RDMAP_INV_OPCODE	= 0x06,
-	RDMAP_STREAM_CATA	= 0x07,
-	RDMAP_GLOBAL_CATA	= 0x08,
-	RDMAP_CANT_INV_STAG	= 0x09,
-	RDMAP_UNSPECIFIED	= 0xff
-};
-
-enum iwch_ddp_ecodes {
-	DDPT_INV_STAG		= 0x00,
-	DDPT_BASE_BOUNDS	= 0x01,
-	DDPT_STAG_NOT_ASSOC	= 0x02,
-	DDPT_TO_WRAP		= 0x03,
-	DDPT_INV_VERS		= 0x04,
-	DDPU_INV_QN		= 0x01,
-	DDPU_INV_MSN_NOBUF	= 0x02,
-	DDPU_INV_MSN_RANGE	= 0x03,
-	DDPU_INV_MO		= 0x04,
-	DDPU_MSG_TOOBIG		= 0x05,
-	DDPU_INV_VERS		= 0x06
-};
-
-enum iwch_mpa_ecodes {
-	MPA_CRC_ERR		= 0x02,
-	MPA_MARKER_ERR		= 0x03
-};
-
-enum iwch_ep_state {
-	IDLE = 0,
-	LISTEN,
-	CONNECTING,
-	MPA_REQ_WAIT,
-	MPA_REQ_SENT,
-	MPA_REQ_RCVD,
-	MPA_REP_SENT,
-	FPDU_MODE,
-	ABORTING,
-	CLOSING,
-	MORIBUND,
-	DEAD,
-};
-
-enum iwch_ep_flags {
-	PEER_ABORT_IN_PROGRESS	= 0,
-	ABORT_REQ_IN_PROGRESS	= 1,
-	RELEASE_RESOURCES	= 2,
-	CLOSE_SENT		= 3,
-};
-
-struct iwch_ep_common {
-	struct iw_cm_id *cm_id;
-	struct iwch_qp *qp;
-	struct t3cdev *tdev;
-	enum iwch_ep_state state;
-	struct kref kref;
-	spinlock_t lock;
-	struct sockaddr_in local_addr;
-	struct sockaddr_in remote_addr;
-	wait_queue_head_t waitq;
-	int rpl_done;
-	int rpl_err;
-	unsigned long flags;
-};
-
-struct iwch_listen_ep {
-	struct iwch_ep_common com;
-	unsigned int stid;
-	int backlog;
-};
-
-struct iwch_ep {
-	struct iwch_ep_common com;
-	struct iwch_ep *parent_ep;
-	struct timer_list timer;
-	unsigned int atid;
-	u32 hwtid;
-	u32 snd_seq;
-	u32 rcv_seq;
-	struct l2t_entry *l2t;
-	struct dst_entry *dst;
-	struct sk_buff *mpa_skb;
-	struct iwch_mpa_attributes mpa_attr;
-	unsigned int mpa_pkt_len;
-	u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
-	u8 tos;
-	u16 emss;
-	u16 plen;
-	u32 ird;
-	u32 ord;
-};
-
-static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
-{
-	return cm_id->provider_data;
-}
-
-static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
-{
-	return cm_id->provider_data;
-}
-
-static inline int compute_wscale(int win)
-{
-	int wscale = 0;
-
-	while (wscale < 14 && (65535<<wscale) < win)
-		wscale++;
-	return wscale;
-}
-
-/* CM prototypes */
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
-int iwch_destroy_listen(struct iw_cm_id *cm_id);
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
-int iwch_quiesce_tid(struct iwch_ep *ep);
-int iwch_resume_tid(struct iwch_ep *ep);
-void __free_ep(struct kref *kref);
-void iwch_rearp(struct iwch_ep *ep);
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
-
-int __init iwch_cm_init(void);
-void __exit iwch_cm_term(void);
-extern int peer2peer;
-
-#endif				/* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
deleted file mode 100644
index a098c0140580..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "iwch_provider.h"
-#include "iwch.h"
-
-static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
-			      struct iwch_qp *qhp, struct ib_wc *wc)
-{
-	struct t3_wq *wq = qhp ? &qhp->wq : NULL;
-	struct t3_cqe cqe;
-	u32 credit = 0;
-	u8 cqe_flushed;
-	u64 cookie;
-	int ret = 1;
-
-	ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
-				   &credit);
-	if (t3a_device(chp->rhp) && credit) {
-		pr_debug("%s updating %d cq credits on id %d\n", __func__,
-			 credit, chp->cq.cqid);
-		cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
-	}
-
-	if (ret) {
-		ret = -EAGAIN;
-		goto out;
-	}
-	ret = 1;
-
-	wc->wr_id = cookie;
-	wc->qp = qhp ? &qhp->ibqp : NULL;
-	wc->vendor_err = CQE_STATUS(cqe);
-	wc->wc_flags = 0;
-
-	pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
-		 __func__,
-		 CQE_QPID(cqe), CQE_TYPE(cqe),
-		 CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
-		 CQE_WRID_LOW(cqe), (unsigned long long)cookie);
-
-	if (CQE_TYPE(cqe) == 0) {
-		if (!CQE_STATUS(cqe))
-			wc->byte_len = CQE_LEN(cqe);
-		else
-			wc->byte_len = 0;
-		wc->opcode = IB_WC_RECV;
-		if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
-		    CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
-			wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
-			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
-		}
-	} else {
-		switch (CQE_OPCODE(cqe)) {
-		case T3_RDMA_WRITE:
-			wc->opcode = IB_WC_RDMA_WRITE;
-			break;
-		case T3_READ_REQ:
-			wc->opcode = IB_WC_RDMA_READ;
-			wc->byte_len = CQE_LEN(cqe);
-			break;
-		case T3_SEND:
-		case T3_SEND_WITH_SE:
-		case T3_SEND_WITH_INV:
-		case T3_SEND_WITH_SE_INV:
-			wc->opcode = IB_WC_SEND;
-			break;
-		case T3_LOCAL_INV:
-			wc->opcode = IB_WC_LOCAL_INV;
-			break;
-		case T3_FAST_REGISTER:
-			wc->opcode = IB_WC_REG_MR;
-			break;
-		default:
-			pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
-			       CQE_OPCODE(cqe), CQE_QPID(cqe));
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
-	if (cqe_flushed)
-		wc->status = IB_WC_WR_FLUSH_ERR;
-	else {
-
-		switch (CQE_STATUS(cqe)) {
-		case TPT_ERR_SUCCESS:
-			wc->status = IB_WC_SUCCESS;
-			break;
-		case TPT_ERR_STAG:
-			wc->status = IB_WC_LOC_ACCESS_ERR;
-			break;
-		case TPT_ERR_PDID:
-			wc->status = IB_WC_LOC_PROT_ERR;
-			break;
-		case TPT_ERR_QPID:
-		case TPT_ERR_ACCESS:
-			wc->status = IB_WC_LOC_ACCESS_ERR;
-			break;
-		case TPT_ERR_WRAP:
-			wc->status = IB_WC_GENERAL_ERR;
-			break;
-		case TPT_ERR_BOUND:
-			wc->status = IB_WC_LOC_LEN_ERR;
-			break;
-		case TPT_ERR_INVALIDATE_SHARED_MR:
-		case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
-			wc->status = IB_WC_MW_BIND_ERR;
-			break;
-		case TPT_ERR_CRC:
-		case TPT_ERR_MARKER:
-		case TPT_ERR_PDU_LEN_ERR:
-		case TPT_ERR_OUT_OF_RQE:
-		case TPT_ERR_DDP_VERSION:
-		case TPT_ERR_RDMA_VERSION:
-		case TPT_ERR_DDP_QUEUE_NUM:
-		case TPT_ERR_MSN:
-		case TPT_ERR_TBIT:
-		case TPT_ERR_MO:
-		case TPT_ERR_MSN_RANGE:
-		case TPT_ERR_IRD_OVERFLOW:
-		case TPT_ERR_OPCODE:
-			wc->status = IB_WC_FATAL_ERR;
-			break;
-		case TPT_ERR_SWFLUSH:
-			wc->status = IB_WC_WR_FLUSH_ERR;
-			break;
-		default:
-			pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
-			       CQE_STATUS(cqe), CQE_QPID(cqe));
-			ret = -EINVAL;
-		}
-	}
-out:
-	return ret;
-}
-
-/*
- * Get one cq entry from cxio and map it to openib.
- *
- * Returns:
- *	0			EMPTY;
- *	1			cqe returned
- *	-EAGAIN		caller must try again
- *	any other -errno	fatal error
- */
-static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
-			    struct ib_wc *wc)
-{
-	struct iwch_qp *qhp;
-	struct t3_cqe *rd_cqe;
-	int ret;
-
-	rd_cqe = cxio_next_cqe(&chp->cq);
-
-	if (!rd_cqe)
-		return 0;
-
-	qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
-	if (qhp) {
-		spin_lock(&qhp->lock);
-		ret = __iwch_poll_cq_one(rhp, chp, qhp, wc);
-		spin_unlock(&qhp->lock);
-	} else {
-		ret = __iwch_poll_cq_one(rhp, chp, NULL, wc);
-	}
-	return ret;
-}
-
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
-	struct iwch_dev *rhp;
-	struct iwch_cq *chp;
-	unsigned long flags;
-	int npolled;
-	int err = 0;
-
-	chp = to_iwch_cq(ibcq);
-	rhp = chp->rhp;
-
-	spin_lock_irqsave(&chp->lock, flags);
-	for (npolled = 0; npolled < num_entries; ++npolled) {
-
-		/*
-		 * Because T3 can post CQEs that are _not_ associated
-		 * with a WR, we might have to poll again after removing
-		 * one of these.
-		 */
-		do {
-			err = iwch_poll_cq_one(rhp, chp, wc + npolled);
-		} while (err == -EAGAIN);
-		if (err <= 0)
-			break;
-	}
-	spin_unlock_irqrestore(&chp->lock, flags);
-
-	if (err < 0)
-		return err;
-	else {
-		return npolled;
-	}
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
deleted file mode 100644
index 9d356c1301c7..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/gfp.h>
-#include <linux/mman.h>
-#include <net/sock.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
-			  struct respQ_msg_t *rsp_msg,
-			  enum ib_event_type ib_event,
-			  int send_term)
-{
-	struct ib_event event;
-	struct iwch_qp_attributes attrs;
-	struct iwch_qp *qhp;
-	unsigned long flag;
-
-	xa_lock(&rnicp->qps);
-	qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe));
-
-	if (!qhp) {
-		pr_err("%s unaffiliated error 0x%x qpid 0x%x\n",
-		       __func__, CQE_STATUS(rsp_msg->cqe),
-		       CQE_QPID(rsp_msg->cqe));
-		xa_unlock(&rnicp->qps);
-		return;
-	}
-
-	if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
-	    (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
-		pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n",
-			 __func__,
-			 qhp->attr.state, qhp->wq.qpid,
-			 CQE_STATUS(rsp_msg->cqe));
-		xa_unlock(&rnicp->qps);
-		return;
-	}
-
-	pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
-	       __func__,
-	       CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
-	       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
-	       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-
-	atomic_inc(&qhp->refcnt);
-	xa_unlock(&rnicp->qps);
-
-	if (qhp->attr.state == IWCH_QP_STATE_RTS) {
-		attrs.next_state = IWCH_QP_STATE_TERMINATE;
-		iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
-			       &attrs, 1);
-		if (send_term)
-			iwch_post_terminate(qhp, rsp_msg);
-	}
-
-	event.event = ib_event;
-	event.device = chp->ibcq.device;
-	if (ib_event == IB_EVENT_CQ_ERR)
-		event.element.cq = &chp->ibcq;
-	else
-		event.element.qp = &qhp->ibqp;
-
-	if (qhp->ibqp.event_handler)
-		(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
-
-	spin_lock_irqsave(&chp->comp_handler_lock, flag);
-	(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
-	spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
-
-	if (atomic_dec_and_test(&qhp->refcnt))
-		wake_up(&qhp->wait);
-}
-
-void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
-{
-	struct iwch_dev *rnicp;
-	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
-	struct iwch_cq *chp;
-	struct iwch_qp *qhp;
-	u32 cqid = RSPQ_CQID(rsp_msg);
-	unsigned long flag;
-
-	rnicp = (struct iwch_dev *) rdev_p->ulp;
-	xa_lock(&rnicp->qps);
-	chp = get_chp(rnicp, cqid);
-	qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe));
-	if (!chp || !qhp) {
-		pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
-		       cqid, CQE_QPID(rsp_msg->cqe),
-		       CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
-		       CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
-		       CQE_WRID_LOW(rsp_msg->cqe));
-		xa_unlock(&rnicp->qps);
-		goto out;
-	}
-	iwch_qp_add_ref(&qhp->ibqp);
-	atomic_inc(&chp->refcnt);
-	xa_unlock(&rnicp->qps);
-
-	/*
-	 * 1) completion of our sending a TERMINATE.
-	 * 2) incoming TERMINATE message.
-	 */
-	if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
-	    (CQE_STATUS(rsp_msg->cqe) == 0)) {
-		if (SQ_TYPE(rsp_msg->cqe)) {
-			pr_debug("%s QPID 0x%x ep %p disconnecting\n",
-				 __func__, qhp->wq.qpid, qhp->ep);
-			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
-		} else {
-			pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__,
-				 qhp->wq.qpid);
-			post_qp_event(rnicp, chp, rsp_msg,
-				      IB_EVENT_QP_REQ_ERR, 0);
-			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
-		}
-		goto done;
-	}
-
-	/* Bad incoming Read request */
-	if (SQ_TYPE(rsp_msg->cqe) &&
-	    (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
-		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
-		goto done;
-	}
-
-	/* Bad incoming write */
-	if (RQ_TYPE(rsp_msg->cqe) &&
-	    (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
-		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
-		goto done;
-	}
-
-	switch (CQE_STATUS(rsp_msg->cqe)) {
-
-	/* Completion Events */
-	case TPT_ERR_SUCCESS:
-
-		/*
-		 * Confirm the destination entry if this is a RECV completion.
-		 */
-		if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
-			dst_confirm(qhp->ep->dst);
-		spin_lock_irqsave(&chp->comp_handler_lock, flag);
-		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
-		spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
-		break;
-
-	case TPT_ERR_STAG:
-	case TPT_ERR_PDID:
-	case TPT_ERR_QPID:
-	case TPT_ERR_ACCESS:
-	case TPT_ERR_WRAP:
-	case TPT_ERR_BOUND:
-	case TPT_ERR_INVALIDATE_SHARED_MR:
-	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
-		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
-		break;
-
-	/* Device Fatal Errors */
-	case TPT_ERR_ECC:
-	case TPT_ERR_ECC_PSTAG:
-	case TPT_ERR_INTERNAL_ERR:
-		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
-		break;
-
-	/* QP Fatal Errors */
-	case TPT_ERR_OUT_OF_RQE:
-	case TPT_ERR_PBL_ADDR_BOUND:
-	case TPT_ERR_CRC:
-	case TPT_ERR_MARKER:
-	case TPT_ERR_PDU_LEN_ERR:
-	case TPT_ERR_DDP_VERSION:
-	case TPT_ERR_RDMA_VERSION:
-	case TPT_ERR_OPCODE:
-	case TPT_ERR_DDP_QUEUE_NUM:
-	case TPT_ERR_MSN:
-	case TPT_ERR_TBIT:
-	case TPT_ERR_MO:
-	case TPT_ERR_MSN_GAP:
-	case TPT_ERR_MSN_RANGE:
-	case TPT_ERR_RQE_ADDR_BOUND:
-	case TPT_ERR_IRD_OVERFLOW:
-		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
-		break;
-
-	default:
-		pr_err("Unknown T3 status 0x%x QPID 0x%x\n",
-		       CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
-		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
-		break;
-	}
-done:
-	if (atomic_dec_and_test(&chp->refcnt))
-	        wake_up(&chp->wait);
-	iwch_qp_rem_ref(&qhp->ibqp);
-out:
-	dev_kfree_skb_irq(skb);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
deleted file mode 100644
index ce0f2741821d..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-
-static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
-{
-	u32 mmid;
-
-	mhp->attr.state = 1;
-	mhp->attr.stag = stag;
-	mmid = stag >> 8;
-	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
-	return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
-}
-
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-		      struct iwch_mr *mhp, int shift)
-{
-	u32 stag;
-	int ret;
-
-	if (cxio_register_phys_mem(&rhp->rdev,
-				   &stag, mhp->attr.pdid,
-				   mhp->attr.perms,
-				   mhp->attr.zbva,
-				   mhp->attr.va_fbo,
-				   mhp->attr.len,
-				   shift - 12,
-				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
-		return -ENOMEM;
-
-	ret = iwch_finish_mem_reg(mhp, stag);
-	if (ret)
-		cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-		       mhp->attr.pbl_addr);
-	return ret;
-}
-
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
-{
-	mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
-						    npages << 3);
-
-	if (!mhp->attr.pbl_addr)
-		return -ENOMEM;
-
-	mhp->attr.pbl_size = npages;
-
-	return 0;
-}
-
-void iwch_free_pbl(struct iwch_mr *mhp)
-{
-	cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
-			      mhp->attr.pbl_size << 3);
-}
-
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
-{
-	return cxio_write_pbl(&mhp->rhp->rdev, pages,
-			      mhp->attr.pbl_addr + (offset << 3), npages);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
deleted file mode 100644
index dcf02ec02810..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ /dev/null
@@ -1,1321 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/sched/mm.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include <rdma/uverbs_ioctl.h>
-
-#include "cxio_hal.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-#include <rdma/cxgb3-abi.h>
-#include "common.h"
-
-static void iwch_dealloc_ucontext(struct ib_ucontext *context)
-{
-	struct iwch_dev *rhp = to_iwch_dev(context->device);
-	struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
-	struct iwch_mm_entry *mm, *tmp;
-
-	pr_debug("%s context %p\n", __func__, context);
-	list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
-		kfree(mm);
-	cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
-}
-
-static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
-			       struct ib_udata *udata)
-{
-	struct ib_device *ibdev = ucontext->device;
-	struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
-	struct iwch_dev *rhp = to_iwch_dev(ibdev);
-
-	pr_debug("%s ibdev %p\n", __func__, ibdev);
-	cxio_init_ucontext(&rhp->rdev, &context->uctx);
-	INIT_LIST_HEAD(&context->mmaps);
-	spin_lock_init(&context->mmap_lock);
-	return 0;
-}
-
-static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
-{
-	struct iwch_cq *chp;
-
-	pr_debug("%s ib_cq %p\n", __func__, ib_cq);
-	chp = to_iwch_cq(ib_cq);
-
-	xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
-	atomic_dec(&chp->refcnt);
-	wait_event(chp->wait, !atomic_read(&chp->refcnt));
-
-	cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
-}
-
-static int iwch_create_cq(struct ib_cq *ibcq,
-			  const struct ib_cq_init_attr *attr,
-			  struct ib_udata *udata)
-{
-	struct ib_device *ibdev = ibcq->device;
-	int entries = attr->cqe;
-	struct iwch_dev *rhp = to_iwch_dev(ibcq->device);
-	struct iwch_cq *chp = to_iwch_cq(ibcq);
-	struct iwch_create_cq_resp uresp;
-	struct iwch_create_cq_req ureq;
-	static int warned;
-	size_t resplen;
-
-	pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
-	if (attr->flags)
-		return -EINVAL;
-
-	if (udata) {
-		if (!t3a_device(rhp)) {
-			if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
-				return  -EFAULT;
-
-			chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
-		}
-	}
-
-	if (t3a_device(rhp)) {
-
-		/*
-		 * T3A: Add some fluff to handle extra CQEs inserted
-		 * for various errors.
-		 * Additional CQE possibilities:
-		 *      TERMINATE,
-		 *      incoming RDMA WRITE Failures
-		 *      incoming RDMA READ REQUEST FAILUREs
-		 * NOTE: We cannot ensure the CQ won't overflow.
-		 */
-		entries += 16;
-	}
-	entries = roundup_pow_of_two(entries);
-	chp->cq.size_log2 = ilog2(entries);
-
-	if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata))
-		return -ENOMEM;
-
-	chp->rhp = rhp;
-	chp->ibcq.cqe = 1 << chp->cq.size_log2;
-	spin_lock_init(&chp->lock);
-	spin_lock_init(&chp->comp_handler_lock);
-	atomic_set(&chp->refcnt, 1);
-	init_waitqueue_head(&chp->wait);
-	if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) {
-		cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
-		return -ENOMEM;
-	}
-
-	if (udata) {
-		struct iwch_mm_entry *mm;
-		struct iwch_ucontext *ucontext = rdma_udata_to_drv_context(
-			udata, struct iwch_ucontext, ibucontext);
-
-		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
-		if (!mm) {
-			iwch_destroy_cq(&chp->ibcq, udata);
-			return -ENOMEM;
-		}
-		uresp.cqid = chp->cq.cqid;
-		uresp.size_log2 = chp->cq.size_log2;
-		spin_lock(&ucontext->mmap_lock);
-		uresp.key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
-		spin_unlock(&ucontext->mmap_lock);
-		mm->key = uresp.key;
-		mm->addr = virt_to_phys(chp->cq.queue);
-		if (udata->outlen < sizeof(uresp)) {
-			if (!warned++)
-				pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
-			mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
-					     sizeof(struct t3_cqe));
-			resplen = sizeof(struct iwch_create_cq_resp_v0);
-		} else {
-			mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
-					     sizeof(struct t3_cqe));
-			uresp.memsize = mm->len;
-			uresp.reserved = 0;
-			resplen = sizeof(uresp);
-		}
-		if (ib_copy_to_udata(udata, &uresp, resplen)) {
-			kfree(mm);
-			iwch_destroy_cq(&chp->ibcq, udata);
-			return -EFAULT;
-		}
-		insert_mmap(ucontext, mm);
-	}
-	pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n",
-		 chp->cq.cqid, chp, (1 << chp->cq.size_log2),
-		 &chp->cq.dma_addr);
-	return 0;
-}
-
-static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
-{
-	struct iwch_dev *rhp;
-	struct iwch_cq *chp;
-	enum t3_cq_opcode cq_op;
-	int err;
-	unsigned long flag;
-	u32 rptr;
-
-	chp = to_iwch_cq(ibcq);
-	rhp = chp->rhp;
-	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-		cq_op = CQ_ARM_SE;
-	else
-		cq_op = CQ_ARM_AN;
-	if (chp->user_rptr_addr) {
-		if (get_user(rptr, chp->user_rptr_addr))
-			return -EFAULT;
-		spin_lock_irqsave(&chp->lock, flag);
-		chp->cq.rptr = rptr;
-	} else
-		spin_lock_irqsave(&chp->lock, flag);
-	pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
-	err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
-	spin_unlock_irqrestore(&chp->lock, flag);
-	if (err < 0)
-		pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
-	if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
-		err = 0;
-	return err;
-}
-
-static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	int len = vma->vm_end - vma->vm_start;
-	u32 key = vma->vm_pgoff << PAGE_SHIFT;
-	struct cxio_rdev *rdev_p;
-	int ret = 0;
-	struct iwch_mm_entry *mm;
-	struct iwch_ucontext *ucontext;
-	u64 addr;
-
-	pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
-		 key, len);
-
-	if (vma->vm_start & (PAGE_SIZE-1)) {
-	        return -EINVAL;
-	}
-
-	rdev_p = &(to_iwch_dev(context->device)->rdev);
-	ucontext = to_iwch_ucontext(context);
-
-	mm = remove_mmap(ucontext, key, len);
-	if (!mm)
-		return -EINVAL;
-	addr = mm->addr;
-	kfree(mm);
-
-	if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
-	    (addr < (rdev_p->rnic_info.udbell_physbase +
-		       rdev_p->rnic_info.udbell_len))) {
-
-		/*
-		 * Map T3 DB register.
-		 */
-		if (vma->vm_flags & VM_READ) {
-			return -EPERM;
-		}
-
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-		vma->vm_flags &= ~VM_MAYREAD;
-		ret = io_remap_pfn_range(vma, vma->vm_start,
-					 addr >> PAGE_SHIFT,
-				         len, vma->vm_page_prot);
-	} else {
-
-		/*
-		 * Map WQ or CQ contig dma memory...
-		 */
-		ret = remap_pfn_range(vma, vma->vm_start,
-				      addr >> PAGE_SHIFT,
-				      len, vma->vm_page_prot);
-	}
-
-	return ret;
-}
-
-static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_pd *php;
-
-	php = to_iwch_pd(pd);
-	rhp = php->rhp;
-	pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
-	cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
-}
-
-static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
-	struct iwch_pd *php = to_iwch_pd(pd);
-	struct ib_device *ibdev = pd->device;
-	u32 pdid;
-	struct iwch_dev *rhp;
-
-	pr_debug("%s ibdev %p\n", __func__, ibdev);
-	rhp = (struct iwch_dev *) ibdev;
-	pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
-	if (!pdid)
-		return -EINVAL;
-
-	php->pdid = pdid;
-	php->rhp = rhp;
-	if (udata) {
-		struct iwch_alloc_pd_resp resp = {.pdid = php->pdid};
-
-		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-			iwch_deallocate_pd(&php->ibpd, udata);
-			return -EFAULT;
-		}
-	}
-	pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
-	return 0;
-}
-
-static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_mr *mhp;
-	u32 mmid;
-
-	pr_debug("%s ib_mr %p\n", __func__, ib_mr);
-
-	mhp = to_iwch_mr(ib_mr);
-	kfree(mhp->pages);
-	rhp = mhp->rhp;
-	mmid = mhp->attr.stag >> 8;
-	cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
-		       mhp->attr.pbl_addr);
-	iwch_free_pbl(mhp);
-	xa_erase_irq(&rhp->mrs, mmid);
-	if (mhp->kva)
-		kfree((void *) (unsigned long) mhp->kva);
-	ib_umem_release(mhp->umem);
-	pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
-	kfree(mhp);
-	return 0;
-}
-
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	const u64 total_size = 0xffffffff;
-	const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
-	struct iwch_pd *php = to_iwch_pd(pd);
-	struct iwch_dev *rhp = php->rhp;
-	struct iwch_mr *mhp;
-	__be64 *page_list;
-	int shift = 26, npages, ret, i;
-
-	pr_debug("%s ib_pd %p\n", __func__, pd);
-
-	/*
-	 * T3 only supports 32 bits of size.
-	 */
-	if (sizeof(phys_addr_t) > 4) {
-		pr_warn_once("Cannot support dma_mrs on this platform\n");
-		return ERR_PTR(-ENOTSUPP);
-	}
-
-	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-	if (!mhp)
-		return ERR_PTR(-ENOMEM);
-
-	mhp->rhp = rhp;
-
-	npages = (total_size + (1ULL << shift) - 1) >> shift;
-	if (!npages) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
-	if (!page_list) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	for (i = 0; i < npages; i++)
-		page_list[i] = cpu_to_be64((u64)i << shift);
-
-	pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
-		 __func__, mask, shift, total_size, npages);
-
-	ret = iwch_alloc_pbl(mhp, npages);
-	if (ret) {
-		kfree(page_list);
-		goto err_pbl;
-	}
-
-	ret = iwch_write_pbl(mhp, page_list, npages, 0);
-	kfree(page_list);
-	if (ret)
-		goto err_pbl;
-
-	mhp->attr.pdid = php->pdid;
-	mhp->attr.zbva = 0;
-
-	mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-	mhp->attr.va_fbo = 0;
-	mhp->attr.page_size = shift - 12;
-
-	mhp->attr.len = (u32) total_size;
-	mhp->attr.pbl_size = npages;
-	ret = iwch_register_mem(rhp, php, mhp, shift);
-	if (ret)
-		goto err_pbl;
-
-	return &mhp->ibmr;
-
-err_pbl:
-	iwch_free_pbl(mhp);
-
-err:
-	kfree(mhp);
-	return ERR_PTR(ret);
-}
-
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				      u64 virt, int acc, struct ib_udata *udata)
-{
-	__be64 *pages;
-	int shift, n, i;
-	int err = 0;
-	struct iwch_dev *rhp;
-	struct iwch_pd *php;
-	struct iwch_mr *mhp;
-	struct iwch_reg_user_mr_resp uresp;
-	struct sg_dma_page_iter sg_iter;
-	pr_debug("%s ib_pd %p\n", __func__, pd);
-
-	php = to_iwch_pd(pd);
-	rhp = php->rhp;
-	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-	if (!mhp)
-		return ERR_PTR(-ENOMEM);
-
-	mhp->rhp = rhp;
-
-	mhp->umem = ib_umem_get(udata, start, length, acc, 0);
-	if (IS_ERR(mhp->umem)) {
-		err = PTR_ERR(mhp->umem);
-		kfree(mhp);
-		return ERR_PTR(err);
-	}
-
-	shift = PAGE_SHIFT;
-
-	n = ib_umem_num_pages(mhp->umem);
-
-	err = iwch_alloc_pbl(mhp, n);
-	if (err)
-		goto err;
-
-	pages = (__be64 *) __get_free_page(GFP_KERNEL);
-	if (!pages) {
-		err = -ENOMEM;
-		goto err_pbl;
-	}
-
-	i = n = 0;
-
-	for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
-		pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
-		if (i == PAGE_SIZE / sizeof(*pages)) {
-			err = iwch_write_pbl(mhp, pages, i, n);
-			if (err)
-				goto pbl_done;
-			n += i;
-			i = 0;
-		}
-	}
-
-	if (i)
-		err = iwch_write_pbl(mhp, pages, i, n);
-
-pbl_done:
-	free_page((unsigned long) pages);
-	if (err)
-		goto err_pbl;
-
-	mhp->attr.pdid = php->pdid;
-	mhp->attr.zbva = 0;
-	mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-	mhp->attr.va_fbo = virt;
-	mhp->attr.page_size = shift - 12;
-	mhp->attr.len = (u32) length;
-
-	err = iwch_register_mem(rhp, php, mhp, shift);
-	if (err)
-		goto err_pbl;
-
-	if (udata && !t3a_device(rhp)) {
-		uresp.pbl_addr = (mhp->attr.pbl_addr -
-				 rhp->rdev.rnic_info.pbl_base) >> 3;
-		pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
-			 uresp.pbl_addr);
-
-		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
-			iwch_dereg_mr(&mhp->ibmr, udata);
-			err = -EFAULT;
-			goto err;
-		}
-	}
-
-	return &mhp->ibmr;
-
-err_pbl:
-	iwch_free_pbl(mhp);
-
-err:
-	ib_umem_release(mhp->umem);
-	kfree(mhp);
-	return ERR_PTR(err);
-}
-
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
-				   struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_pd *php;
-	struct iwch_mw *mhp;
-	u32 mmid;
-	u32 stag = 0;
-	int ret;
-
-	if (type != IB_MW_TYPE_1)
-		return ERR_PTR(-EINVAL);
-
-	php = to_iwch_pd(pd);
-	rhp = php->rhp;
-	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-	if (!mhp)
-		return ERR_PTR(-ENOMEM);
-	ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
-	if (ret) {
-		kfree(mhp);
-		return ERR_PTR(ret);
-	}
-	mhp->rhp = rhp;
-	mhp->attr.pdid = php->pdid;
-	mhp->attr.type = TPT_MW;
-	mhp->attr.stag = stag;
-	mmid = (stag) >> 8;
-	mhp->ibmw.rkey = stag;
-	if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
-		cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
-		kfree(mhp);
-		return ERR_PTR(-ENOMEM);
-	}
-	pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
-	return &(mhp->ibmw);
-}
-
-static int iwch_dealloc_mw(struct ib_mw *mw)
-{
-	struct iwch_dev *rhp;
-	struct iwch_mw *mhp;
-	u32 mmid;
-
-	mhp = to_iwch_mw(mw);
-	rhp = mhp->rhp;
-	mmid = (mw->rkey) >> 8;
-	cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
-	xa_erase_irq(&rhp->mrs, mmid);
-	pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
-	kfree(mhp);
-	return 0;
-}
-
-static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
-				   u32 max_num_sg, struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_pd *php;
-	struct iwch_mr *mhp;
-	u32 mmid;
-	u32 stag = 0;
-	int ret = -ENOMEM;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG ||
-	    max_num_sg > T3_MAX_FASTREG_DEPTH)
-		return ERR_PTR(-EINVAL);
-
-	php = to_iwch_pd(pd);
-	rhp = php->rhp;
-	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
-	if (!mhp)
-		goto err;
-
-	mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
-	if (!mhp->pages)
-		goto pl_err;
-
-	mhp->rhp = rhp;
-	ret = iwch_alloc_pbl(mhp, max_num_sg);
-	if (ret)
-		goto err1;
-	mhp->attr.pbl_size = max_num_sg;
-	ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
-				 mhp->attr.pbl_size, mhp->attr.pbl_addr);
-	if (ret)
-		goto err2;
-	mhp->attr.pdid = php->pdid;
-	mhp->attr.type = TPT_NON_SHARED_MR;
-	mhp->attr.stag = stag;
-	mhp->attr.state = 1;
-	mmid = (stag) >> 8;
-	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL);
-	if (ret)
-		goto err3;
-
-	pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
-	return &(mhp->ibmr);
-err3:
-	cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
-		       mhp->attr.pbl_addr);
-err2:
-	iwch_free_pbl(mhp);
-err1:
-	kfree(mhp->pages);
-pl_err:
-	kfree(mhp);
-err:
-	return ERR_PTR(ret);
-}
-
-static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
-{
-	struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
-	if (unlikely(mhp->npages == mhp->attr.pbl_size))
-		return -ENOMEM;
-
-	mhp->pages[mhp->npages++] = addr;
-
-	return 0;
-}
-
-static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
-			  int sg_nents, unsigned int *sg_offset)
-{
-	struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
-	mhp->npages = 0;
-
-	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
-}
-
-static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_qp *qhp;
-	struct iwch_qp_attributes attrs;
-	struct iwch_ucontext *ucontext;
-
-	qhp = to_iwch_qp(ib_qp);
-	rhp = qhp->rhp;
-
-	attrs.next_state = IWCH_QP_STATE_ERROR;
-	iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
-	wait_event(qhp->wait, !qhp->ep);
-
-	xa_erase_irq(&rhp->qps, qhp->wq.qpid);
-
-	atomic_dec(&qhp->refcnt);
-	wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
-
-	ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
-					     ibucontext);
-	cxio_destroy_qp(&rhp->rdev, &qhp->wq,
-			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-
-	pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
-		 ib_qp, qhp->wq.qpid, qhp);
-	kfree(qhp);
-	return 0;
-}
-
-static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *attrs,
-			     struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_qp *qhp;
-	struct iwch_pd *php;
-	struct iwch_cq *schp;
-	struct iwch_cq *rchp;
-	struct iwch_create_qp_resp uresp;
-	int wqsize, sqsize, rqsize;
-	struct iwch_ucontext *ucontext;
-
-	pr_debug("%s ib_pd %p\n", __func__, pd);
-	if (attrs->qp_type != IB_QPT_RC)
-		return ERR_PTR(-EINVAL);
-	php = to_iwch_pd(pd);
-	rhp = php->rhp;
-	schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
-	rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
-	if (!schp || !rchp)
-		return ERR_PTR(-EINVAL);
-
-	/* The RQT size must be # of entries + 1 rounded up to a power of two */
-	rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
-	if (rqsize == attrs->cap.max_recv_wr)
-		rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
-
-	/* T3 doesn't support RQT depth < 16 */
-	if (rqsize < 16)
-		rqsize = 16;
-
-	if (rqsize > T3_MAX_RQ_SIZE)
-		return ERR_PTR(-EINVAL);
-
-	if (attrs->cap.max_inline_data > T3_MAX_INLINE)
-		return ERR_PTR(-EINVAL);
-
-	/*
-	 * NOTE: The SQ and total WQ sizes don't need to be
-	 * a power of two.  However, all the code assumes
-	 * they are. EG: Q_FREECNT() and friends.
-	 */
-	sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
-	wqsize = roundup_pow_of_two(rqsize + sqsize);
-
-	/*
-	 * Kernel users need more wq space for fastreg WRs which can take
-	 * 2 WR fragments.
-	 */
-	ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
-					     ibucontext);
-	if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
-		wqsize = roundup_pow_of_two(rqsize +
-				roundup_pow_of_two(attrs->cap.max_send_wr * 2));
-	pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
-		 wqsize, sqsize, rqsize);
-	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
-	if (!qhp)
-		return ERR_PTR(-ENOMEM);
-	qhp->wq.size_log2 = ilog2(wqsize);
-	qhp->wq.rq_size_log2 = ilog2(rqsize);
-	qhp->wq.sq_size_log2 = ilog2(sqsize);
-	if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
-			   ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
-		kfree(qhp);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	attrs->cap.max_recv_wr = rqsize - 1;
-	attrs->cap.max_send_wr = sqsize;
-	attrs->cap.max_inline_data = T3_MAX_INLINE;
-
-	qhp->rhp = rhp;
-	qhp->attr.pd = php->pdid;
-	qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
-	qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
-	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
-	qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
-	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
-	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
-	qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
-	qhp->attr.state = IWCH_QP_STATE_IDLE;
-	qhp->attr.next_state = IWCH_QP_STATE_IDLE;
-
-	/*
-	 * XXX - These don't get passed in from the openib user
-	 * at create time.  The CM sets them via a QP modify.
-	 * Need to fix...  I think the CM should
-	 */
-	qhp->attr.enable_rdma_read = 1;
-	qhp->attr.enable_rdma_write = 1;
-	qhp->attr.enable_bind = 1;
-	qhp->attr.max_ord = 1;
-	qhp->attr.max_ird = 1;
-
-	spin_lock_init(&qhp->lock);
-	init_waitqueue_head(&qhp->wait);
-	atomic_set(&qhp->refcnt, 1);
-
-	if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) {
-		cxio_destroy_qp(&rhp->rdev, &qhp->wq,
-			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-		kfree(qhp);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (udata) {
-
-		struct iwch_mm_entry *mm1, *mm2;
-
-		mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL);
-		if (!mm1) {
-			iwch_destroy_qp(&qhp->ibqp, udata);
-			return ERR_PTR(-ENOMEM);
-		}
-
-		mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
-		if (!mm2) {
-			kfree(mm1);
-			iwch_destroy_qp(&qhp->ibqp, udata);
-			return ERR_PTR(-ENOMEM);
-		}
-
-		uresp.qpid = qhp->wq.qpid;
-		uresp.size_log2 = qhp->wq.size_log2;
-		uresp.sq_size_log2 = qhp->wq.sq_size_log2;
-		uresp.rq_size_log2 = qhp->wq.rq_size_log2;
-		spin_lock(&ucontext->mmap_lock);
-		uresp.key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
-		uresp.db_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
-		spin_unlock(&ucontext->mmap_lock);
-		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
-			kfree(mm1);
-			kfree(mm2);
-			iwch_destroy_qp(&qhp->ibqp, udata);
-			return ERR_PTR(-EFAULT);
-		}
-		mm1->key = uresp.key;
-		mm1->addr = virt_to_phys(qhp->wq.queue);
-		mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr));
-		insert_mmap(ucontext, mm1);
-		mm2->key = uresp.db_key;
-		mm2->addr = qhp->wq.udb & PAGE_MASK;
-		mm2->len = PAGE_SIZE;
-		insert_mmap(ucontext, mm2);
-	}
-	qhp->ibqp.qp_num = qhp->wq.qpid;
-	pr_debug(
-		"%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n",
-		__func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
-		qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2,
-		qhp->wq.rq_addr);
-	return &qhp->ibqp;
-}
-
-static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		      int attr_mask, struct ib_udata *udata)
-{
-	struct iwch_dev *rhp;
-	struct iwch_qp *qhp;
-	enum iwch_qp_attr_mask mask = 0;
-	struct iwch_qp_attributes attrs = {};
-
-	pr_debug("%s ib_qp %p\n", __func__, ibqp);
-
-	/* iwarp does not support the RTR state */
-	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
-		attr_mask &= ~IB_QP_STATE;
-
-	/* Make sure we still have something left to do */
-	if (!attr_mask)
-		return 0;
-
-	qhp = to_iwch_qp(ibqp);
-	rhp = qhp->rhp;
-
-	attrs.next_state = iwch_convert_state(attr->qp_state);
-	attrs.enable_rdma_read = (attr->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ) ?  1 : 0;
-	attrs.enable_rdma_write = (attr->qp_access_flags &
-				IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
-	attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-
-	mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
-	mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
-			(IWCH_QP_ATTR_ENABLE_RDMA_READ |
-			 IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
-			 IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
-
-	return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp)
-{
-	pr_debug("%s ib_qp %p\n", __func__, qp);
-	atomic_inc(&(to_iwch_qp(qp)->refcnt));
-}
-
-void iwch_qp_rem_ref(struct ib_qp *qp)
-{
-	pr_debug("%s ib_qp %p\n", __func__, qp);
-	if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
-	        wake_up(&(to_iwch_qp(qp)->wait));
-}
-
-static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
-{
-	pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
-	return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
-}
-
-
-static int iwch_query_pkey(struct ib_device *ibdev,
-			   u8 port, u16 index, u16 * pkey)
-{
-	pr_debug("%s ibdev %p\n", __func__, ibdev);
-	*pkey = 0;
-	return 0;
-}
-
-static int iwch_query_gid(struct ib_device *ibdev, u8 port,
-			  int index, union ib_gid *gid)
-{
-	struct iwch_dev *dev;
-
-	pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
-		 __func__, ibdev, port, index, gid);
-	dev = to_iwch_dev(ibdev);
-	BUG_ON(port == 0 || port > 2);
-	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
-	memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
-	return 0;
-}
-
-static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
-{
-	struct ethtool_drvinfo info;
-	struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-	char *cp, *next;
-	unsigned fw_maj, fw_min, fw_mic;
-
-	lldev->ethtool_ops->get_drvinfo(lldev, &info);
-
-	next = info.fw_version + 1;
-	cp = strsep(&next, ".");
-	sscanf(cp, "%i", &fw_maj);
-	cp = strsep(&next, ".");
-	sscanf(cp, "%i", &fw_min);
-	cp = strsep(&next, ".");
-	sscanf(cp, "%i", &fw_mic);
-
-	return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
-	       (fw_mic & 0xffff);
-}
-
-static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			     struct ib_udata *uhw)
-{
-
-	struct iwch_dev *dev;
-
-	pr_debug("%s ibdev %p\n", __func__, ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	dev = to_iwch_dev(ibdev);
-	memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
-	props->hw_ver = dev->rdev.t3cdev_p->type;
-	props->fw_ver = fw_vers_string_to_u64(dev);
-	props->device_cap_flags = dev->device_cap_flags;
-	props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
-	props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
-	props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
-	props->max_mr_size = dev->attr.max_mr_size;
-	props->max_qp = dev->attr.max_qps;
-	props->max_qp_wr = dev->attr.max_wrs;
-	props->max_send_sge = dev->attr.max_sge_per_wr;
-	props->max_recv_sge = dev->attr.max_sge_per_wr;
-	props->max_sge_rd = 1;
-	props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
-	props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
-	props->max_cq = dev->attr.max_cqs;
-	props->max_cqe = dev->attr.max_cqes_per_cq;
-	props->max_mr = dev->attr.max_mem_regs;
-	props->max_pd = dev->attr.max_pds;
-	props->local_ca_ack_delay = 0;
-	props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
-
-	return 0;
-}
-
-static int iwch_query_port(struct ib_device *ibdev,
-			   u8 port, struct ib_port_attr *props)
-{
-	pr_debug("%s ibdev %p\n", __func__, ibdev);
-
-	props->port_cap_flags =
-	    IB_PORT_CM_SUP |
-	    IB_PORT_SNMP_TUNNEL_SUP |
-	    IB_PORT_REINIT_SUP |
-	    IB_PORT_DEVICE_MGMT_SUP |
-	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
-	props->gid_tbl_len = 1;
-	props->pkey_tbl_len = 1;
-	props->active_width = 2;
-	props->active_speed = IB_SPEED_DDR;
-	props->max_msg_sz = -1;
-
-	return 0;
-}
-
-static ssize_t hw_rev_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	struct iwch_dev *iwch_dev =
-			rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-
-	pr_debug("%s dev 0x%p\n", __func__, dev);
-	return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
-}
-static DEVICE_ATTR_RO(hw_rev);
-
-static ssize_t hca_type_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	struct iwch_dev *iwch_dev =
-			rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-	struct ethtool_drvinfo info;
-	struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
-	pr_debug("%s dev 0x%p\n", __func__, dev);
-	lldev->ethtool_ops->get_drvinfo(lldev, &info);
-	return sprintf(buf, "%s\n", info.driver);
-}
-static DEVICE_ATTR_RO(hca_type);
-
-static ssize_t board_id_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	struct iwch_dev *iwch_dev =
-			rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-
-	pr_debug("%s dev 0x%p\n", __func__, dev);
-	return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
-		       iwch_dev->rdev.rnic_info.pdev->device);
-}
-static DEVICE_ATTR_RO(board_id);
-
-enum counters {
-	IPINRECEIVES,
-	IPINHDRERRORS,
-	IPINADDRERRORS,
-	IPINUNKNOWNPROTOS,
-	IPINDISCARDS,
-	IPINDELIVERS,
-	IPOUTREQUESTS,
-	IPOUTDISCARDS,
-	IPOUTNOROUTES,
-	IPREASMTIMEOUT,
-	IPREASMREQDS,
-	IPREASMOKS,
-	IPREASMFAILS,
-	TCPACTIVEOPENS,
-	TCPPASSIVEOPENS,
-	TCPATTEMPTFAILS,
-	TCPESTABRESETS,
-	TCPCURRESTAB,
-	TCPINSEGS,
-	TCPOUTSEGS,
-	TCPRETRANSSEGS,
-	TCPINERRS,
-	TCPOUTRSTS,
-	TCPRTOMIN,
-	TCPRTOMAX,
-	NR_COUNTERS
-};
-
-static const char * const names[] = {
-	[IPINRECEIVES] = "ipInReceives",
-	[IPINHDRERRORS] = "ipInHdrErrors",
-	[IPINADDRERRORS] = "ipInAddrErrors",
-	[IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
-	[IPINDISCARDS] = "ipInDiscards",
-	[IPINDELIVERS] = "ipInDelivers",
-	[IPOUTREQUESTS] = "ipOutRequests",
-	[IPOUTDISCARDS] = "ipOutDiscards",
-	[IPOUTNOROUTES] = "ipOutNoRoutes",
-	[IPREASMTIMEOUT] = "ipReasmTimeout",
-	[IPREASMREQDS] = "ipReasmReqds",
-	[IPREASMOKS] = "ipReasmOKs",
-	[IPREASMFAILS] = "ipReasmFails",
-	[TCPACTIVEOPENS] = "tcpActiveOpens",
-	[TCPPASSIVEOPENS] = "tcpPassiveOpens",
-	[TCPATTEMPTFAILS] = "tcpAttemptFails",
-	[TCPESTABRESETS] = "tcpEstabResets",
-	[TCPCURRESTAB] = "tcpCurrEstab",
-	[TCPINSEGS] = "tcpInSegs",
-	[TCPOUTSEGS] = "tcpOutSegs",
-	[TCPRETRANSSEGS] = "tcpRetransSegs",
-	[TCPINERRS] = "tcpInErrs",
-	[TCPOUTRSTS] = "tcpOutRsts",
-	[TCPRTOMIN] = "tcpRtoMin",
-	[TCPRTOMAX] = "tcpRtoMax",
-};
-
-static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
-					      u8 port_num)
-{
-	BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
-
-	/* Our driver only supports device level stats */
-	if (port_num != 0)
-		return NULL;
-
-	return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
-					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
-			u8 port, int index)
-{
-	struct iwch_dev *dev;
-	struct tp_mib_stats m;
-	int ret;
-
-	if (port != 0 || !stats)
-		return -ENOSYS;
-
-	pr_debug("%s ibdev %p\n", __func__, ibdev);
-	dev = to_iwch_dev(ibdev);
-	ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
-	if (ret)
-		return -ENOSYS;
-
-	stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) +	m.ipInReceive_lo;
-	stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
-	stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
-	stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
-	stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
-	stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
-	stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
-	stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
-	stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
-	stats->value[IPREASMTIMEOUT] = 	m.ipReasmTimeout;
-	stats->value[IPREASMREQDS] = m.ipReasmReqds;
-	stats->value[IPREASMOKS] = m.ipReasmOKs;
-	stats->value[IPREASMFAILS] = m.ipReasmFails;
-	stats->value[TCPACTIVEOPENS] =	m.tcpActiveOpens;
-	stats->value[TCPPASSIVEOPENS] =	m.tcpPassiveOpens;
-	stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
-	stats->value[TCPESTABRESETS] = m.tcpEstabResets;
-	stats->value[TCPCURRESTAB] = m.tcpOutRsts;
-	stats->value[TCPINSEGS] = m.tcpCurrEstab;
-	stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
-	stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
-	stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
-	stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
-	stats->value[TCPRTOMIN] = m.tcpRtoMin;
-	stats->value[TCPRTOMAX] = m.tcpRtoMax;
-
-	return stats->num_counters;
-}
-
-static struct attribute *iwch_class_attributes[] = {
-	&dev_attr_hw_rev.attr,
-	&dev_attr_hca_type.attr,
-	&dev_attr_board_id.attr,
-	NULL
-};
-
-static const struct attribute_group iwch_attr_group = {
-	.attrs = iwch_class_attributes,
-};
-
-static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
-			       struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
-	err = ib_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-
-	return 0;
-}
-
-static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
-{
-	struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
-	struct ethtool_drvinfo info;
-	struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
-	pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
-	lldev->ethtool_ops->get_drvinfo(lldev, &info);
-	snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
-}
-
-static const struct ib_device_ops iwch_dev_ops = {
-	.owner = THIS_MODULE,
-	.driver_id = RDMA_DRIVER_CXGB3,
-	.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION,
-	.uverbs_no_driver_id_binding = 1,
-
-	.alloc_hw_stats	= iwch_alloc_stats,
-	.alloc_mr = iwch_alloc_mr,
-	.alloc_mw = iwch_alloc_mw,
-	.alloc_pd = iwch_allocate_pd,
-	.alloc_ucontext = iwch_alloc_ucontext,
-	.create_cq = iwch_create_cq,
-	.create_qp = iwch_create_qp,
-	.dealloc_mw = iwch_dealloc_mw,
-	.dealloc_pd = iwch_deallocate_pd,
-	.dealloc_ucontext = iwch_dealloc_ucontext,
-	.dereg_mr = iwch_dereg_mr,
-	.destroy_cq = iwch_destroy_cq,
-	.destroy_qp = iwch_destroy_qp,
-	.get_dev_fw_str = get_dev_fw_ver_str,
-	.get_dma_mr = iwch_get_dma_mr,
-	.get_hw_stats = iwch_get_mib,
-	.get_port_immutable = iwch_port_immutable,
-	.iw_accept = iwch_accept_cr,
-	.iw_add_ref = iwch_qp_add_ref,
-	.iw_connect = iwch_connect,
-	.iw_create_listen = iwch_create_listen,
-	.iw_destroy_listen = iwch_destroy_listen,
-	.iw_get_qp = iwch_get_qp,
-	.iw_reject = iwch_reject_cr,
-	.iw_rem_ref = iwch_qp_rem_ref,
-	.map_mr_sg = iwch_map_mr_sg,
-	.mmap = iwch_mmap,
-	.modify_qp = iwch_ib_modify_qp,
-	.poll_cq = iwch_poll_cq,
-	.post_recv = iwch_post_receive,
-	.post_send = iwch_post_send,
-	.query_device = iwch_query_device,
-	.query_gid = iwch_query_gid,
-	.query_pkey = iwch_query_pkey,
-	.query_port = iwch_query_port,
-	.reg_user_mr = iwch_reg_user_mr,
-	.req_notify_cq = iwch_arm_cq,
-	INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
-	INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq),
-	INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
-};
-
-static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev)
-{
-	int ret;
-	int i;
-
-	for (i = 0; i < rdev->port_info.nports; i++) {
-		ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i],
-					   i + 1);
-		if (ret)
-			return ret;
-	}
-	return 0;
-}
-
-int iwch_register_device(struct iwch_dev *dev)
-{
-	int err;
-
-	pr_debug("%s iwch_dev %p\n", __func__, dev);
-	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-	memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
-	dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
-				IB_DEVICE_MEM_WINDOW |
-				IB_DEVICE_MEM_MGT_EXTENSIONS;
-
-	/* cxgb3 supports STag 0. */
-	dev->ibdev.local_dma_lkey = 0;
-
-	dev->ibdev.uverbs_cmd_mask =
-	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
-	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
-	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
-	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
-	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
-	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
-	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
-	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
-	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
-	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
-	dev->ibdev.node_type = RDMA_NODE_RNIC;
-	BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
-	memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
-	dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
-	dev->ibdev.num_comp_vectors = 1;
-	dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
-
-	memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name,
-	       sizeof(dev->ibdev.iw_ifname));
-
-	rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
-	ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
-	err = set_netdevs(&dev->ibdev, &dev->rdev);
-	if (err)
-		return err;
-
-	return ib_register_device(&dev->ibdev, "cxgb3_%d");
-}
-
-void iwch_unregister_device(struct iwch_dev *dev)
-{
-	pr_debug("%s iwch_dev %p\n", __func__, dev);
-	ib_unregister_device(&dev->ibdev);
-	return;
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
deleted file mode 100644
index 8adbe9658935..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_PROVIDER_H__
-#define __IWCH_PROVIDER_H__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <rdma/ib_verbs.h>
-#include <asm/types.h>
-#include "t3cdev.h"
-#include "iwch.h"
-#include "cxio_wr.h"
-#include "cxio_hal.h"
-
-struct iwch_pd {
-	struct ib_pd ibpd;
-	u32 pdid;
-	struct iwch_dev *rhp;
-};
-
-static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct iwch_pd, ibpd);
-}
-
-struct tpt_attributes {
-	u32 stag;
-	u32 state:1;
-	u32 type:2;
-	u32 rsvd:1;
-	enum tpt_mem_perm perms;
-	u32 remote_invaliate_disable:1;
-	u32 zbva:1;
-	u32 mw_bind_enable:1;
-	u32 page_size:5;
-
-	u32 pdid;
-	u32 qpid;
-	u32 pbl_addr;
-	u32 len;
-	u64 va_fbo;
-	u32 pbl_size;
-};
-
-struct iwch_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	struct iwch_dev *rhp;
-	u64 kva;
-	struct tpt_attributes attr;
-	u64 *pages;
-	u32 npages;
-};
-
-typedef struct iwch_mw iwch_mw_handle;
-
-static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct iwch_mr, ibmr);
-}
-
-struct iwch_mw {
-	struct ib_mw ibmw;
-	struct iwch_dev *rhp;
-	u64 kva;
-	struct tpt_attributes attr;
-};
-
-static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
-{
-	return container_of(ibmw, struct iwch_mw, ibmw);
-}
-
-struct iwch_cq {
-	struct ib_cq ibcq;
-	struct iwch_dev *rhp;
-	struct t3_cq cq;
-	spinlock_t lock;
-	spinlock_t comp_handler_lock;
-	atomic_t refcnt;
-	wait_queue_head_t wait;
-	u32 __user *user_rptr_addr;
-};
-
-static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct iwch_cq, ibcq);
-}
-
-enum IWCH_QP_FLAGS {
-	QP_QUIESCED = 0x01
-};
-
-struct iwch_mpa_attributes {
-	u8 initiator;
-	u8 recv_marker_enabled;
-	u8 xmit_marker_enabled;	/* iWARP: enable inbound Read Resp. */
-	u8 crc_enabled;
-	u8 version;	/* 0 or 1 */
-};
-
-struct iwch_qp_attributes {
-	u32 scq;
-	u32 rcq;
-	u32 sq_num_entries;
-	u32 rq_num_entries;
-	u32 sq_max_sges;
-	u32 sq_max_sges_rdma_write;
-	u32 rq_max_sges;
-	u32 state;
-	u8 enable_rdma_read;
-	u8 enable_rdma_write;	/* enable inbound Read Resp. */
-	u8 enable_bind;
-	u8 enable_mmid0_fastreg;	/* Enable STAG0 + Fast-register */
-	/*
-	 * Next QP state. If specify the current state, only the
-	 * QP attributes will be modified.
-	 */
-	u32 max_ord;
-	u32 max_ird;
-	u32 pd;	/* IN */
-	u32 next_state;
-	char terminate_buffer[52];
-	u32 terminate_msg_len;
-	u8 is_terminate_local;
-	struct iwch_mpa_attributes mpa_attr;	/* IN-OUT */
-	struct iwch_ep *llp_stream_handle;
-	char *stream_msg_buf;	/* Last stream msg. before Idle -> RTS */
-	u32 stream_msg_buf_len;	/* Only on Idle -> RTS */
-};
-
-struct iwch_qp {
-	struct ib_qp ibqp;
-	struct iwch_dev *rhp;
-	struct iwch_ep *ep;
-	struct iwch_qp_attributes attr;
-	struct t3_wq wq;
-	spinlock_t lock;
-	atomic_t refcnt;
-	wait_queue_head_t wait;
-	enum IWCH_QP_FLAGS flags;
-};
-
-static inline int qp_quiesced(struct iwch_qp *qhp)
-{
-	return qhp->flags & QP_QUIESCED;
-}
-
-static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct iwch_qp, ibqp);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp);
-void iwch_qp_rem_ref(struct ib_qp *qp);
-
-struct iwch_ucontext {
-	struct ib_ucontext ibucontext;
-	struct cxio_ucontext uctx;
-	u32 key;
-	spinlock_t mmap_lock;
-	struct list_head mmaps;
-};
-
-static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
-{
-	return container_of(c, struct iwch_ucontext, ibucontext);
-}
-
-struct iwch_mm_entry {
-	struct list_head entry;
-	u64 addr;
-	u32 key;
-	unsigned len;
-};
-
-static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
-						u32 key, unsigned len)
-{
-	struct list_head *pos, *nxt;
-	struct iwch_mm_entry *mm;
-
-	spin_lock(&ucontext->mmap_lock);
-	list_for_each_safe(pos, nxt, &ucontext->mmaps) {
-
-		mm = list_entry(pos, struct iwch_mm_entry, entry);
-		if (mm->key == key && mm->len == len) {
-			list_del_init(&mm->entry);
-			spin_unlock(&ucontext->mmap_lock);
-			pr_debug("%s key 0x%x addr 0x%llx len %d\n",
-				 __func__, key,
-				 (unsigned long long)mm->addr, mm->len);
-			return mm;
-		}
-	}
-	spin_unlock(&ucontext->mmap_lock);
-	return NULL;
-}
-
-static inline void insert_mmap(struct iwch_ucontext *ucontext,
-			       struct iwch_mm_entry *mm)
-{
-	spin_lock(&ucontext->mmap_lock);
-	pr_debug("%s key 0x%x addr 0x%llx len %d\n",
-		 __func__, mm->key, (unsigned long long)mm->addr, mm->len);
-	list_add_tail(&mm->entry, &ucontext->mmaps);
-	spin_unlock(&ucontext->mmap_lock);
-}
-
-enum iwch_qp_attr_mask {
-	IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
-	IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
-	IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
-	IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
-	IWCH_QP_ATTR_MAX_ORD = 1 << 11,
-	IWCH_QP_ATTR_MAX_IRD = 1 << 12,
-	IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
-	IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
-	IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
-	IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
-	IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
-				     IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
-				     IWCH_QP_ATTR_MAX_ORD |
-				     IWCH_QP_ATTR_MAX_IRD |
-				     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
-				     IWCH_QP_ATTR_STREAM_MSG_BUFFER |
-				     IWCH_QP_ATTR_MPA_ATTR |
-				     IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
-};
-
-int iwch_modify_qp(struct iwch_dev *rhp,
-				struct iwch_qp *qhp,
-				enum iwch_qp_attr_mask mask,
-				struct iwch_qp_attributes *attrs,
-				int internal);
-
-enum iwch_qp_state {
-	IWCH_QP_STATE_IDLE,
-	IWCH_QP_STATE_RTS,
-	IWCH_QP_STATE_ERROR,
-	IWCH_QP_STATE_TERMINATE,
-	IWCH_QP_STATE_CLOSING,
-	IWCH_QP_STATE_TOT
-};
-
-static inline int iwch_convert_state(enum ib_qp_state ib_state)
-{
-	switch (ib_state) {
-	case IB_QPS_RESET:
-	case IB_QPS_INIT:
-		return IWCH_QP_STATE_IDLE;
-	case IB_QPS_RTS:
-		return IWCH_QP_STATE_RTS;
-	case IB_QPS_SQD:
-		return IWCH_QP_STATE_CLOSING;
-	case IB_QPS_SQE:
-		return IWCH_QP_STATE_TERMINATE;
-	case IB_QPS_ERR:
-		return IWCH_QP_STATE_ERROR;
-	default:
-		return -1;
-	}
-}
-
-static inline u32 iwch_ib_to_tpt_access(int acc)
-{
-	return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
-	       (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
-	       (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
-	       (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) |
-	       TPT_LOCAL_READ;
-}
-
-static inline u32 iwch_ib_to_tpt_bind_access(int acc)
-{
-	return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
-	       (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0);
-}
-
-enum iwch_mmid_state {
-	IWCH_STAG_STATE_VALID,
-	IWCH_STAG_STATE_INVALID
-};
-
-enum iwch_qp_query_flags {
-	IWCH_QP_QUERY_CONTEXT_NONE = 0x0,	/* No ctx; Only attrs */
-	IWCH_QP_QUERY_CONTEXT_GET = 0x1,	/* Get ctx + attrs */
-	IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2,	/* Not Supported */
-
-	/*
-	 * Quiesce QP context; Consumer
-	 * will NOT replay outstanding WR
-	 */
-	IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
-	IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
-	IWCH_QP_QUERY_TEST_USERWRITE = 0x32	/* Test special */
-};
-
-u16 iwch_rqes_posted(struct iwch_qp *qhp);
-int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
-		   const struct ib_send_wr **bad_wr);
-int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
-		      const struct ib_recv_wr **bad_wr);
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
-int iwch_post_zb_read(struct iwch_ep *ep);
-int iwch_register_device(struct iwch_dev *dev);
-void iwch_unregister_device(struct iwch_dev *dev);
-void stop_read_rep_timer(struct iwch_qp *qhp);
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
-		      struct iwch_mr *mhp, int shift);
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
-void iwch_free_pbl(struct iwch_mr *mhp);
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-
-#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
deleted file mode 100644
index c649faad63f9..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ /dev/null
@@ -1,1082 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-
-#define NO_SUPPORT -1
-
-static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr,
-			   u8 *flit_cnt)
-{
-	int i;
-	u32 plen;
-
-	switch (wr->opcode) {
-	case IB_WR_SEND:
-		if (wr->send_flags & IB_SEND_SOLICITED)
-			wqe->send.rdmaop = T3_SEND_WITH_SE;
-		else
-			wqe->send.rdmaop = T3_SEND;
-		wqe->send.rem_stag = 0;
-		break;
-	case IB_WR_SEND_WITH_INV:
-		if (wr->send_flags & IB_SEND_SOLICITED)
-			wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
-		else
-			wqe->send.rdmaop = T3_SEND_WITH_INV;
-		wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
-		break;
-	default:
-		return -EINVAL;
-	}
-	if (wr->num_sge > T3_MAX_SGE)
-		return -EINVAL;
-	wqe->send.reserved[0] = 0;
-	wqe->send.reserved[1] = 0;
-	wqe->send.reserved[2] = 0;
-	plen = 0;
-	for (i = 0; i < wr->num_sge; i++) {
-		if ((plen + wr->sg_list[i].length) < plen)
-			return -EMSGSIZE;
-
-		plen += wr->sg_list[i].length;
-		wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
-		wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
-		wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
-	}
-	wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
-	*flit_cnt = 4 + ((wr->num_sge) << 1);
-	wqe->send.plen = cpu_to_be32(plen);
-	return 0;
-}
-
-static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr,
-			    u8 *flit_cnt)
-{
-	int i;
-	u32 plen;
-	if (wr->num_sge > T3_MAX_SGE)
-		return -EINVAL;
-	wqe->write.rdmaop = T3_RDMA_WRITE;
-	wqe->write.reserved[0] = 0;
-	wqe->write.reserved[1] = 0;
-	wqe->write.reserved[2] = 0;
-	wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
-	wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
-
-	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
-		plen = 4;
-		wqe->write.sgl[0].stag = wr->ex.imm_data;
-		wqe->write.sgl[0].len = cpu_to_be32(0);
-		wqe->write.num_sgle = cpu_to_be32(0);
-		*flit_cnt = 6;
-	} else {
-		plen = 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			if ((plen + wr->sg_list[i].length) < plen) {
-				return -EMSGSIZE;
-			}
-			plen += wr->sg_list[i].length;
-			wqe->write.sgl[i].stag =
-			    cpu_to_be32(wr->sg_list[i].lkey);
-			wqe->write.sgl[i].len =
-			    cpu_to_be32(wr->sg_list[i].length);
-			wqe->write.sgl[i].to =
-			    cpu_to_be64(wr->sg_list[i].addr);
-		}
-		wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
-		*flit_cnt = 5 + ((wr->num_sge) << 1);
-	}
-	wqe->write.plen = cpu_to_be32(plen);
-	return 0;
-}
-
-static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr,
-			   u8 *flit_cnt)
-{
-	if (wr->num_sge > 1)
-		return -EINVAL;
-	wqe->read.rdmaop = T3_READ_REQ;
-	if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
-		wqe->read.local_inv = 1;
-	else
-		wqe->read.local_inv = 0;
-	wqe->read.reserved[0] = 0;
-	wqe->read.reserved[1] = 0;
-	wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
-	wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
-	wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
-	wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
-	wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
-	*flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
-	return 0;
-}
-
-static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr,
-			u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
-{
-	struct iwch_mr *mhp = to_iwch_mr(wr->mr);
-	int i;
-	__be64 *p;
-
-	if (mhp->npages > T3_MAX_FASTREG_DEPTH)
-		return -EINVAL;
-	*wr_cnt = 1;
-	wqe->fastreg.stag = cpu_to_be32(wr->key);
-	wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
-	wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
-	wqe->fastreg.va_base_lo_fbo =
-				cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
-	wqe->fastreg.page_type_perms = cpu_to_be32(
-		V_FR_PAGE_COUNT(mhp->npages) |
-		V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
-		V_FR_TYPE(TPT_VATO) |
-		V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
-	p = &wqe->fastreg.pbl_addrs[0];
-	for (i = 0; i < mhp->npages; i++, p++) {
-
-		/* If we need a 2nd WR, then set it up */
-		if (i == T3_MAX_FASTREG_FRAG) {
-			*wr_cnt = 2;
-			wqe = (union t3_wr *)(wq->queue +
-				Q_PTR2IDX((wq->wptr+1), wq->size_log2));
-			build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
-			       Q_GENBIT(wq->wptr + 1, wq->size_log2),
-			       0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
-			       T3_EOP);
-
-			p = &wqe->pbl_frag.pbl_addrs[0];
-		}
-		*p = cpu_to_be64((u64)mhp->pages[i]);
-	}
-	*flit_cnt = 5 + mhp->npages;
-	if (*flit_cnt > 15)
-		*flit_cnt = 15;
-	return 0;
-}
-
-static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr,
-			  u8 *flit_cnt)
-{
-	wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
-	wqe->local_inv.reserved = 0;
-	*flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
-	return 0;
-}
-
-static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
-			    u32 num_sgle, u32 * pbl_addr, u8 * page_size)
-{
-	int i;
-	struct iwch_mr *mhp;
-	u64 offset;
-	for (i = 0; i < num_sgle; i++) {
-
-		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
-		if (!mhp) {
-			pr_debug("%s %d\n", __func__, __LINE__);
-			return -EIO;
-		}
-		if (!mhp->attr.state) {
-			pr_debug("%s %d\n", __func__, __LINE__);
-			return -EIO;
-		}
-		if (mhp->attr.zbva) {
-			pr_debug("%s %d\n", __func__, __LINE__);
-			return -EIO;
-		}
-
-		if (sg_list[i].addr < mhp->attr.va_fbo) {
-			pr_debug("%s %d\n", __func__, __LINE__);
-			return -EINVAL;
-		}
-		if (sg_list[i].addr + ((u64) sg_list[i].length) <
-		    sg_list[i].addr) {
-			pr_debug("%s %d\n", __func__, __LINE__);
-			return -EINVAL;
-		}
-		if (sg_list[i].addr + ((u64) sg_list[i].length) >
-		    mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
-			pr_debug("%s %d\n", __func__, __LINE__);
-			return -EINVAL;
-		}
-		offset = sg_list[i].addr - mhp->attr.va_fbo;
-		offset += mhp->attr.va_fbo &
-			  ((1UL << (12 + mhp->attr.page_size)) - 1);
-		pbl_addr[i] = ((mhp->attr.pbl_addr -
-			        rhp->rdev.rnic_info.pbl_base) >> 3) +
-			      (offset >> (12 + mhp->attr.page_size));
-		page_size[i] = mhp->attr.page_size;
-	}
-	return 0;
-}
-
-static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
-			   const struct ib_recv_wr *wr)
-{
-	int i, err = 0;
-	u32 pbl_addr[T3_MAX_SGE];
-	u8 page_size[T3_MAX_SGE];
-
-	err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
-			       page_size);
-	if (err)
-		return err;
-	wqe->recv.pagesz[0] = page_size[0];
-	wqe->recv.pagesz[1] = page_size[1];
-	wqe->recv.pagesz[2] = page_size[2];
-	wqe->recv.pagesz[3] = page_size[3];
-	wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
-	for (i = 0; i < wr->num_sge; i++) {
-		wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
-		wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
-
-		/* to in the WQE == the offset into the page */
-		wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) &
-				((1UL << (12 + page_size[i])) - 1));
-
-		/* pbl_addr is the adapters address in the PBL */
-		wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
-	}
-	for (; i < T3_MAX_SGE; i++) {
-		wqe->recv.sgl[i].stag = 0;
-		wqe->recv.sgl[i].len = 0;
-		wqe->recv.sgl[i].to = 0;
-		wqe->recv.pbl_addr[i] = 0;
-	}
-	qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
-			     qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
-	qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
-			     qhp->wq.rq_size_log2)].pbl_addr = 0;
-	return 0;
-}
-
-static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
-				const struct ib_recv_wr *wr)
-{
-	int i;
-	u32 pbl_addr;
-	u32 pbl_offset;
-
-
-	/*
-	 * The T3 HW requires the PBL in the HW recv descriptor to reference
-	 * a PBL entry.  So we allocate the max needed PBL memory here and pass
-	 * it to the uP in the recv WR.  The uP will build the PBL and setup
-	 * the HW recv descriptor.
-	 */
-	pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
-	if (!pbl_addr)
-		return -ENOMEM;
-
-	/*
-	 * Compute the 8B aligned offset.
-	 */
-	pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
-
-	wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
-
-	for (i = 0; i < wr->num_sge; i++) {
-
-		/*
-		 * Use a 128MB page size. This and an imposed 128MB
-		 * sge length limit allows us to require only a 2-entry HW
-		 * PBL for each SGE.  This restriction is acceptable since
-		 * since it is not possible to allocate 128MB of contiguous
-		 * DMA coherent memory!
-		 */
-		if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
-			return -EINVAL;
-		wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
-
-		/*
-		 * T3 restricts a recv to all zero-stag or all non-zero-stag.
-		 */
-		if (wr->sg_list[i].lkey != 0)
-			return -EINVAL;
-		wqe->recv.sgl[i].stag = 0;
-		wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
-		wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
-		wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
-		pbl_offset += 2;
-	}
-	for (; i < T3_MAX_SGE; i++) {
-		wqe->recv.pagesz[i] = 0;
-		wqe->recv.sgl[i].stag = 0;
-		wqe->recv.sgl[i].len = 0;
-		wqe->recv.sgl[i].to = 0;
-		wqe->recv.pbl_addr[i] = 0;
-	}
-	qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
-			     qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
-	qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
-			     qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
-	return 0;
-}
-
-int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
-		   const struct ib_send_wr **bad_wr)
-{
-	int err = 0;
-	u8 uninitialized_var(t3_wr_flit_cnt);
-	enum t3_wr_opcode t3_wr_opcode = 0;
-	enum t3_wr_flags t3_wr_flags;
-	struct iwch_qp *qhp;
-	u32 idx;
-	union t3_wr *wqe;
-	u32 num_wrs;
-	unsigned long flag;
-	struct t3_swsq *sqp;
-	int wr_cnt = 1;
-
-	qhp = to_iwch_qp(ibqp);
-	spin_lock_irqsave(&qhp->lock, flag);
-	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
-		spin_unlock_irqrestore(&qhp->lock, flag);
-		err = -EINVAL;
-		goto out;
-	}
-	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
-		  qhp->wq.sq_size_log2);
-	if (num_wrs == 0) {
-		spin_unlock_irqrestore(&qhp->lock, flag);
-		err = -ENOMEM;
-		goto out;
-	}
-	while (wr) {
-		if (num_wrs == 0) {
-			err = -ENOMEM;
-			break;
-		}
-		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
-		wqe = (union t3_wr *) (qhp->wq.queue + idx);
-		t3_wr_flags = 0;
-		if (wr->send_flags & IB_SEND_SOLICITED)
-			t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
-		if (wr->send_flags & IB_SEND_SIGNALED)
-			t3_wr_flags |= T3_COMPLETION_FLAG;
-		sqp = qhp->wq.sq +
-		      Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
-		switch (wr->opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_INV:
-			if (wr->send_flags & IB_SEND_FENCE)
-				t3_wr_flags |= T3_READ_FENCE_FLAG;
-			t3_wr_opcode = T3_WR_SEND;
-			err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
-			break;
-		case IB_WR_RDMA_WRITE:
-		case IB_WR_RDMA_WRITE_WITH_IMM:
-			t3_wr_opcode = T3_WR_WRITE;
-			err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
-			break;
-		case IB_WR_RDMA_READ:
-		case IB_WR_RDMA_READ_WITH_INV:
-			t3_wr_opcode = T3_WR_READ;
-			t3_wr_flags = 0; /* T3 reads are always signaled */
-			err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
-			if (err)
-				break;
-			sqp->read_len = wqe->read.local_len;
-			if (!qhp->wq.oldest_read)
-				qhp->wq.oldest_read = sqp;
-			break;
-		case IB_WR_REG_MR:
-			t3_wr_opcode = T3_WR_FASTREG;
-			err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
-					   &wr_cnt, &qhp->wq);
-			break;
-		case IB_WR_LOCAL_INV:
-			if (wr->send_flags & IB_SEND_FENCE)
-				t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
-			t3_wr_opcode = T3_WR_INV_STAG;
-			err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
-			break;
-		default:
-			pr_debug("%s post of type=%d TBD!\n", __func__,
-				 wr->opcode);
-			err = -EINVAL;
-		}
-		if (err)
-			break;
-		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
-		sqp->wr_id = wr->wr_id;
-		sqp->opcode = wr2opcode(t3_wr_opcode);
-		sqp->sq_wptr = qhp->wq.sq_wptr;
-		sqp->complete = 0;
-		sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
-
-		build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
-			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
-			       0, t3_wr_flit_cnt,
-			       (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
-		pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
-			 __func__, (unsigned long long)wr->wr_id, idx,
-			 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
-			 sqp->opcode);
-		wr = wr->next;
-		num_wrs--;
-		qhp->wq.wptr += wr_cnt;
-		++(qhp->wq.sq_wptr);
-	}
-	spin_unlock_irqrestore(&qhp->lock, flag);
-	if (cxio_wq_db_enabled(&qhp->wq))
-		ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
-	if (err)
-		*bad_wr = wr;
-	return err;
-}
-
-int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
-		      const struct ib_recv_wr **bad_wr)
-{
-	int err = 0;
-	struct iwch_qp *qhp;
-	u32 idx;
-	union t3_wr *wqe;
-	u32 num_wrs;
-	unsigned long flag;
-
-	qhp = to_iwch_qp(ibqp);
-	spin_lock_irqsave(&qhp->lock, flag);
-	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
-		spin_unlock_irqrestore(&qhp->lock, flag);
-		err = -EINVAL;
-		goto out;
-	}
-	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
-			    qhp->wq.rq_size_log2) - 1;
-	if (!wr) {
-		spin_unlock_irqrestore(&qhp->lock, flag);
-		err = -ENOMEM;
-		goto out;
-	}
-	while (wr) {
-		if (wr->num_sge > T3_MAX_SGE) {
-			err = -EINVAL;
-			break;
-		}
-		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
-		wqe = (union t3_wr *) (qhp->wq.queue + idx);
-		if (num_wrs)
-			if (wr->sg_list[0].lkey)
-				err = build_rdma_recv(qhp, wqe, wr);
-			else
-				err = build_zero_stag_recv(qhp, wqe, wr);
-		else
-			err = -ENOMEM;
-
-		if (err)
-			break;
-
-		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
-			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
-			       0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
-		pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n",
-			 __func__, (unsigned long long)wr->wr_id,
-			 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
-		++(qhp->wq.rq_wptr);
-		++(qhp->wq.wptr);
-		wr = wr->next;
-		num_wrs--;
-	}
-	spin_unlock_irqrestore(&qhp->lock, flag);
-	if (cxio_wq_db_enabled(&qhp->wq))
-		ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
-	if (err)
-		*bad_wr = wr;
-	return err;
-}
-
-static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
-				    u8 *layer_type, u8 *ecode)
-{
-	int status = TPT_ERR_INTERNAL_ERR;
-	int tagged = 0;
-	int opcode = -1;
-	int rqtype = 0;
-	int send_inv = 0;
-
-	if (rsp_msg) {
-		status = CQE_STATUS(rsp_msg->cqe);
-		opcode = CQE_OPCODE(rsp_msg->cqe);
-		rqtype = RQ_TYPE(rsp_msg->cqe);
-		send_inv = (opcode == T3_SEND_WITH_INV) ||
-		           (opcode == T3_SEND_WITH_SE_INV);
-		tagged = (opcode == T3_RDMA_WRITE) ||
-			 (rqtype && (opcode == T3_READ_RESP));
-	}
-
-	switch (status) {
-	case TPT_ERR_STAG:
-		if (send_inv) {
-			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
-			*ecode = RDMAP_CANT_INV_STAG;
-		} else {
-			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
-			*ecode = RDMAP_INV_STAG;
-		}
-		break;
-	case TPT_ERR_PDID:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
-		if ((opcode == T3_SEND_WITH_INV) ||
-		    (opcode == T3_SEND_WITH_SE_INV))
-			*ecode = RDMAP_CANT_INV_STAG;
-		else
-			*ecode = RDMAP_STAG_NOT_ASSOC;
-		break;
-	case TPT_ERR_QPID:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
-		*ecode = RDMAP_STAG_NOT_ASSOC;
-		break;
-	case TPT_ERR_ACCESS:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
-		*ecode = RDMAP_ACC_VIOL;
-		break;
-	case TPT_ERR_WRAP:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
-		*ecode = RDMAP_TO_WRAP;
-		break;
-	case TPT_ERR_BOUND:
-		if (tagged) {
-			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
-			*ecode = DDPT_BASE_BOUNDS;
-		} else {
-			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
-			*ecode = RDMAP_BASE_BOUNDS;
-		}
-		break;
-	case TPT_ERR_INVALIDATE_SHARED_MR:
-	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
-		*ecode = RDMAP_CANT_INV_STAG;
-		break;
-	case TPT_ERR_ECC:
-	case TPT_ERR_ECC_PSTAG:
-	case TPT_ERR_INTERNAL_ERR:
-		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
-		*ecode = 0;
-		break;
-	case TPT_ERR_OUT_OF_RQE:
-		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
-		*ecode = DDPU_INV_MSN_NOBUF;
-		break;
-	case TPT_ERR_PBL_ADDR_BOUND:
-		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
-		*ecode = DDPT_BASE_BOUNDS;
-		break;
-	case TPT_ERR_CRC:
-		*layer_type = LAYER_MPA|DDP_LLP;
-		*ecode = MPA_CRC_ERR;
-		break;
-	case TPT_ERR_MARKER:
-		*layer_type = LAYER_MPA|DDP_LLP;
-		*ecode = MPA_MARKER_ERR;
-		break;
-	case TPT_ERR_PDU_LEN_ERR:
-		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
-		*ecode = DDPU_MSG_TOOBIG;
-		break;
-	case TPT_ERR_DDP_VERSION:
-		if (tagged) {
-			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
-			*ecode = DDPT_INV_VERS;
-		} else {
-			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
-			*ecode = DDPU_INV_VERS;
-		}
-		break;
-	case TPT_ERR_RDMA_VERSION:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
-		*ecode = RDMAP_INV_VERS;
-		break;
-	case TPT_ERR_OPCODE:
-		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
-		*ecode = RDMAP_INV_OPCODE;
-		break;
-	case TPT_ERR_DDP_QUEUE_NUM:
-		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
-		*ecode = DDPU_INV_QN;
-		break;
-	case TPT_ERR_MSN:
-	case TPT_ERR_MSN_GAP:
-	case TPT_ERR_MSN_RANGE:
-	case TPT_ERR_IRD_OVERFLOW:
-		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
-		*ecode = DDPU_INV_MSN_RANGE;
-		break;
-	case TPT_ERR_TBIT:
-		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
-		*ecode = 0;
-		break;
-	case TPT_ERR_MO:
-		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
-		*ecode = DDPU_INV_MO;
-		break;
-	default:
-		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
-		*ecode = 0;
-		break;
-	}
-}
-
-int iwch_post_zb_read(struct iwch_ep *ep)
-{
-	union t3_wr *wqe;
-	struct sk_buff *skb;
-	u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
-
-	pr_debug("%s enter\n", __func__);
-	skb = alloc_skb(40, GFP_KERNEL);
-	if (!skb) {
-		pr_err("%s cannot send zb_read!!\n", __func__);
-		return -ENOMEM;
-	}
-	wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr));
-	wqe->read.rdmaop = T3_READ_REQ;
-	wqe->read.reserved[0] = 0;
-	wqe->read.reserved[1] = 0;
-	wqe->read.rem_stag = cpu_to_be32(1);
-	wqe->read.rem_to = cpu_to_be64(1);
-	wqe->read.local_stag = cpu_to_be32(1);
-	wqe->read.local_len = cpu_to_be32(0);
-	wqe->read.local_to = cpu_to_be64(1);
-	wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
-	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)|
-						V_FW_RIWR_LEN(flit_cnt));
-	skb->priority = CPL_PRIORITY_DATA;
-	return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * This posts a TERMINATE with layer=RDMA, type=catastrophic.
- */
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
-{
-	union t3_wr *wqe;
-	struct terminate_message *term;
-	struct sk_buff *skb;
-
-	pr_debug("%s %d\n", __func__, __LINE__);
-	skb = alloc_skb(40, GFP_ATOMIC);
-	if (!skb) {
-		pr_err("%s cannot send TERMINATE!\n", __func__);
-		return -ENOMEM;
-	}
-	wqe = skb_put_zero(skb, 40);
-	wqe->send.rdmaop = T3_TERMINATE;
-
-	/* immediate data length */
-	wqe->send.plen = htonl(4);
-
-	/* immediate data starts here. */
-	term = (struct terminate_message *)wqe->send.sgl;
-	build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
-	wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
-			 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
-	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
-	skb->priority = CPL_PRIORITY_DATA;
-	return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * Assumes qhp lock is held.
- */
-static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
-				struct iwch_cq *schp)
-	__releases(&qhp->lock)
-	__acquires(&qhp->lock)
-{
-	int count;
-	int flushed;
-
-	lockdep_assert_held(&qhp->lock);
-
-	pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
-	/* take a ref on the qhp since we must release the lock */
-	atomic_inc(&qhp->refcnt);
-	spin_unlock(&qhp->lock);
-
-	/* locking hierarchy: cq lock first, then qp lock. */
-	spin_lock(&rchp->lock);
-	spin_lock(&qhp->lock);
-	cxio_flush_hw_cq(&rchp->cq);
-	cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
-	flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
-	spin_unlock(&qhp->lock);
-	spin_unlock(&rchp->lock);
-	if (flushed) {
-		spin_lock(&rchp->comp_handler_lock);
-		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
-		spin_unlock(&rchp->comp_handler_lock);
-	}
-
-	/* locking hierarchy: cq lock first, then qp lock. */
-	spin_lock(&schp->lock);
-	spin_lock(&qhp->lock);
-	cxio_flush_hw_cq(&schp->cq);
-	cxio_count_scqes(&schp->cq, &qhp->wq, &count);
-	flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
-	spin_unlock(&qhp->lock);
-	spin_unlock(&schp->lock);
-	if (flushed) {
-		spin_lock(&schp->comp_handler_lock);
-		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
-		spin_unlock(&schp->comp_handler_lock);
-	}
-
-	/* deref */
-	if (atomic_dec_and_test(&qhp->refcnt))
-	        wake_up(&qhp->wait);
-
-	spin_lock(&qhp->lock);
-}
-
-static void flush_qp(struct iwch_qp *qhp)
-{
-	struct iwch_cq *rchp, *schp;
-
-	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
-	schp = get_chp(qhp->rhp, qhp->attr.scq);
-
-	if (qhp->ibqp.uobject) {
-		cxio_set_wq_in_error(&qhp->wq);
-		cxio_set_cq_in_error(&rchp->cq);
-		spin_lock(&rchp->comp_handler_lock);
-		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
-		spin_unlock(&rchp->comp_handler_lock);
-		if (schp != rchp) {
-			cxio_set_cq_in_error(&schp->cq);
-			spin_lock(&schp->comp_handler_lock);
-			(*schp->ibcq.comp_handler)(&schp->ibcq,
-						   schp->ibcq.cq_context);
-			spin_unlock(&schp->comp_handler_lock);
-		}
-		return;
-	}
-	__flush_qp(qhp, rchp, schp);
-}
-
-
-/*
- * Return count of RECV WRs posted
- */
-u16 iwch_rqes_posted(struct iwch_qp *qhp)
-{
-	union t3_wr *wqe = qhp->wq.queue;
-	u16 count = 0;
-
-	while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
-		count++;
-		wqe++;
-	}
-	pr_debug("%s qhp %p count %u\n", __func__, qhp, count);
-	return count;
-}
-
-static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
-				enum iwch_qp_attr_mask mask,
-				struct iwch_qp_attributes *attrs)
-{
-	struct t3_rdma_init_attr init_attr;
-	int ret;
-
-	init_attr.tid = qhp->ep->hwtid;
-	init_attr.qpid = qhp->wq.qpid;
-	init_attr.pdid = qhp->attr.pd;
-	init_attr.scqid = qhp->attr.scq;
-	init_attr.rcqid = qhp->attr.rcq;
-	init_attr.rq_addr = qhp->wq.rq_addr;
-	init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
-	init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
-		qhp->attr.mpa_attr.recv_marker_enabled |
-		(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
-		(qhp->attr.mpa_attr.crc_enabled << 2);
-
-	init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
-			   uP_RI_QP_RDMA_WRITE_ENABLE |
-			   uP_RI_QP_BIND_ENABLE;
-	if (!qhp->ibqp.uobject)
-		init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE |
-				    uP_RI_QP_FAST_REGISTER_ENABLE;
-
-	init_attr.tcp_emss = qhp->ep->emss;
-	init_attr.ord = qhp->attr.max_ord;
-	init_attr.ird = qhp->attr.max_ird;
-	init_attr.qp_dma_addr = qhp->wq.dma_addr;
-	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
-	init_attr.rqe_count = iwch_rqes_posted(qhp);
-	init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
-	init_attr.chan = qhp->ep->l2t->smt_idx;
-	if (peer2peer) {
-		init_attr.rtr_type = RTR_READ;
-		if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
-			init_attr.ord = 1;
-		if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
-			init_attr.ird = 1;
-	} else
-		init_attr.rtr_type = 0;
-	init_attr.irs = qhp->ep->rcv_seq;
-	pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n",
-		 __func__,
-		 init_attr.rq_addr, init_attr.rq_size,
-		 init_attr.flags, init_attr.qpcaps);
-	ret = cxio_rdma_init(&rhp->rdev, &init_attr);
-	pr_debug("%s ret %d\n", __func__, ret);
-	return ret;
-}
-
-int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
-				enum iwch_qp_attr_mask mask,
-				struct iwch_qp_attributes *attrs,
-				int internal)
-{
-	int ret = 0;
-	struct iwch_qp_attributes newattr = qhp->attr;
-	unsigned long flag;
-	int disconnect = 0;
-	int terminate = 0;
-	int abort = 0;
-	int free = 0;
-	struct iwch_ep *ep = NULL;
-
-	pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
-		 qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
-		 (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
-
-	spin_lock_irqsave(&qhp->lock, flag);
-
-	/* Process attr changes if in IDLE */
-	if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
-		if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
-			ret = -EIO;
-			goto out;
-		}
-		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
-			newattr.enable_rdma_read = attrs->enable_rdma_read;
-		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
-			newattr.enable_rdma_write = attrs->enable_rdma_write;
-		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
-			newattr.enable_bind = attrs->enable_bind;
-		if (mask & IWCH_QP_ATTR_MAX_ORD) {
-			if (attrs->max_ord >
-			    rhp->attr.max_rdma_read_qp_depth) {
-				ret = -EINVAL;
-				goto out;
-			}
-			newattr.max_ord = attrs->max_ord;
-		}
-		if (mask & IWCH_QP_ATTR_MAX_IRD) {
-			if (attrs->max_ird >
-			    rhp->attr.max_rdma_reads_per_qp) {
-				ret = -EINVAL;
-				goto out;
-			}
-			newattr.max_ird = attrs->max_ird;
-		}
-		qhp->attr = newattr;
-	}
-
-	if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
-		goto out;
-	if (qhp->attr.state == attrs->next_state)
-		goto out;
-
-	switch (qhp->attr.state) {
-	case IWCH_QP_STATE_IDLE:
-		switch (attrs->next_state) {
-		case IWCH_QP_STATE_RTS:
-			if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			qhp->attr.mpa_attr = attrs->mpa_attr;
-			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
-			qhp->ep = qhp->attr.llp_stream_handle;
-			qhp->attr.state = IWCH_QP_STATE_RTS;
-
-			/*
-			 * Ref the endpoint here and deref when we
-			 * disassociate the endpoint from the QP.  This
-			 * happens in CLOSING->IDLE transition or *->ERROR
-			 * transition.
-			 */
-			get_ep(&qhp->ep->com);
-			spin_unlock_irqrestore(&qhp->lock, flag);
-			ret = rdma_init(rhp, qhp, mask, attrs);
-			spin_lock_irqsave(&qhp->lock, flag);
-			if (ret)
-				goto err;
-			break;
-		case IWCH_QP_STATE_ERROR:
-			qhp->attr.state = IWCH_QP_STATE_ERROR;
-			flush_qp(qhp);
-			break;
-		default:
-			ret = -EINVAL;
-			goto out;
-		}
-		break;
-	case IWCH_QP_STATE_RTS:
-		switch (attrs->next_state) {
-		case IWCH_QP_STATE_CLOSING:
-			BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
-			qhp->attr.state = IWCH_QP_STATE_CLOSING;
-			if (!internal) {
-				abort=0;
-				disconnect = 1;
-				ep = qhp->ep;
-				get_ep(&ep->com);
-			}
-			break;
-		case IWCH_QP_STATE_TERMINATE:
-			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
-			if (qhp->ibqp.uobject)
-				cxio_set_wq_in_error(&qhp->wq);
-			if (!internal)
-				terminate = 1;
-			break;
-		case IWCH_QP_STATE_ERROR:
-			qhp->attr.state = IWCH_QP_STATE_ERROR;
-			if (!internal) {
-				abort=1;
-				disconnect = 1;
-				ep = qhp->ep;
-				get_ep(&ep->com);
-			}
-			goto err;
-			break;
-		default:
-			ret = -EINVAL;
-			goto out;
-		}
-		break;
-	case IWCH_QP_STATE_CLOSING:
-		if (!internal) {
-			ret = -EINVAL;
-			goto out;
-		}
-		switch (attrs->next_state) {
-			case IWCH_QP_STATE_IDLE:
-				flush_qp(qhp);
-				qhp->attr.state = IWCH_QP_STATE_IDLE;
-				qhp->attr.llp_stream_handle = NULL;
-				put_ep(&qhp->ep->com);
-				qhp->ep = NULL;
-				wake_up(&qhp->wait);
-				break;
-			case IWCH_QP_STATE_ERROR:
-				goto err;
-			default:
-				ret = -EINVAL;
-				goto err;
-		}
-		break;
-	case IWCH_QP_STATE_ERROR:
-		if (attrs->next_state != IWCH_QP_STATE_IDLE) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
-		    !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
-			ret = -EINVAL;
-			goto out;
-		}
-		qhp->attr.state = IWCH_QP_STATE_IDLE;
-		break;
-	case IWCH_QP_STATE_TERMINATE:
-		if (!internal) {
-			ret = -EINVAL;
-			goto out;
-		}
-		goto err;
-		break;
-	default:
-		pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
-		ret = -EINVAL;
-		goto err;
-		break;
-	}
-	goto out;
-err:
-	pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
-		 qhp->wq.qpid);
-
-	/* disassociate the LLP connection */
-	qhp->attr.llp_stream_handle = NULL;
-	ep = qhp->ep;
-	qhp->ep = NULL;
-	qhp->attr.state = IWCH_QP_STATE_ERROR;
-	free=1;
-	wake_up(&qhp->wait);
-	BUG_ON(!ep);
-	flush_qp(qhp);
-out:
-	spin_unlock_irqrestore(&qhp->lock, flag);
-
-	if (terminate)
-		iwch_post_terminate(qhp, NULL);
-
-	/*
-	 * If disconnect is 1, then we need to initiate a disconnect
-	 * on the EP.  This can be a normal close (RTS->CLOSING) or
-	 * an abnormal close (RTS/CLOSING->ERROR).
-	 */
-	if (disconnect) {
-		iwch_ep_disconnect(ep, abort, GFP_KERNEL);
-		put_ep(&ep->com);
-	}
-
-	/*
-	 * If free is 1, then we've disassociated the EP from the QP
-	 * and we need to dereference the EP.
-	 */
-	if (free)
-		put_ep(&ep->com);
-
-	pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
-	return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
deleted file mode 100644
index c702dc199e18..000000000000
--- a/drivers/infiniband/hw/cxgb3/tcb.h
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _TCB_DEFS_H
-#define _TCB_DEFS_H
-
-#define W_TCB_T_STATE    0
-#define S_TCB_T_STATE    0
-#define M_TCB_T_STATE    0xfULL
-#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
-
-#define W_TCB_TIMER    0
-#define S_TCB_TIMER    4
-#define M_TCB_TIMER    0x1ULL
-#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
-
-#define W_TCB_DACK_TIMER    0
-#define S_TCB_DACK_TIMER    5
-#define M_TCB_DACK_TIMER    0x1ULL
-#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
-
-#define W_TCB_DEL_FLAG    0
-#define S_TCB_DEL_FLAG    6
-#define M_TCB_DEL_FLAG    0x1ULL
-#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
-
-#define W_TCB_L2T_IX    0
-#define S_TCB_L2T_IX    7
-#define M_TCB_L2T_IX    0x7ffULL
-#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
-
-#define W_TCB_SMAC_SEL    0
-#define S_TCB_SMAC_SEL    18
-#define M_TCB_SMAC_SEL    0x3ULL
-#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
-
-#define W_TCB_TOS    0
-#define S_TCB_TOS    20
-#define M_TCB_TOS    0x3fULL
-#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
-
-#define W_TCB_MAX_RT    0
-#define S_TCB_MAX_RT    26
-#define M_TCB_MAX_RT    0xfULL
-#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
-
-#define W_TCB_T_RXTSHIFT    0
-#define S_TCB_T_RXTSHIFT    30
-#define M_TCB_T_RXTSHIFT    0xfULL
-#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
-
-#define W_TCB_T_DUPACKS    1
-#define S_TCB_T_DUPACKS    2
-#define M_TCB_T_DUPACKS    0xfULL
-#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
-
-#define W_TCB_T_MAXSEG    1
-#define S_TCB_T_MAXSEG    6
-#define M_TCB_T_MAXSEG    0xfULL
-#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
-
-#define W_TCB_T_FLAGS1    1
-#define S_TCB_T_FLAGS1    10
-#define M_TCB_T_FLAGS1    0xffffffffULL
-#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
-
-#define W_TCB_T_MIGRATION    1
-#define S_TCB_T_MIGRATION    20
-#define M_TCB_T_MIGRATION    0x1ULL
-#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
-
-#define W_TCB_T_FLAGS2    2
-#define S_TCB_T_FLAGS2    10
-#define M_TCB_T_FLAGS2    0x7fULL
-#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
-
-#define W_TCB_SND_SCALE    2
-#define S_TCB_SND_SCALE    17
-#define M_TCB_SND_SCALE    0xfULL
-#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
-
-#define W_TCB_RCV_SCALE    2
-#define S_TCB_RCV_SCALE    21
-#define M_TCB_RCV_SCALE    0xfULL
-#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
-
-#define W_TCB_SND_UNA_RAW    2
-#define S_TCB_SND_UNA_RAW    25
-#define M_TCB_SND_UNA_RAW    0x7ffffffULL
-#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
-
-#define W_TCB_SND_NXT_RAW    3
-#define S_TCB_SND_NXT_RAW    20
-#define M_TCB_SND_NXT_RAW    0x7ffffffULL
-#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
-
-#define W_TCB_RCV_NXT    4
-#define S_TCB_RCV_NXT    15
-#define M_TCB_RCV_NXT    0xffffffffULL
-#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
-
-#define W_TCB_RCV_ADV    5
-#define S_TCB_RCV_ADV    15
-#define M_TCB_RCV_ADV    0xffffULL
-#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
-
-#define W_TCB_SND_MAX_RAW    5
-#define S_TCB_SND_MAX_RAW    31
-#define M_TCB_SND_MAX_RAW    0x7ffffffULL
-#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
-
-#define W_TCB_SND_CWND    6
-#define S_TCB_SND_CWND    26
-#define M_TCB_SND_CWND    0x7ffffffULL
-#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
-
-#define W_TCB_SND_SSTHRESH    7
-#define S_TCB_SND_SSTHRESH    21
-#define M_TCB_SND_SSTHRESH    0x7ffffffULL
-#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
-
-#define W_TCB_T_RTT_TS_RECENT_AGE    8
-#define S_TCB_T_RTT_TS_RECENT_AGE    16
-#define M_TCB_T_RTT_TS_RECENT_AGE    0xffffffffULL
-#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
-
-#define W_TCB_T_RTSEQ_RECENT    9
-#define S_TCB_T_RTSEQ_RECENT    16
-#define M_TCB_T_RTSEQ_RECENT    0xffffffffULL
-#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
-
-#define W_TCB_T_SRTT    10
-#define S_TCB_T_SRTT    16
-#define M_TCB_T_SRTT    0xffffULL
-#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
-
-#define W_TCB_T_RTTVAR    11
-#define S_TCB_T_RTTVAR    0
-#define M_TCB_T_RTTVAR    0xffffULL
-#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
-
-#define W_TCB_TS_LAST_ACK_SENT_RAW    11
-#define S_TCB_TS_LAST_ACK_SENT_RAW    16
-#define M_TCB_TS_LAST_ACK_SENT_RAW    0x7ffffffULL
-#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
-
-#define W_TCB_DIP    12
-#define S_TCB_DIP    11
-#define M_TCB_DIP    0xffffffffULL
-#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
-
-#define W_TCB_SIP    13
-#define S_TCB_SIP    11
-#define M_TCB_SIP    0xffffffffULL
-#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
-
-#define W_TCB_DP    14
-#define S_TCB_DP    11
-#define M_TCB_DP    0xffffULL
-#define V_TCB_DP(x) ((x) << S_TCB_DP)
-
-#define W_TCB_SP    14
-#define S_TCB_SP    27
-#define M_TCB_SP    0xffffULL
-#define V_TCB_SP(x) ((x) << S_TCB_SP)
-
-#define W_TCB_TIMESTAMP    15
-#define S_TCB_TIMESTAMP    11
-#define M_TCB_TIMESTAMP    0xffffffffULL
-#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
-
-#define W_TCB_TIMESTAMP_OFFSET    16
-#define S_TCB_TIMESTAMP_OFFSET    11
-#define M_TCB_TIMESTAMP_OFFSET    0xfULL
-#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
-
-#define W_TCB_TX_MAX    16
-#define S_TCB_TX_MAX    15
-#define M_TCB_TX_MAX    0xffffffffULL
-#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
-
-#define W_TCB_TX_HDR_PTR_RAW    17
-#define S_TCB_TX_HDR_PTR_RAW    15
-#define M_TCB_TX_HDR_PTR_RAW    0x1ffffULL
-#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
-
-#define W_TCB_TX_LAST_PTR_RAW    18
-#define S_TCB_TX_LAST_PTR_RAW    0
-#define M_TCB_TX_LAST_PTR_RAW    0x1ffffULL
-#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
-
-#define W_TCB_TX_COMPACT    18
-#define S_TCB_TX_COMPACT    17
-#define M_TCB_TX_COMPACT    0x1ULL
-#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
-
-#define W_TCB_RX_COMPACT    18
-#define S_TCB_RX_COMPACT    18
-#define M_TCB_RX_COMPACT    0x1ULL
-#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
-
-#define W_TCB_RCV_WND    18
-#define S_TCB_RCV_WND    19
-#define M_TCB_RCV_WND    0x7ffffffULL
-#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
-
-#define W_TCB_RX_HDR_OFFSET    19
-#define S_TCB_RX_HDR_OFFSET    14
-#define M_TCB_RX_HDR_OFFSET    0x7ffffffULL
-#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
-
-#define W_TCB_RX_FRAG0_START_IDX_RAW    20
-#define S_TCB_RX_FRAG0_START_IDX_RAW    9
-#define M_TCB_RX_FRAG0_START_IDX_RAW    0x7ffffffULL
-#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
-
-#define W_TCB_RX_FRAG1_START_IDX_OFFSET    21
-#define S_TCB_RX_FRAG1_START_IDX_OFFSET    4
-#define M_TCB_RX_FRAG1_START_IDX_OFFSET    0x7ffffffULL
-#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
-
-#define W_TCB_RX_FRAG0_LEN    21
-#define S_TCB_RX_FRAG0_LEN    31
-#define M_TCB_RX_FRAG0_LEN    0x7ffffffULL
-#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
-
-#define W_TCB_RX_FRAG1_LEN    22
-#define S_TCB_RX_FRAG1_LEN    26
-#define M_TCB_RX_FRAG1_LEN    0x7ffffffULL
-#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
-
-#define W_TCB_NEWRENO_RECOVER    23
-#define S_TCB_NEWRENO_RECOVER    21
-#define M_TCB_NEWRENO_RECOVER    0x7ffffffULL
-#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
-
-#define W_TCB_PDU_HAVE_LEN    24
-#define S_TCB_PDU_HAVE_LEN    16
-#define M_TCB_PDU_HAVE_LEN    0x1ULL
-#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
-
-#define W_TCB_PDU_LEN    24
-#define S_TCB_PDU_LEN    17
-#define M_TCB_PDU_LEN    0xffffULL
-#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
-
-#define W_TCB_RX_QUIESCE    25
-#define S_TCB_RX_QUIESCE    1
-#define M_TCB_RX_QUIESCE    0x1ULL
-#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
-
-#define W_TCB_RX_PTR_RAW    25
-#define S_TCB_RX_PTR_RAW    2
-#define M_TCB_RX_PTR_RAW    0x1ffffULL
-#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
-
-#define W_TCB_CPU_NO    25
-#define S_TCB_CPU_NO    19
-#define M_TCB_CPU_NO    0x7fULL
-#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
-
-#define W_TCB_ULP_TYPE    25
-#define S_TCB_ULP_TYPE    26
-#define M_TCB_ULP_TYPE    0xfULL
-#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
-
-#define W_TCB_RX_FRAG1_PTR_RAW    25
-#define S_TCB_RX_FRAG1_PTR_RAW    30
-#define M_TCB_RX_FRAG1_PTR_RAW    0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW    26
-#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW    15
-#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW    0x7ffffffULL
-#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
-
-#define W_TCB_RX_FRAG2_PTR_RAW    27
-#define S_TCB_RX_FRAG2_PTR_RAW    10
-#define M_TCB_RX_FRAG2_PTR_RAW    0x1ffffULL
-#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_LEN_RAW    27
-#define S_TCB_RX_FRAG2_LEN_RAW    27
-#define M_TCB_RX_FRAG2_LEN_RAW    0x7ffffffULL
-#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_PTR_RAW    28
-#define S_TCB_RX_FRAG3_PTR_RAW    22
-#define M_TCB_RX_FRAG3_PTR_RAW    0x1ffffULL
-#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
-
-#define W_TCB_RX_FRAG3_LEN_RAW    29
-#define S_TCB_RX_FRAG3_LEN_RAW    7
-#define M_TCB_RX_FRAG3_LEN_RAW    0x7ffffffULL
-#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW    30
-#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW    2
-#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW    0x7ffffffULL
-#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
-
-#define W_TCB_PDU_HDR_LEN    30
-#define S_TCB_PDU_HDR_LEN    29
-#define M_TCB_PDU_HDR_LEN    0xffULL
-#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
-
-#define W_TCB_SLUSH1    31
-#define S_TCB_SLUSH1    5
-#define M_TCB_SLUSH1    0x7ffffULL
-#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
-
-#define W_TCB_ULP_RAW    31
-#define S_TCB_ULP_RAW    24
-#define M_TCB_ULP_RAW    0xffULL
-#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
-
-#define W_TCB_DDP_RDMAP_VERSION    25
-#define S_TCB_DDP_RDMAP_VERSION    30
-#define M_TCB_DDP_RDMAP_VERSION    0x1ULL
-#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
-
-#define W_TCB_MARKER_ENABLE_RX    25
-#define S_TCB_MARKER_ENABLE_RX    31
-#define M_TCB_MARKER_ENABLE_RX    0x1ULL
-#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
-
-#define W_TCB_MARKER_ENABLE_TX    26
-#define S_TCB_MARKER_ENABLE_TX    0
-#define M_TCB_MARKER_ENABLE_TX    0x1ULL
-#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
-
-#define W_TCB_CRC_ENABLE    26
-#define S_TCB_CRC_ENABLE    1
-#define M_TCB_CRC_ENABLE    0x1ULL
-#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
-
-#define W_TCB_IRS_ULP    26
-#define S_TCB_IRS_ULP    2
-#define M_TCB_IRS_ULP    0x1ffULL
-#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
-
-#define W_TCB_ISS_ULP    26
-#define S_TCB_ISS_ULP    11
-#define M_TCB_ISS_ULP    0x1ffULL
-#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
-
-#define W_TCB_TX_PDU_LEN    26
-#define S_TCB_TX_PDU_LEN    20
-#define M_TCB_TX_PDU_LEN    0x3fffULL
-#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
-
-#define W_TCB_TX_PDU_OUT    27
-#define S_TCB_TX_PDU_OUT    2
-#define M_TCB_TX_PDU_OUT    0x1ULL
-#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
-
-#define W_TCB_CQ_IDX_SQ    27
-#define S_TCB_CQ_IDX_SQ    3
-#define M_TCB_CQ_IDX_SQ    0xffffULL
-#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
-
-#define W_TCB_CQ_IDX_RQ    27
-#define S_TCB_CQ_IDX_RQ    19
-#define M_TCB_CQ_IDX_RQ    0xffffULL
-#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
-
-#define W_TCB_QP_ID    28
-#define S_TCB_QP_ID    3
-#define M_TCB_QP_ID    0xffffULL
-#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
-
-#define W_TCB_PD_ID    28
-#define S_TCB_PD_ID    19
-#define M_TCB_PD_ID    0xffffULL
-#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
-
-#define W_TCB_STAG    29
-#define S_TCB_STAG    3
-#define M_TCB_STAG    0xffffffffULL
-#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
-
-#define W_TCB_RQ_START    30
-#define S_TCB_RQ_START    3
-#define M_TCB_RQ_START    0x3ffffffULL
-#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
-
-#define W_TCB_RQ_MSN    30
-#define S_TCB_RQ_MSN    29
-#define M_TCB_RQ_MSN    0x3ffULL
-#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
-
-#define W_TCB_RQ_MAX_OFFSET    31
-#define S_TCB_RQ_MAX_OFFSET    7
-#define M_TCB_RQ_MAX_OFFSET    0xfULL
-#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
-
-#define W_TCB_RQ_WRITE_PTR    31
-#define S_TCB_RQ_WRITE_PTR    11
-#define M_TCB_RQ_WRITE_PTR    0x3ffULL
-#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
-
-#define W_TCB_INB_WRITE_PERM    31
-#define S_TCB_INB_WRITE_PERM    21
-#define M_TCB_INB_WRITE_PERM    0x1ULL
-#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
-
-#define W_TCB_INB_READ_PERM    31
-#define S_TCB_INB_READ_PERM    22
-#define M_TCB_INB_READ_PERM    0x1ULL
-#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
-
-#define W_TCB_ORD_L_BIT_VLD    31
-#define S_TCB_ORD_L_BIT_VLD    23
-#define M_TCB_ORD_L_BIT_VLD    0x1ULL
-#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
-
-#define W_TCB_RDMAP_OPCODE    31
-#define S_TCB_RDMAP_OPCODE    24
-#define M_TCB_RDMAP_OPCODE    0xfULL
-#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
-
-#define W_TCB_TX_FLUSH    31
-#define S_TCB_TX_FLUSH    28
-#define M_TCB_TX_FLUSH    0x1ULL
-#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
-
-#define W_TCB_TX_OOS_RXMT    31
-#define S_TCB_TX_OOS_RXMT    29
-#define M_TCB_TX_OOS_RXMT    0x1ULL
-#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
-
-#define W_TCB_TX_OOS_TXMT    31
-#define S_TCB_TX_OOS_TXMT    30
-#define M_TCB_TX_OOS_TXMT    0x1ULL
-#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
-
-#define W_TCB_SLUSH_AUX2    31
-#define S_TCB_SLUSH_AUX2    31
-#define M_TCB_SLUSH_AUX2    0x1ULL
-#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
-
-#define W_TCB_RX_FRAG1_PTR_RAW2    25
-#define S_TCB_RX_FRAG1_PTR_RAW2    30
-#define M_TCB_RX_FRAG1_PTR_RAW2    0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
-
-#define W_TCB_RX_DDP_FLAGS    26
-#define S_TCB_RX_DDP_FLAGS    15
-#define M_TCB_RX_DDP_FLAGS    0x3ffULL
-#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
-
-#define W_TCB_SLUSH_AUX3    26
-#define S_TCB_SLUSH_AUX3    31
-#define M_TCB_SLUSH_AUX3    0x1ffULL
-#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
-
-#define W_TCB_RX_DDP_BUF0_OFFSET    27
-#define S_TCB_RX_DDP_BUF0_OFFSET    8
-#define M_TCB_RX_DDP_BUF0_OFFSET    0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
-
-#define W_TCB_RX_DDP_BUF0_LEN    27
-#define S_TCB_RX_DDP_BUF0_LEN    30
-#define M_TCB_RX_DDP_BUF0_LEN    0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
-
-#define W_TCB_RX_DDP_BUF1_OFFSET    28
-#define S_TCB_RX_DDP_BUF1_OFFSET    20
-#define M_TCB_RX_DDP_BUF1_OFFSET    0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
-
-#define W_TCB_RX_DDP_BUF1_LEN    29
-#define S_TCB_RX_DDP_BUF1_LEN    10
-#define M_TCB_RX_DDP_BUF1_LEN    0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
-
-#define W_TCB_RX_DDP_BUF0_TAG    30
-#define S_TCB_RX_DDP_BUF0_TAG    0
-#define M_TCB_RX_DDP_BUF0_TAG    0xffffffffULL
-#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
-
-#define W_TCB_RX_DDP_BUF1_TAG    31
-#define S_TCB_RX_DDP_BUF1_TAG    0
-#define M_TCB_RX_DDP_BUF1_TAG    0xffffffffULL
-#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
-
-#define S_TF_DACK    10
-#define V_TF_DACK(x) ((x) << S_TF_DACK)
-
-#define S_TF_NAGLE    11
-#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
-
-#define S_TF_RECV_SCALE    12
-#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
-
-#define S_TF_RECV_TSTMP    13
-#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
-
-#define S_TF_RECV_SACK    14
-#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
-
-#define S_TF_TURBO    15
-#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
-
-#define S_TF_KEEPALIVE    16
-#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
-
-#define S_TF_TCAM_BYPASS    17
-#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
-
-#define S_TF_CORE_FIN    18
-#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
-
-#define S_TF_CORE_MORE    19
-#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
-
-#define S_TF_MIGRATING    20
-#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
-
-#define S_TF_ACTIVE_OPEN    21
-#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
-
-#define S_TF_ASK_MODE    22
-#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
-
-#define S_TF_NON_OFFLOAD    23
-#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
-
-#define S_TF_MOD_SCHD    24
-#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
-
-#define S_TF_MOD_SCHD_REASON0    25
-#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
-
-#define S_TF_MOD_SCHD_REASON1    26
-#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
-
-#define S_TF_MOD_SCHD_RX    27
-#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
-
-#define S_TF_CORE_PUSH    28
-#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
-
-#define S_TF_RCV_COALESCE_ENABLE    29
-#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
-
-#define S_TF_RCV_COALESCE_PUSH    30
-#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
-
-#define S_TF_RCV_COALESCE_LAST_PSH    31
-#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
-
-#define S_TF_RCV_COALESCE_HEARTBEAT    32
-#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
-
-#define S_TF_HALF_CLOSE    33
-#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
-
-#define S_TF_DACK_MSS    34
-#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
-
-#define S_TF_CCTRL_SEL0    35
-#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
-
-#define S_TF_CCTRL_SEL1    36
-#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
-
-#define S_TF_TCP_NEWRENO_FAST_RECOVERY    37
-#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
-
-#define S_TF_TX_PACE_AUTO    38
-#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
-
-#define S_TF_PEER_FIN_HELD    39
-#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
-
-#define S_TF_CORE_URG    40
-#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
-
-#define S_TF_RDMA_ERROR    41
-#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
-
-#define S_TF_SSWS_DISABLED    42
-#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
-
-#define S_TF_DUPACK_COUNT_ODD    43
-#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
-
-#define S_TF_TX_CHANNEL    44
-#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
-
-#define S_TF_RX_CHANNEL    45
-#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
-
-#define S_TF_TX_PACE_FIXED    46
-#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
-
-#define S_TF_RDMA_FLM_ERROR    47
-#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
-
-#define S_TF_RX_FLOW_CONTROL_DISABLE    48
-#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
-
-#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 347dc242fb88..ee1182f9b627 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3379,7 +3379,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
 			err = pick_local_ipaddrs(dev, cm_id);
 			if (err)
-				goto fail2;
+				goto fail3;
 		}
 
 		/* find a route */
@@ -3401,7 +3401,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
 			err = pick_local_ip6addrs(dev, cm_id);
 			if (err)
-				goto fail2;
+				goto fail3;
 		}
 
 		/* find a route */
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 35c284af574d..fe3a7e8561df 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -543,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	mhp->rhp = rhp;
 
-	mhp->umem = ib_umem_get(udata, start, length, acc, 0);
+	mhp->umem = ib_umem_get(udata, start, length, acc);
 	if (IS_ERR(mhp->umem))
 		goto err_free_skb;
 
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index d373ac0fe2cb..ba83d942997c 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -305,7 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 			   struct ib_port_attr *props)
 {
+	int ret = 0;
 	pr_debug("ibdev %p\n", ibdev);
+	ret = ib_get_eth_speed(ibdev, port, &props->active_speed,
+			       &props->active_width);
 
 	props->port_cap_flags =
 	    IB_PORT_CM_SUP |
@@ -315,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
 	props->gid_tbl_len = 1;
 	props->pkey_tbl_len = 1;
-	props->active_width = 2;
-	props->active_speed = IB_SPEED_DDR;
 	props->max_msg_sz = -1;
 
-	return 0;
+	return ret;
 }
 
 static ssize_t hw_rev_show(struct device *dev,
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 2283e432693e..aa7396a1588a 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -60,8 +60,6 @@ struct efa_dev {
 	u64 mem_bar_len;
 	u64 db_bar_addr;
 	u64 db_bar_len;
-	u8 addr[EFA_GID_SIZE];
-	u32 mtu;
 
 	int admin_msix_vector_idx;
 	struct efa_irq admin_irq;
@@ -71,8 +69,6 @@ struct efa_dev {
 
 struct efa_ucontext {
 	struct ib_ucontext ibucontext;
-	struct xarray mmap_xa;
-	u32 mmap_xa_page;
 	u16 uarn;
 };
 
@@ -91,6 +87,7 @@ struct efa_cq {
 	struct efa_ucontext *ucontext;
 	dma_addr_t dma_addr;
 	void *cpu_addr;
+	struct rdma_user_mmap_entry *mmap_entry;
 	size_t size;
 	u16 cq_idx;
 };
@@ -101,6 +98,13 @@ struct efa_qp {
 	void *rq_cpu_addr;
 	size_t rq_size;
 	enum ib_qp_state state;
+
+	/* Used for saving mmap_xa entries */
+	struct rdma_user_mmap_entry *sq_db_mmap_entry;
+	struct rdma_user_mmap_entry *llq_desc_mmap_entry;
+	struct rdma_user_mmap_entry *rq_db_mmap_entry;
+	struct rdma_user_mmap_entry *rq_mmap_entry;
+
 	u32 qp_handle;
 	u32 max_send_wr;
 	u32 max_recv_wr;
@@ -147,6 +151,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
 int efa_mmap(struct ib_ucontext *ibucontext,
 	     struct vm_area_struct *vma);
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 int efa_create_ah(struct ib_ah *ibah,
 		  struct rdma_ah_attr *ah_attr,
 		  u32 flags,
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 2be0469d545f..e96bcb16bd2b 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -362,9 +362,13 @@ struct efa_admin_reg_mr_cmd {
 
 	/*
 	 * permissions
-	 * 0 : local_write_enable - Write permissions: value
-	 *    of 1 needed for RQ buffers and for RDMA write
-	 * 7:1 : reserved1 - remote access flags, etc
+	 * 0 : local_write_enable - Local write permissions:
+	 *    must be set for RQ buffers and buffers posted for
+	 *    RDMA Read requests
+	 * 1 : reserved1 - MBZ
+	 * 2 : remote_read_enable - Remote read permissions:
+	 *    must be set to enable RDMA read from the region
+	 * 7:3 : reserved2 - MBZ
 	 */
 	u8 permissions;
 
@@ -558,6 +562,16 @@ struct efa_admin_feature_device_attr_desc {
 
 	/* Indicates how many bits are used virtual address access */
 	u8 virt_addr_width;
+
+	/*
+	 * 0 : rdma_read - If set, RDMA Read is supported on
+	 *    TX queues
+	 * 31:1 : reserved - MBZ
+	 */
+	u32 device_caps;
+
+	/* Max RDMA transfer size in bytes */
+	u32 max_rdma_size;
 };
 
 struct efa_admin_feature_queue_attr_desc {
@@ -604,6 +618,9 @@ struct efa_admin_feature_queue_attr_desc {
 
 	/* The maximum size of LLQ in bytes */
 	u32 max_llq_size;
+
+	/* Maximum number of SGEs for a single RDMA read WQE */
+	u16 max_wr_rdma_sges;
 };
 
 struct efa_admin_feature_aenq_desc {
@@ -618,6 +635,7 @@ struct efa_admin_feature_network_attr_desc {
 	/* Raw address data in network byte order */
 	u8 addr[16];
 
+	/* max packet payload size in bytes */
 	u32 mtu;
 };
 
@@ -780,6 +798,8 @@ struct efa_admin_mmio_req_read_less_resp {
 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT     7
 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK      BIT(7)
 #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK        BIT(0)
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT       2
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK        BIT(2)
 
 /* create_cq_cmd */
 #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
@@ -791,4 +811,7 @@ struct efa_admin_mmio_req_read_less_resp {
 /* get_set_feature_common_desc */
 #define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK   GENMASK(1, 0)
 
+/* feature_device_attr_desc */
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK   BIT(0)
+
 #endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 3c412bc5b94f..0778f4f7dccd 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -317,6 +317,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
 						       struct efa_admin_acq_entry *comp,
 						       size_t comp_size_in_bytes)
 {
+	struct efa_admin_aq_entry *aqe;
 	struct efa_comp_ctx *comp_ctx;
 	u16 queue_size_mask;
 	u16 cmd_id;
@@ -350,7 +351,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
 
 	reinit_completion(&comp_ctx->wait_event);
 
-	memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes);
+	aqe = &aq->sq.entries[pi];
+	memset(aqe, 0, sizeof(*aqe));
+	memcpy(aqe, cmd, cmd_size_in_bytes);
 
 	aq->sq.pc++;
 	atomic64_inc(&aq->stats.submitted_cmd);
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index c079f1332082..e20bd84a1014 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -230,8 +230,7 @@ int efa_com_register_mr(struct efa_com_dev *edev,
 	mr_cmd.flags |= params->page_shift &
 		EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK;
 	mr_cmd.iova = params->iova;
-	mr_cmd.permissions |= params->permissions &
-			      EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK;
+	mr_cmd.permissions = params->permissions;
 
 	if (params->inline_pbl) {
 		memcpy(mr_cmd.pbl.inline_pbl_array,
@@ -423,28 +422,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev,
 	return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0);
 }
 
-int efa_com_get_network_attr(struct efa_com_dev *edev,
-			     struct efa_com_get_network_attr_result *result)
-{
-	struct efa_admin_get_feature_resp resp;
-	int err;
-
-	err = efa_com_get_feature(edev, &resp,
-				  EFA_ADMIN_NETWORK_ATTR);
-	if (err) {
-		ibdev_err_ratelimited(edev->efa_dev,
-				      "Failed to get network attributes %d\n",
-				      err);
-		return err;
-	}
-
-	memcpy(result->addr, resp.u.network_attr.addr,
-	       sizeof(resp.u.network_attr.addr));
-	result->mtu = resp.u.network_attr.mtu;
-
-	return 0;
-}
-
 int efa_com_get_device_attr(struct efa_com_dev *edev,
 			    struct efa_com_get_device_attr_result *result)
 {
@@ -467,6 +444,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
 	result->phys_addr_width = resp.u.device_attr.phys_addr_width;
 	result->virt_addr_width = resp.u.device_attr.virt_addr_width;
 	result->db_bar = resp.u.device_attr.db_bar;
+	result->max_rdma_size = resp.u.device_attr.max_rdma_size;
+	result->device_caps = resp.u.device_attr.device_caps;
 
 	if (result->admin_api_version < 1) {
 		ibdev_err_ratelimited(
@@ -500,6 +479,19 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
 	result->max_ah = resp.u.queue_attr.max_ah;
 	result->max_llq_size = resp.u.queue_attr.max_llq_size;
 	result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
+	result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges;
+
+	err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR);
+	if (err) {
+		ibdev_err_ratelimited(edev->efa_dev,
+				      "Failed to get network attributes %d\n",
+				      err);
+		return err;
+	}
+
+	memcpy(result->addr, resp.u.network_attr.addr,
+	       sizeof(resp.u.network_attr.addr));
+	result->mtu = resp.u.network_attr.mtu;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 7f6c13052f49..31db5a0cbd5b 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -100,14 +100,11 @@ struct efa_com_destroy_ah_params {
 	u16 pdn;
 };
 
-struct efa_com_get_network_attr_result {
-	u8 addr[EFA_GID_SIZE];
-	u32 mtu;
-};
-
 struct efa_com_get_device_attr_result {
+	u8 addr[EFA_GID_SIZE];
 	u64 page_size_cap;
 	u64 max_mr_pages;
+	u32 mtu;
 	u32 fw_version;
 	u32 admin_api_version;
 	u32 device_version;
@@ -124,9 +121,12 @@ struct efa_com_get_device_attr_result {
 	u32 max_pd;
 	u32 max_ah;
 	u32 max_llq_size;
+	u32 max_rdma_size;
+	u32 device_caps;
 	u16 sub_cqs_per_cq;
 	u16 max_sq_sge;
 	u16 max_rq_sge;
+	u16 max_wr_rdma_sge;
 	u8 db_bar;
 };
 
@@ -181,12 +181,7 @@ struct efa_com_reg_mr_params {
 	 * address mapping
 	 */
 	u8 page_shift;
-	/*
-	 * permissions
-	 * 0: local_write_enable - Write permissions: value of 1 needed
-	 * for RQ buffers and for RDMA write:1: reserved1 - remote
-	 * access flags, etc
-	 */
+	/* see permissions field of struct efa_admin_reg_mr_cmd */
 	u8 permissions;
 	u8 inline_pbl;
 	u8 indirect;
@@ -271,8 +266,6 @@ int efa_com_create_ah(struct efa_com_dev *edev,
 		      struct efa_com_create_ah_result *result);
 int efa_com_destroy_ah(struct efa_com_dev *edev,
 		       struct efa_com_destroy_ah_params *params);
-int efa_com_get_network_attr(struct efa_com_dev *edev,
-			     struct efa_com_get_network_attr_result *result);
 int efa_com_get_device_attr(struct efa_com_dev *edev,
 			    struct efa_com_get_device_attr_result *result);
 int efa_com_get_hw_hints(struct efa_com_dev *edev,
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 83858f7e83d0..faf3ff1bca2a 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -30,15 +30,6 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
 	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
 	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
 
-static void efa_update_network_attr(struct efa_dev *dev,
-				    struct efa_com_get_network_attr_result *network_attr)
-{
-	memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr));
-	dev->mtu = network_attr->mtu;
-
-	dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr);
-}
-
 /* This handler will called for unknown event group or unimplemented handlers */
 static void unimplemented_aenq_handler(void *data,
 				       struct efa_admin_aenq_entry *aenq_e)
@@ -217,6 +208,7 @@ static const struct ib_device_ops efa_dev_ops = {
 	.get_link_layer = efa_port_link_layer,
 	.get_port_immutable = efa_get_port_immutable,
 	.mmap = efa_mmap,
+	.mmap_free = efa_mmap_free,
 	.modify_qp = efa_modify_qp,
 	.query_device = efa_query_device,
 	.query_gid = efa_query_gid,
@@ -233,7 +225,6 @@ static const struct ib_device_ops efa_dev_ops = {
 
 static int efa_ib_device_add(struct efa_dev *dev)
 {
-	struct efa_com_get_network_attr_result network_attr;
 	struct efa_com_get_hw_hints_result hw_hints;
 	struct pci_dev *pdev = dev->pdev;
 	int err;
@@ -249,12 +240,6 @@ static int efa_ib_device_add(struct efa_dev *dev)
 	if (err)
 		return err;
 
-	err = efa_com_get_network_attr(&dev->edev, &network_attr);
-	if (err)
-		goto err_release_doorbell_bar;
-
-	efa_update_network_attr(dev, &network_attr);
-
 	err = efa_com_get_hw_hints(&dev->edev, &hw_hints);
 	if (err)
 		goto err_release_doorbell_bar;
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 4edae89e8e3c..c9d294caa27a 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -13,10 +13,6 @@
 
 #include "efa.h"
 
-#define EFA_MMAP_FLAG_SHIFT 56
-#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
-#define EFA_MMAP_INVALID U64_MAX
-
 enum {
 	EFA_MMAP_DMA_PAGE = 0,
 	EFA_MMAP_IO_WC,
@@ -27,20 +23,12 @@ enum {
 	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
 	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
 
-struct efa_mmap_entry {
-	void  *obj;
+struct efa_user_mmap_entry {
+	struct rdma_user_mmap_entry rdma_entry;
 	u64 address;
-	u64 length;
-	u32 mmap_page;
 	u8 mmap_flag;
 };
 
-static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
-{
-	return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
-	       ((u64)efa->mmap_page << PAGE_SHIFT);
-}
-
 #define EFA_DEFINE_STATS(op) \
 	op(EFA_TX_BYTES, "tx_bytes") \
 	op(EFA_TX_PKTS, "tx_pkts") \
@@ -82,8 +70,6 @@ static const char *const efa_stats_names[] = {
 #define EFA_CHUNK_USED_SIZE \
 	((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
 
-#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
-
 struct pbl_chunk {
 	dma_addr_t dma_addr;
 	u64 *buf;
@@ -147,6 +133,17 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah)
 	return container_of(ibah, struct efa_ah, ibah);
 }
 
+static inline struct efa_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+	return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
+}
+
+static inline bool is_rdma_read_cap(struct efa_dev *dev)
+{
+	return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK;
+}
+
 #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
 				 FIELD_SIZEOF(typeof(x), fld) <= (sz))
 
@@ -172,106 +169,6 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
 	return addr;
 }
 
-/*
- * This is only called when the ucontext is destroyed and there can be no
- * concurrent query via mmap or allocate on the xarray, thus we can be sure no
- * other thread is using the entry pointer. We also know that all the BAR
- * pages have either been zap'd or munmaped at this point.  Normal pages are
- * refcounted and will be freed at the proper time.
- */
-static void mmap_entries_remove_free(struct efa_dev *dev,
-				     struct efa_ucontext *ucontext)
-{
-	struct efa_mmap_entry *entry;
-	unsigned long mmap_page;
-
-	xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
-		xa_erase(&ucontext->mmap_xa, mmap_page);
-
-		ibdev_dbg(
-			&dev->ibdev,
-			"mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
-			entry->obj, get_mmap_key(entry), entry->address,
-			entry->length);
-		if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
-			/* DMA mapping is already gone, now free the pages */
-			free_pages_exact(phys_to_virt(entry->address),
-					 entry->length);
-		kfree(entry);
-	}
-}
-
-static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
-					     struct efa_ucontext *ucontext,
-					     u64 key, u64 len)
-{
-	struct efa_mmap_entry *entry;
-	u64 mmap_page;
-
-	mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
-	if (mmap_page > U32_MAX)
-		return NULL;
-
-	entry = xa_load(&ucontext->mmap_xa, mmap_page);
-	if (!entry || get_mmap_key(entry) != key || entry->length != len)
-		return NULL;
-
-	ibdev_dbg(&dev->ibdev,
-		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
-		  entry->obj, key, entry->address, entry->length);
-
-	return entry;
-}
-
-/*
- * Note this locking scheme cannot support removal of entries, except during
- * ucontext destruction when the core code guarentees no concurrency.
- */
-static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
-			     void *obj, u64 address, u64 length, u8 mmap_flag)
-{
-	struct efa_mmap_entry *entry;
-	u32 next_mmap_page;
-	int err;
-
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return EFA_MMAP_INVALID;
-
-	entry->obj = obj;
-	entry->address = address;
-	entry->length = length;
-	entry->mmap_flag = mmap_flag;
-
-	xa_lock(&ucontext->mmap_xa);
-	if (check_add_overflow(ucontext->mmap_xa_page,
-			       (u32)(length >> PAGE_SHIFT),
-			       &next_mmap_page))
-		goto err_unlock;
-
-	entry->mmap_page = ucontext->mmap_xa_page;
-	ucontext->mmap_xa_page = next_mmap_page;
-	err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
-			  GFP_KERNEL);
-	if (err)
-		goto err_unlock;
-
-	xa_unlock(&ucontext->mmap_xa);
-
-	ibdev_dbg(
-		&dev->ibdev,
-		"mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
-		entry->obj, entry->address, entry->length, get_mmap_key(entry));
-
-	return get_mmap_key(entry);
-
-err_unlock:
-	xa_unlock(&ucontext->mmap_xa);
-	kfree(entry);
-	return EFA_MMAP_INVALID;
-
-}
-
 int efa_query_device(struct ib_device *ibdev,
 		     struct ib_device_attr *props,
 		     struct ib_udata *udata)
@@ -306,12 +203,17 @@ int efa_query_device(struct ib_device *ibdev,
 				 dev_attr->max_rq_depth);
 	props->max_send_sge = dev_attr->max_sq_sge;
 	props->max_recv_sge = dev_attr->max_rq_sge;
+	props->max_sge_rd = dev_attr->max_wr_rdma_sge;
 
 	if (udata && udata->outlen) {
 		resp.max_sq_sge = dev_attr->max_sq_sge;
 		resp.max_rq_sge = dev_attr->max_rq_sge;
 		resp.max_sq_wr = dev_attr->max_sq_depth;
 		resp.max_rq_wr = dev_attr->max_rq_depth;
+		resp.max_rdma_size = dev_attr->max_rdma_size;
+
+		if (is_rdma_read_cap(dev))
+			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
 
 		err = ib_copy_to_udata(udata, &resp,
 				       min(sizeof(resp), udata->outlen));
@@ -338,9 +240,9 @@ int efa_query_port(struct ib_device *ibdev, u8 port,
 	props->pkey_tbl_len = 1;
 	props->active_speed = IB_SPEED_EDR;
 	props->active_width = IB_WIDTH_4X;
-	props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
-	props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
-	props->max_msg_sz = dev->mtu;
+	props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+	props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+	props->max_msg_sz = dev->dev_attr.mtu;
 	props->max_vl_num = 1;
 
 	return 0;
@@ -401,7 +303,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
 {
 	struct efa_dev *dev = to_edev(ibdev);
 
-	memcpy(gid->raw, dev->addr, sizeof(dev->addr));
+	memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
 
 	return 0;
 }
@@ -485,8 +387,19 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
 	return efa_com_destroy_qp(&dev->edev, &params);
 }
 
+static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx,
+					    struct efa_qp *qp)
+{
+	rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
+	rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
+	rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
+	rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
+}
+
 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 {
+	struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
+		struct efa_ucontext, ibucontext);
 	struct efa_dev *dev = to_edev(ibqp->pd->device);
 	struct efa_qp *qp = to_eqp(ibqp);
 	int err;
@@ -505,61 +418,101 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 				 DMA_TO_DEVICE);
 	}
 
+	efa_qp_user_mmap_entries_remove(ucontext, qp);
 	kfree(qp);
 	return 0;
 }
 
+static struct rdma_user_mmap_entry*
+efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+			   u64 address, size_t length,
+			   u8 mmap_flag, u64 *offset)
+{
+	struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	int err;
+
+	if (!entry)
+		return NULL;
+
+	entry->address = address;
+	entry->mmap_flag = mmap_flag;
+
+	err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
+					  length);
+	if (err) {
+		kfree(entry);
+		return NULL;
+	}
+	*offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+	return &entry->rdma_entry;
+}
+
 static int qp_mmap_entries_setup(struct efa_qp *qp,
 				 struct efa_dev *dev,
 				 struct efa_ucontext *ucontext,
 				 struct efa_com_create_qp_params *params,
 				 struct efa_ibv_create_qp_resp *resp)
 {
-	/*
-	 * Once an entry is inserted it might be mmapped, hence cannot be
-	 * cleaned up until dealloc_ucontext.
-	 */
-	resp->sq_db_mmap_key =
-		mmap_entry_insert(dev, ucontext, qp,
-				  dev->db_bar_addr + resp->sq_db_offset,
-				  PAGE_SIZE, EFA_MMAP_IO_NC);
-	if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+	size_t length;
+	u64 address;
+
+	address = dev->db_bar_addr + resp->sq_db_offset;
+	qp->sq_db_mmap_entry =
+		efa_user_mmap_entry_insert(&ucontext->ibucontext,
+					   address,
+					   PAGE_SIZE, EFA_MMAP_IO_NC,
+					   &resp->sq_db_mmap_key);
+	if (!qp->sq_db_mmap_entry)
 		return -ENOMEM;
 
 	resp->sq_db_offset &= ~PAGE_MASK;
 
-	resp->llq_desc_mmap_key =
-		mmap_entry_insert(dev, ucontext, qp,
-				  dev->mem_bar_addr + resp->llq_desc_offset,
-				  PAGE_ALIGN(params->sq_ring_size_in_bytes +
-					     (resp->llq_desc_offset & ~PAGE_MASK)),
-				  EFA_MMAP_IO_WC);
-	if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
-		return -ENOMEM;
+	address = dev->mem_bar_addr + resp->llq_desc_offset;
+	length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
+			    (resp->llq_desc_offset & ~PAGE_MASK));
+
+	qp->llq_desc_mmap_entry =
+		efa_user_mmap_entry_insert(&ucontext->ibucontext,
+					   address, length,
+					   EFA_MMAP_IO_WC,
+					   &resp->llq_desc_mmap_key);
+	if (!qp->llq_desc_mmap_entry)
+		goto err_remove_mmap;
 
 	resp->llq_desc_offset &= ~PAGE_MASK;
 
 	if (qp->rq_size) {
-		resp->rq_db_mmap_key =
-			mmap_entry_insert(dev, ucontext, qp,
-					  dev->db_bar_addr + resp->rq_db_offset,
-					  PAGE_SIZE, EFA_MMAP_IO_NC);
-		if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
-			return -ENOMEM;
+		address = dev->db_bar_addr + resp->rq_db_offset;
+
+		qp->rq_db_mmap_entry =
+			efa_user_mmap_entry_insert(&ucontext->ibucontext,
+						   address, PAGE_SIZE,
+						   EFA_MMAP_IO_NC,
+						   &resp->rq_db_mmap_key);
+		if (!qp->rq_db_mmap_entry)
+			goto err_remove_mmap;
 
 		resp->rq_db_offset &= ~PAGE_MASK;
 
-		resp->rq_mmap_key =
-			mmap_entry_insert(dev, ucontext, qp,
-					  virt_to_phys(qp->rq_cpu_addr),
-					  qp->rq_size, EFA_MMAP_DMA_PAGE);
-		if (resp->rq_mmap_key == EFA_MMAP_INVALID)
-			return -ENOMEM;
+		address = virt_to_phys(qp->rq_cpu_addr);
+		qp->rq_mmap_entry =
+			efa_user_mmap_entry_insert(&ucontext->ibucontext,
+						   address, qp->rq_size,
+						   EFA_MMAP_DMA_PAGE,
+						   &resp->rq_mmap_key);
+		if (!qp->rq_mmap_entry)
+			goto err_remove_mmap;
 
 		resp->rq_mmap_size = qp->rq_size;
 	}
 
 	return 0;
+
+err_remove_mmap:
+	efa_qp_user_mmap_entries_remove(ucontext, qp);
+
+	return -ENOMEM;
 }
 
 static int efa_qp_validate_cap(struct efa_dev *dev,
@@ -634,7 +587,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 	struct efa_dev *dev = to_edev(ibpd->device);
 	struct efa_ibv_create_qp_resp resp = {};
 	struct efa_ibv_create_qp cmd = {};
-	bool rq_entry_inserted = false;
 	struct efa_ucontext *ucontext;
 	struct efa_qp *qp;
 	int err;
@@ -742,7 +694,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 	if (err)
 		goto err_destroy_qp;
 
-	rq_entry_inserted = true;
 	qp->qp_handle = create_qp_resp.qp_handle;
 	qp->ibqp.qp_num = create_qp_resp.qp_num;
 	qp->ibqp.qp_type = init_attr->qp_type;
@@ -759,7 +710,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 			ibdev_dbg(&dev->ibdev,
 				  "Failed to copy udata for qp[%u]\n",
 				  create_qp_resp.qp_num);
-			goto err_destroy_qp;
+			goto err_remove_mmap_entries;
 		}
 	}
 
@@ -767,13 +718,16 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 
 	return &qp->ibqp;
 
+err_remove_mmap_entries:
+	efa_qp_user_mmap_entries_remove(ucontext, qp);
 err_destroy_qp:
 	efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
 err_free_mapped:
 	if (qp->rq_size) {
 		dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
 				 DMA_TO_DEVICE);
-		if (!rq_entry_inserted)
+
+		if (!qp->rq_mmap_entry)
 			free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
 	}
 err_free_qp:
@@ -897,16 +851,18 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 	efa_destroy_cq_idx(dev, cq->cq_idx);
 	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
 			 DMA_FROM_DEVICE);
+	rdma_user_mmap_entry_remove(cq->mmap_entry);
 }
 
 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
 				 struct efa_ibv_create_cq_resp *resp)
 {
 	resp->q_mmap_size = cq->size;
-	resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
-					     virt_to_phys(cq->cpu_addr),
-					     cq->size, EFA_MMAP_DMA_PAGE);
-	if (resp->q_mmap_key == EFA_MMAP_INVALID)
+	cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+						    virt_to_phys(cq->cpu_addr),
+						    cq->size, EFA_MMAP_DMA_PAGE,
+						    &resp->q_mmap_key);
+	if (!cq->mmap_entry)
 		return -ENOMEM;
 
 	return 0;
@@ -924,7 +880,6 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct efa_dev *dev = to_edev(ibdev);
 	struct efa_ibv_create_cq cmd = {};
 	struct efa_cq *cq = to_ecq(ibcq);
-	bool cq_entry_inserted = false;
 	int entries = attr->cqe;
 	int err;
 
@@ -1013,15 +968,13 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		goto err_destroy_cq;
 	}
 
-	cq_entry_inserted = true;
-
 	if (udata->outlen) {
 		err = ib_copy_to_udata(udata, &resp,
 				       min(sizeof(resp), udata->outlen));
 		if (err) {
 			ibdev_dbg(ibdev,
 				  "Failed to copy udata for create_cq\n");
-			goto err_destroy_cq;
+			goto err_remove_mmap;
 		}
 	}
 
@@ -1030,13 +983,16 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	return 0;
 
+err_remove_mmap:
+	rdma_user_mmap_entry_remove(cq->mmap_entry);
 err_destroy_cq:
 	efa_destroy_cq_idx(dev, cq->cq_idx);
 err_free_mapped:
 	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
 			 DMA_FROM_DEVICE);
-	if (!cq_entry_inserted)
+	if (!cq->mmap_entry)
 		free_pages_exact(cq->cpu_addr, cq->size);
+
 err_out:
 	atomic64_inc(&dev->stats.sw_stats.create_cq_err);
 	return err;
@@ -1396,6 +1352,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
 	struct efa_com_reg_mr_params params = {};
 	struct efa_com_reg_mr_result result = {};
 	struct pbl_context pbl;
+	int supp_access_flags;
 	unsigned int pg_sz;
 	struct efa_mr *mr;
 	int inline_size;
@@ -1409,10 +1366,14 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
 		goto err_out;
 	}
 
-	if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
+	supp_access_flags =
+		IB_ACCESS_LOCAL_WRITE |
+		(is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0);
+
+	if (access_flags & ~supp_access_flags) {
 		ibdev_dbg(&dev->ibdev,
 			  "Unsupported access flags[%#x], supported[%#x]\n",
-			  access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
+			  access_flags, supp_access_flags);
 		err = -EOPNOTSUPP;
 		goto err_out;
 	}
@@ -1423,7 +1384,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
 		goto err_out;
 	}
 
-	mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+	mr->umem = ib_umem_get(udata, start, length, access_flags);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		ibdev_dbg(&dev->ibdev,
@@ -1434,7 +1395,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
 	params.pd = to_epd(ibpd)->pdn;
 	params.iova = virt_addr;
 	params.mr_length_in_bytes = length;
-	params.permissions = access_flags & 0x1;
+	params.permissions = access_flags;
 
 	pg_sz = ib_umem_find_best_pgsz(mr->umem,
 				       dev->dev_attr.page_size_cap,
@@ -1556,7 +1517,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
 		goto err_out;
 
 	ucontext->uarn = result.uarn;
-	xa_init(&ucontext->mmap_xa);
 
 	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
 	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
@@ -1585,38 +1545,56 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
 	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
 	struct efa_dev *dev = to_edev(ibucontext->device);
 
-	mmap_entries_remove_free(dev, ucontext);
 	efa_dealloc_uar(dev, ucontext->uarn);
 }
 
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+	struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+	/* DMA mapping is already gone, now free the pages */
+	if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
+		free_pages_exact(phys_to_virt(entry->address),
+				 entry->rdma_entry.npages * PAGE_SIZE);
+	kfree(entry);
+}
+
 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
-		      struct vm_area_struct *vma, u64 key, u64 length)
+		      struct vm_area_struct *vma)
 {
-	struct efa_mmap_entry *entry;
+	struct rdma_user_mmap_entry *rdma_entry;
+	struct efa_user_mmap_entry *entry;
 	unsigned long va;
+	int err = 0;
 	u64 pfn;
-	int err;
 
-	entry = mmap_entry_get(dev, ucontext, key, length);
-	if (!entry) {
-		ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
-			  key);
+	rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+	if (!rdma_entry) {
+		ibdev_dbg(&dev->ibdev,
+			  "pgoff[%#lx] does not have valid entry\n",
+			  vma->vm_pgoff);
 		return -EINVAL;
 	}
+	entry = to_emmap(rdma_entry);
 
 	ibdev_dbg(&dev->ibdev,
-		  "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
-		  entry->address, length, entry->mmap_flag);
+		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+		  entry->address, rdma_entry->npages * PAGE_SIZE,
+		  entry->mmap_flag);
 
 	pfn = entry->address >> PAGE_SHIFT;
 	switch (entry->mmap_flag) {
 	case EFA_MMAP_IO_NC:
-		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
-					pgprot_noncached(vma->vm_page_prot));
+		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+					entry->rdma_entry.npages * PAGE_SIZE,
+					pgprot_noncached(vma->vm_page_prot),
+					rdma_entry);
 		break;
 	case EFA_MMAP_IO_WC:
-		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
-					pgprot_writecombine(vma->vm_page_prot));
+		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+					entry->rdma_entry.npages * PAGE_SIZE,
+					pgprot_writecombine(vma->vm_page_prot),
+					rdma_entry);
 		break;
 	case EFA_MMAP_DMA_PAGE:
 		for (va = vma->vm_start; va < vma->vm_end;
@@ -1633,12 +1611,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
 	if (err) {
 		ibdev_dbg(
 			&dev->ibdev,
-			"Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
-			entry->address, length, entry->mmap_flag, err);
-		return err;
+			"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+			entry->address, rdma_entry->npages * PAGE_SIZE,
+			entry->mmap_flag, err);
 	}
 
-	return 0;
+	rdma_user_mmap_entry_put(rdma_entry);
+	return err;
 }
 
 int efa_mmap(struct ib_ucontext *ibucontext,
@@ -1646,26 +1625,13 @@ int efa_mmap(struct ib_ucontext *ibucontext,
 {
 	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
 	struct efa_dev *dev = to_edev(ibucontext->device);
-	u64 length = vma->vm_end - vma->vm_start;
-	u64 key = vma->vm_pgoff << PAGE_SHIFT;
+	size_t length = vma->vm_end - vma->vm_start;
 
 	ibdev_dbg(&dev->ibdev,
-		  "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
-		  vma->vm_start, vma->vm_end, length, key);
-
-	if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
-		ibdev_dbg(&dev->ibdev,
-			  "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
-			  length, PAGE_SIZE, vma->vm_flags);
-		return -EINVAL;
-	}
-
-	if (vma->vm_flags & VM_EXEC) {
-		ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
-		return -EPERM;
-	}
+		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
 
-	return __efa_mmap(dev, ucontext, vma, key, length);
+	return __efa_mmap(dev, ucontext, vma);
 }
 
 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index d8ff063a5419..a51bcd2b4391 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -4915,16 +4915,11 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
  */
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
-		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index)
+		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
+		     size_t *out_mad_size, u16 *out_mad_pkey_index)
 {
-	switch (in_mad->base_version) {
+	switch (in_mad->mad_hdr.base_version) {
 	case OPA_MGMT_BASE_VERSION:
-		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
-			dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
-			return IB_MAD_RESULT_FAILURE;
-		}
 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
 					    in_wc, in_grh,
 					    (struct opa_mad *)in_mad,
@@ -4932,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 					    out_mad_size,
 					    out_mad_pkey_index);
 	case IB_MGMT_BASE_VERSION:
-		return hfi1_process_ib_mad(ibdev, mad_flags, port,
-					  in_wc, in_grh,
-					  (const struct ib_mad *)in_mad,
-					  (struct ib_mad *)out_mad);
+		return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
+					   in_grh, in_mad, out_mad);
 	default:
 		break;
 	}
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index cbf7faa5038c..36593f2efe26 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
 			   u32 config_data, const char *message)
 {
 	u8 i;
-	int ret = HCMD_SUCCESS;
+	int ret;
 
 	for (i = 0; i < 4; i++) {
 		ret = load_8051_config(ppd->dd, field_id, i, config_data);
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index ae9582ddbc8f..b0e9bf7cd150 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
 void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
-		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index);
+		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
+		     size_t *out_mad_size, u16 *out_mad_pkey_index);
 
 /*
  * The PSN_MASK and PSN_SHIFT allow for
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index d602b698b57e..4921c1e40ccd 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,23 +1,34 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config INFINIBAND_HNS
-	bool "HNS RoCE Driver"
+	tristate "HNS RoCE Driver"
 	depends on NET_VENDOR_HISILICON
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on (HNS_DSAF && HNS_ENET) || HNS3
 	---help---
 	  This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
 	  is used in Hisilicon Hip06 and more further ICT SoC based on
 	  platform device.
 
+	  To compile HIP06 or HIP08 driver as module, choose M here.
+
 config INFINIBAND_HNS_HIP06
-	tristate "Hisilicon Hip06 Family RoCE support"
+	bool "Hisilicon Hip06 Family RoCE support"
 	depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
+	depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y)
 	---help---
 	  RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
 	  Hip07 SoC. These RoCE engines are platform devices.
 
+	  To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
+	  module will be called hns-roce-hw-v1
+
 config INFINIBAND_HNS_HIP08
-	tristate "Hisilicon Hip08 Family RoCE support"
+	bool "Hisilicon Hip08 Family RoCE support"
 	depends on INFINIBAND_HNS && PCI && HNS3
+	depends on INFINIBAND_HNS=m || HNS3=y
 	---help---
 	  RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
 	  The RoCE engine is a PCI device.
+
+	  To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
+	  module will be called hns-roce-hw-v2.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index 449a2d81319d..e105945b94a1 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -9,8 +9,12 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
 	hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
 	hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
 
+ifdef CONFIG_INFINIBAND_HNS_HIP06
 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs)
-obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o
+endif
 
+ifdef CONFIG_INFINIBAND_HNS_HIP08
 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs)
-obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
+endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 90e08c0c332d..8a522e14ef62 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -46,32 +46,32 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
 	const struct ib_gid_attr *gid_attr;
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_ah *ah = to_hr_ah(ibah);
-	u16 vlan_tag = 0xffff;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+	u16 vlan_id = 0xffff;
 	bool vlan_en = false;
 	int ret;
 
 	gid_attr = ah_attr->grh.sgid_attr;
-	ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL);
+	ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
 	if (ret)
 		return ret;
 
 	/* Get mac address */
 	memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
 
-	if (vlan_tag < VLAN_CFI_MASK) {
+	if (vlan_id < VLAN_N_VID) {
 		vlan_en = true;
-		vlan_tag |= (rdma_ah_get_sl(ah_attr) &
+		vlan_id |= (rdma_ah_get_sl(ah_attr) &
 			     HNS_ROCE_VLAN_SL_BIT_MASK) <<
 			     HNS_ROCE_VLAN_SL_SHIFT;
 	}
 
 	ah->av.port = rdma_ah_get_port_num(ah_attr);
 	ah->av.gid_index = grh->sgid_index;
-	ah->av.vlan = vlan_tag;
+	ah->av.vlan_id = vlan_id;
 	ah->av.vlan_en = vlan_en;
-	dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
-		ah->av.vlan);
+	dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index,
+		ah->av.vlan_id);
 
 	if (rdma_ah_get_static_rate(ah_attr))
 		ah->av.stat_rate = IB_RATE_10_GBPS;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 8c063c598d2a..da574c26e063 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -55,7 +55,7 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
 			bitmap->last = 0;
 		*obj |= bitmap->top;
 	} else {
-		ret = -1;
+		ret = -EINVAL;
 	}
 
 	spin_unlock(&bitmap->lock);
@@ -100,7 +100,7 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
 		}
 		*obj |= bitmap->top;
 	} else {
-		ret = -1;
+		ret = -EINVAL;
 	}
 
 	spin_unlock(&bitmap->lock);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 2b6ac646ca9a..1915bacaded0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -115,12 +115,12 @@ enum {
 
 enum {
 	/* TPT commands */
-	HNS_ROCE_CMD_SW2HW_MPT		= 0xd,
-	HNS_ROCE_CMD_HW2SW_MPT		= 0xf,
+	HNS_ROCE_CMD_CREATE_MPT		= 0xd,
+	HNS_ROCE_CMD_DESTROY_MPT	= 0xf,
 
 	/* CQ commands */
-	HNS_ROCE_CMD_SW2HW_CQ		= 0x16,
-	HNS_ROCE_CMD_HW2SW_CQ		= 0x17,
+	HNS_ROCE_CMD_CREATE_CQC		= 0x16,
+	HNS_ROCE_CMD_DESTROY_CQC	= 0x17,
 
 	/* QP/EE commands */
 	HNS_ROCE_CMD_RST2INIT_QP	= 0x19,
@@ -129,14 +129,14 @@ enum {
 	HNS_ROCE_CMD_RTS2RTS_QP		= 0x1c,
 	HNS_ROCE_CMD_2ERR_QP		= 0x1e,
 	HNS_ROCE_CMD_RTS2SQD_QP		= 0x1f,
-	HNS_ROCE_CMD_SQD2SQD_QP		= 0x38,
 	HNS_ROCE_CMD_SQD2RTS_QP		= 0x20,
 	HNS_ROCE_CMD_2RST_QP		= 0x21,
 	HNS_ROCE_CMD_QUERY_QP		= 0x22,
-	HNS_ROCE_CMD_SW2HW_SRQ		= 0x70,
+	HNS_ROCE_CMD_SQD2SQD_QP		= 0x38,
+	HNS_ROCE_CMD_CREATE_SRQ		= 0x70,
 	HNS_ROCE_CMD_MODIFY_SRQC	= 0x72,
 	HNS_ROCE_CMD_QUERY_SRQC		= 0x73,
-	HNS_ROCE_CMD_HW2SW_SRQ		= 0x74,
+	HNS_ROCE_CMD_DESTROY_SRQ	= 0x74,
 };
 
 int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 22541d19cd09..af1d8823b3f0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -39,51 +39,8 @@
 #include <rdma/hns-abi.h>
 #include "hns_roce_common.h"
 
-static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
-{
-	struct ib_cq *ibcq = &hr_cq->ib_cq;
-
-	ibcq->comp_handler(ibcq, ibcq->cq_context);
-}
-
-static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq,
-				 enum hns_roce_event event_type)
-{
-	struct hns_roce_dev *hr_dev;
-	struct ib_event event;
-	struct ib_cq *ibcq;
-
-	ibcq = &hr_cq->ib_cq;
-	hr_dev = to_hr_dev(ibcq->device);
-
-	if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
-	    event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
-	    event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
-		dev_err(hr_dev->dev,
-			"hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n",
-			event_type, hr_cq->cqn);
-		return;
-	}
-
-	if (ibcq->event_handler) {
-		event.device = ibcq->device;
-		event.event = IB_EVENT_CQ_ERR;
-		event.element.cq = ibcq;
-		ibcq->event_handler(&event, ibcq->cq_context);
-	}
-}
-
-static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
-			     struct hns_roce_cmd_mailbox *mailbox,
-			     unsigned long cq_num)
-{
-	return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
-			    HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
-			     struct hns_roce_mtt *hr_mtt,
-			     struct hns_roce_cq *hr_cq, int vector)
+static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev,
+			      struct hns_roce_cq *hr_cq)
 {
 	struct hns_roce_cmd_mailbox *mailbox;
 	struct hns_roce_hem_table *mtt_table;
@@ -101,35 +58,32 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
 	else
 		mtt_table = &hr_dev->mr_table.mtt_table;
 
-	mtts = hns_roce_table_find(hr_dev, mtt_table,
-				   hr_mtt->first_seg, &dma_handle);
-	if (!mtts) {
-		dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n");
-		return -EINVAL;
-	}
+	mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg,
+				   &dma_handle);
 
-	if (vector >= hr_dev->caps.num_comp_vectors) {
-		dev_err(dev, "CQ alloc.Invalid vector.\n");
+	if (!mtts) {
+		dev_err(dev, "Failed to find mtt for CQ buf.\n");
 		return -EINVAL;
 	}
-	hr_cq->vector = vector;
 
 	ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
-	if (ret == -1) {
-		dev_err(dev, "CQ alloc.Failed to alloc index.\n");
-		return -ENOMEM;
+	if (ret) {
+		dev_err(dev, "Num of CQ out of range.\n");
+		return ret;
 	}
 
 	/* Get CQC memory HEM(Hardware Entry Memory) table */
 	ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
 	if (ret) {
-		dev_err(dev, "CQ alloc.Failed to get context mem.\n");
+		dev_err(dev,
+			"Get context mem failed(%d) when CQ(0x%lx) alloc.\n",
+			ret, hr_cq->cqn);
 		goto err_out;
 	}
 
 	ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
 	if (ret) {
-		dev_err(dev, "CQ alloc failed xa_store.\n");
+		dev_err(dev, "Failed to xa_store CQ.\n");
 		goto err_put;
 	}
 
@@ -140,14 +94,16 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
 		goto err_xa;
 	}
 
-	hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle,
-			      nent, vector);
+	hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
 
 	/* Send mailbox to hw */
-	ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn);
+	ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0,
+			HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS);
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 	if (ret) {
-		dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n");
+		dev_err(dev,
+			"Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n",
+			ret, hr_cq->cqn);
 		goto err_xa;
 	}
 
@@ -170,24 +126,17 @@ err_out:
 	return ret;
 }
 
-static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
-			     struct hns_roce_cmd_mailbox *mailbox,
-			     unsigned long cq_num)
-{
-	return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
-				 mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
-				 HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 {
 	struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
 	struct device *dev = hr_dev->dev;
 	int ret;
 
-	ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn);
+	ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1,
+				HNS_ROCE_CMD_DESTROY_CQC,
+				HNS_ROCE_CMD_TIMEOUT_MSECS);
 	if (ret)
-		dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
+		dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
 			hr_cq->cqn);
 
 	xa_erase(&cq_table->array, hr_cq->cqn);
@@ -204,103 +153,91 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 	hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
 }
 
-static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
-				   struct ib_udata *udata,
-				   struct hns_roce_cq_buf *buf,
-				   struct ib_umem **umem, u64 buf_addr, int cqe)
+static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+		       struct hns_roce_ib_create_cq ucmd,
+		       struct ib_udata *udata)
 {
-	int ret;
-	u32 page_shift;
+	struct hns_roce_buf *buf = &hr_cq->buf;
+	struct hns_roce_mtt *mtt = &hr_cq->mtt;
+	struct ib_umem **umem = &hr_cq->umem;
 	u32 npages;
+	int ret;
 
-	*umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
-			    IB_ACCESS_LOCAL_WRITE, 1);
+	*umem = ib_umem_get(udata, ucmd.buf_addr, buf->size,
+			    IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
 
 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
-		buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
+		mtt->mtt_type = MTT_TYPE_CQE;
 	else
-		buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
-
-	if (hr_dev->caps.cqe_buf_pg_sz) {
-		npages = (ib_umem_page_count(*umem) +
-			(1 << hr_dev->caps.cqe_buf_pg_sz) - 1) /
-			(1 << hr_dev->caps.cqe_buf_pg_sz);
-		page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
-		ret = hns_roce_mtt_init(hr_dev, npages, page_shift,
-					&buf->hr_mtt);
-	} else {
-		ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
-					PAGE_SHIFT, &buf->hr_mtt);
-	}
+		mtt->mtt_type = MTT_TYPE_WQE;
+
+	npages = DIV_ROUND_UP(ib_umem_page_count(*umem),
+			      1 << hr_dev->caps.cqe_buf_pg_sz);
+	ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt);
 	if (ret)
 		goto err_buf;
 
-	ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem);
+	ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem);
 	if (ret)
 		goto err_mtt;
 
 	return 0;
 
 err_mtt:
-	hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+	hns_roce_mtt_cleanup(hr_dev, mtt);
 
 err_buf:
 	ib_umem_release(*umem);
 	return ret;
 }
 
-static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
-				    struct hns_roce_cq_buf *buf, u32 nent)
+static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 {
+	struct hns_roce_buf *buf = &hr_cq->buf;
+	struct hns_roce_mtt *mtt = &hr_cq->mtt;
 	int ret;
-	u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
 
-	ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
-				 (1 << page_shift) * 2, &buf->hr_buf,
-				 page_shift);
+	ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2,
+				 buf, buf->page_shift);
 	if (ret)
 		goto out;
 
 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
-		buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
+		mtt->mtt_type = MTT_TYPE_CQE;
 	else
-		buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
+		mtt->mtt_type = MTT_TYPE_WQE;
 
-	ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages,
-				buf->hr_buf.page_shift, &buf->hr_mtt);
+	ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt);
 	if (ret)
 		goto err_buf;
 
-	ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf);
+	ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf);
 	if (ret)
 		goto err_mtt;
 
 	return 0;
 
 err_mtt:
-	hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+	hns_roce_mtt_cleanup(hr_dev, mtt);
 
 err_buf:
-	hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz,
-			  &buf->hr_buf);
+	hns_roce_buf_free(hr_dev, buf->size, buf);
+
 out:
 	return ret;
 }
 
-static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
-				    struct hns_roce_cq_buf *buf, int cqe)
+static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 {
-	hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz,
-			  &buf->hr_buf);
+	hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
 }
 
 static int create_user_cq(struct hns_roce_dev *hr_dev,
 			  struct hns_roce_cq *hr_cq,
 			  struct ib_udata *udata,
-			  struct hns_roce_ib_create_cq_resp *resp,
-			  int cq_entries)
+			  struct hns_roce_ib_create_cq_resp *resp)
 {
 	struct hns_roce_ib_create_cq ucmd;
 	struct device *dev = hr_dev->dev;
@@ -314,9 +251,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev,
 	}
 
 	/* Get user space address, write it into mtt table */
-	ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
-				      &hr_cq->umem, ucmd.buf_addr,
-				      cq_entries);
+	ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata);
 	if (ret) {
 		dev_err(dev, "Failed to get_cq_umem.\n");
 		return ret;
@@ -337,17 +272,16 @@ static int create_user_cq(struct hns_roce_dev *hr_dev,
 	return 0;
 
 err_mtt:
-	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+	hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
 	ib_umem_release(hr_cq->umem);
 
 	return ret;
 }
 
 static int create_kernel_cq(struct hns_roce_dev *hr_dev,
-			    struct hns_roce_cq *hr_cq, int cq_entries)
+			    struct hns_roce_cq *hr_cq)
 {
 	struct device *dev = hr_dev->dev;
-	struct hns_roce_uar *uar;
 	int ret;
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
@@ -361,15 +295,14 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev,
 	}
 
 	/* Init mtt table and write buff address to mtt table */
-	ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries);
+	ret = alloc_cq_buf(hr_dev, hr_cq);
 	if (ret) {
 		dev_err(dev, "Failed to alloc_cq_buf.\n");
 		goto err_db;
 	}
 
-	uar = &hr_dev->priv_uar;
 	hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
-			 DB_REG_OFFSET * uar->index;
+			 DB_REG_OFFSET * hr_dev->priv_uar.index;
 
 	return 0;
 
@@ -392,64 +325,69 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev,
 	    (udata->outlen >= sizeof(*resp)))
 		hns_roce_db_unmap_user(context, &hr_cq->db);
 
-	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+	hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
 	ib_umem_release(hr_cq->umem);
 }
 
 static void destroy_kernel_cq(struct hns_roce_dev *hr_dev,
 			      struct hns_roce_cq *hr_cq)
 {
-	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-	hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe);
+	hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
+	free_cq_buf(hr_dev, hr_cq);
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
 		hns_roce_free_db(hr_dev, &hr_cq->db);
 }
 
-int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
-			  const struct ib_cq_init_attr *attr,
-			  struct ib_udata *udata)
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+		       struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
-	struct device *dev = hr_dev->dev;
 	struct hns_roce_ib_create_cq_resp resp = {};
 	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+	struct device *dev = hr_dev->dev;
 	int vector = attr->comp_vector;
-	int cq_entries = attr->cqe;
+	u32 cq_entries = attr->cqe;
 	int ret;
 
 	if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
-		dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
+		dev_err(dev, "Create CQ failed. entries=%d, max=%d\n",
 			cq_entries, hr_dev->caps.max_cqes);
 		return -EINVAL;
 	}
 
-	if (hr_dev->caps.min_cqes)
-		cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+	if (vector >= hr_dev->caps.num_comp_vectors) {
+		dev_err(dev, "Create CQ failed, vector=%d, max=%d\n",
+			vector, hr_dev->caps.num_comp_vectors);
+		return -EINVAL;
+	}
 
-	cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
-	hr_cq->ib_cq.cqe = cq_entries - 1;
+	cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+	cq_entries = roundup_pow_of_two(cq_entries);
+	hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
+	hr_cq->cq_depth = cq_entries;
+	hr_cq->vector = vector;
+	hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+	hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
 	spin_lock_init(&hr_cq->lock);
 
 	if (udata) {
-		ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries);
+		ret = create_user_cq(hr_dev, hr_cq, udata, &resp);
 		if (ret) {
 			dev_err(dev, "Create cq failed in user mode!\n");
 			goto err_cq;
 		}
 	} else {
-		ret = create_kernel_cq(hr_dev, hr_cq, cq_entries);
+		ret = create_kernel_cq(hr_dev, hr_cq);
 		if (ret) {
 			dev_err(dev, "Create cq failed in kernel mode!\n");
 			goto err_cq;
 		}
 	}
 
-	/* Allocate cq index, fill cq_context */
-	ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt,
-				hr_cq, vector);
+	ret = hns_roce_alloc_cqc(hr_dev, hr_cq);
 	if (ret) {
-		dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
+		dev_err(dev, "Alloc CQ failed(%d).\n", ret);
 		goto err_dbmap;
 	}
 
@@ -462,11 +400,6 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
 	if (!udata && hr_cq->tptr_addr)
 		*hr_cq->tptr_addr = 0;
 
-	/* Get created cq handler and carry out event */
-	hr_cq->comp = hns_roce_ib_cq_comp;
-	hr_cq->event = hns_roce_ib_cq_event;
-	hr_cq->cq_depth = cq_entries;
-
 	if (udata) {
 		resp.cqn = hr_cq->cqn;
 		ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -477,7 +410,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
 	return 0;
 
 err_cqc:
-	hns_roce_free_cq(hr_dev, hr_cq);
+	hns_roce_free_cqc(hr_dev, hr_cq);
 
 err_dbmap:
 	if (udata)
@@ -489,7 +422,7 @@ err_cq:
 	return ret;
 }
 
-void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
 	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
@@ -499,8 +432,8 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 		return;
 	}
 
-	hns_roce_free_cq(hr_dev, hr_cq);
-	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+	hns_roce_free_cqc(hr_dev, hr_cq);
+	hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
 
 	ib_umem_release(hr_cq->umem);
 	if (udata) {
@@ -512,7 +445,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 					       &hr_cq->db);
 	} else {
 		/* Free the buff of stored cq */
-		hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+		free_cq_buf(hr_dev, hr_cq);
 		if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
 			hns_roce_free_db(hr_dev, &hr_cq->db);
 	}
@@ -520,38 +453,57 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 
 void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
 {
-	struct device *dev = hr_dev->dev;
-	struct hns_roce_cq *cq;
+	struct hns_roce_cq *hr_cq;
+	struct ib_cq *ibcq;
 
-	cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1));
-	if (!cq) {
-		dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn);
+	hr_cq = xa_load(&hr_dev->cq_table.array,
+			cqn & (hr_dev->caps.num_cqs - 1));
+	if (!hr_cq) {
+		dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n",
+			 cqn);
 		return;
 	}
 
-	++cq->arm_sn;
-	cq->comp(cq);
+	++hr_cq->arm_sn;
+	ibcq = &hr_cq->ib_cq;
+	if (ibcq->comp_handler)
+		ibcq->comp_handler(ibcq, ibcq->cq_context);
 }
 
 void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
 {
-	struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
 	struct device *dev = hr_dev->dev;
-	struct hns_roce_cq *cq;
+	struct hns_roce_cq *hr_cq;
+	struct ib_event event;
+	struct ib_cq *ibcq;
 
-	cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1));
-	if (cq)
-		atomic_inc(&cq->refcount);
+	hr_cq = xa_load(&hr_dev->cq_table.array,
+			cqn & (hr_dev->caps.num_cqs - 1));
+	if (!hr_cq) {
+		dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn);
+		return;
+	}
 
-	if (!cq) {
-		dev_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+	if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
+	    event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
+	    event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
+		dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n",
+			event_type, cqn);
 		return;
 	}
 
-	cq->event(cq, (enum hns_roce_event)event_type);
+	atomic_inc(&hr_cq->refcount);
+
+	ibcq = &hr_cq->ib_cq;
+	if (ibcq->event_handler) {
+		event.device = ibcq->device;
+		event.element.cq = ibcq;
+		event.event = IB_EVENT_CQ_ERR;
+		ibcq->event_handler(&event, ibcq->cq_context);
+	}
 
-	if (atomic_dec_and_test(&cq->refcount))
-		complete(&cq->free);
+	if (atomic_dec_and_test(&hr_cq->refcount))
+		complete(&hr_cq->free);
 }
 
 int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index c00714c2f16a..10af6958ab69 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -31,7 +31,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context,
 
 	refcount_set(&page->refcount, 1);
 	page->user_virt = page_addr;
-	page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0);
+	page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0);
 	if (IS_ERR(page->umem)) {
 		ret = PTR_ERR(page->umem);
 		kfree(page);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 96d1302abde1..5617434cbfb4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -45,7 +45,7 @@
 
 #define HNS_ROCE_MAX_MSG_LEN			0x80000000
 
-#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
+#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
 
 #define HNS_ROCE_IB_MIN_SQ_STRIDE		6
 
@@ -53,8 +53,6 @@
 
 #define BA_BYTE_LEN				8
 
-#define BITS_PER_BYTE				8
-
 /* Hardware specification only for v1 engine */
 #define HNS_ROCE_MIN_CQE_NUM			0x40
 #define HNS_ROCE_MIN_WQE_NUM			0x20
@@ -426,7 +424,6 @@ struct hns_roce_wq {
 	u64		*wrid;     /* Work request ID */
 	spinlock_t	lock;
 	int		wqe_cnt;  /* WQE num */
-	u32		max_post;
 	int		max_gs;
 	int		offset;
 	int		wqe_shift;	/* WQE size */
@@ -451,6 +448,7 @@ struct hns_roce_buf {
 	struct hns_roce_buf_list	*page_list;
 	int				nbufs;
 	u32				npages;
+	u32				size;
 	int				page_shift;
 };
 
@@ -482,22 +480,14 @@ struct hns_roce_db {
 	int		order;
 };
 
-struct hns_roce_cq_buf {
-	struct hns_roce_buf hr_buf;
-	struct hns_roce_mtt hr_mtt;
-};
-
 struct hns_roce_cq {
 	struct ib_cq			ib_cq;
-	struct hns_roce_cq_buf		hr_buf;
+	struct hns_roce_buf		buf;
+	struct hns_roce_mtt		mtt;
 	struct hns_roce_db		db;
 	u8				db_en;
 	spinlock_t			lock;
 	struct ib_umem			*umem;
-	void (*comp)(struct hns_roce_cq *cq);
-	void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type);
-
-	struct hns_roce_uar		*uar;
 	u32				cq_depth;
 	u32				cons_index;
 	u32				*set_ci_db;
@@ -521,9 +511,8 @@ struct hns_roce_idx_que {
 
 struct hns_roce_srq {
 	struct ib_srq		ibsrq;
-	void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
 	unsigned long		srqn;
-	int			max;
+	u32			wqe_cnt;
 	int			max_gs;
 	int			wqe_shift;
 	void __iomem		*db_reg_l;
@@ -539,8 +528,8 @@ struct hns_roce_srq {
 	spinlock_t		lock;
 	int			head;
 	int			tail;
-	u16			wqe_ctr;
 	struct mutex		mutex;
+	void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
 };
 
 struct hns_roce_uar_table {
@@ -582,7 +571,7 @@ struct hns_roce_av {
 	u8          tclass;
 	u8          dgid[HNS_ROCE_GID_SIZE];
 	u8          mac[ETH_ALEN];
-	u16         vlan;
+	u16         vlan_id;
 	bool	    vlan_en;
 };
 
@@ -695,10 +684,6 @@ struct hns_roce_qp {
 	struct hns_roce_rinl_buf rq_inl_buf;
 };
 
-struct hns_roce_sqp {
-	struct hns_roce_qp	hr_qp;
-};
-
 struct hns_roce_ib_iboe {
 	spinlock_t		lock;
 	struct net_device      *netdevs[HNS_ROCE_MAX_PORTS];
@@ -821,8 +806,8 @@ struct hns_roce_caps {
 	int		max_qp_init_rdma;
 	int		max_qp_dest_rdma;
 	int		num_cqs;
-	int		max_cqes;
-	int		min_cqes;
+	u32		max_cqes;
+	u32		min_cqes;
 	u32		min_wqes;
 	int		reserved_cqs;
 	int		reserved_srqs;
@@ -953,7 +938,7 @@ struct hns_roce_hw {
 	int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
 	void (*write_cqc)(struct hns_roce_dev *hr_dev,
 			  struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
-			  dma_addr_t dma_handle, int nent, u32 vector);
+			  dma_addr_t dma_handle);
 	int (*set_hem)(struct hns_roce_dev *hr_dev,
 		       struct hns_roce_hem_table *table, int obj, int step_idx);
 	int (*clear_hem)(struct hns_roce_dev *hr_dev,
@@ -1092,11 +1077,6 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
 	return container_of(ibsrq, struct hns_roce_srq, ibsrq);
 }
 
-static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
-{
-	return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
-}
-
 static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
 {
 	__raw_writeq(*(u64 *) val, dest);
@@ -1198,9 +1178,9 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 		       unsigned int *sg_offset);
 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
-		       struct hns_roce_cmd_mailbox *mailbox,
-		       unsigned long mpt_index);
+int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_cmd_mailbox *mailbox,
+			    unsigned long mpt_index);
 unsigned long key_to_hw_index(u32 key);
 
 struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
@@ -1257,12 +1237,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
 __be32 send_ieth(const struct ib_send_wr *wr);
 int to_hr_qp_type(int qp_type);
 
-int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
-			  const struct ib_cq_init_attr *attr,
-			  struct ib_udata *udata);
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+		       struct ib_udata *udata);
 
-void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
+void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
 
 int hns_roce_db_map_user(struct hns_roce_ucontext *context,
 			 struct ib_udata *udata, unsigned long virt,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 5f74bf55f471..2a2b2112f886 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -732,7 +732,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	if (!cq)
 		return -ENOMEM;
 
-	ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL);
+	ret = hns_roce_create_cq(cq, &cq_init_attr, NULL);
 	if (ret) {
 		dev_err(dev, "Create cq for reserved loop qp failed!");
 		goto alloc_cq_failed;
@@ -868,7 +868,7 @@ alloc_pd_failed:
 	kfree(pd);
 
 alloc_mem_failed:
-	hns_roce_ib_destroy_cq(cq, NULL);
+	hns_roce_destroy_cq(cq, NULL);
 alloc_cq_failed:
 	kfree(cq);
 	return ret;
@@ -897,7 +897,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
 				i, ret);
 	}
 
-	hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
+	hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
 	kfree(&free_mr->mr_free_cq->ib_cq);
 	hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL);
 	kfree(&free_mr->mr_free_pd->ibpd);
@@ -1114,9 +1114,10 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
 	free_mr = &priv->free_mr;
 
 	if (mr->enabled) {
-		if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
-				       & (hr_dev->caps.num_mtpts - 1)))
-			dev_warn(dev, "HW2SW_MPT failed!\n");
+		if (hns_roce_hw_destroy_mpt(hr_dev, NULL,
+					    key_to_hw_index(mr->key) &
+					    (hr_dev->caps.num_mtpts - 1)))
+			dev_warn(dev, "DESTROY_MPT failed!\n");
 	}
 
 	mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
@@ -1979,8 +1980,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 
 static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
 {
-	return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
-				   n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+	return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
 }
 
 static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
@@ -1989,7 +1989,7 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
 
 	/* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
 	return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^
-		!!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL;
+		!!(n & hr_cq->cq_depth)) ? hr_cqe : NULL;
 }
 
 static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
@@ -2072,8 +2072,7 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
 
 static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 				  struct hns_roce_cq *hr_cq, void *mb_buf,
-				  u64 *mtts, dma_addr_t dma_handle, int nent,
-				  u32 vector)
+				  u64 *mtts, dma_addr_t dma_handle)
 {
 	struct hns_roce_cq_context *cq_context = NULL;
 	struct hns_roce_buf_list *tptr_buf;
@@ -2108,9 +2107,9 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->cqc_byte_12,
 		       CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M,
 		       CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S,
-		       ilog2((unsigned int)nent));
+		       ilog2(hr_cq->cq_depth));
 	roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
-		       CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
+		       CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector);
 
 	cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
 
@@ -3644,10 +3643,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
 	}
 
-	if (hr_qp->ibqp.qp_type == IB_QPT_RC)
-		kfree(hr_qp);
-	else
-		kfree(hr_to_hr_sqp(hr_qp));
+	kfree(hr_qp);
 	return 0;
 }
 
@@ -3658,10 +3654,9 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 	struct device *dev = &hr_dev->pdev->dev;
 	u32 cqe_cnt_ori;
 	u32 cqe_cnt_cur;
-	u32 cq_buf_size;
 	int wait_time = 0;
 
-	hns_roce_free_cq(hr_dev, hr_cq);
+	hns_roce_free_cqc(hr_dev, hr_cq);
 
 	/*
 	 * Before freeing cq buffer, we need to ensure that the outstanding CQE
@@ -3686,13 +3681,12 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 		wait_time++;
 	}
 
-	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+	hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt);
 
 	ib_umem_release(hr_cq->umem);
 	if (!udata) {
 		/* Free the buff of stored cq */
-		cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
-		hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
+		hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
 	}
 }
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index e82567fcdeb7..cb8071a3e0d5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			roce_set_field(ud_sq_wqe->byte_36,
 				       V2_UD_SEND_WQE_BYTE_36_VLAN_M,
 				       V2_UD_SEND_WQE_BYTE_36_VLAN_S,
-				       le16_to_cpu(ah->av.vlan));
+				       ah->av.vlan_id);
 			roce_set_field(ud_sq_wqe->byte_36,
 				       V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
 				       V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
@@ -2447,8 +2447,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 
 static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 {
-	return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
-				   n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+	return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
 }
 
 static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
@@ -2457,7 +2456,7 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 
 	/* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
 	return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^
-		!!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL;
+		!!(n & hr_cq->cq_depth)) ? cqe : NULL;
 }
 
 static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
@@ -2550,8 +2549,7 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
 
 static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
 				  struct hns_roce_cq *hr_cq, void *mb_buf,
-				  u64 *mtts, dma_addr_t dma_handle, int nent,
-				  u32 vector)
+				  u64 *mtts, dma_addr_t dma_handle)
 {
 	struct hns_roce_v2_cq_context *cq_context;
 
@@ -2563,9 +2561,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M,
 		       V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE);
 	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
-		       V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
+		       V2_CQC_BYTE_4_SHIFT_S,
+		       ilog2(hr_cq->cq_depth));
 	roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
-		       V2_CQC_BYTE_4_CEQN_S, vector);
+		       V2_CQC_BYTE_4_CEQN_S, hr_cq->vector);
 
 	roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
 		       V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
@@ -4061,8 +4060,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
 	const struct ib_gid_attr *gid_attr = NULL;
 	int is_roce_protocol;
+	u16 vlan_id = 0xffff;
 	bool is_udp = false;
-	u16 vlan = 0xffff;
 	u8 ib_port;
 	u8 hr_port;
 	int ret;
@@ -4074,7 +4073,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
 
 	if (is_roce_protocol) {
 		gid_attr = attr->ah_attr.grh.sgid_attr;
-		ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
+		ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
 		if (ret)
 			return ret;
 
@@ -4083,7 +4082,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
 				 IB_GID_TYPE_ROCE_UDP_ENCAP);
 	}
 
-	if (vlan < VLAN_CFI_MASK) {
+	if (vlan_id < VLAN_N_VID) {
 		roce_set_bit(context->byte_76_srqn_op_en,
 			     V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
@@ -4095,7 +4094,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
 	}
 
 	roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
-		       V2_QPC_BYTE_24_VLAN_ID_S, vlan);
+		       V2_QPC_BYTE_24_VLAN_ID_S, vlan_id);
 	roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
 		       V2_QPC_BYTE_24_VLAN_ID_S, 0);
 
@@ -4650,16 +4649,14 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 {
 	struct hns_roce_cq *send_cq, *recv_cq;
 	struct ib_device *ibdev = &hr_dev->ib_dev;
-	int ret;
+	int ret = 0;
 
 	if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
 		/* Modify qp to reset before destroying qp */
 		ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
 					    hr_qp->state, IB_QPS_RESET);
-		if (ret) {
+		if (ret)
 			ibdev_err(ibdev, "modify QP to Reset failed.\n");
-			return ret;
-		}
 	}
 
 	send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
@@ -4715,7 +4712,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 		kfree(hr_qp->rq_inl_buf.wqe_list);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
@@ -4725,16 +4722,11 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 	int ret;
 
 	ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
-	if (ret) {
+	if (ret)
 		ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n",
 			  hr_qp->qpn, ret);
-		return ret;
-	}
 
-	if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
-		kfree(hr_to_hr_sqp(hr_qp));
-	else
-		kfree(hr_qp);
+	kfree(hr_qp);
 
 	return 0;
 }
@@ -4951,10 +4943,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
 static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
 {
 	struct hns_roce_dev *hr_dev = eq->hr_dev;
-	__le32 doorbell[2];
-
-	doorbell[0] = 0;
-	doorbell[1] = 0;
+	__le32 doorbell[2] = {};
 
 	if (eq->type_flag == HNS_ROCE_AEQ) {
 		roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
@@ -6047,7 +6036,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
 		       hr_dev->caps.srqwqe_hop_num));
 	roce_set_field(srq_context->byte_4_srqn_srqst,
 		       SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
-		       ilog2(srq->max));
+		       ilog2(srq->wqe_cnt));
 
 	roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
 		       SRQC_BYTE_4_SRQN_S, srq->srqn);
@@ -6092,11 +6081,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
 		       SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
 		       SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
-		       hr_dev->caps.idx_ba_pg_sz);
+		       hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET);
 	roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
 		       SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
 		       SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
-		       hr_dev->caps.idx_buf_pg_sz);
+		       hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET);
 
 	srq_context->idx_nxt_blk_addr =
 		cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT);
@@ -6133,7 +6122,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
 	int ret;
 
 	if (srq_attr_mask & IB_SRQ_LIMIT) {
-		if (srq_attr->srq_limit >= srq->max)
+		if (srq_attr->srq_limit >= srq->wqe_cnt)
 			return -EINVAL;
 
 		mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -6193,7 +6182,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 				  SRQC_BYTE_8_SRQ_LIMIT_WL_S);
 
 	attr->srq_limit = limit_wl;
-	attr->max_wr    = srq->max - 1;
+	attr->max_wr    = srq->wqe_cnt - 1;
 	attr->max_sge   = srq->max_gs;
 
 	memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
@@ -6246,7 +6235,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
 
 	spin_lock_irqsave(&srq->lock, flags);
 
-	ind = srq->head & (srq->max - 1);
+	ind = srq->head & (srq->wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (unlikely(wr->num_sge > srq->max_gs)) {
@@ -6261,7 +6250,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
 			break;
 		}
 
-		wqe_idx = find_empty_entry(&srq->idx_que, srq->max);
+		wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt);
 		if (wqe_idx < 0) {
 			ret = -ENOMEM;
 			*bad_wr = wr;
@@ -6285,7 +6274,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
 		}
 
 		srq->wrid[wqe_idx] = wr->wr_id;
-		ind = (ind + 1) & (srq->max - 1);
+		ind = (ind + 1) & (srq->wqe_cnt - 1);
 	}
 
 	if (likely(nreq)) {
@@ -6380,12 +6369,14 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
 
 MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
 
-static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
+static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
 				  struct hnae3_handle *handle)
 {
 	struct hns_roce_v2_priv *priv = hr_dev->priv;
 	int i;
 
+	hr_dev->pci_dev = handle->pdev;
+	hr_dev->dev = &handle->pdev->dev;
 	hr_dev->hw = &hns_roce_hw_v2;
 	hr_dev->dfx = &hns_roce_dfx_hw_v2;
 	hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
@@ -6410,8 +6401,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
 
 	hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle);
 	priv->handle = handle;
-
-	return 0;
 }
 
 static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
@@ -6429,14 +6418,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
 		goto error_failed_kzalloc;
 	}
 
-	hr_dev->pci_dev = handle->pdev;
-	hr_dev->dev = &handle->pdev->dev;
-
-	ret = hns_roce_hw_v2_get_cfg(hr_dev, handle);
-	if (ret) {
-		dev_err(hr_dev->dev, "Get Configuration failed!\n");
-		goto error_failed_get_cfg;
-	}
+	hns_roce_hw_v2_get_cfg(hr_dev, handle);
 
 	ret = hns_roce_init(hr_dev);
 	if (ret) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 43219d2f7de0..76a14db7028d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -87,8 +87,8 @@
 #define HNS_ROCE_V2_MTT_ENTRY_SZ		64
 #define HNS_ROCE_V2_CQE_ENTRY_SIZE		32
 #define HNS_ROCE_V2_SCCC_ENTRY_SZ		32
-#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ		4096
-#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ		4096
+#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ		PAGE_SIZE
+#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ		PAGE_SIZE
 #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED		0xFFFFF000
 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM		2
 #define HNS_ROCE_INVALID_LKEY			0x100
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index b5d196c119ee..854ef6e74788 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -111,7 +111,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
 
 	netdev = hr_dev->iboe.netdevs[port];
 	if (!netdev) {
-		dev_err(dev, "port(%d) can't find netdev\n", port);
+		dev_err(dev, "Can't find netdev on port(%u)!\n", port);
 		return -ENODEV;
 	}
 
@@ -253,7 +253,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
 	net_dev = hr_dev->iboe.netdevs[port];
 	if (!net_dev) {
 		spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-		dev_err(dev, "find netdev %d failed!\r\n", port);
+		dev_err(dev, "Find netdev %u failed!\n", port);
 		return -EINVAL;
 	}
 
@@ -301,12 +301,6 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
 	return 0;
 }
 
-static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
-				struct ib_port_modify *props)
-{
-	return 0;
-}
-
 static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 				   struct ib_udata *udata)
 {
@@ -359,7 +353,8 @@ static int hns_roce_mmap(struct ib_ucontext *context,
 		return rdma_user_mmap_io(context, vma,
 					 to_hr_ucontext(context)->uar.pfn,
 					 PAGE_SIZE,
-					 pgprot_noncached(vma->vm_page_prot));
+					 pgprot_noncached(vma->vm_page_prot),
+					 NULL);
 
 	/* vm_pgoff: 1 -- TPTR */
 	case 1:
@@ -372,7 +367,8 @@ static int hns_roce_mmap(struct ib_ucontext *context,
 		return rdma_user_mmap_io(context, vma,
 					 hr_dev->tptr_dma_addr >> PAGE_SHIFT,
 					 hr_dev->tptr_size,
-					 vma->vm_page_prot);
+					 vma->vm_page_prot,
+					 NULL);
 
 	default:
 		return -EINVAL;
@@ -423,14 +419,14 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	.alloc_pd = hns_roce_alloc_pd,
 	.alloc_ucontext = hns_roce_alloc_ucontext,
 	.create_ah = hns_roce_create_ah,
-	.create_cq = hns_roce_ib_create_cq,
+	.create_cq = hns_roce_create_cq,
 	.create_qp = hns_roce_create_qp,
 	.dealloc_pd = hns_roce_dealloc_pd,
 	.dealloc_ucontext = hns_roce_dealloc_ucontext,
 	.del_gid = hns_roce_del_gid,
 	.dereg_mr = hns_roce_dereg_mr,
 	.destroy_ah = hns_roce_destroy_ah,
-	.destroy_cq = hns_roce_ib_destroy_cq,
+	.destroy_cq = hns_roce_destroy_cq,
 	.disassociate_ucontext = hns_roce_disassociate_ucontext,
 	.fill_res_entry = hns_roce_fill_res_entry,
 	.get_dma_mr = hns_roce_get_dma_mr,
@@ -438,7 +434,6 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	.get_port_immutable = hns_roce_port_immutable,
 	.mmap = hns_roce_mmap,
 	.modify_device = hns_roce_modify_device,
-	.modify_port = hns_roce_modify_port,
 	.modify_qp = hns_roce_modify_qp,
 	.query_ah = hns_roce_query_ah,
 	.query_device = hns_roce_query_device,
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 5f8416ba09a9..9ad19170c3f9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -48,21 +48,21 @@ unsigned long key_to_hw_index(u32 key)
 	return (key << 24) | (key >> 8);
 }
 
-static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
-			      struct hns_roce_cmd_mailbox *mailbox,
-			      unsigned long mpt_index)
+static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
+				  struct hns_roce_cmd_mailbox *mailbox,
+				  unsigned long mpt_index)
 {
 	return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
-				 HNS_ROCE_CMD_SW2HW_MPT,
+				 HNS_ROCE_CMD_CREATE_MPT,
 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
-			      struct hns_roce_cmd_mailbox *mailbox,
-			      unsigned long mpt_index)
+int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_cmd_mailbox *mailbox,
+			    unsigned long mpt_index)
 {
 	return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
-				 mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
+				 mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
@@ -83,7 +83,7 @@ static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
 		}
 	}
 	spin_unlock(&buddy->lock);
-	return -1;
+	return -EINVAL;
 
  found:
 	clear_bit(*seg, buddy->bits[o]);
@@ -206,13 +206,14 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
 	}
 
 	ret = hns_roce_buddy_alloc(buddy, order, seg);
-	if (ret == -1)
-		return -1;
+	if (ret)
+		return ret;
 
-	if (hns_roce_table_get_range(hr_dev, table, *seg,
-				     *seg + (1 << order) - 1)) {
+	ret = hns_roce_table_get_range(hr_dev, table, *seg,
+				       *seg + (1 << order) - 1);
+	if (ret) {
 		hns_roce_buddy_free(buddy, *seg, order);
-		return -1;
+		return ret;
 	}
 
 	return 0;
@@ -578,7 +579,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 
 	/* Allocate a key for mr from mr_table */
 	ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
-	if (ret == -1)
+	if (ret)
 		return -ENOMEM;
 
 	mr->iova = iova;			/* MR va starting addr */
@@ -707,10 +708,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
 	int ret;
 
 	if (mr->enabled) {
-		ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
-					 & (hr_dev->caps.num_mtpts - 1));
+		ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+					      key_to_hw_index(mr->key) &
+					      (hr_dev->caps.num_mtpts - 1));
 		if (ret)
-			dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+			dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
 	}
 
 	if (mr->size != ~0ULL) {
@@ -763,10 +765,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
 		goto err_page;
 	}
 
-	ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
-				 mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+	ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
 	if (ret) {
-		dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
+		dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
 		goto err_page;
 	}
 
@@ -1143,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
+	mr->umem = ib_umem_get(udata, start, length, access_flags);
 	if (IS_ERR(mr->umem)) {
 		ret = PTR_ERR(mr->umem);
 		goto err_free;
@@ -1228,7 +1230,7 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
 	}
 	ib_umem_release(mr->umem);
 
-	mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
+	mr->umem = ib_umem_get(udata, start, length, mr_access_flags);
 	if (IS_ERR(mr->umem)) {
 		ret = PTR_ERR(mr->umem);
 		mr->umem = NULL;
@@ -1308,9 +1310,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
 	if (ret)
 		goto free_cmd_mbox;
 
-	ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx);
+	ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
 	if (ret)
-		dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+		dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
 
 	mr->enabled = 0;
 
@@ -1332,9 +1334,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
 			goto free_cmd_mbox;
 	}
 
-	ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
+	ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
 	if (ret) {
-		dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
+		dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
 		ib_umem_release(mr->umem);
 		goto free_cmd_mbox;
 	}
@@ -1448,10 +1450,11 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
 	int ret;
 
 	if (mw->enabled) {
-		ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
-					 & (hr_dev->caps.num_mtpts - 1));
+		ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+					      key_to_hw_index(mw->rkey) &
+					      (hr_dev->caps.num_mtpts - 1));
 		if (ret)
-			dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
+			dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
 
 		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
 				   key_to_hw_index(mw->rkey));
@@ -1487,10 +1490,10 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
 		goto err_page;
 	}
 
-	ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
-				 mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+	ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
 	if (ret) {
-		dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
+		dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
 		goto err_page;
 	}
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 912b89b4da34..780c780fdb22 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -96,7 +96,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 
 	/* Using bitmap to manager UAR index */
 	ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx);
-	if (ret == -1)
+	if (ret)
 		return -ENOMEM;
 
 	if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index bd78ff90d998..a6565b674801 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -318,7 +318,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
 					      * hr_qp->rq.max_gs);
 	}
 
-	cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt;
+	cap->max_recv_wr = hr_qp->rq.wqe_cnt;
 	cap->max_recv_sge = hr_qp->rq.max_gs;
 
 	return 0;
@@ -332,9 +332,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
 	u8 max_sq_stride = ilog2(roundup_sq_stride);
 
 	/* Sanity check SQ size before proceeding */
-	if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes ||
-	     ucmd->log_sq_stride > max_sq_stride ||
-	     ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
+	if (ucmd->log_sq_stride > max_sq_stride ||
+	    ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
 		ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n");
 		return -EINVAL;
 	}
@@ -358,13 +357,16 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 	u32 max_cnt;
 	int ret;
 
+	if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) ||
+	    hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes)
+		return -EINVAL;
+
 	ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
 	if (ret) {
 		ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n");
 		return ret;
 	}
 
-	hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
 	hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
 
 	max_cnt = max(1U, cap->max_send_sge);
@@ -391,37 +393,37 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 
 	/* Get buf size, SQ and RQ  are aligned to page_szie */
 	if (hr_dev->caps.max_sq_sg <= 2) {
-		hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
+		hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt <<
 					     hr_qp->rq.wqe_shift), PAGE_SIZE) +
-				   HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+				   HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), PAGE_SIZE);
 
 		hr_qp->sq.offset = 0;
-		hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+		hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), PAGE_SIZE);
 	} else {
 		page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 		hr_qp->sge.sge_cnt = ex_sge_num ?
 		   max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
-		hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
+		hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt <<
 					     hr_qp->rq.wqe_shift), page_size) +
-				   HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
+				   HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt <<
 					     hr_qp->sge.sge_shift), page_size) +
-				   HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+				   HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), page_size);
 
 		hr_qp->sq.offset = 0;
 		if (ex_sge_num) {
-			hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
+			hr_qp->sge.offset = HNS_ROCE_ALIGN_UP(
 							(hr_qp->sq.wqe_cnt <<
 							hr_qp->sq.wqe_shift),
 							page_size);
 			hr_qp->rq.offset = hr_qp->sge.offset +
-					HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
+					HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt <<
 						hr_qp->sge.sge_shift),
 						page_size);
 		} else {
-			hr_qp->rq.offset = HNS_ROCE_ALOGN_UP(
+			hr_qp->rq.offset = HNS_ROCE_ALIGN_UP(
 							(hr_qp->sq.wqe_cnt <<
 							hr_qp->sq.wqe_shift),
 							page_size);
@@ -591,24 +593,24 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 	/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
 	page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 	hr_qp->sq.offset = 0;
-	size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
+	size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
 				 page_size);
 
 	if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
 		hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
 					(u32)hr_qp->sge.sge_cnt);
 		hr_qp->sge.offset = size;
-		size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
+		size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt <<
 					  hr_qp->sge.sge_shift, page_size);
 	}
 
 	hr_qp->rq.offset = size;
-	size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
+	size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
 				  page_size);
 	hr_qp->buff_size = size;
 
 	/* Get wr and sge number which send */
-	cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt;
+	cap->max_send_wr = hr_qp->sq.wqe_cnt;
 	cap->max_send_sge = hr_qp->sq.max_gs;
 
 	/* We don't support inline sends for kernel QPs (yet) */
@@ -743,7 +745,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		}
 
 		hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
-					  hr_qp->buff_size, 0, 0);
+					  hr_qp->buff_size, 0);
 		if (IS_ERR(hr_qp->umem)) {
 			dev_err(dev, "ib_umem_get error for create qp\n");
 			ret = PTR_ERR(hr_qp->umem);
@@ -1017,7 +1019,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
 	struct ib_device *ibdev = &hr_dev->ib_dev;
-	struct hns_roce_sqp *hr_sqp;
 	struct hns_roce_qp *hr_qp;
 	int ret;
 
@@ -1030,7 +1031,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 		ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
 						hr_qp);
 		if (ret) {
-			ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n",
+			ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n",
 				  hr_qp->qpn, ret);
 			kfree(hr_qp);
 			return ERR_PTR(ret);
@@ -1047,11 +1048,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 			return ERR_PTR(-EINVAL);
 		}
 
-		hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL);
-		if (!hr_sqp)
+		hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+		if (!hr_qp)
 			return ERR_PTR(-ENOMEM);
 
-		hr_qp = &hr_sqp->hr_qp;
 		hr_qp->port = init_attr->port_num - 1;
 		hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
 
@@ -1066,7 +1066,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 						hr_qp->ibqp.qp_num, hr_qp);
 		if (ret) {
 			ibdev_err(ibdev, "Create GSI QP failed!\n");
-			kfree(hr_sqp);
+			kfree(hr_qp);
 			return ERR_PTR(ret);
 		}
 
@@ -1289,7 +1289,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
 	u32 cur;
 
 	cur = hr_wq->head - hr_wq->tail;
-	if (likely(cur + nreq < hr_wq->max_post))
+	if (likely(cur + nreq < hr_wq->wqe_cnt))
 		return false;
 
 	hr_cq = to_hr_cq(ib_cq);
@@ -1297,7 +1297,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
 	cur = hr_wq->head - hr_wq->tail;
 	spin_unlock(&hr_cq->lock);
 
-	return cur + nreq >= hr_wq->max_post;
+	return cur + nreq >= hr_wq->wqe_cnt;
 }
 
 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 0a31d0a3d657..06871731ac43 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
 		goto err;
 
 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
-	if (!table_attr)
+	if (!table_attr) {
+		ret = -EMSGSIZE;
 		goto err;
+	}
 
-	if (hns_roce_fill_cq(msg, context))
+	if (hns_roce_fill_cq(msg, context)) {
+		ret = -EMSGSIZE;
 		goto err_cancel_table;
+	}
 
 	nla_nest_end(msg, table_attr);
 	kfree(context);
@@ -113,7 +117,7 @@ err_cancel_table:
 	nla_nest_cancel(msg, table_attr);
 err:
 	kfree(context);
-	return -EMSGSIZE;
+	return ret;
 }
 
 int hns_roce_fill_res_entry(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 43ea2c13b212..7113ebfdb4f0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -59,21 +59,21 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
 	}
 }
 
-static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
-			      struct hns_roce_cmd_mailbox *mailbox,
-			      unsigned long srq_num)
+static int hns_roce_hw_create_srq(struct hns_roce_dev *dev,
+				  struct hns_roce_cmd_mailbox *mailbox,
+				  unsigned long srq_num)
 {
 	return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
-				 HNS_ROCE_CMD_SW2HW_SRQ,
+				 HNS_ROCE_CMD_CREATE_SRQ,
 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
-static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
-			     struct hns_roce_cmd_mailbox *mailbox,
-			     unsigned long srq_num)
+static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
+				   struct hns_roce_cmd_mailbox *mailbox,
+				   unsigned long srq_num)
 {
 	return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
-				 mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
+				 mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ,
 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
@@ -95,8 +95,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
 				       srq->mtt.first_seg,
 				       &dma_handle_wqe);
 	if (!mtts_wqe) {
-		dev_err(hr_dev->dev,
-			"SRQ alloc.Failed to find srq buf addr.\n");
+		dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n");
 		return -EINVAL;
 	}
 
@@ -106,13 +105,14 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
 				       &dma_handle_idx);
 	if (!mtts_idx) {
 		dev_err(hr_dev->dev,
-			"SRQ alloc.Failed to find idx que buf addr.\n");
+			"Failed to find mtt for srq idx queue buf.\n");
 		return -EINVAL;
 	}
 
 	ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
-	if (ret == -1) {
-		dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
+	if (ret) {
+		dev_err(hr_dev->dev,
+			"Failed to alloc a bit from srq bitmap.\n");
 		return -ENOMEM;
 	}
 
@@ -134,7 +134,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
 			       mtts_wqe, mtts_idx, dma_handle_wqe,
 			       dma_handle_idx);
 
-	ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
+	ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 	if (ret)
 		goto err_xa;
@@ -160,9 +160,9 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
 	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
 	int ret;
 
-	ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+	ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn);
 	if (ret)
-		dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
+		dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
 			ret, srq->srqn);
 
 	xa_erase(&srq_table->xa, srq->srqn);
@@ -180,22 +180,23 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata,
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
 	struct hns_roce_ib_create_srq  ucmd;
-	u32 page_shift;
-	u32 npages;
+	struct hns_roce_buf *buf;
 	int ret;
 
 	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
 		return -EFAULT;
 
-	srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
+	srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0);
 	if (IS_ERR(srq->umem))
 		return PTR_ERR(srq->umem);
 
-	npages = (ib_umem_page_count(srq->umem) +
-		(1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
-		(1 << hr_dev->caps.srqwqe_buf_pg_sz);
-	page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
-	ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt);
+	buf = &srq->buf;
+	buf->npages = (ib_umem_page_count(srq->umem) +
+		       (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+		      (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+	buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+	ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift,
+				&srq->mtt);
 	if (ret)
 		goto err_user_buf;
 
@@ -205,16 +206,19 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata,
 
 	/* config index queue BA */
 	srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
-					srq->idx_que.buf_size, 0, 0);
+					srq->idx_que.buf_size, 0);
 	if (IS_ERR(srq->idx_que.umem)) {
 		dev_err(hr_dev->dev, "ib_umem_get error for index queue\n");
 		ret = PTR_ERR(srq->idx_que.umem);
 		goto err_user_srq_mtt;
 	}
 
-	ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem),
-				PAGE_SHIFT, &srq->idx_que.mtt);
-
+	buf = &srq->idx_que.idx_buf;
+	buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem),
+				   1 << hr_dev->caps.idx_buf_pg_sz);
+	buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+	ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift,
+				&srq->idx_que.mtt);
 	if (ret) {
 		dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n");
 		goto err_user_idx_mtt;
@@ -251,7 +255,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
 	struct hns_roce_idx_que *idx_que = &srq->idx_que;
 
-	idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL);
+	idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
 	if (!idx_que->bitmap)
 		return -ENOMEM;
 
@@ -277,7 +281,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size)
 		return -ENOMEM;
 
 	srq->head = 0;
-	srq->tail = srq->max - 1;
+	srq->tail = srq->wqe_cnt - 1;
 
 	ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift,
 				&srq->mtt);
@@ -308,7 +312,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size)
 	if (ret)
 		goto err_kernel_idx_buf;
 
-	srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+	srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
 	if (!srq->wrid) {
 		ret = -ENOMEM;
 		goto err_kernel_idx_buf;
@@ -354,7 +358,7 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev,
 }
 
 int hns_roce_create_srq(struct ib_srq *ib_srq,
-			struct ib_srq_init_attr *srq_init_attr,
+			struct ib_srq_init_attr *init_attr,
 			struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
@@ -366,24 +370,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
 	u32 cqn;
 
 	/* Check the actual SRQ wqe and SRQ sge num */
-	if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
-	    srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+	if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
+	    init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
 		return -EINVAL;
 
 	mutex_init(&srq->mutex);
 	spin_lock_init(&srq->lock);
 
-	srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
-	srq->max_gs = srq_init_attr->attr.max_sge;
+	srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+	srq->max_gs = init_attr->attr.max_sge;
 
 	srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
 
 	srq->wqe_shift = ilog2(srq_desc_size);
 
-	srq_buf_size = srq->max * srq_desc_size;
+	srq_buf_size = srq->wqe_cnt * srq_desc_size;
 
 	srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
-	srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
+	srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz;
 	srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
 	srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
 
@@ -401,8 +405,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
 		}
 	}
 
-	cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
-	      to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+	cqn = ib_srq_has_cq(init_attr->srq_type) ?
+	      to_hr_cq(init_attr->ext.cq)->cqn : 0;
 
 	srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
 
@@ -449,7 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 		hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
 	} else {
 		kvfree(srq->wrid);
-		hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
+		hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift,
 				  &srq->buf);
 	}
 	ib_umem_release(srq->idx_que.umem);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 2d6a378e8560..bb78d3280acc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -2079,9 +2079,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
 	dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
 	if (!dst || dst->error) {
 		if (dst) {
-			dst_release(dst);
 			i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
 				     dst->error);
+			dst_release(dst);
 		}
 		return rc;
 	}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index cd9ee1664a69..86375947bc67 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1763,7 +1763,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
 
 	if (length > I40IW_MAX_MR_SIZE)
 		return ERR_PTR(-EINVAL);
-	region = ib_umem_get(udata, start, length, acc, 0);
+	region = ib_umem_get(udata, start, length, acc);
 	if (IS_ERR(region))
 		return (struct ib_mr *)region;
 
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a7d238d312f0..306b21281fa2 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -145,7 +145,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
 	int n;
 
 	*umem = ib_umem_get(udata, buf_addr, cqe * cqe_size,
-			    IB_ACCESS_LOCAL_WRITE, 1);
+			    IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
 
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 0f390351cef0..714f9df5bf39 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -64,7 +64,7 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
 
 	page->user_virt = (virt & PAGE_MASK);
 	page->refcnt    = 0;
-	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0);
 	if (IS_ERR(page->umem)) {
 		err = PTR_ERR(page->umem);
 		kfree(page);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 57079110af9b..abe68708d6d6 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 	}
 	mutex_unlock(&dev->counters_table[port_num - 1].mutex);
 	if (stats_avail) {
-		memset(out_mad->data, 0, sizeof out_mad->data);
 		switch (counter_stats.counter_mode & 0xf) {
 		case 0:
 			edit_counter(&counter_stats,
@@ -984,38 +983,31 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 
 int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index)
+			const struct ib_mad *in, struct ib_mad *out,
+			size_t *out_mad_size, u16 *out_mad_pkey_index)
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
 	enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
 
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
-
 	/* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA
 	 * queries, should be called only by VFs and for that specific purpose
 	 */
 	if (link == IB_LINK_LAYER_INFINIBAND) {
 		if (mlx4_is_slave(dev->dev) &&
-		    (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
-		     (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
-		      in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
-		      in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
-			return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
-						in_grh, in_mad, out_mad);
+		    (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+		     (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+		      in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+		      in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
+			return iboe_process_mad(ibdev, mad_flags, port_num,
+						in_wc, in_grh, in, out);
 
-		return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
-				      in_grh, in_mad, out_mad);
+		return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+				      in, out);
 	}
 
 	if (link == IB_LINK_LAYER_ETHERNET)
 		return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
-					in_grh, in_mad, out_mad);
+					in_grh, in, out);
 
 	return -EINVAL;
 }
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8d2f1e38b891..0b5dc1d5928f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -256,6 +256,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 	int hw_update = 0;
 	int i;
 	struct gid_entry *gids = NULL;
+	u16 vlan_id = 0xffff;
+	u8 mac[ETH_ALEN];
 
 	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
 		return -EINVAL;
@@ -266,12 +268,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 	if (!context)
 		return -EINVAL;
 
+	ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
+	if (ret)
+		return ret;
 	port_gid_table = &iboe->gids[attr->port_num - 1];
 	spin_lock_bh(&iboe->lock);
 	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
 		if (!memcmp(&port_gid_table->gids[i].gid,
 			    &attr->gid, sizeof(attr->gid)) &&
-		    port_gid_table->gids[i].gid_type == attr->gid_type)  {
+		    port_gid_table->gids[i].gid_type == attr->gid_type &&
+		    port_gid_table->gids[i].vlan_id == vlan_id)  {
 			found = i;
 			break;
 		}
@@ -291,6 +297,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 				memcpy(&port_gid_table->gids[free].gid,
 				       &attr->gid, sizeof(attr->gid));
 				port_gid_table->gids[free].gid_type = attr->gid_type;
+				port_gid_table->gids[free].vlan_id = vlan_id;
 				port_gid_table->gids[free].ctx->real_index = free;
 				port_gid_table->gids[free].ctx->refcount = 1;
 				hw_update = 1;
@@ -1146,7 +1153,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 		return rdma_user_mmap_io(context, vma,
 					 to_mucontext(context)->uar.pfn,
 					 PAGE_SIZE,
-					 pgprot_noncached(vma->vm_page_prot));
+					 pgprot_noncached(vma->vm_page_prot),
+					 NULL);
 
 	case 1:
 		if (dev->dev->caps.bf_reg_size == 0)
@@ -1155,7 +1163,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 			context, vma,
 			to_mucontext(context)->uar.pfn +
 				dev->dev->caps.num_uars,
-			PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
+			PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
+			NULL);
 
 	case 3: {
 		struct mlx4_clock_params params;
@@ -1171,7 +1180,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 					    params.bar) +
 			 params.offset) >>
 				PAGE_SHIFT,
-			PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
+			PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
+			NULL);
 	}
 
 	default:
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index eb53bb4c0c91..d188573187fa 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -508,6 +508,7 @@ struct gid_entry {
 	union ib_gid	gid;
 	enum ib_gid_type gid_type;
 	struct gid_cache_context *ctx;
+	u16 vlan_id;
 };
 
 struct mlx4_port_gid_table {
@@ -786,11 +787,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
 		 int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
 		 const void *in_mad, void *response_mad);
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags,	u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index);
+			const struct ib_mad *in, struct ib_mad *out,
+			size_t *out_mad_size, u16 *out_mad_pkey_index);
 int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
 void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
 
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 6ae503cfc526..dfa17bcdcdbc 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
 		up_read(&current->mm->mmap_sem);
 	}
 
-	return ib_umem_get(udata, start, length, access_flags, 0);
+	return ib_umem_get(udata, start, length, access_flags);
 }
 
 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bd4aa04416c6..85f57b76e446 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -916,7 +916,7 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
 	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
 		       (qp->sq.wqe_cnt << qp->sq.wqe_shift);
 
-	qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0);
+	qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0);
 	if (IS_ERR(qp->umem)) {
 		err = PTR_ERR(qp->umem);
 		goto err;
@@ -1110,8 +1110,7 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
 		if (err)
 			goto err;
 
-		qp->umem =
-			ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0);
+		qp->umem = ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0);
 		if (IS_ERR(qp->umem)) {
 			err = PTR_ERR(qp->umem);
 			goto err;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 848db7264cc9..8dcf6e3d9ae2 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -110,7 +110,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
 		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
 			return -EFAULT;
 
-		srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
+		srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0);
 		if (IS_ERR(srq->umem))
 			return PTR_ERR(srq->umem);
 
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 9924be8384d8..d0a043ccbe58 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
 		srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
-		cong.o
+		cong.o restrack.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 45f48cde6b9d..dd8d24ee8e1d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -423,9 +423,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	struct mlx5_cqe64 *cqe64;
 	struct mlx5_core_qp *mqp;
 	struct mlx5_ib_wq *wq;
-	struct mlx5_sig_err_cqe *sig_err_cqe;
-	struct mlx5_core_mkey *mmkey;
-	struct mlx5_ib_mr *mr;
 	uint8_t opcode;
 	uint32_t qpn;
 	u16 wqe_ctr;
@@ -519,27 +516,29 @@ repoll:
 			}
 		}
 		break;
-	case MLX5_CQE_SIG_ERR:
-		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+	case MLX5_CQE_SIG_ERR: {
+		struct mlx5_sig_err_cqe *sig_err_cqe =
+			(struct mlx5_sig_err_cqe *)cqe64;
+		struct mlx5_core_sig_ctx *sig;
 
-		xa_lock(&dev->mdev->priv.mkey_table);
-		mmkey = xa_load(&dev->mdev->priv.mkey_table,
+		xa_lock(&dev->sig_mrs);
+		sig = xa_load(&dev->sig_mrs,
 				mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
-		mr = to_mibmr(mmkey);
-		get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
-		mr->sig->sig_err_exists = true;
-		mr->sig->sigerr_count++;
+		get_sig_err_item(sig_err_cqe, &sig->err_item);
+		sig->sig_err_exists = true;
+		sig->sigerr_count++;
 
 		mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
-			     cq->mcq.cqn, mr->sig->err_item.key,
-			     mr->sig->err_item.err_type,
-			     mr->sig->err_item.sig_err_offset,
-			     mr->sig->err_item.expected,
-			     mr->sig->err_item.actual);
+			     cq->mcq.cqn, sig->err_item.key,
+			     sig->err_item.err_type,
+			     sig->err_item.sig_err_offset,
+			     sig->err_item.expected,
+			     sig->err_item.actual);
 
-		xa_unlock(&dev->mdev->priv.mkey_table);
+		xa_unlock(&dev->sig_mrs);
 		goto repoll;
 	}
+	}
 
 	return 0;
 }
@@ -710,7 +709,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 
 	cq->buf.umem =
 		ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size,
-			    IB_ACCESS_LOCAL_WRITE, 1);
+			    IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(cq->buf.umem)) {
 		err = PTR_ERR(cq->buf.umem);
 		return err;
@@ -1111,7 +1110,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 
 	umem = ib_umem_get(udata, ucmd.buf_addr,
 			   (size_t)ucmd.cqe_size * entries,
-			   IB_ACCESS_LOCAL_WRITE, 1);
+			   IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(umem)) {
 		err = PTR_ERR(umem);
 		return err;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index d609f4659afb..9d0a18cf9e5e 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -100,6 +100,7 @@ struct devx_obj {
 		struct mlx5_ib_devx_mr	devx_mr;
 		struct mlx5_core_dct	core_dct;
 		struct mlx5_core_cq	core_cq;
+		u32			flow_counter_bulk_size;
 	};
 	struct list_head event_sub; /* holds devx_event_subscription entries */
 };
@@ -192,15 +193,20 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
 	}
 }
 
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
 {
 	struct devx_obj *devx_obj = obj;
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
 
 	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+		if (offset && offset >= devx_obj->flow_counter_bulk_size)
+			return false;
+
 		*counter_id = MLX5_GET(dealloc_flow_counter_in,
 				       devx_obj->dinbox,
 				       flow_counter_id);
+		*counter_id += offset;
 		return true;
 	}
 
@@ -1265,8 +1271,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
 
-	return xa_err(xa_store(&dev->mdev->priv.mkey_table,
-			       mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
+	return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
+			       GFP_KERNEL));
 }
 
 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1345,9 +1351,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 		 * the mmkey, we must wait for that to stop before freeing the
 		 * mkey, as another allocation could get the same mkey #.
 		 */
-		xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
+		xa_erase(&obj->ib_dev->odp_mkeys,
 			 mlx5_base_mkey(obj->devx_mr.mmkey.key));
-		synchronize_srcu(&dev->mr_srcu);
+		synchronize_srcu(&dev->odp_srcu);
 	}
 
 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
@@ -1463,6 +1469,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 	if (err)
 		goto obj_free;
 
+	if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
+		u8 bulk = MLX5_GET(alloc_flow_counter_in,
+				   cmd_in,
+				   flow_counter_bulk);
+		obj->flow_counter_bulk_size = 128UL * bulk;
+	}
+
 	uobj->object = obj;
 	INIT_LIST_HEAD(&obj->event_sub);
 	obj->ib_dev = dev;
@@ -2121,7 +2134,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
 	if (err)
 		return err;
 
-	obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
+	obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access);
 	if (IS_ERR(obj->umem))
 		return PTR_ERR(obj->umem);
 
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 8f4e5f22b84c..12737c509aa2 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
 
 	page->user_virt = (virt & PAGE_MASK);
 	page->refcnt    = 0;
-	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0);
 	if (IS_ERR(page->umem)) {
 		err = PTR_ERR(page->umem);
 		kfree(page);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index b198ff10cde9..dbee17d22d50 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -85,6 +85,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
 	int len, ret, i;
 	u32 counter_id = 0;
+	u32 *offset_attr;
+	u32 offset = 0;
 
 	if (!capable(CAP_NET_RAW))
 		return -EPERM;
@@ -151,8 +153,27 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	if (len) {
 		devx_obj = arr_flow_actions[0]->object;
 
-		if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
+		if (uverbs_attr_is_valid(attrs,
+					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+			int num_offsets = uverbs_attr_ptr_get_array_size(
+				attrs,
+				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+				sizeof(u32));
+
+			if (num_offsets != 1)
+				return -EINVAL;
+
+			offset_attr = uverbs_attr_get_alloced_ptr(
+				attrs,
+				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+			offset = *offset_attr;
+		}
+
+		if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset,
+						  &counter_id))
 			return -EINVAL;
+
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 	}
 
@@ -598,7 +619,11 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
 			     MLX5_IB_OBJECT_DEVX_OBJ,
 			     UVERBS_ACCESS_READ, 1, 1,
-			     UA_OPTIONAL));
+			     UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+			   UA_OPTIONAL,
+			   UA_ALLOC_AND_COPY));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	MLX5_IB_METHOD_DESTROY_FLOW,
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 4950df3f71b6..ac4d8d1b9a07 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
 		},
 		.sq_sig_type = gsi->sq_sig_type,
 		.qp_type = IB_QPT_UD,
-		.create_flags = mlx5_ib_create_qp_sqpn_qp1(),
+		.create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
 	};
 
 	return ib_create_qp(pd, &init_attr);
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index 649a3364f838..4f0edd4832bd 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -201,3 +201,27 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
 
 	return -EINVAL;
 }
+
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+			struct ifla_vf_guid *node_guid,
+			struct ifla_vf_guid *port_guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *rep;
+	int err;
+
+	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	if (!rep)
+		return -ENOMEM;
+
+	err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep);
+	if (err)
+		goto ex;
+
+	port_guid->guid = rep->port_guid;
+	node_guid->guid = rep->node_guid;
+ex:
+	kfree(rep);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 348c1df69cdc..14e0c17de6a9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -74,58 +74,6 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
 				port);
 }
 
-static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		       const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		       const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
-	u16 slid;
-	int err;
-
-	slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
-
-	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
-		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
-	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
-	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-		if (in_mad->mad_hdr.method   != IB_MGMT_METHOD_GET &&
-		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_SET &&
-		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_TRAP_REPRESS)
-			return IB_MAD_RESULT_SUCCESS;
-
-		/* Don't process SMInfo queries -- the SMA can't handle them.
-		 */
-		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
-			return IB_MAD_RESULT_SUCCESS;
-	} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
-		   in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1   ||
-		   in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2   ||
-		   in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
-		if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
-		    in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
-			return IB_MAD_RESULT_SUCCESS;
-	} else {
-		return IB_MAD_RESULT_SUCCESS;
-	}
-
-	err = mlx5_MAD_IFC(to_mdev(ibdev),
-			   mad_flags & IB_MAD_IGNORE_MKEY,
-			   mad_flags & IB_MAD_IGNORE_BKEY,
-			   port_num, in_wc, in_grh, in_mad, out_mad);
-	if (err)
-		return IB_MAD_RESULT_FAILURE;
-
-	/* set return bit in status of directed route responses */
-	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-		out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
-
-	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
-		/* no response for trap repress */
-		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
-	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
 static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
 			       void *out)
 {
@@ -271,30 +219,66 @@ done:
 
 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index)
+			const struct ib_mad *in, struct ib_mad *out,
+			size_t *out_mad_size, u16 *out_mad_pkey_index)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-	int ret;
+	u8 mgmt_class = in->mad_hdr.mgmt_class;
+	u8 method = in->mad_hdr.method;
+	u16 slid;
+	int err;
 
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
+	slid = in_wc ? ib_lid_cpu16(in_wc->slid) :
+		       be16_to_cpu(IB_LID_PERMISSIVE);
 
-	memset(out_mad->data, 0, sizeof(out_mad->data));
+	if (method == IB_MGMT_METHOD_TRAP && !slid)
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 
-	if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
-	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
-	    in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
-		ret = process_pma_cmd(dev, port_num, in_mad, out_mad);
-	} else {
-		ret =  process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
-				   in_mad, out_mad);
+	switch (mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: {
+		if (method != IB_MGMT_METHOD_GET &&
+		    method != IB_MGMT_METHOD_SET &&
+		    method != IB_MGMT_METHOD_TRAP_REPRESS)
+			return IB_MAD_RESULT_SUCCESS;
+
+		/* Don't process SMInfo queries -- the SMA can't handle them.
+		 */
+		if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+			return IB_MAD_RESULT_SUCCESS;
+	} break;
+	case IB_MGMT_CLASS_PERF_MGMT:
+		if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
+		    method == IB_MGMT_METHOD_GET)
+			return process_pma_cmd(dev, port_num, in, out);
+		/* fallthrough */
+	case MLX5_IB_VENDOR_CLASS1:
+		/* fallthrough */
+	case MLX5_IB_VENDOR_CLASS2:
+	case IB_MGMT_CLASS_CONG_MGMT: {
+		if (method != IB_MGMT_METHOD_GET &&
+		    method != IB_MGMT_METHOD_SET)
+			return IB_MAD_RESULT_SUCCESS;
+	} break;
+	default:
+		return IB_MAD_RESULT_SUCCESS;
 	}
-	return ret;
+
+	err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+			   mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+			   in_grh, in, out);
+	if (err)
+		return IB_MAD_RESULT_FAILURE;
+
+	/* set return bit in status of directed route responses */
+	if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		out->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+	if (method == IB_MGMT_METHOD_TRAP_REPRESS)
+		/* no response for trap repress */
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 46ea4f0b9b51..97b26e9a5234 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -67,6 +67,7 @@
 #include <rdma/uverbs_std_types.h>
 #include <rdma/mlx5_user_ioctl_verbs.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem_odp.h>
 
 #define UVERBS_MODULE_NAME mlx5_ib
 #include <rdma/uverbs_named_ioctl.h>
@@ -693,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
 	get_atomic_caps(dev, atomic_size_qp, props);
 }
 
-static void get_atomic_caps_dc(struct mlx5_ib_dev *dev,
-			       struct ib_device_attr *props)
-{
-	u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
-
-	get_atomic_caps(dev, atomic_size_qp, props);
-}
-
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev)
-{
-	struct ib_device_attr props = {};
-
-	get_atomic_caps_dc(dev, &props);
-	return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false;
-}
 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
 					__be64 *sys_image_guid)
 {
@@ -844,8 +830,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
 	if (uhw->outlen && uhw->outlen < resp_len)
 		return -EINVAL;
-	else
-		resp.response_length = resp_len;
+
+	resp.response_length = resp_len;
 
 	if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
 		return -EINVAL;
@@ -1011,6 +997,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
 	props->max_pi_fast_reg_page_list_len =
 		props->max_fast_reg_page_list_len / 2;
+	props->max_sgl_rd =
+		MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance);
 	get_atomic_caps_qp(dev, props);
 	props->masked_atomic_cap   = IB_ATOMIC_NONE;
 	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -1161,8 +1149,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 				MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
 			resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
 				MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
-			resp.striding_rq_caps.min_single_wqe_log_num_of_strides =
-				MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+			if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range))
+				resp.striding_rq_caps
+					.min_single_wqe_log_num_of_strides =
+					MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+			else
+				resp.striding_rq_caps
+					.min_single_wqe_log_num_of_strides =
+					MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
 			resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
 				MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
 			resp.striding_rq_caps.supported_qpts =
@@ -1808,7 +1802,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 		return -EINVAL;
 
 	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
-	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
+	if (dev->wc_support)
 		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
 	resp.cache_line_size = cache_line_size();
 	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
@@ -2168,7 +2162,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 	mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
 
 	err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
-				prot);
+				prot, NULL);
 	if (err) {
 		mlx5_ib_err(dev,
 			    "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
@@ -2210,7 +2204,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	      PAGE_SHIFT) +
 	      page_idx;
 	return rdma_user_mmap_io(context, vma, pfn, map_size,
-				 pgprot_writecombine(vma->vm_page_prot));
+				 pgprot_writecombine(vma->vm_page_prot),
+				 NULL);
 }
 
 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2248,7 +2243,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
 			PAGE_SHIFT;
 		return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
 					 PAGE_SIZE,
-					 pgprot_noncached(vma->vm_page_prot));
+					 pgprot_noncached(vma->vm_page_prot),
+					 NULL);
 	case MLX5_IB_MMAP_CLOCK_INFO:
 		return mlx5_ib_mmap_clock_info_page(dev, vma, context);
 
@@ -6140,11 +6136,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
-	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
-		srcu_barrier(&dev->mr_srcu);
-		cleanup_srcu_struct(&dev->mr_srcu);
-	}
+	WARN_ON(!xa_empty(&dev->odp_mkeys));
+	cleanup_srcu_struct(&dev->odp_srcu);
 
+	WARN_ON(!xa_empty(&dev->sig_mrs));
 	WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
 }
 
@@ -6196,15 +6191,15 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 	mutex_init(&dev->cap_mask_mutex);
 	INIT_LIST_HEAD(&dev->qp_list);
 	spin_lock_init(&dev->reset_flow_resource_lock);
+	xa_init(&dev->odp_mkeys);
+	xa_init(&dev->sig_mrs);
 
 	spin_lock_init(&dev->dm.lock);
 	dev->dm.dev = mdev;
 
-	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
-		err = init_srcu_struct(&dev->mr_srcu);
-		if (err)
-			goto err_mp;
-	}
+	err = init_srcu_struct(&dev->odp_srcu);
+	if (err)
+		goto err_mp;
 
 	return 0;
 
@@ -6264,6 +6259,9 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
 	.disassociate_ucontext = mlx5_ib_disassociate_ucontext,
 	.drain_rq = mlx5_ib_drain_rq,
 	.drain_sq = mlx5_ib_drain_sq,
+	.enable_driver = mlx5_ib_enable_driver,
+	.fill_res_entry = mlx5_ib_fill_res_entry,
+	.fill_stat_entry = mlx5_ib_fill_stat_entry,
 	.get_dev_fw_str = get_dev_fw_str,
 	.get_dma_mr = mlx5_ib_get_dma_mr,
 	.get_link_layer = mlx5_ib_port_link_layer,
@@ -6310,6 +6308,7 @@ static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
 
 static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
 	.get_vf_config = mlx5_ib_get_vf_config,
+	.get_vf_guid = mlx5_ib_get_vf_guid,
 	.get_vf_stats = mlx5_ib_get_vf_stats,
 	.set_vf_guid = mlx5_ib_set_vf_guid,
 	.set_vf_link_state = mlx5_ib_set_vf_link_state,
@@ -6705,6 +6704,18 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
 	}
 }
 
+int mlx5_ib_enable_driver(struct ib_device *dev)
+{
+	struct mlx5_ib_dev *mdev = to_mdev(dev);
+	int ret;
+
+	ret = mlx5_ib_test_wc(mdev);
+	mlx5_ib_dbg(mdev, "Write-Combining %s",
+		    mdev->wc_support ? "supported" : "not supported");
+
+	return ret;
+}
+
 void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 		      const struct mlx5_ib_profile *profile,
 		      int stage)
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index b5aece786b36..048f4e974a61 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -34,6 +34,7 @@
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem_odp.h>
 #include "mlx5_ib.h"
+#include <linux/jiffies.h>
 
 /* @umem: umem object to scan
  * @addr: ib virtual address requested by the user
@@ -216,3 +217,201 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 	*offset = buf_off >> ilog2(off_size);
 	return 0;
 }
+
+#define WR_ID_BF 0xBF
+#define WR_ID_END 0xBAD
+#define TEST_WC_NUM_WQES 255
+#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
+static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
+			 bool signaled)
+{
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_wqe_ctrl_seg *ctrl;
+	struct mlx5_bf *bf = &qp->bf;
+	__be32 mmio_wqe[16] = {};
+	unsigned long flags;
+	unsigned int idx;
+	int i;
+
+	if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
+		return -EIO;
+
+	spin_lock_irqsave(&qp->sq.lock, flags);
+
+	idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+	ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
+
+	memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
+	ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
+	ctrl->opmod_idx_opcode =
+		cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
+	ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
+				   (qp->trans_qp.base.mqp.qpn << 8));
+
+	qp->sq.wrid[idx] = wr_id;
+	qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
+	qp->sq.wqe_head[idx] = qp->sq.head + 1;
+	qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
+					MLX5_SEND_WQE_BB);
+	qp->sq.w_list[idx].next = qp->sq.cur_post;
+	qp->sq.head++;
+
+	memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
+	((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
+		MLX5_WQE_CTRL_CQ_UPDATE;
+
+	/* Make sure that descriptors are written before
+	 * updating doorbell record and ringing the doorbell
+	 */
+	wmb();
+
+	qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+	/* Make sure doorbell record is visible to the HCA before
+	 * we hit doorbell
+	 */
+	wmb();
+	for (i = 0; i < 8; i++)
+		mlx5_write64(&mmio_wqe[i * 2],
+			     bf->bfreg->map + bf->offset + i * 8);
+
+	bf->offset ^= bf->buf_size;
+
+	spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+	return 0;
+}
+
+static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
+{
+	int ret;
+	struct ib_wc wc = {};
+	unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
+
+	do {
+		ret = ib_poll_cq(cq, 1, &wc);
+		if (ret < 0 || wc.status)
+			return ret < 0 ? ret : -EINVAL;
+		if (ret)
+			break;
+	} while (!time_after(jiffies, end));
+
+	if (!ret)
+		return -ETIMEDOUT;
+
+	if (wc.wr_id != WR_ID_BF)
+		ret = 0;
+
+	return ret;
+}
+
+static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
+{
+	int err, i;
+
+	for (i = 0; i < TEST_WC_NUM_WQES; i++) {
+		err = post_send_nop(dev, qp, WR_ID_BF, false);
+		if (err)
+			return err;
+	}
+
+	return post_send_nop(dev, qp, WR_ID_END, true);
+}
+
+int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
+{
+	struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
+	int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+	struct ib_qp_init_attr qp_init_attr = {
+		.cap = { .max_send_wr = TEST_WC_NUM_WQES },
+		.qp_type = IB_QPT_UD,
+		.sq_sig_type = IB_SIGNAL_REQ_WR,
+		.create_flags = MLX5_IB_QP_CREATE_WC_TEST,
+	};
+	struct ib_qp_attr qp_attr = { .port_num = 1 };
+	struct ib_device *ibdev = &dev->ib_dev;
+	struct ib_qp *qp;
+	struct ib_cq *cq;
+	struct ib_pd *pd;
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev->mdev, bf))
+		return 0;
+
+	if (!dev->mdev->roce.roce_en &&
+	    port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
+		if (mlx5_core_is_pf(dev->mdev))
+			dev->wc_support = true;
+		return 0;
+	}
+
+	ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
+	if (ret)
+		goto print_err;
+
+	if (!dev->wc_bfreg.wc)
+		goto out1;
+
+	pd = ib_alloc_pd(ibdev, 0);
+	if (IS_ERR(pd)) {
+		ret = PTR_ERR(pd);
+		goto out1;
+	}
+
+	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
+	if (IS_ERR(cq)) {
+		ret = PTR_ERR(cq);
+		goto out2;
+	}
+
+	qp_init_attr.recv_cq = cq;
+	qp_init_attr.send_cq = cq;
+	qp = ib_create_qp(pd, &qp_init_attr);
+	if (IS_ERR(qp)) {
+		ret = PTR_ERR(qp);
+		goto out3;
+	}
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = ib_modify_qp(qp, &qp_attr,
+			   IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
+				   IB_QP_QKEY);
+	if (ret)
+		goto out4;
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+	if (ret)
+		goto out4;
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+	if (ret)
+		goto out4;
+
+	ret = test_wc_do_send(dev, qp);
+	if (ret < 0)
+		goto out4;
+
+	ret = test_wc_poll_cq_result(dev, cq);
+	if (ret > 0) {
+		dev->wc_support = true;
+		ret = 0;
+	}
+
+out4:
+	ib_destroy_qp(qp);
+out3:
+	ib_destroy_cq(cq);
+out2:
+	ib_dealloc_pd(pd);
+out1:
+	mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
+print_err:
+	if (ret)
+		mlx5_ib_err(
+			dev,
+			"Error %d while trying to test write-combining support\n",
+			ret);
+	return ret;
+}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1a98ee2e01c4..b983e385a8c5 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -247,12 +247,8 @@ struct mlx5_ib_flow_db {
  * These flags are intended for internal use by the mlx5_ib driver, and they
  * rely on the range reserved for that use in the ib_qp_create_flags enum.
  */
-
-/* Create a UD QP whose source QP number is 1 */
-static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
-{
-	return IB_QP_CREATE_RESERVED_START;
-}
+#define MLX5_IB_QP_CREATE_SQPN_QP1	IB_QP_CREATE_RESERVED_START
+#define MLX5_IB_QP_CREATE_WC_TEST	(IB_QP_CREATE_RESERVED_START << 1)
 
 struct wr_list {
 	u16	opcode;
@@ -295,6 +291,7 @@ enum mlx5_ib_wq_flags {
 #define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16
 #define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6
 #define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13
+#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3
 
 struct mlx5_ib_rwq {
 	struct ib_wq		ibwq;
@@ -585,6 +582,9 @@ struct mlx5_ib_dm {
 					  IB_ACCESS_REMOTE_READ   |\
 					  IB_ZERO_BASED)
 
+#define mlx5_update_odp_stats(mr, counter_name, value)		\
+	atomic64_add(value, &((mr)->odp_stats.counter_name))
+
 struct mlx5_ib_mr {
 	struct ib_mr		ibmr;
 	void			*descs;
@@ -606,7 +606,6 @@ struct mlx5_ib_mr {
 	struct mlx5_ib_dev     *dev;
 	u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
 	struct mlx5_core_sig_ctx    *sig;
-	unsigned int		live;
 	void			*descs_alloc;
 	int			access_flags; /* Needed for rereg MR */
 
@@ -618,10 +617,18 @@ struct mlx5_ib_mr {
 	u64			data_iova;
 	u64			pi_iova;
 
-	atomic_t		num_leaf_free;
-	wait_queue_head_t       q_leaf_free;
+	/* For ODP and implicit */
+	atomic_t		num_deferred_work;
+	struct xarray		implicit_children;
+	union {
+		struct rcu_head rcu;
+		struct list_head elm;
+		struct work_struct work;
+	} odp_destroy;
+	struct ib_odp_counters	odp_stats;
+	bool			is_odp_implicit;
+
 	struct mlx5_async_work  cb_work;
-	atomic_t		num_pending_prefetch;
 };
 
 static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
@@ -957,7 +964,11 @@ struct mlx5_ib_dev {
 	/* serialize update of capability mask
 	 */
 	struct mutex			cap_mask_mutex;
-	bool				ib_active;
+	u8				ib_active:1;
+	u8				fill_delay:1;
+	u8				is_rep:1;
+	u8				lag_active:1;
+	u8				wc_support:1;
 	struct umr_common		umrc;
 	/* sync used page count stats
 	 */
@@ -966,7 +977,6 @@ struct mlx5_ib_dev {
 	struct timer_list		delay_timer;
 	/* Prevents soft lock on massive reg MRs */
 	struct mutex			slow_path_mutex;
-	int				fill_delay;
 	struct ib_odp_caps	odp_caps;
 	u64			odp_max_size;
 	struct mlx5_ib_pf_eq	odp_pf_eq;
@@ -975,7 +985,9 @@ struct mlx5_ib_dev {
 	 * Sleepable RCU that prevents destruction of MRs while they are still
 	 * being used by a page fault handler.
 	 */
-	struct srcu_struct      mr_srcu;
+	struct srcu_struct      odp_srcu;
+	struct xarray		odp_mkeys;
+
 	u32			null_mkey;
 	struct mlx5_ib_flow_db	*flow_db;
 	/* protect resources needed as part of reset flow */
@@ -984,11 +996,10 @@ struct mlx5_ib_dev {
 	/* Array with num_ports elements */
 	struct mlx5_ib_port	*port;
 	struct mlx5_sq_bfreg	bfreg;
+	struct mlx5_sq_bfreg	wc_bfreg;
 	struct mlx5_sq_bfreg	fp_bfreg;
 	struct mlx5_ib_delay_drop	delay_drop;
 	const struct mlx5_ib_profile	*profile;
-	bool			is_rep;
-	int				lag_active;
 
 	struct mlx5_ib_lb_state		lb;
 	u8			umr_fence;
@@ -999,6 +1010,8 @@ struct mlx5_ib_dev {
 	struct mlx5_srq_table   srq_table;
 	struct mlx5_async_ctx   async_ctx;
 	struct mlx5_devx_event_table devx_event_table;
+
+	struct xarray sig_mrs;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1162,6 +1175,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 					     struct ib_udata *udata,
 					     int access_flags);
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
+void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 			  u64 length, u64 virt_addr, int access_flags,
 			  struct ib_pd *pd, struct ib_udata *udata);
@@ -1179,9 +1193,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
 			 unsigned int *meta_sg_offset);
 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index);
+			const struct ib_mad *in, struct ib_mad *out,
+			size_t *out_mad_size, u16 *out_mad_pkey_index);
 struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 				   struct ib_udata *udata);
 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
@@ -1223,6 +1236,8 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 
 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
+
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -1235,7 +1250,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
 						      struct ib_rwq_ind_table_init_attr *init_attr,
 						      struct ib_udata *udata);
 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
 struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
 			       struct ib_ucontext *context,
 			       struct ib_dm_alloc_attr *attr,
@@ -1300,6 +1314,9 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
 			      u8 port, int state);
 int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
 			 u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+			struct ifla_vf_guid *node_guid,
+			struct ifla_vf_guid *port_guid);
 int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
 			u64 guid, int type);
 
@@ -1334,6 +1351,10 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
 						   u8 *native_port_num);
 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
 				  u8 port_num);
+int mlx5_ib_fill_res_entry(struct sk_buff *msg,
+			   struct rdma_restrack_entry *res);
+int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
+			    struct rdma_restrack_entry *res);
 
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
@@ -1349,7 +1370,7 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
 	struct mlx5_flow_act *flow_act, u32 counter_id,
 	void *cmd_in, int inlen, int dest_id, int dest_type);
 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
 int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
 void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
 #else
@@ -1491,4 +1512,7 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
 
 	return true;
 }
+
+int mlx5_ib_enable_driver(struct ib_device *dev);
+int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 7019c12005f4..60d39b9ec41c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -50,7 +50,6 @@ enum {
 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 
 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
 {
@@ -59,13 +58,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
 
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+	WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
 
-	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
-		/* Wait until all page fault handlers using the mr complete. */
-		synchronize_srcu(&dev->mr_srcu);
-
-	return err;
+	return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 }
 
 static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -94,8 +89,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	unsigned long flags;
-	struct xarray *mkeys = &dev->mdev->priv.mkey_table;
-	int err;
 
 	spin_lock_irqsave(&ent->lock, flags);
 	ent->pending--;
@@ -122,13 +115,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
 	ent->size++;
 	spin_unlock_irqrestore(&ent->lock, flags);
 
-	xa_lock_irqsave(mkeys, flags);
-	err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
-				&mr->mmkey, GFP_ATOMIC));
-	xa_unlock_irqrestore(mkeys, flags);
-	if (err)
-		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
-
 	if (!completion_done(&ent->compl))
 		complete(&ent->compl);
 }
@@ -218,9 +204,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 	}
 
-	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
-		synchronize_srcu(&dev->mr_srcu);
-
 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
 		list_del(&mr->list);
 		kfree(mr);
@@ -428,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
 
 	if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
 		mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	ent = &cache->ent[entry];
@@ -511,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	c = order2idx(dev, mr->order);
 	WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
 
-	if (unreg_umr(dev, mr)) {
+	if (mlx5_mr_cache_invalidate(mr)) {
 		mr->allocated_from_cache = false;
 		destroy_mkey(dev, mr);
 		ent = &cache->ent[c];
@@ -555,10 +538,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 	}
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	synchronize_srcu(&dev->mr_srcu);
-#endif
-
 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
 		list_del(&mr->list);
 		kfree(mr);
@@ -679,6 +658,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+					  struct ib_pd *pd)
+{
+	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+	MLX5_SET(mkc, mkc, lr, 1);
+
+	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -702,16 +695,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
-	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
-	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
-	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
-	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
-	MLX5_SET(mkc, mkc, lr, 1);
-
 	MLX5_SET(mkc, mkc, length64, 1);
-	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
-	MLX5_SET(mkc, mkc, qpn, 0xffffff);
-	MLX5_SET64(mkc, mkc, start_addr, 0);
+	set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
 
 	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
 	if (err)
@@ -779,7 +764,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 		if (order)
 			*order = ilog2(roundup_pow_of_two(*ncont));
 	} else {
-		u = ib_umem_get(udata, start, length, access_flags, 0);
+		u = ib_umem_get(udata, start, length, access_flags);
 		if (IS_ERR(u)) {
 			mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
 			return PTR_ERR(u);
@@ -1169,16 +1154,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
 
 	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
 	MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
-	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
-	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
-	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
-	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
-	MLX5_SET(mkc, mkc, lr, 1);
-
 	MLX5_SET64(mkc, mkc, len, length);
-	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
-	MLX5_SET(mkc, mkc, qpn, 0xffffff);
-	MLX5_SET64(mkc, mkc, start_addr, start_addr);
+	set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
 
 	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
 	if (err)
@@ -1337,10 +1314,15 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	if (is_odp_mr(mr)) {
 		to_ib_umem_odp(mr->umem)->private = mr;
-		atomic_set(&mr->num_pending_prefetch, 0);
+		atomic_set(&mr->num_deferred_work, 0);
+		err = xa_err(xa_store(&dev->odp_mkeys,
+				      mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
+				      GFP_KERNEL));
+		if (err) {
+			dereg_mr(dev, mr);
+			return ERR_PTR(err);
+		}
 	}
-	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
-		smp_store_release(&mr->live, 1);
 
 	return &mr->ibmr;
 error:
@@ -1348,22 +1330,29 @@ error:
 	return ERR_PTR(err);
 }
 
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+/**
+ * mlx5_mr_cache_invalidate - Fence all DMA on the MR
+ * @mr: The MR to fence
+ *
+ * Upon return the NIC will not be doing any DMA to the pages under the MR,
+ * and any DMA inprogress will be completed. Failure of this function
+ * indicates the HW has failed catastrophically.
+ */
+int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
 {
-	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_umr_wr umrwr = {};
 
-	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+	if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		return 0;
 
 	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
 			      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
 	umrwr.wr.opcode = MLX5_IB_WR_UMR;
-	umrwr.pd = dev->umrc.pd;
+	umrwr.pd = mr->dev->umrc.pd;
 	umrwr.mkey = mr->mmkey.key;
 	umrwr.ignore_free_state = 1;
 
-	return mlx5_ib_post_send_wait(dev, &umrwr);
+	return mlx5_ib_post_send_wait(mr->dev, &umrwr);
 }
 
 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
@@ -1447,7 +1436,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		 * UMR can't be used - MKey needs to be replaced.
 		 */
 		if (mr->allocated_from_cache)
-			err = unreg_umr(dev, mr);
+			err = mlx5_mr_cache_invalidate(mr);
 		else
 			err = destroy_mkey(dev, mr);
 		if (err)
@@ -1560,6 +1549,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 					  mr->sig->psv_wire.psv_idx))
 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
 				     mr->sig->psv_wire.psv_idx);
+		xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
 		kfree(mr->sig);
 		mr->sig = NULL;
 	}
@@ -1575,54 +1565,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 	int npages = mr->npages;
 	struct ib_umem *umem = mr->umem;
 
-	if (is_odp_mr(mr)) {
-		struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
-
-		/* Prevent new page faults and
-		 * prefetch requests from succeeding
-		 */
-		WRITE_ONCE(mr->live, 0);
-
-		/* Wait for all running page-fault handlers to finish. */
-		synchronize_srcu(&dev->mr_srcu);
-
-		/* dequeue pending prefetch requests for the mr */
-		if (atomic_read(&mr->num_pending_prefetch))
-			flush_workqueue(system_unbound_wq);
-		WARN_ON(atomic_read(&mr->num_pending_prefetch));
-
-		/* Destroy all page mappings */
-		if (!umem_odp->is_implicit_odp)
-			mlx5_ib_invalidate_range(umem_odp,
-						 ib_umem_start(umem_odp),
-						 ib_umem_end(umem_odp));
-		else
-			mlx5_ib_free_implicit_mr(mr);
-		/*
-		 * We kill the umem before the MR for ODP,
-		 * so that there will not be any invalidations in
-		 * flight, looking at the *mr struct.
-		 */
-		ib_umem_odp_release(umem_odp);
-		atomic_sub(npages, &dev->mdev->priv.reg_pages);
-
-		/* Avoid double-freeing the umem. */
-		umem = NULL;
-	}
+	/* Stop all DMA */
+	if (is_odp_mr(mr))
+		mlx5_ib_fence_odp_mr(mr);
+	else
+		clean_mr(dev, mr);
 
-	clean_mr(dev, mr);
+	if (mr->allocated_from_cache)
+		mlx5_mr_cache_free(dev, mr);
+	else
+		kfree(mr);
 
-	/*
-	 * We should unregister the DMA address from the HCA before
-	 * remove the DMA mapping.
-	 */
-	mlx5_mr_cache_free(dev, mr);
 	ib_umem_release(umem);
-	if (umem)
-		atomic_sub(npages, &dev->mdev->priv.reg_pages);
+	atomic_sub(npages, &dev->mdev->priv.reg_pages);
 
-	if (!mr->allocated_from_cache)
-		kfree(mr);
 }
 
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -1634,6 +1590,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 		dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
 	}
 
+	if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
+		mlx5_ib_free_implicit_mr(mmr);
+		return 0;
+	}
+
 	dereg_mr(to_mdev(ibmr->device), mmr);
 
 	return 0;
@@ -1797,8 +1758,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
 	if (err)
 		goto err_free_mtt_mr;
 
+	err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+			      mr->sig, GFP_KERNEL));
+	if (err)
+		goto err_free_descs;
 	return 0;
 
+err_free_descs:
+	destroy_mkey(dev, mr);
+	mlx5_free_priv_descs(mr);
 err_free_mtt_mr:
 	dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
 	mr->mtt_mr = NULL;
@@ -1951,9 +1919,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+		err = xa_err(xa_store(&dev->odp_mkeys,
+				      mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
+				      GFP_KERNEL));
+		if (err)
+			goto free_mkey;
+	}
+
 	kfree(in);
 	return &mw->ibmw;
 
+free_mkey:
+	mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
 free:
 	kfree(mw);
 	kfree(in);
@@ -1967,13 +1945,12 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
 	int err;
 
 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
-		xa_erase_irq(&dev->mdev->priv.mkey_table,
-			     mlx5_base_mkey(mmw->mmkey.key));
+		xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
 		/*
 		 * pagefault_single_data_segment() may be accessing mmw under
 		 * SRCU if the user bound an ODP MR to this MW.
 		 */
-		synchronize_srcu(&dev->mr_srcu);
+		synchronize_srcu(&dev->odp_srcu);
 	}
 
 	err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 3f9478d19376..45ee40c2f36e 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -93,158 +93,152 @@ struct mlx5_pagefault {
 
 static u64 mlx5_imr_ksm_entries;
 
-static int check_parent(struct ib_umem_odp *odp,
-			       struct mlx5_ib_mr *parent)
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
+			   struct mlx5_ib_mr *imr, int flags)
 {
-	struct mlx5_ib_mr *mr = odp->private;
-
-	return mr && mr->parent == parent && !odp->dying;
-}
-
-static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
-{
-	if (WARN_ON(!mr || !is_odp_mr(mr)))
-		return NULL;
-
-	return to_ib_umem_odp(mr->umem)->per_mm;
-}
-
-static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
-{
-	struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
-	struct ib_ucontext_per_mm *per_mm = odp->per_mm;
-	struct rb_node *rb;
-
-	down_read(&per_mm->umem_rwsem);
-	while (1) {
-		rb = rb_next(&odp->interval_tree.rb);
-		if (!rb)
-			goto not_found;
-		odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
-		if (check_parent(odp, parent))
-			goto end;
-	}
-not_found:
-	odp = NULL;
-end:
-	up_read(&per_mm->umem_rwsem);
-	return odp;
-}
-
-static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
-				      struct mlx5_ib_mr *parent)
-{
-	struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
-	struct ib_umem_odp *odp;
-	struct rb_node *rb;
-
-	down_read(&per_mm->umem_rwsem);
-	odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
-	if (!odp)
-		goto end;
-
-	while (1) {
-		if (check_parent(odp, parent))
-			goto end;
-		rb = rb_next(&odp->interval_tree.rb);
-		if (!rb)
-			goto not_found;
-		odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
-		if (ib_umem_start(odp) > start + length)
-			goto not_found;
-	}
-not_found:
-	odp = NULL;
-end:
-	up_read(&per_mm->umem_rwsem);
-	return odp;
-}
-
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
-			   size_t nentries, struct mlx5_ib_mr *mr, int flags)
-{
-	struct ib_pd *pd = mr->ibmr.pd;
-	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct ib_umem_odp *odp;
-	unsigned long va;
-	int i;
+	struct mlx5_klm *end = pklm + nentries;
 
 	if (flags & MLX5_IB_UPD_XLT_ZAP) {
-		for (i = 0; i < nentries; i++, pklm++) {
+		for (; pklm != end; pklm++, idx++) {
 			pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
-			pklm->key = cpu_to_be32(dev->null_mkey);
+			pklm->key = cpu_to_be32(imr->dev->null_mkey);
 			pklm->va = 0;
 		}
 		return;
 	}
 
 	/*
-	 * The locking here is pretty subtle. Ideally the implicit children
-	 * list would be protected by the umem_mutex, however that is not
+	 * The locking here is pretty subtle. Ideally the implicit_children
+	 * xarray would be protected by the umem_mutex, however that is not
 	 * possible. Instead this uses a weaker update-then-lock pattern:
 	 *
 	 *  srcu_read_lock()
-	 *    <change children list>
+	 *    xa_store()
 	 *    mutex_lock(umem_mutex)
 	 *     mlx5_ib_update_xlt()
 	 *    mutex_unlock(umem_mutex)
 	 *    destroy lkey
 	 *
-	 * ie any change the children list must be followed by the locked
-	 * update_xlt before destroying.
+	 * ie any change the xarray must be followed by the locked update_xlt
+	 * before destroying.
 	 *
 	 * The umem_mutex provides the acquire/release semantic needed to make
-	 * the children list visible to a racing thread. While SRCU is not
+	 * the xa_store() visible to a racing thread. While SRCU is not
 	 * technically required, using it gives consistent use of the SRCU
-	 * locking around the children list.
+	 * locking around the xarray.
 	 */
-	lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex);
-	lockdep_assert_held(&mr->dev->mr_srcu);
+	lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
+	lockdep_assert_held(&imr->dev->odp_srcu);
 
-	odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
-			 nentries * MLX5_IMR_MTT_SIZE, mr);
+	for (; pklm != end; pklm++, idx++) {
+		struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
 
-	for (i = 0; i < nentries; i++, pklm++) {
 		pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
-		va = (offset + i) * MLX5_IMR_MTT_SIZE;
-		if (odp && ib_umem_start(odp) == va) {
-			struct mlx5_ib_mr *mtt = odp->private;
-
+		if (mtt) {
 			pklm->key = cpu_to_be32(mtt->ibmr.lkey);
-			odp = odp_next(odp);
+			pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
 		} else {
-			pklm->key = cpu_to_be32(dev->null_mkey);
+			pklm->key = cpu_to_be32(imr->dev->null_mkey);
+			pklm->va = 0;
 		}
-		mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
-			    i, va, be32_to_cpu(pklm->key));
 	}
 }
 
-static void mr_leaf_free_action(struct work_struct *work)
+static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
+{
+	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+
+	/* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */
+	mutex_lock(&odp->umem_mutex);
+	if (odp->npages) {
+		mlx5_mr_cache_invalidate(mr);
+		ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp),
+					    ib_umem_end(odp));
+		WARN_ON(odp->npages);
+	}
+	odp->private = NULL;
+	mutex_unlock(&odp->umem_mutex);
+
+	if (!mr->allocated_from_cache) {
+		mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
+		WARN_ON(mr->descs);
+	}
+}
+
+/*
+ * This must be called after the mr has been removed from implicit_children
+ * and the SRCU synchronized.  NOTE: The MR does not necessarily have to be
+ * empty here, parallel page faults could have raced with the free process and
+ * added pages to it.
+ */
+static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
 {
-	struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
-	int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
-	struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+	struct mlx5_ib_mr *imr = mr->parent;
 	struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+	unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
 	int srcu_key;
 
-	mr->parent = NULL;
-	synchronize_srcu(&mr->dev->mr_srcu);
+	/* implicit_child_mr's are not allowed to have deferred work */
+	WARN_ON(atomic_read(&mr->num_deferred_work));
 
-	if (smp_load_acquire(&imr->live)) {
-		srcu_key = srcu_read_lock(&mr->dev->mr_srcu);
+	if (need_imr_xlt) {
+		srcu_key = srcu_read_lock(&mr->dev->odp_srcu);
 		mutex_lock(&odp_imr->umem_mutex);
-		mlx5_ib_update_xlt(imr, idx, 1, 0,
+		mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
 				   MLX5_IB_UPD_XLT_INDIRECT |
 				   MLX5_IB_UPD_XLT_ATOMIC);
 		mutex_unlock(&odp_imr->umem_mutex);
-		srcu_read_unlock(&mr->dev->mr_srcu, srcu_key);
+		srcu_read_unlock(&mr->dev->odp_srcu, srcu_key);
 	}
-	ib_umem_odp_release(odp);
+
+	dma_fence_odp_mr(mr);
+
+	mr->parent = NULL;
 	mlx5_mr_cache_free(mr->dev, mr);
+	ib_umem_odp_release(odp);
+	atomic_dec(&imr->num_deferred_work);
+}
+
+static void free_implicit_child_mr_work(struct work_struct *work)
+{
+	struct mlx5_ib_mr *mr =
+		container_of(work, struct mlx5_ib_mr, odp_destroy.work);
 
-	if (atomic_dec_and_test(&imr->num_leaf_free))
-		wake_up(&imr->q_leaf_free);
+	free_implicit_child_mr(mr, true);
+}
+
+static void free_implicit_child_mr_rcu(struct rcu_head *head)
+{
+	struct mlx5_ib_mr *mr =
+		container_of(head, struct mlx5_ib_mr, odp_destroy.rcu);
+
+	/* Freeing a MR is a sleeping operation, so bounce to a work queue */
+	INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
+	queue_work(system_unbound_wq, &mr->odp_destroy.work);
+}
+
+static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
+{
+	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+	unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
+	struct mlx5_ib_mr *imr = mr->parent;
+
+	xa_lock(&imr->implicit_children);
+	/*
+	 * This can race with mlx5_ib_free_implicit_mr(), the first one to
+	 * reach the xa lock wins the race and destroys the MR.
+	 */
+	if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) !=
+	    mr)
+		goto out_unlock;
+
+	atomic_inc(&imr->num_deferred_work);
+	call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu,
+		  free_implicit_child_mr_rcu);
+
+out_unlock:
+	xa_unlock(&imr->implicit_children);
 }
 
 void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
@@ -254,19 +248,19 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 	const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
 				    sizeof(struct mlx5_mtt)) - 1;
 	u64 idx = 0, blk_start_idx = 0;
+	u64 invalidations = 0;
 	int in_block = 0;
 	u64 addr;
 
-	if (!umem_odp) {
-		pr_err("invalidation called on NULL umem or non-ODP umem\n");
-		return;
-	}
-
+	mutex_lock(&umem_odp->umem_mutex);
+	/*
+	 * If npages is zero then umem_odp->private may not be setup yet. This
+	 * does not complete until after the first page is mapped for DMA.
+	 */
+	if (!umem_odp->npages)
+		goto out;
 	mr = umem_odp->private;
 
-	if (!mr || !mr->ibmr.pd)
-		return;
-
 	start = max_t(u64, ib_umem_start(umem_odp), start);
 	end = min_t(u64, ib_umem_end(umem_odp), end);
 
@@ -276,7 +270,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 	 * overwrite the same MTTs.  Concurent invalidations might race us,
 	 * but they will write 0s as well, so no difference in the end result.
 	 */
-	mutex_lock(&umem_odp->umem_mutex);
 	for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
 		idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
 		/*
@@ -291,6 +284,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 				blk_start_idx = idx;
 				in_block = 1;
 			}
+
+			/* Count page invalidations */
+			invalidations += idx - blk_start_idx + 1;
 		} else {
 			u64 umr_offset = idx & umr_block_mask;
 
@@ -308,6 +304,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 				   idx - blk_start_idx + 1, 0,
 				   MLX5_IB_UPD_XLT_ZAP |
 				   MLX5_IB_UPD_XLT_ATOMIC);
+
+	mlx5_update_odp_stats(mr, invalidations, invalidations);
+
 	/*
 	 * We are now sure that the device will not access the
 	 * memory. We can safely unmap it, and mark it as dirty if
@@ -316,13 +315,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 
 	ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
 
-	if (unlikely(!umem_odp->npages && mr->parent &&
-		     !umem_odp->dying)) {
-		WRITE_ONCE(mr->live, 0);
-		umem_odp->dying = 1;
-		atomic_inc(&mr->parent->num_leaf_free);
-		schedule_work(&umem_odp->work);
-	}
+	if (unlikely(!umem_odp->npages && mr->parent))
+		destroy_unused_implicit_child_mr(mr);
+out:
 	mutex_unlock(&umem_odp->umem_mutex);
 }
 
@@ -390,8 +385,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 	    MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
 	    !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled))
 		caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
-
-	return;
 }
 
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
@@ -416,237 +409,213 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 			    wq_num, err);
 }
 
-static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
-					    struct ib_umem_odp *umem_odp,
-					    bool ksm, int access_flags)
+static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
+						unsigned long idx)
 {
-	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct ib_umem_odp *odp;
 	struct mlx5_ib_mr *mr;
+	struct mlx5_ib_mr *ret;
 	int err;
 
-	mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
-					    MLX5_IMR_MTT_CACHE_ENTRY);
+	odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
+				      idx * MLX5_IMR_MTT_SIZE,
+				      MLX5_IMR_MTT_SIZE);
+	if (IS_ERR(odp))
+		return ERR_CAST(odp);
 
+	ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY);
 	if (IS_ERR(mr))
-		return mr;
-
-	mr->ibmr.pd = pd;
-
-	mr->dev = dev;
-	mr->access_flags = access_flags;
-	mr->mmkey.iova = 0;
-	mr->umem = &umem_odp->umem;
-
-	if (ksm) {
-		err = mlx5_ib_update_xlt(mr, 0,
-					 mlx5_imr_ksm_entries,
-					 MLX5_KSM_PAGE_SHIFT,
-					 MLX5_IB_UPD_XLT_INDIRECT |
-					 MLX5_IB_UPD_XLT_ZAP |
-					 MLX5_IB_UPD_XLT_ENABLE);
-
-	} else {
-		err = mlx5_ib_update_xlt(mr, 0,
-					 MLX5_IMR_MTT_ENTRIES,
-					 PAGE_SHIFT,
-					 MLX5_IB_UPD_XLT_ZAP |
-					 MLX5_IB_UPD_XLT_ENABLE |
-					 MLX5_IB_UPD_XLT_ATOMIC);
-	}
-
-	if (err)
-		goto fail;
+		goto out_umem;
 
+	mr->ibmr.pd = imr->ibmr.pd;
+	mr->access_flags = imr->access_flags;
+	mr->umem = &odp->umem;
 	mr->ibmr.lkey = mr->mmkey.key;
 	mr->ibmr.rkey = mr->mmkey.key;
-
-	mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
-		    mr->mmkey.key, dev->mdev, mr);
-
-	return mr;
-
-fail:
-	mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
-	mlx5_mr_cache_free(dev, mr);
-
-	return ERR_PTR(err);
-}
-
-static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
-						u64 io_virt, size_t bcnt)
-{
-	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
-	struct ib_umem_odp *odp, *result = NULL;
-	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
-	u64 addr = io_virt & MLX5_IMR_MTT_MASK;
-	int nentries = 0, start_idx = 0, ret;
-	struct mlx5_ib_mr *mtt;
-
-	mutex_lock(&odp_mr->umem_mutex);
-	odp = odp_lookup(addr, 1, mr);
-
-	mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
-		    io_virt, bcnt, addr, odp);
-
-next_mr:
-	if (likely(odp)) {
-		if (nentries)
-			nentries++;
-	} else {
-		odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
-		if (IS_ERR(odp)) {
-			mutex_unlock(&odp_mr->umem_mutex);
-			return ERR_CAST(odp);
-		}
-
-		mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0,
-					mr->access_flags);
-		if (IS_ERR(mtt)) {
-			mutex_unlock(&odp_mr->umem_mutex);
-			ib_umem_odp_release(odp);
-			return ERR_CAST(mtt);
-		}
-
-		odp->private = mtt;
-		mtt->umem = &odp->umem;
-		mtt->mmkey.iova = addr;
-		mtt->parent = mr;
-		INIT_WORK(&odp->work, mr_leaf_free_action);
-
-		smp_store_release(&mtt->live, 1);
-
-		if (!nentries)
-			start_idx = addr >> MLX5_IMR_MTT_SHIFT;
-		nentries++;
-	}
-
-	/* Return first odp if region not covered by single one */
-	if (likely(!result))
-		result = odp;
-
-	addr += MLX5_IMR_MTT_SIZE;
-	if (unlikely(addr < io_virt + bcnt)) {
-		odp = odp_next(odp);
-		if (odp && ib_umem_start(odp) != addr)
-			odp = NULL;
-		goto next_mr;
+	mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE;
+	mr->parent = imr;
+	odp->private = mr;
+
+	err = mlx5_ib_update_xlt(mr, 0,
+				 MLX5_IMR_MTT_ENTRIES,
+				 PAGE_SHIFT,
+				 MLX5_IB_UPD_XLT_ZAP |
+				 MLX5_IB_UPD_XLT_ENABLE);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto out_mr;
 	}
 
-	if (unlikely(nentries)) {
-		ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
-					 MLX5_IB_UPD_XLT_INDIRECT |
-					 MLX5_IB_UPD_XLT_ATOMIC);
-		if (ret) {
-			mlx5_ib_err(dev, "Failed to update PAS\n");
-			result = ERR_PTR(ret);
+	/*
+	 * Once the store to either xarray completes any error unwind has to
+	 * use synchronize_srcu(). Avoid this with xa_reserve()
+	 */
+	ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
+			 GFP_KERNEL);
+	if (unlikely(ret)) {
+		if (xa_is_err(ret)) {
+			ret = ERR_PTR(xa_err(ret));
+			goto out_mr;
 		}
+		/*
+		 * Another thread beat us to creating the child mr, use
+		 * theirs.
+		 */
+		goto out_mr;
 	}
 
-	mutex_unlock(&odp_mr->umem_mutex);
-	return result;
+	mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr);
+	return mr;
+
+out_mr:
+	mlx5_mr_cache_free(imr->dev, mr);
+out_umem:
+	ib_umem_odp_release(odp);
+	return ret;
 }
 
 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 					     struct ib_udata *udata,
 					     int access_flags)
 {
-	struct mlx5_ib_mr *imr;
+	struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
 	struct ib_umem_odp *umem_odp;
+	struct mlx5_ib_mr *imr;
+	int err;
 
 	umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags);
 	if (IS_ERR(umem_odp))
 		return ERR_CAST(umem_odp);
 
-	imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags);
+	imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY);
 	if (IS_ERR(imr)) {
-		ib_umem_odp_release(umem_odp);
-		return ERR_CAST(imr);
+		err = PTR_ERR(imr);
+		goto out_umem;
 	}
 
+	imr->ibmr.pd = &pd->ibpd;
+	imr->access_flags = access_flags;
+	imr->mmkey.iova = 0;
 	imr->umem = &umem_odp->umem;
-	init_waitqueue_head(&imr->q_leaf_free);
-	atomic_set(&imr->num_leaf_free, 0);
-	atomic_set(&imr->num_pending_prefetch, 0);
-	smp_store_release(&imr->live, 1);
+	imr->ibmr.lkey = imr->mmkey.key;
+	imr->ibmr.rkey = imr->mmkey.key;
+	imr->umem = &umem_odp->umem;
+	imr->is_odp_implicit = true;
+	atomic_set(&imr->num_deferred_work, 0);
+	xa_init(&imr->implicit_children);
+
+	err = mlx5_ib_update_xlt(imr, 0,
+				 mlx5_imr_ksm_entries,
+				 MLX5_KSM_PAGE_SHIFT,
+				 MLX5_IB_UPD_XLT_INDIRECT |
+				 MLX5_IB_UPD_XLT_ZAP |
+				 MLX5_IB_UPD_XLT_ENABLE);
+	if (err)
+		goto out_mr;
 
+	err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key),
+			      &imr->mmkey, GFP_KERNEL));
+	if (err)
+		goto out_mr;
+
+	mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr);
 	return imr;
+out_mr:
+	mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+	mlx5_mr_cache_free(dev, imr);
+out_umem:
+	ib_umem_odp_release(umem_odp);
+	return ERR_PTR(err);
 }
 
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 {
-	struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
-	struct rb_node *node;
+	struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+	struct mlx5_ib_dev *dev = imr->dev;
+	struct list_head destroy_list;
+	struct mlx5_ib_mr *mtt;
+	struct mlx5_ib_mr *tmp;
+	unsigned long idx;
 
-	down_read(&per_mm->umem_rwsem);
-	for (node = rb_first_cached(&per_mm->umem_tree); node;
-	     node = rb_next(node)) {
-		struct ib_umem_odp *umem_odp =
-			rb_entry(node, struct ib_umem_odp, interval_tree.rb);
-		struct mlx5_ib_mr *mr = umem_odp->private;
+	INIT_LIST_HEAD(&destroy_list);
 
-		if (mr->parent != imr)
-			continue;
+	xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key));
+	/*
+	 * This stops the SRCU protected page fault path from touching either
+	 * the imr or any children. The page fault path can only reach the
+	 * children xarray via the imr.
+	 */
+	synchronize_srcu(&dev->odp_srcu);
 
-		mutex_lock(&umem_odp->umem_mutex);
-		ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
-					    ib_umem_end(umem_odp));
+	xa_lock(&imr->implicit_children);
+	xa_for_each (&imr->implicit_children, idx, mtt) {
+		__xa_erase(&imr->implicit_children, idx);
+		list_add(&mtt->odp_destroy.elm, &destroy_list);
+	}
+	xa_unlock(&imr->implicit_children);
 
-		if (umem_odp->dying) {
-			mutex_unlock(&umem_odp->umem_mutex);
-			continue;
-		}
+	/*
+	 * num_deferred_work can only be incremented inside the odp_srcu, or
+	 * under xa_lock while the child is in the xarray. Thus at this point
+	 * it is only decreasing, and all work holding it is now on the wq.
+	 */
+	if (atomic_read(&imr->num_deferred_work)) {
+		flush_workqueue(system_unbound_wq);
+		WARN_ON(atomic_read(&imr->num_deferred_work));
+	}
+
+	/*
+	 * Fence the imr before we destroy the children. This allows us to
+	 * skip updating the XLT of the imr during destroy of the child mkey
+	 * the imr points to.
+	 */
+	mlx5_mr_cache_invalidate(imr);
+
+	list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm)
+		free_implicit_child_mr(mtt, false);
+
+	mlx5_mr_cache_free(dev, imr);
+	ib_umem_odp_release(odp_imr);
+}
 
-		umem_odp->dying = 1;
-		atomic_inc(&imr->num_leaf_free);
-		schedule_work(&umem_odp->work);
-		mutex_unlock(&umem_odp->umem_mutex);
+/**
+ * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR
+ * @mr: to fence
+ *
+ * On return no parallel threads will be touching this MR and no DMA will be
+ * active.
+ */
+void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
+{
+	/* Prevent new page faults and prefetch requests from succeeding */
+	xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
+
+	/* Wait for all running page-fault handlers to finish. */
+	synchronize_srcu(&mr->dev->odp_srcu);
+
+	if (atomic_read(&mr->num_deferred_work)) {
+		flush_workqueue(system_unbound_wq);
+		WARN_ON(atomic_read(&mr->num_deferred_work));
 	}
-	up_read(&per_mm->umem_rwsem);
 
-	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+	dma_fence_odp_mr(mr);
 }
 
-#define MLX5_PF_FLAGS_PREFETCH  BIT(0)
 #define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
-static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
-			u64 io_virt, size_t bcnt, u32 *bytes_mapped,
-			u32 flags)
+static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
+			     u64 user_va, size_t bcnt, u32 *bytes_mapped,
+			     u32 flags)
 {
-	int npages = 0, current_seq, page_shift, ret, np;
-	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
+	int current_seq, page_shift, ret, np;
 	bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
-	bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
 	u64 access_mask;
 	u64 start_idx, page_mask;
-	struct ib_umem_odp *odp;
-	size_t size;
-
-	if (odp_mr->is_implicit_odp) {
-		odp = implicit_mr_get_data(mr, io_virt, bcnt);
-
-		if (IS_ERR(odp))
-			return PTR_ERR(odp);
-		mr = odp->private;
-	} else {
-		odp = odp_mr;
-	}
-
-next_mr:
-	size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
 
 	page_shift = odp->page_shift;
 	page_mask = ~(BIT(page_shift) - 1);
-	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+	start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift;
 	access_mask = ODP_READ_ALLOWED_BIT;
 
-	if (prefetch && !downgrade && !odp->umem.writable) {
-		/* prefetch with write-access must
-		 * be supported by the MR
-		 */
-		ret = -EINVAL;
-		goto out;
-	}
-
 	if (odp->umem.writable && !downgrade)
 		access_mask |= ODP_WRITE_ALLOWED_BIT;
 
@@ -657,13 +626,10 @@ next_mr:
 	 */
 	smp_rmb();
 
-	ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask,
-					current_seq);
-
-	if (ret < 0)
-		goto out;
-
-	np = ret;
+	np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask,
+				       current_seq);
+	if (np < 0)
+		return np;
 
 	mutex_lock(&odp->umem_mutex);
 	if (!ib_umem_mmu_notifier_retry(odp, current_seq)) {
@@ -681,35 +647,19 @@ next_mr:
 
 	if (ret < 0) {
 		if (ret != -EAGAIN)
-			mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+			mlx5_ib_err(mr->dev,
+				    "Failed to update mkey page tables\n");
 		goto out;
 	}
 
 	if (bytes_mapped) {
 		u32 new_mappings = (np << page_shift) -
-			(io_virt - round_down(io_virt, 1 << page_shift));
-		*bytes_mapped += min_t(u32, new_mappings, size);
-	}
-
-	npages += np << (page_shift - PAGE_SHIFT);
-	bcnt -= size;
-
-	if (unlikely(bcnt)) {
-		struct ib_umem_odp *next;
+			(user_va - round_down(user_va, 1 << page_shift));
 
-		io_virt += size;
-		next = odp_next(odp);
-		if (unlikely(!next || ib_umem_start(next) != io_virt)) {
-			mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
-				    io_virt, next);
-			return -EAGAIN;
-		}
-		odp = next;
-		mr = odp->private;
-		goto next_mr;
+		*bytes_mapped += min_t(u32, new_mappings, bcnt);
 	}
 
-	return npages;
+	return np << (page_shift - PAGE_SHIFT);
 
 out:
 	if (ret == -EAGAIN) {
@@ -718,7 +668,7 @@ out:
 		if (!wait_for_completion_timeout(&odp->notifier_completion,
 						 timeout)) {
 			mlx5_ib_warn(
-				dev,
+				mr->dev,
 				"timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
 				current_seq, odp->notifiers_seq,
 				odp->notifiers_count);
@@ -728,6 +678,109 @@ out:
 	return ret;
 }
 
+static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
+				 struct ib_umem_odp *odp_imr, u64 user_va,
+				 size_t bcnt, u32 *bytes_mapped, u32 flags)
+{
+	unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
+	unsigned long upd_start_idx = end_idx + 1;
+	unsigned long upd_len = 0;
+	unsigned long npages = 0;
+	int err;
+	int ret;
+
+	if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
+		     mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt))
+		return -EFAULT;
+
+	/* Fault each child mr that intersects with our interval. */
+	while (bcnt) {
+		unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT;
+		struct ib_umem_odp *umem_odp;
+		struct mlx5_ib_mr *mtt;
+		u64 len;
+
+		mtt = xa_load(&imr->implicit_children, idx);
+		if (unlikely(!mtt)) {
+			mtt = implicit_get_child_mr(imr, idx);
+			if (IS_ERR(mtt)) {
+				ret = PTR_ERR(mtt);
+				goto out;
+			}
+			upd_start_idx = min(upd_start_idx, idx);
+			upd_len = idx - upd_start_idx + 1;
+		}
+
+		umem_odp = to_ib_umem_odp(mtt->umem);
+		len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) -
+		      user_va;
+
+		ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
+					bytes_mapped, flags);
+		if (ret < 0)
+			goto out;
+		user_va += len;
+		bcnt -= len;
+		npages += ret;
+	}
+
+	ret = npages;
+
+	/*
+	 * Any time the implicit_children are changed we must perform an
+	 * update of the xlt before exiting to ensure the HW and the
+	 * implicit_children remains synchronized.
+	 */
+out:
+	if (likely(!upd_len))
+		return ret;
+
+	/*
+	 * Notice this is not strictly ordered right, the KSM is updated after
+	 * the implicit_children is updated, so a parallel page fault could
+	 * see a MR that is not yet visible in the KSM.  This is similar to a
+	 * parallel page fault seeing a MR that is being concurrently removed
+	 * from the KSM. Both of these improbable situations are resolved
+	 * safely by resuming the HW and then taking another page fault. The
+	 * next pagefault handler will see the new information.
+	 */
+	mutex_lock(&odp_imr->umem_mutex);
+	err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
+				 MLX5_IB_UPD_XLT_INDIRECT |
+					 MLX5_IB_UPD_XLT_ATOMIC);
+	mutex_unlock(&odp_imr->umem_mutex);
+	if (err) {
+		mlx5_ib_err(imr->dev, "Failed to update PAS\n");
+		return err;
+	}
+	return ret;
+}
+
+/*
+ * Returns:
+ *  -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
+ *           not accessible, or the MR is no longer valid.
+ *  -EAGAIN/-ENOMEM: The operation should be retried
+ *
+ *  -EINVAL/others: General internal malfunction
+ *  >0: Number of pages mapped
+ */
+static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
+			u32 *bytes_mapped, u32 flags)
+{
+	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+
+	if (!odp->is_implicit_odp) {
+		if (unlikely(io_virt < ib_umem_start(odp) ||
+			     ib_umem_end(odp) - io_virt < bcnt))
+			return -EFAULT;
+		return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped,
+					 flags);
+	}
+	return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped,
+				     flags);
+}
+
 struct pf_frame {
 	struct pf_frame *next;
 	u32 key;
@@ -775,10 +828,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 					 struct ib_pd *pd, u32 key,
 					 u64 io_virt, size_t bcnt,
 					 u32 *bytes_committed,
-					 u32 *bytes_mapped, u32 flags)
+					 u32 *bytes_mapped)
 {
 	int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
-	bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
 	struct pf_frame *head = NULL, *frame;
 	struct mlx5_core_mkey *mmkey;
 	struct mlx5_ib_mr *mr;
@@ -787,58 +839,49 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 	size_t offset;
 	int ndescs;
 
-	srcu_key = srcu_read_lock(&dev->mr_srcu);
+	srcu_key = srcu_read_lock(&dev->odp_srcu);
 
 	io_virt += *bytes_committed;
 	bcnt -= *bytes_committed;
 
 next_mr:
-	mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
+	mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
+	if (!mmkey) {
+		mlx5_ib_dbg(
+			dev,
+			"skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+			key);
+		if (bytes_mapped)
+			*bytes_mapped += bcnt;
+		/*
+		 * The user could specify a SGL with multiple lkeys and only
+		 * some of them are ODP. Treat the non-ODP ones as fully
+		 * faulted.
+		 */
+		ret = 0;
+		goto srcu_unlock;
+	}
 	if (!mkey_is_eq(mmkey, key)) {
 		mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
 		ret = -EFAULT;
 		goto srcu_unlock;
 	}
 
-	if (prefetch && mmkey->type != MLX5_MKEY_MR) {
-		mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n");
-		ret = -EINVAL;
-		goto srcu_unlock;
-	}
-
 	switch (mmkey->type) {
 	case MLX5_MKEY_MR:
 		mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
-		if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) {
-			mlx5_ib_dbg(dev, "got dead MR\n");
-			ret = -EFAULT;
-			goto srcu_unlock;
-		}
 
-		if (prefetch) {
-			if (!is_odp_mr(mr) ||
-			    mr->ibmr.pd != pd) {
-				mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n",
-					    is_odp_mr(mr) ?  "MR is not ODP" :
-					    "PD is not of the MR");
-				ret = -EINVAL;
-				goto srcu_unlock;
-			}
-		}
-
-		if (!is_odp_mr(mr)) {
-			mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
-				    key);
-			if (bytes_mapped)
-				*bytes_mapped += bcnt;
-			ret = 0;
-			goto srcu_unlock;
-		}
-
-		ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags);
+		ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0);
 		if (ret < 0)
 			goto srcu_unlock;
 
+		/*
+		 * When prefetching a page, page fault is generated
+		 * in order to bring the page to the main memory.
+		 * In the current flow, page faults are being counted.
+		 */
+		mlx5_update_odp_stats(mr, faults, ret);
+
 		npages += ret;
 		ret = 0;
 		break;
@@ -928,7 +971,7 @@ srcu_unlock:
 	}
 	kfree(out);
 
-	srcu_read_unlock(&dev->mr_srcu, srcu_key);
+	srcu_read_unlock(&dev->odp_srcu, srcu_key);
 	*bytes_committed = 0;
 	return ret ? ret : npages;
 }
@@ -1009,7 +1052,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
 		ret = pagefault_single_data_segment(dev, NULL, key,
 						    io_virt, bcnt,
 						    &pfault->bytes_committed,
-						    bytes_mapped, 0);
+						    bytes_mapped);
 		if (ret < 0)
 			break;
 		npages += ret;
@@ -1292,8 +1335,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
 	}
 
 	ret = pagefault_single_data_segment(dev, NULL, rkey, address, length,
-					    &pfault->bytes_committed, NULL,
-					    0);
+					    &pfault->bytes_committed, NULL);
 	if (ret == -EAGAIN) {
 		/* We're racing with an invalidation, don't prefetch */
 		prefetch_activated = 0;
@@ -1320,8 +1362,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
 
 		ret = pagefault_single_data_segment(dev, NULL, rkey, address,
 						    prefetch_len,
-						    &bytes_committed, NULL,
-						    0);
+						    &bytes_committed, NULL);
 		if (ret < 0 && ret != -EAGAIN) {
 			mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
 				    ret, pfault->token, address, prefetch_len);
@@ -1624,114 +1665,128 @@ int mlx5_ib_odp_init(void)
 
 struct prefetch_mr_work {
 	struct work_struct work;
-	struct ib_pd *pd;
 	u32 pf_flags;
 	u32 num_sge;
-	struct ib_sge sg_list[0];
+	struct {
+		u64 io_virt;
+		struct mlx5_ib_mr *mr;
+		size_t length;
+	} frags[];
 };
 
-static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
-				     struct ib_sge *sg_list, u32 num_sge,
-				     u32 from)
+static void destroy_prefetch_work(struct prefetch_mr_work *work)
 {
 	u32 i;
-	int srcu_key;
-
-	srcu_key = srcu_read_lock(&dev->mr_srcu);
 
-	for (i = from; i < num_sge; ++i) {
-		struct mlx5_core_mkey *mmkey;
-		struct mlx5_ib_mr *mr;
-
-		mmkey = xa_load(&dev->mdev->priv.mkey_table,
-				mlx5_base_mkey(sg_list[i].lkey));
-		mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
-		atomic_dec(&mr->num_pending_prefetch);
-	}
-
-	srcu_read_unlock(&dev->mr_srcu, srcu_key);
+	for (i = 0; i < work->num_sge; ++i)
+		atomic_dec(&work->frags[i].mr->num_deferred_work);
+	kvfree(work);
 }
 
-static bool num_pending_prefetch_inc(struct ib_pd *pd,
-				     struct ib_sge *sg_list, u32 num_sge)
+static struct mlx5_ib_mr *
+get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+		    u32 lkey)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	bool ret = true;
-	u32 i;
+	struct mlx5_core_mkey *mmkey;
+	struct ib_umem_odp *odp;
+	struct mlx5_ib_mr *mr;
 
-	for (i = 0; i < num_sge; ++i) {
-		struct mlx5_core_mkey *mmkey;
-		struct mlx5_ib_mr *mr;
+	lockdep_assert_held(&dev->odp_srcu);
 
-		mmkey = xa_load(&dev->mdev->priv.mkey_table,
-				mlx5_base_mkey(sg_list[i].lkey));
-		if (!mmkey || mmkey->key != sg_list[i].lkey) {
-			ret = false;
-			break;
-		}
+	mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
+	if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR)
+		return NULL;
 
-		if (mmkey->type != MLX5_MKEY_MR) {
-			ret = false;
-			break;
-		}
+	mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
 
-		mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+	if (mr->ibmr.pd != pd)
+		return NULL;
 
-		if (!smp_load_acquire(&mr->live)) {
-			ret = false;
-			break;
-		}
+	odp = to_ib_umem_odp(mr->umem);
 
-		if (mr->ibmr.pd != pd) {
-			ret = false;
-			break;
-		}
+	/* prefetch with write-access must be supported by the MR */
+	if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+	    !odp->umem.writable)
+		return NULL;
 
-		atomic_inc(&mr->num_pending_prefetch);
-	}
+	return mr;
+}
 
-	if (!ret)
-		num_pending_prefetch_dec(dev, sg_list, i, 0);
+static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
+{
+	struct prefetch_mr_work *work =
+		container_of(w, struct prefetch_mr_work, work);
+	u32 bytes_mapped = 0;
+	u32 i;
 
-	return ret;
+	for (i = 0; i < work->num_sge; ++i)
+		pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+			     work->frags[i].length, &bytes_mapped,
+			     work->pf_flags);
+
+	destroy_prefetch_work(work);
 }
 
-static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags,
-				    struct ib_sge *sg_list, u32 num_sge)
+static bool init_prefetch_work(struct ib_pd *pd,
+			       enum ib_uverbs_advise_mr_advice advice,
+			       u32 pf_flags, struct prefetch_mr_work *work,
+			       struct ib_sge *sg_list, u32 num_sge)
 {
 	u32 i;
-	int ret = 0;
-	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+	INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
+	work->pf_flags = pf_flags;
 
 	for (i = 0; i < num_sge; ++i) {
-		struct ib_sge *sg = &sg_list[i];
-		int bytes_committed = 0;
+		work->frags[i].io_virt = sg_list[i].addr;
+		work->frags[i].length = sg_list[i].length;
+		work->frags[i].mr =
+			get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+		if (!work->frags[i].mr) {
+			work->num_sge = i - 1;
+			if (i)
+				destroy_prefetch_work(work);
+			return false;
+		}
 
-		ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr,
-						    sg->length,
-						    &bytes_committed, NULL,
-						    pf_flags);
-		if (ret < 0)
-			break;
+		/* Keep the MR pointer will valid outside the SRCU */
+		atomic_inc(&work->frags[i].mr->num_deferred_work);
 	}
-
-	return ret < 0 ? ret : 0;
+	work->num_sge = num_sge;
+	return true;
 }
 
-static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
+static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
+				    enum ib_uverbs_advise_mr_advice advice,
+				    u32 pf_flags, struct ib_sge *sg_list,
+				    u32 num_sge)
 {
-	struct prefetch_mr_work *w =
-		container_of(work, struct prefetch_mr_work, work);
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	u32 bytes_mapped = 0;
+	int srcu_key;
+	int ret = 0;
+	u32 i;
+
+	srcu_key = srcu_read_lock(&dev->odp_srcu);
+	for (i = 0; i < num_sge; ++i) {
+		struct mlx5_ib_mr *mr;
 
-	if (ib_device_try_get(w->pd->device)) {
-		mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list,
-					 w->num_sge);
-		ib_device_put(w->pd->device);
+		mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+		if (!mr) {
+			ret = -ENOENT;
+			goto out;
+		}
+		ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
+				   &bytes_mapped, pf_flags);
+		if (ret < 0)
+			goto out;
 	}
+	ret = 0;
 
-	num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list,
-				 w->num_sge, 0);
-	kvfree(w);
+out:
+	srcu_read_unlock(&dev->odp_srcu, srcu_key);
+	return ret;
 }
 
 int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
@@ -1739,43 +1794,27 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 			       u32 flags, struct ib_sge *sg_list, u32 num_sge)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	u32 pf_flags = MLX5_PF_FLAGS_PREFETCH;
+	u32 pf_flags = 0;
 	struct prefetch_mr_work *work;
-	bool valid_req;
 	int srcu_key;
 
 	if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
 		pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
 
 	if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
-		return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list,
+		return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
 						num_sge);
 
-	work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
+	work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
-	memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
-
-	/* It is guaranteed that the pd when work is executed is the pd when
-	 * work was queued since pd can't be destroyed while it holds MRs and
-	 * destroying a MR leads to flushing the workquque
-	 */
-	work->pd = pd;
-	work->pf_flags = pf_flags;
-	work->num_sge = num_sge;
-
-	INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
-
-	srcu_key = srcu_read_lock(&dev->mr_srcu);
-
-	valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge);
-	if (valid_req)
-		queue_work(system_unbound_wq, &work->work);
-	else
-		kvfree(work);
-
-	srcu_read_unlock(&dev->mr_srcu, srcu_key);
-
-	return valid_req ? 0 : -EINVAL;
+	srcu_key = srcu_read_lock(&dev->odp_srcu);
+	if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
+		srcu_read_unlock(&dev->odp_srcu, srcu_key);
+		return -EINVAL;
+	}
+	queue_work(system_unbound_wq, &work->work);
+	srcu_read_unlock(&dev->odp_srcu, srcu_key);
+	return 0;
 }
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5fd071c05944..7e51870e9e01 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -749,7 +749,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 {
 	int err;
 
-	*umem = ib_umem_get(udata, addr, size, 0, 0);
+	*umem = ib_umem_get(udata, addr, size, 0);
 	if (IS_ERR(*umem)) {
 		mlx5_ib_dbg(dev, "umem_get failed\n");
 		return PTR_ERR(*umem);
@@ -806,7 +806,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	if (!ucmd->buf_addr)
 		return -EINVAL;
 
-	rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0);
+	rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0);
 	if (IS_ERR(rwq->umem)) {
 		mlx5_ib_dbg(dev, "umem_get failed\n");
 		err = PTR_ERR(rwq->umem);
@@ -1041,11 +1041,14 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 					IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
 					IB_QP_CREATE_IPOIB_UD_LSO |
 					IB_QP_CREATE_NETIF_QP |
-					mlx5_ib_create_qp_sqpn_qp1()))
+					MLX5_IB_QP_CREATE_SQPN_QP1 |
+					MLX5_IB_QP_CREATE_WC_TEST))
 		return -EINVAL;
 
 	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
 		qp->bf.bfreg = &dev->fp_bfreg;
+	else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST)
+		qp->bf.bfreg = &dev->wc_bfreg;
 	else
 		qp->bf.bfreg = &dev->bfreg;
 
@@ -1104,7 +1107,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	MLX5_SET(qpc, qpc, fre, 1);
 	MLX5_SET(qpc, qpc, rlky, 1);
 
-	if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+	if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) {
 		MLX5_SET(qpc, qpc, deth_sqpn, 1);
 		qp->flags |= MLX5_IB_QP_SQPN_QP1;
 	}
@@ -2140,7 +2143,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 				return -EINVAL;
 			}
 			if (init_attr->create_flags &
-			    mlx5_ib_create_qp_sqpn_qp1()) {
+			    MLX5_IB_QP_CREATE_SQPN_QP1) {
 				mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
 				return -EINVAL;
 			}
@@ -5330,7 +5333,6 @@ out:
 		 * we hit doorbell */
 		wmb();
 
-		/* currently we support only regular doorbells */
 		mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
 		/* Make sure doorbells don't leak out of SQ spinlock
 		 * and reach the HCA out of order.
@@ -5825,7 +5827,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
 	if (qp->flags & MLX5_IB_QP_SQPN_QP1)
-		qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
+		qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1;
 
 	qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
 		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
@@ -5957,12 +5959,21 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 	}
 	MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
 	if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) {
+		/*
+		 * In Firmware number of strides in each WQE is:
+		 *   "512 * 2^single_wqe_log_num_of_strides"
+		 * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
+		 * accepted as 0 to 9
+		 */
+		static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1,
+					     2,  3,  4,  5,  6,  7,  8, 9 };
 		MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
 			 rwq->single_stride_log_num_of_bytes -
 			 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES);
-		MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides -
-			 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES);
+		MLX5_SET(wq, wq, log_wqe_num_of_strides,
+			 fw_map[rwq->log_num_strides -
+				MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]);
 	}
 	MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
 	MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
@@ -6037,6 +6048,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev,
 	return 0;
 }
 
+static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides)
+{
+	if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
+	    (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+		return false;
+
+	if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) &&
+	    (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+		return false;
+
+	return true;
+}
+
 static int prepare_user_rq(struct ib_pd *pd,
 			   struct ib_wq_init_attr *init_attr,
 			   struct ib_udata *udata,
@@ -6084,14 +6108,16 @@ static int prepare_user_rq(struct ib_pd *pd,
 				    MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES);
 			return -EINVAL;
 		}
-		if ((ucmd.single_wqe_log_num_of_strides >
-		    MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
-		     (ucmd.single_wqe_log_num_of_strides <
-			MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) {
-			mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n",
-				    ucmd.single_wqe_log_num_of_strides,
-				    MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
-				    MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
+		if (!log_of_strides_valid(dev,
+					  ucmd.single_wqe_log_num_of_strides)) {
+			mlx5_ib_dbg(
+				dev,
+				"Invalid log num strides (%u. Range is %u - %u)\n",
+				ucmd.single_wqe_log_num_of_strides,
+				MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ?
+					MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES :
+					MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
+				MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
 			return -EINVAL;
 		}
 		rwq->single_stride_log_num_of_bytes =
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
new file mode 100644
index 000000000000..8f6c04f12531
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <uapi/rdma/rdma_netlink.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/restrack.h>
+#include "mlx5_ib.h"
+
+static int fill_stat_mr_entry(struct sk_buff *msg,
+			      struct rdma_restrack_entry *res)
+{
+	struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+	struct nlattr *table_attr;
+
+	if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+		return 0;
+
+	table_attr = nla_nest_start(msg,
+				    RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+
+	if (!table_attr)
+		goto err;
+
+	if (rdma_nl_stat_hwcounter_entry(msg, "page_faults",
+					 atomic64_read(&mr->odp_stats.faults)))
+		goto err_table;
+	if (rdma_nl_stat_hwcounter_entry(
+		    msg, "page_invalidations",
+		    atomic64_read(&mr->odp_stats.invalidations)))
+		goto err_table;
+
+	nla_nest_end(msg, table_attr);
+	return 0;
+
+err_table:
+	nla_nest_cancel(msg, table_attr);
+err:
+	return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg,
+			     struct rdma_restrack_entry *res)
+{
+	struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+	struct nlattr *table_attr;
+
+	if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+		return 0;
+
+	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+	if (!table_attr)
+		goto err;
+
+	if (mr->is_odp_implicit) {
+		if (rdma_nl_put_driver_string(msg, "odp", "implicit"))
+			goto err;
+	} else {
+		if (rdma_nl_put_driver_string(msg, "odp", "explicit"))
+			goto err;
+	}
+
+	nla_nest_end(msg, table_attr);
+	return 0;
+
+err:
+	nla_nest_cancel(msg, table_attr);
+	return -EMSGSIZE;
+}
+
+int mlx5_ib_fill_res_entry(struct sk_buff *msg,
+			   struct rdma_restrack_entry *res)
+{
+	if (res->type == RDMA_RESTRACK_MR)
+		return fill_res_mr_entry(msg, res);
+
+	return 0;
+}
+
+int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
+			    struct rdma_restrack_entry *res)
+{
+	if (res->type == RDMA_RESTRACK_MR)
+		return fill_stat_mr_entry(msg, res);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4e7fde86c96b..62939df3c692 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
-	srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
+	srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0);
 	if (IS_ERR(srq->umem)) {
 		mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
 		err = PTR_ERR(srq->umem);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index bfd4eebc1182..599794c5a78f 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 
-int mthca_process_mad(struct ib_device *ibdev,
-		      int mad_flags,
-		      u8 port_num,
-		      const struct ib_wc *in_wc,
-		      const struct ib_grh *in_grh,
-		      const struct ib_mad_hdr *in, size_t in_mad_size,
-		      struct ib_mad_hdr *out, size_t *out_mad_size,
-		      u16 *out_mad_pkey_index);
+int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		      const struct ib_mad *in, struct ib_mad *out,
+		      size_t *out_mad_size, u16 *out_mad_pkey_index);
 int mthca_create_agents(struct mthca_dev *dev);
 void mthca_free_agents(struct mthca_dev *dev);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 7ad517da4917..99aa8183a7f2 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -196,30 +196,19 @@ static void forward_trap(struct mthca_dev *dev,
 	}
 }
 
-int mthca_process_mad(struct ib_device *ibdev,
-		      int mad_flags,
-		      u8 port_num,
-		      const struct ib_wc *in_wc,
-		      const struct ib_grh *in_grh,
-		      const struct ib_mad_hdr *in, size_t in_mad_size,
-		      struct ib_mad_hdr *out, size_t *out_mad_size,
-		      u16 *out_mad_pkey_index)
+int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		      const struct ib_mad *in, struct ib_mad *out,
+		      size_t *out_mad_size, u16 *out_mad_pkey_index)
 {
 	int err;
 	u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
 	u16 prev_lid = 0;
 	struct ib_port_attr pattr;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
 
 	/* Forward locally generated traps to the SM */
-	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
-	    slid == 0) {
-		forward_trap(to_mdev(ibdev), port_num, in_mad);
+	if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) {
+		forward_trap(to_mdev(ibdev), port_num, in);
 		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 	}
 
@@ -229,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev,
 	 * Only handle PMA and Mellanox vendor-specific class gets and
 	 * sets for other classes.
 	 */
-	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
-	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-		if (in_mad->mad_hdr.method   != IB_MGMT_METHOD_GET &&
-		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_SET &&
-		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_TRAP_REPRESS)
+	if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+	    in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+		if (in->mad_hdr.method != IB_MGMT_METHOD_GET &&
+		    in->mad_hdr.method != IB_MGMT_METHOD_SET &&
+		    in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
 			return IB_MAD_RESULT_SUCCESS;
 
 		/*
 		 * Don't process SMInfo queries or vendor-specific
 		 * MADs -- the SMA can't handle them.
 		 */
-		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
-		    ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+		if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+		    ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
 		     IB_SMP_ATTR_VENDOR_MASK))
 			return IB_MAD_RESULT_SUCCESS;
-	} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
-		   in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1     ||
-		   in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
-		if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
-		    in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
+	} else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+		   in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1     ||
+		   in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
+		if (in->mad_hdr.method != IB_MGMT_METHOD_GET &&
+		    in->mad_hdr.method != IB_MGMT_METHOD_SET)
 			return IB_MAD_RESULT_SUCCESS;
 	} else
 		return IB_MAD_RESULT_SUCCESS;
-	if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
-	     in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
-	    in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
-	    in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+	if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+	     in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+	    in->mad_hdr.method == IB_MGMT_METHOD_SET &&
+	    in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
 	    !ib_query_port(ibdev, port_num, &pattr))
 		prev_lid = ib_lid_cpu16(pattr.lid);
 
-	err = mthca_MAD_IFC(to_mdev(ibdev),
-			    mad_flags & IB_MAD_IGNORE_MKEY,
-			    mad_flags & IB_MAD_IGNORE_BKEY,
-			    port_num, in_wc, in_grh, in_mad, out_mad);
+	err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+			    mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+			    in_grh, in, out);
 	if (err == -EBADMSG)
 		return IB_MAD_RESULT_SUCCESS;
 	else if (err) {
@@ -270,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev,
 		return IB_MAD_RESULT_FAILURE;
 	}
 
-	if (!out_mad->mad_hdr.status) {
-		smp_snoop(ibdev, port_num, in_mad, prev_lid);
-		node_desc_override(ibdev, out_mad);
+	if (!out->mad_hdr.status) {
+		smp_snoop(ibdev, port_num, in, prev_lid);
+		node_desc_override(ibdev, out);
 	}
 
 	/* set return bit in status of directed route responses */
-	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-		out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+	if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		out->mad_hdr.status |= cpu_to_be16(1 << 15);
 
-	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+	if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
 		/* no response for trap repress */
 		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 23554d8bf241..33002530fee7 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = ib_umem_get(udata, start, length, acc,
-			       ucmd.mr_attrs & MTHCA_MR_DMASYNC);
-
+	mr->umem = ib_umem_get(udata, start, length, acc);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 8d3e36d548aa..2b7f00ac41b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -247,35 +247,20 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 	return 0;
 }
 
-int ocrdma_process_mad(struct ib_device *ibdev,
-		       int process_mad_flags,
-		       u8 port_num,
-		       const struct ib_wc *in_wc,
-		       const struct ib_grh *in_grh,
-		       const struct ib_mad_hdr *in, size_t in_mad_size,
-		       struct ib_mad_hdr *out, size_t *out_mad_size,
+int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags,
+		       u8 port_num, const struct ib_wc *in_wc,
+		       const struct ib_grh *in_grh, const struct ib_mad *in,
+		       struct ib_mad *out, size_t *out_mad_size,
 		       u16 *out_mad_pkey_index)
 {
-	int status;
+	int status = IB_MAD_RESULT_SUCCESS;
 	struct ocrdma_dev *dev;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
 
-	switch (in_mad->mad_hdr.mgmt_class) {
-	case IB_MGMT_CLASS_PERF_MGMT:
+	if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
 		dev = get_ocrdma_dev(ibdev);
-		if (!ocrdma_pma_counters(dev, out_mad))
-			status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-		else
-			status = IB_MAD_RESULT_SUCCESS;
-		break;
-	default:
-		status = IB_MAD_RESULT_SUCCESS;
-		break;
+		ocrdma_pma_counters(dev, out);
+		status |= IB_MAD_RESULT_REPLY;
 	}
+
 	return status;
 }
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 64cb82c08664..9780afcde780 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
 void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
 int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
 
-int ocrdma_process_mad(struct ib_device *,
-		       int process_mad_flags,
-		       u8 port_num,
-		       const struct ib_wc *in_wc,
-		       const struct ib_grh *in_grh,
-		       const struct ib_mad_hdr *in, size_t in_mad_size,
-		       struct ib_mad_hdr *out, size_t *out_mad_size,
+int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags,
+		       u8 port_num, const struct ib_wc *in_wc,
+		       const struct ib_grh *in_grh, const struct ib_mad *in,
+		       struct ib_mad *out, size_t *out_mad_size,
 		       u16 *out_mad_pkey_index);
 #endif				/* __OCRDMA_AH_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index c15cfc6cef81..d8c47d24d6d6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -166,7 +166,6 @@ static const struct ib_device_ops ocrdma_dev_ops = {
 	.get_port_immutable = ocrdma_port_immutable,
 	.map_mr_sg = ocrdma_map_mr_sg,
 	.mmap = ocrdma_mmap,
-	.modify_port = ocrdma_modify_port,
 	.modify_qp = ocrdma_modify_qp,
 	.poll_cq = ocrdma_poll_cq,
 	.post_recv = ocrdma_post_recv,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 6ef89c226ad8..c2e0d0fa44be 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats {
 };
 
 struct ocrdma_rx_qp_err_stats {
-	u32 nak_invalid_requst_errors;
+	u32 nak_invalid_request_errors;
 	u32 nak_remote_operation_errors;
 	u32 nak_count_remote_access_errors;
 	u32 local_length_errors;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index a902942adb5d..5f831e3bdbad 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -423,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
 	memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
 
 	pcur = stats;
-	pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
-			(u64)rx_qp_err_stats->nak_invalid_requst_errors);
+	pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors",
+			(u64)rx_qp_err_stats->nak_invalid_request_errors);
 	pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
 			(u64)rx_qp_err_stats->nak_remote_operation_errors);
 	pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
@@ -670,12 +670,10 @@ err:
 	return -EFAULT;
 }
 
-int ocrdma_pma_counters(struct ocrdma_dev *dev,
-			struct ib_mad *out_mad)
+void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad)
 {
 	struct ib_pma_portcounters *pma_cnt;
 
-	memset(out_mad->data, 0, sizeof out_mad->data);
 	pma_cnt = (void *)(out_mad->data + 40);
 	ocrdma_update_stats(dev);
 
@@ -683,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev,
 	pma_cnt->port_rcv_data     = cpu_to_be32(ocrdma_sysfs_rcv_data(dev));
 	pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev));
 	pma_cnt->port_rcv_packets  = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev));
-	return 0;
 }
 
 static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
index bba1fec4f11f..98feca26ac55 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev);
 void ocrdma_release_stats_resources(struct ocrdma_dev *dev);
 void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
 void ocrdma_add_port_stats(struct ocrdma_dev *dev);
-int ocrdma_pma_counters(struct ocrdma_dev *dev,
-			struct ib_mad *out_mad);
+void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad);
 
 #endif	/* __OCRDMA_STATS_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index e8267e590772..9bc1ca6f6f9e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -190,12 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
 	return 0;
 }
 
-int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
-		       struct ib_port_modify *props)
-{
-	return 0;
-}
-
 static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
 			   unsigned long len)
 {
@@ -875,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(status);
-	mr->umem = ib_umem_get(udata, start, len, acc, 0);
+	mr->umem = ib_umem_get(udata, start, len, acc);
 	if (IS_ERR(mr->umem)) {
 		status = -EFAULT;
 		goto umem_err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 32488da1b752..3a5010881be5 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -54,8 +54,6 @@ int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
 int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props,
 			struct ib_udata *uhw);
 int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
-int ocrdma_modify_port(struct ib_device *, u8 port, int mask,
-		       struct ib_port_modify *props);
 
 enum rdma_protocol_type
 ocrdma_query_protocol(struct ib_device *device, u8 port_num);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index dc71b6e16a07..dcdc85a1ab25 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -212,7 +212,7 @@ static const struct ib_device_ops qedr_dev_ops = {
 	.get_link_layer = qedr_link_layer,
 	.map_mr_sg = qedr_map_mr_sg,
 	.mmap = qedr_mmap,
-	.modify_port = qedr_modify_port,
+	.mmap_free = qedr_mmap_free,
 	.modify_qp = qedr_modify_qp,
 	.modify_srq = qedr_modify_srq,
 	.poll_cq = qedr_poll_cq,
@@ -357,9 +357,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
 		return -ENOMEM;
 
 	spin_lock_init(&dev->sgid_lock);
+	xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ);
 
 	if (IS_IWARP(dev)) {
-		xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ);
+		xa_init(&dev->qps);
 		dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
 	}
 
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 0cfd849b13d6..5488dbd59d3c 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -40,6 +40,7 @@
 #include <linux/qed/qed_rdma_if.h>
 #include <linux/qed/qede_rdma.h>
 #include <linux/qed/roce_common.h>
+#include <linux/completion.h>
 #include "qedr_hsi_rdma.h"
 
 #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
@@ -230,14 +231,16 @@ struct qedr_ucontext {
 	struct qedr_dev *dev;
 	struct qedr_pd *pd;
 	void __iomem *dpi_addr;
+	struct rdma_user_mmap_entry *db_mmap_entry;
 	u64 dpi_phys_addr;
 	u32 dpi_size;
 	u16 dpi;
+	bool db_rec;
+};
 
-	struct list_head mm_head;
-
-	/* Lock to protect mm list */
-	struct mutex mm_list_lock;
+union db_prod32 {
+	struct rdma_pwm_val16_data data;
+	u32 raw;
 };
 
 union db_prod64 {
@@ -265,6 +268,13 @@ struct qedr_userq {
 	struct qedr_pbl *pbl_tbl;
 	u64 buf_addr;
 	size_t buf_len;
+
+	/* doorbell recovery */
+	void __iomem *db_addr;
+	struct qedr_user_db_rec *db_rec_data;
+	struct rdma_user_mmap_entry *db_mmap_entry;
+	void __iomem *db_rec_db2_addr;
+	union db_prod32 db_rec_db2_data;
 };
 
 struct qedr_cq {
@@ -300,19 +310,6 @@ struct qedr_pd {
 	struct qedr_ucontext *uctx;
 };
 
-struct qedr_mm {
-	struct {
-		u64 phy_addr;
-		unsigned long len;
-	} key;
-	struct list_head entry;
-};
-
-union db_prod32 {
-	struct rdma_pwm_val16_data data;
-	u32 raw;
-};
-
 struct qedr_qp_hwq_info {
 	/* WQE Elements */
 	struct qed_chain pbl;
@@ -377,10 +374,20 @@ enum qedr_qp_err_bitmap {
 	QEDR_QP_ERR_RQ_PBL_FULL = 32,
 };
 
+enum qedr_qp_create_type {
+	QEDR_QP_CREATE_NONE,
+	QEDR_QP_CREATE_USER,
+	QEDR_QP_CREATE_KERNEL,
+};
+
+enum qedr_iwarp_cm_flags {
+	QEDR_IWARP_CM_WAIT_FOR_CONNECT    = BIT(0),
+	QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1),
+};
+
 struct qedr_qp {
 	struct ib_qp ibqp;	/* must be first */
 	struct qedr_dev *dev;
-	struct qedr_iw_ep *ep;
 	struct qedr_qp_hwq_info sq;
 	struct qedr_qp_hwq_info rq;
 
@@ -395,6 +402,7 @@ struct qedr_qp {
 	u32 id;
 	struct qedr_pd *pd;
 	enum ib_qp_type qp_type;
+	enum qedr_qp_create_type create_type;
 	struct qed_rdma_qp *qed_qp;
 	u32 qp_id;
 	u16 icid;
@@ -437,8 +445,11 @@ struct qedr_qp {
 	/* Relevant to qps created from user space only (applications) */
 	struct qedr_userq usq;
 	struct qedr_userq urq;
-	atomic_t refcnt;
-	bool destroyed;
+
+	/* synchronization objects used with iwarp ep */
+	struct kref refcnt;
+	struct completion iwarp_cm_comp;
+	unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */
 };
 
 struct qedr_ah {
@@ -476,6 +487,18 @@ struct qedr_mr {
 	u32 npages;
 };
 
+struct qedr_user_mmap_entry {
+	struct rdma_user_mmap_entry rdma_entry;
+	struct qedr_dev *dev;
+	union {
+		u64 io_address;
+		void *address;
+	};
+	size_t length;
+	u16 dpi;
+	u8 mmap_flag;
+};
+
 #define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT)))
 
 #define QEDR_RESP_IMM	(RDMA_CQE_RESPONDER_IMM_FLG_MASK << \
@@ -531,7 +554,7 @@ struct qedr_iw_ep {
 	struct iw_cm_id	*cm_id;
 	struct qedr_qp	*qp;
 	void		*qed_context;
-	u8		during_connect;
+	struct kref	refcnt;
 };
 
 static inline
@@ -574,4 +597,11 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
 {
 	return container_of(ibsrq, struct qedr_srq, ibsrq);
 }
+
+static inline struct qedr_user_mmap_entry *
+get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
+{
+	return container_of(rdma_entry, struct qedr_user_mmap_entry,
+			    rdma_entry);
+}
 #endif
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 22881d4442b9..792eecd206b6 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info,
 	}
 }
 
+static void qedr_iw_free_qp(struct kref *ref)
+{
+	struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt);
+
+	kfree(qp);
+}
+
+static void
+qedr_iw_free_ep(struct kref *ref)
+{
+	struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt);
+
+	if (ep->qp)
+		kref_put(&ep->qp->refcnt, qedr_iw_free_qp);
+
+	if (ep->cm_id)
+		ep->cm_id->rem_ref(ep->cm_id);
+
+	kfree(ep);
+}
+
 static void
 qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
 {
@@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
 
 	ep->dev = dev;
 	ep->qed_context = params->ep_context;
+	kref_init(&ep->refcnt);
 
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
@@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params)
 {
 	struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
 
-	if (ep->cm_id) {
+	if (ep->cm_id)
 		qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE);
 
-		ep->cm_id->rem_ref(ep->cm_id);
-		ep->cm_id = NULL;
-	}
+	kref_put(&ep->refcnt, qedr_iw_free_ep);
 }
 
 static void
@@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
 	struct qedr_qp *qp = ep->qp;
 	struct iw_cm_event event;
 
-	if (qp->destroyed) {
-		kfree(dwork);
-		qedr_iw_qp_rem_ref(&qp->ibqp);
-		return;
-	}
+	/* The qp won't be released until we release the ep.
+	 * the ep's refcnt was increased before calling this
+	 * function, therefore it is safe to access qp
+	 */
+	if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+			     &qp->iwarp_cm_flags))
+		goto out;
 
 	memset(&event, 0, sizeof(event));
 	event.status = dwork->status;
@@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
 	else
 		qp_params.new_state = QED_ROCE_QP_STATE_SQD;
 
-	kfree(dwork);
 
 	if (ep->cm_id)
 		ep->cm_id->event_handler(ep->cm_id, &event);
@@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
 
 	dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params);
 
-	qedr_iw_qp_rem_ref(&qp->ibqp);
+	complete(&ep->qp->iwarp_cm_comp);
+out:
+	kfree(dwork);
+	kref_put(&ep->refcnt, qedr_iw_free_ep);
 }
 
 static void
@@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context,
 	struct qedr_discon_work *work;
 	struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
 	struct qedr_dev *dev = ep->dev;
-	struct qedr_qp *qp = ep->qp;
 
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work)
 		return;
 
-	qedr_iw_qp_add_ref(&qp->ibqp);
+	/* We can't get a close event before disconnect, but since
+	 * we're scheduling a work queue we need to make sure close
+	 * won't delete the ep, so we increase the refcnt
+	 */
+	kref_get(&ep->refcnt);
+
 	work->ep = ep;
 	work->event = params->event;
 	work->status = params->status;
@@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context,
 	if ((params->status == -ECONNREFUSED) && (!ep->qp)) {
 		DP_DEBUG(dev, QEDR_MSG_IWARP,
 			 "PASSIVE connection refused releasing ep...\n");
-		kfree(ep);
+		kref_put(&ep->refcnt, qedr_iw_free_ep);
 		return;
 	}
 
+	complete(&ep->qp->iwarp_cm_comp);
 	qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED);
 
 	if (params->status < 0)
 		qedr_iw_close_event(context, params);
 }
 
+static void
+qedr_iw_active_complete(void *context,
+			struct qed_iwarp_cm_event_params *params)
+{
+	struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+
+	complete(&ep->qp->iwarp_cm_comp);
+	qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY);
+
+	if (params->status < 0)
+		kref_put(&ep->refcnt, qedr_iw_free_ep);
+}
+
 static int
 qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params)
 {
@@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params)
 		qedr_iw_mpa_reply(context, params);
 		break;
 	case QED_IWARP_EVENT_PASSIVE_COMPLETE:
-		ep->during_connect = 0;
 		qedr_iw_passive_complete(context, params);
 		break;
-
 	case QED_IWARP_EVENT_ACTIVE_COMPLETE:
-		ep->during_connect = 0;
-		qedr_iw_issue_event(context,
-				    params,
-				    IW_CM_EVENT_CONNECT_REPLY);
-		if (params->status < 0) {
-			struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
-
-			ep->cm_id->rem_ref(ep->cm_id);
-			ep->cm_id = NULL;
-		}
+		qedr_iw_active_complete(context, params);
 		break;
 	case QED_IWARP_EVENT_DISCONNECT:
 		qedr_iw_disconnect_event(context, params);
 		break;
 	case QED_IWARP_EVENT_CLOSE:
-		ep->during_connect = 0;
 		qedr_iw_close_event(context, params);
 		break;
 	case QED_IWARP_EVENT_RQ_EMPTY:
@@ -451,10 +481,10 @@ qedr_addr6_resolve(struct qedr_dev *dev,
 
 	if ((!dst) || dst->error) {
 		if (dst) {
-			dst_release(dst);
 			DP_ERR(dev,
 			       "ip6_route_output returned dst->error = %d\n",
 			       dst->error);
+			dst_release(dst);
 		}
 		return -EINVAL;
 	}
@@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev,
 	return rc;
 }
 
+static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn)
+{
+	struct qedr_qp *qp;
+
+	xa_lock(&dev->qps);
+	qp = xa_load(&dev->qps, qpn);
+	if (qp)
+		kref_get(&qp->refcnt);
+	xa_unlock(&dev->qps);
+
+	return qp;
+}
+
 int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 {
 	struct qedr_dev *dev = get_qedr_dev(cm_id->device);
@@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	int rc = 0;
 	int i;
 
-	qp = xa_load(&dev->qps, conn_param->qpn);
-	if (unlikely(!qp))
-		return -EINVAL;
-
 	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
 	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
@@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		return -ENOMEM;
 
 	ep->dev = dev;
+	kref_init(&ep->refcnt);
+
+	qp = qedr_iw_load_qp(dev, conn_param->qpn);
+	if (!qp) {
+		rc = -EINVAL;
+		goto err;
+	}
+
 	ep->qp = qp;
-	qp->ep = ep;
 	cm_id->add_ref(cm_id);
 	ep->cm_id = cm_id;
 
@@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	in_params.qp = qp->qed_qp;
 	memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN);
 
-	ep->during_connect = 1;
+	if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+			     &qp->iwarp_cm_flags))
+		goto err; /* QP already being destroyed */
+
 	rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params);
-	if (rc)
+	if (rc) {
+		complete(&qp->iwarp_cm_comp);
 		goto err;
+	}
 
 	return rc;
 
 err:
-	cm_id->rem_ref(cm_id);
-	kfree(ep);
+	kref_put(&ep->refcnt, qedr_iw_free_ep);
 	return rc;
 }
 
@@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct qedr_dev *dev = ep->dev;
 	struct qedr_qp *qp;
 	struct qed_iwarp_accept_in params;
-	int rc;
+	int rc = 0;
 
 	DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
 
-	qp = xa_load(&dev->qps, conn_param->qpn);
+	qp = qedr_iw_load_qp(dev, conn_param->qpn);
 	if (!qp) {
 		DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn);
 		return -EINVAL;
 	}
 
 	ep->qp = qp;
-	qp->ep = ep;
 	cm_id->add_ref(cm_id);
 	ep->cm_id = cm_id;
 
@@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	params.ird = conn_param->ird;
 	params.ord = conn_param->ord;
 
-	ep->during_connect = 1;
+	if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+			     &qp->iwarp_cm_flags))
+		goto err; /* QP already destroyed */
+
 	rc = dev->ops->iwarp_accept(dev->rdma_ctx, &params);
-	if (rc)
+	if (rc) {
+		complete(&qp->iwarp_cm_comp);
 		goto err;
+	}
 
 	return rc;
+
 err:
-	ep->during_connect = 0;
-	cm_id->rem_ref(cm_id);
+	kref_put(&ep->refcnt, qedr_iw_free_ep);
+
 	return rc;
 }
 
@@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp)
 {
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
 
-	atomic_inc(&qp->refcnt);
+	kref_get(&qp->refcnt);
 }
 
 void qedr_iw_qp_rem_ref(struct ib_qp *ibqp)
 {
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
 
-	if (atomic_dec_and_test(&qp->refcnt)) {
-		xa_erase_irq(&qp->dev->qps, qp->qp_id);
-		kfree(qp);
-	}
+	kref_put(&qp->refcnt, qedr_iw_free_qp);
 }
 
 struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 6f3ce86019b7..4cd292966aa9 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -51,6 +51,7 @@
 #include "verbs.h"
 #include <rdma/qedr-abi.h>
 #include "qedr_roce_cm.h"
+#include "qedr_iw_cm.h"
 
 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
 #define	RDMA_MAX_SGE_PER_SRQ	(4)
@@ -58,6 +59,11 @@
 
 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
 
+enum {
+	QEDR_USER_MMAP_IO_WC = 0,
+	QEDR_USER_MMAP_PHYS_PAGE,
+};
+
 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
 					size_t len)
 {
@@ -250,78 +256,31 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 	return 0;
 }
 
-int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
-		     struct ib_port_modify *props)
-{
-	return 0;
-}
-
-static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
-			 unsigned long len)
-{
-	struct qedr_mm *mm;
-
-	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
-	if (!mm)
-		return -ENOMEM;
-
-	mm->key.phy_addr = phy_addr;
-	/* This function might be called with a length which is not a multiple
-	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
-	 * forces this granularity by increasing the requested size if needed.
-	 * When qedr_mmap is called, it will search the list with the updated
-	 * length as a key. To prevent search failures, the length is rounded up
-	 * in advance to PAGE_SIZE.
-	 */
-	mm->key.len = roundup(len, PAGE_SIZE);
-	INIT_LIST_HEAD(&mm->entry);
-
-	mutex_lock(&uctx->mm_list_lock);
-	list_add(&mm->entry, &uctx->mm_head);
-	mutex_unlock(&uctx->mm_list_lock);
-
-	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
-		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
-		 (unsigned long long)mm->key.phy_addr,
-		 (unsigned long)mm->key.len, uctx);
-
-	return 0;
-}
-
-static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
-			     unsigned long len)
-{
-	bool found = false;
-	struct qedr_mm *mm;
-
-	mutex_lock(&uctx->mm_list_lock);
-	list_for_each_entry(mm, &uctx->mm_head, entry) {
-		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
-			continue;
-
-		found = true;
-		break;
-	}
-	mutex_unlock(&uctx->mm_list_lock);
-	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
-		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
-		 mm->key.phy_addr, mm->key.len, uctx, found);
-
-	return found;
-}
-
 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
 	struct ib_device *ibdev = uctx->device;
 	int rc;
 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
 	struct qedr_alloc_ucontext_resp uresp = {};
+	struct qedr_alloc_ucontext_req ureq = {};
 	struct qedr_dev *dev = get_qedr_dev(ibdev);
 	struct qed_rdma_add_user_out_params oparams;
+	struct qedr_user_mmap_entry *entry;
 
 	if (!udata)
 		return -EFAULT;
 
+	if (udata->inlen) {
+		rc = ib_copy_from_udata(&ureq, udata,
+					min(sizeof(ureq), udata->inlen));
+		if (rc) {
+			DP_ERR(dev, "Problem copying data from user space\n");
+			return -EFAULT;
+		}
+
+		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
+	}
+
 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
 	if (rc) {
 		DP_ERR(dev,
@@ -334,13 +293,29 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 	ctx->dpi_addr = oparams.dpi_addr;
 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
 	ctx->dpi_size = oparams.dpi_size;
-	INIT_LIST_HEAD(&ctx->mm_head);
-	mutex_init(&ctx->mm_list_lock);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	entry->io_address = ctx->dpi_phys_addr;
+	entry->length = ctx->dpi_size;
+	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
+	entry->dpi = ctx->dpi;
+	entry->dev = dev;
+	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
+					 ctx->dpi_size);
+	if (rc) {
+		kfree(entry);
+		goto err;
+	}
+	ctx->db_mmap_entry = &entry->rdma_entry;
 
 	uresp.dpm_enabled = dev->user_dpm_enabled;
 	uresp.wids_enabled = 1;
 	uresp.wid_count = oparams.wid_count;
-	uresp.db_pa = ctx->dpi_phys_addr;
+	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
 	uresp.db_size = ctx->dpi_size;
 	uresp.max_send_wr = dev->attr.max_sqe;
 	uresp.max_recv_wr = dev->attr.max_rqe;
@@ -352,82 +327,92 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 
 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (rc)
-		return rc;
+		goto err;
 
 	ctx->dev = dev;
 
-	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
-	if (rc)
-		return rc;
-
 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
 		 &ctx->ibucontext);
 	return 0;
+
+err:
+	if (!ctx->db_mmap_entry)
+		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
+	else
+		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
+
+	return rc;
 }
 
 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
 {
 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
-	struct qedr_mm *mm, *tmp;
 
 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
 		 uctx);
-	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
 
-	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
-		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
-			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
-			 mm->key.phy_addr, mm->key.len, uctx);
-		list_del(&mm->entry);
-		kfree(mm);
-	}
+	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
 }
 
-int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
 {
-	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
-	struct qedr_dev *dev = get_qedr_dev(context->device);
-	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long len = (vma->vm_end - vma->vm_start);
-	unsigned long dpi_start;
+	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
+	struct qedr_dev *dev = entry->dev;
 
-	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
+	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
+		free_page((unsigned long)entry->address);
+	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
+		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
 
-	DP_DEBUG(dev, QEDR_MSG_INIT,
-		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
-		 (void *)vma->vm_start, (void *)vma->vm_end,
-		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
+	kfree(entry);
+}
 
-	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
-		DP_ERR(dev,
-		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
-		       (void *)vma->vm_start, (void *)vma->vm_end);
-		return -EINVAL;
-	}
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
+{
+	struct ib_device *dev = ucontext->device;
+	size_t length = vma->vm_end - vma->vm_start;
+	struct rdma_user_mmap_entry *rdma_entry;
+	struct qedr_user_mmap_entry *entry;
+	int rc = 0;
+	u64 pfn;
 
-	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
-		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
-		       vma->vm_pgoff);
-		return -EINVAL;
-	}
+	ibdev_dbg(dev,
+		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
 
-	if (phys_addr < dpi_start ||
-	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
-		DP_ERR(dev,
-		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
-		       (void *)phys_addr, (void *)dpi_start,
-		       ucontext->dpi_size);
+	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
+	if (!rdma_entry) {
+		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
+			  vma->vm_pgoff);
 		return -EINVAL;
 	}
-
-	if (vma->vm_flags & VM_READ) {
-		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
-		return -EINVAL;
+	entry = get_qedr_mmap_entry(rdma_entry);
+	ibdev_dbg(dev,
+		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+		  entry->io_address, length, entry->mmap_flag);
+
+	switch (entry->mmap_flag) {
+	case QEDR_USER_MMAP_IO_WC:
+		pfn = entry->io_address >> PAGE_SHIFT;
+		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
+				       pgprot_writecombine(vma->vm_page_prot),
+				       rdma_entry);
+		break;
+	case QEDR_USER_MMAP_PHYS_PAGE:
+		rc = vm_insert_page(vma, vma->vm_start,
+				    virt_to_page(entry->address));
+		break;
+	default:
+		rc = -EINVAL;
 	}
 
-	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
-				  vma->vm_page_prot);
+	if (rc)
+		ibdev_dbg(dev,
+			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+			  entry->io_address, length, entry->mmap_flag, rc);
+
+	rdma_user_mmap_entry_put(rdma_entry);
+	return rc;
 }
 
 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -657,16 +642,50 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 	}
 }
 
+static int qedr_db_recovery_add(struct qedr_dev *dev,
+				void __iomem *db_addr,
+				void *db_data,
+				enum qed_db_rec_width db_width,
+				enum qed_db_rec_space db_space)
+{
+	if (!db_data) {
+		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+		return 0;
+	}
+
+	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
+						 db_width, db_space);
+}
+
+static void qedr_db_recovery_del(struct qedr_dev *dev,
+				 void __iomem *db_addr,
+				 void *db_data)
+{
+	if (!db_data) {
+		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+		return;
+	}
+
+	/* Ignore return code as there is not much we can do about it. Error
+	 * log will be printed inside.
+	 */
+	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
+}
+
 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
-			      struct qedr_cq *cq, struct ib_udata *udata)
+			      struct qedr_cq *cq, struct ib_udata *udata,
+			      u32 db_offset)
 {
 	struct qedr_create_cq_uresp uresp;
 	int rc;
 
 	memset(&uresp, 0, sizeof(uresp));
 
-	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+	uresp.db_offset = db_offset;
 	uresp.icid = cq->icid;
+	if (cq->q.db_mmap_entry)
+		uresp.db_rec_addr =
+			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
 
 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (rc)
@@ -694,10 +713,58 @@ static inline int qedr_align_cq_entries(int entries)
 	return aligned_size / QEDR_CQE_SIZE;
 }
 
+static int qedr_init_user_db_rec(struct ib_udata *udata,
+				 struct qedr_dev *dev, struct qedr_userq *q,
+				 bool requires_db_rec)
+{
+	struct qedr_ucontext *uctx =
+		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+					  ibucontext);
+	struct qedr_user_mmap_entry *entry;
+	int rc;
+
+	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
+	if (requires_db_rec == 0 || !uctx->db_rec)
+		return 0;
+
+	/* Allocate a page for doorbell recovery, add to mmap */
+	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
+	if (!q->db_rec_data) {
+		DP_ERR(dev, "get_zeroed_page failed\n");
+		return -ENOMEM;
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		goto err_free_db_data;
+
+	entry->address = q->db_rec_data;
+	entry->length = PAGE_SIZE;
+	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
+	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
+					 &entry->rdma_entry,
+					 PAGE_SIZE);
+	if (rc)
+		goto err_free_entry;
+
+	q->db_mmap_entry = &entry->rdma_entry;
+
+	return 0;
+
+err_free_entry:
+	kfree(entry);
+
+err_free_db_data:
+	free_page((unsigned long)q->db_rec_data);
+	q->db_rec_data = NULL;
+	return -ENOMEM;
+}
+
 static inline int qedr_init_user_queue(struct ib_udata *udata,
 				       struct qedr_dev *dev,
 				       struct qedr_userq *q, u64 buf_addr,
-				       size_t buf_len, int access, int dmasync,
+				       size_t buf_len, bool requires_db_rec,
+				       int access,
 				       int alloc_and_init)
 {
 	u32 fw_pages;
@@ -705,7 +772,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
 
 	q->buf_addr = buf_addr;
 	q->buf_len = buf_len;
-	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
+	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access);
 	if (IS_ERR(q->umem)) {
 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
 		       PTR_ERR(q->umem));
@@ -735,7 +802,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata,
 		}
 	}
 
-	return 0;
+	/* mmap the user address used to store doorbell data for recovery */
+	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
 
 err0:
 	ib_umem_release(q->umem);
@@ -821,6 +889,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	int entries = attr->cqe;
 	struct qedr_cq *cq = get_qedr_cq(ibcq);
 	int chain_entries;
+	u32 db_offset;
 	int page_cnt;
 	u64 pbl_ptr;
 	u16 icid;
@@ -840,8 +909,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	chain_entries = qedr_align_cq_entries(entries);
 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
 
+	/* calc db offset. user will add DPI base, kernel will add db addr */
+	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+
 	if (udata) {
-		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+							 udata->inlen))) {
 			DP_ERR(dev,
 			       "create cq: problem copying data from user space\n");
 			goto err0;
@@ -856,7 +929,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		cq->cq_type = QEDR_CQ_TYPE_USER;
 
 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
-					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
+					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
 					  1);
 		if (rc)
 			goto err0;
@@ -865,6 +938,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		page_cnt = cq->q.pbl_info.num_pbes;
 
 		cq->ibcq.cqe = chain_entries;
+		cq->q.db_addr = ctx->dpi_addr + db_offset;
 	} else {
 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
 
@@ -876,7 +950,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 						   sizeof(union rdma_cqe),
 						   &cq->pbl, NULL);
 		if (rc)
-			goto err1;
+			goto err0;
 
 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
@@ -888,21 +962,28 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
 	if (rc)
-		goto err2;
+		goto err1;
 
 	cq->icid = icid;
 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
 	spin_lock_init(&cq->cq_lock);
 
 	if (udata) {
-		rc = qedr_copy_cq_uresp(dev, cq, udata);
+		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
+		if (rc)
+			goto err2;
+
+		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
+					  &cq->q.db_rec_data->db_data,
+					  DB_REC_WIDTH_64B,
+					  DB_REC_USER);
 		if (rc)
-			goto err3;
+			goto err2;
+
 	} else {
 		/* Generate doorbell address. */
-		cq->db_addr = dev->db_addr +
-		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
 		cq->db.data.icid = cq->icid;
+		cq->db_addr = dev->db_addr + db_offset;
 		cq->db.data.params = DB_AGG_CMD_SET <<
 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
 
@@ -912,6 +993,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		cq->latest_cqe = NULL;
 		consume_cqe(cq);
 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+
+		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
+					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
+		if (rc)
+			goto err2;
 	}
 
 	DP_DEBUG(dev, QEDR_MSG_CQ,
@@ -920,18 +1006,19 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	return 0;
 
-err3:
+err2:
 	destroy_iparams.icid = cq->icid;
 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
 				  &destroy_oparams);
-err2:
-	if (udata)
-		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
-	else
-		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
 err1:
-	if (udata)
+	if (udata) {
+		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
 		ib_umem_release(cq->q.umem);
+		if (cq->q.db_mmap_entry)
+			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+	} else {
+		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+	}
 err0:
 	return -EINVAL;
 }
@@ -962,8 +1049,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 	cq->destroyed = 1;
 
 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
-	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
+		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
 		return;
+	}
 
 	iparams.icid = cq->icid;
 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
@@ -972,6 +1061,14 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 	if (udata) {
 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
 		ib_umem_release(cq->q.umem);
+
+		if (cq->q.db_rec_data) {
+			qedr_db_recovery_del(dev, cq->q.db_addr,
+					     &cq->q.db_rec_data->db_data);
+			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+		}
+	} else {
+		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
 	}
 
 	/* We don't want the IRQ handler to handle a non-existing CQ so we
@@ -1136,8 +1233,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev,
 }
 
 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
-			       struct qedr_create_qp_uresp *uresp,
-			       struct qedr_qp *qp)
+			      struct qedr_create_qp_uresp *uresp,
+			      struct qedr_qp *qp)
 {
 	/* iWARP requires two doorbells per RQ. */
 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
@@ -1150,6 +1247,9 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev,
 	}
 
 	uresp->rq_icid = qp->icid;
+	if (qp->urq.db_mmap_entry)
+		uresp->rq_db_rec_addr =
+			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
 }
 
 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
@@ -1163,22 +1263,26 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev,
 		uresp->sq_icid = qp->icid;
 	else
 		uresp->sq_icid = qp->icid + 1;
+
+	if (qp->usq.db_mmap_entry)
+		uresp->sq_db_rec_addr =
+			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
 }
 
 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
-			      struct qedr_qp *qp, struct ib_udata *udata)
+			      struct qedr_qp *qp, struct ib_udata *udata,
+			      struct qedr_create_qp_uresp *uresp)
 {
-	struct qedr_create_qp_uresp uresp;
 	int rc;
 
-	memset(&uresp, 0, sizeof(uresp));
-	qedr_copy_sq_uresp(dev, &uresp, qp);
-	qedr_copy_rq_uresp(dev, &uresp, qp);
+	memset(uresp, 0, sizeof(*uresp));
+	qedr_copy_sq_uresp(dev, uresp, qp);
+	qedr_copy_rq_uresp(dev, uresp, qp);
 
-	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
-	uresp.qp_id = qp->qp_id;
+	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
+	uresp->qp_id = qp->qp_id;
 
-	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
 	if (rc)
 		DP_ERR(dev,
 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
@@ -1193,7 +1297,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
 				      struct ib_qp_init_attr *attrs)
 {
 	spin_lock_init(&qp->q_lock);
-	atomic_set(&qp->refcnt, 1);
+	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+		kref_init(&qp->refcnt);
+		init_completion(&qp->iwarp_cm_comp);
+	}
 	qp->pd = pd;
 	qp->qp_type = attrs->qp_type;
 	qp->max_inline_data = attrs->cap.max_inline_data;
@@ -1222,16 +1329,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
 		 qp->sq.max_sges, qp->sq_cq->icid);
 }
 
-static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
 {
+	int rc;
+
 	qp->sq.db = dev->db_addr +
 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
 	qp->sq.db_data.data.icid = qp->icid + 1;
+	rc = qedr_db_recovery_add(dev, qp->sq.db,
+				  &qp->sq.db_data,
+				  DB_REC_WIDTH_32B,
+				  DB_REC_KERNEL);
+	if (rc)
+		return rc;
+
 	if (!qp->srq) {
 		qp->rq.db = dev->db_addr +
 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
 		qp->rq.db_data.data.icid = qp->icid;
+
+		rc = qedr_db_recovery_add(dev, qp->rq.db,
+					  &qp->rq.db_data,
+					  DB_REC_WIDTH_32B,
+					  DB_REC_KERNEL);
+		if (rc)
+			qedr_db_recovery_del(dev, qp->sq.db,
+					     &qp->sq.db_data);
 	}
+
+	return rc;
 }
 
 static int qedr_check_srq_params(struct qedr_dev *dev,
@@ -1279,19 +1405,19 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
 static int qedr_init_srq_user_params(struct ib_udata *udata,
 				     struct qedr_srq *srq,
 				     struct qedr_create_srq_ureq *ureq,
-				     int access, int dmasync)
+				     int access)
 {
 	struct scatterlist *sg;
 	int rc;
 
 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
-				  ureq->srq_len, access, dmasync, 1);
+				  ureq->srq_len, false, access, 1);
 	if (rc)
 		return rc;
 
 	srq->prod_umem =
 		ib_umem_get(udata, ureq->prod_pair_addr,
-			    sizeof(struct rdma_srq_producers), access, dmasync);
+			    sizeof(struct rdma_srq_producers), access);
 	if (IS_ERR(srq->prod_umem)) {
 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
 		ib_umem_release(srq->usrq.umem);
@@ -1381,13 +1507,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
 	hw_srq->max_sges = init_attr->attr.max_sge;
 
 	if (udata) {
-		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+							 udata->inlen))) {
 			DP_ERR(dev,
 			       "create srq: problem copying data from user space\n");
 			goto err0;
 		}
 
-		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
+		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
 		if (rc)
 			goto err0;
 
@@ -1570,13 +1697,39 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
 }
 
-static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
+static void qedr_cleanup_user(struct qedr_dev *dev,
+			      struct qedr_ucontext *ctx,
+			      struct qedr_qp *qp)
 {
 	ib_umem_release(qp->usq.umem);
 	qp->usq.umem = NULL;
 
 	ib_umem_release(qp->urq.umem);
 	qp->urq.umem = NULL;
+
+	if (rdma_protocol_roce(&dev->ibdev, 1)) {
+		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
+	} else {
+		kfree(qp->usq.pbl_tbl);
+		kfree(qp->urq.pbl_tbl);
+	}
+
+	if (qp->usq.db_rec_data) {
+		qedr_db_recovery_del(dev, qp->usq.db_addr,
+				     &qp->usq.db_rec_data->db_data);
+		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
+	}
+
+	if (qp->urq.db_rec_data) {
+		qedr_db_recovery_del(dev, qp->urq.db_addr,
+				     &qp->urq.db_rec_data->db_data);
+		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
+	}
+
+	if (rdma_protocol_iwarp(&dev->ibdev, 1))
+		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
+				     &qp->urq.db_rec_db2_data);
 }
 
 static int qedr_create_user_qp(struct qedr_dev *dev,
@@ -1588,27 +1741,30 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	struct qed_rdma_create_qp_in_params in_params;
 	struct qed_rdma_create_qp_out_params out_params;
 	struct qedr_pd *pd = get_qedr_pd(ibpd);
+	struct qedr_create_qp_uresp uresp;
+	struct qedr_ucontext *ctx = NULL;
 	struct qedr_create_qp_ureq ureq;
 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
 	int rc = -EINVAL;
 
+	qp->create_type = QEDR_QP_CREATE_USER;
 	memset(&ureq, 0, sizeof(ureq));
-	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+	rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
 	if (rc) {
 		DP_ERR(dev, "Problem copying data from user space\n");
 		return rc;
 	}
 
-	/* SQ - read access only (0), dma sync not required (0) */
+	/* SQ - read access only (0) */
 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
-				  ureq.sq_len, 0, 0, alloc_and_init);
+				  ureq.sq_len, true, 0, alloc_and_init);
 	if (rc)
 		return rc;
 
 	if (!qp->srq) {
-		/* RQ - read access only (0), dma sync not required (0) */
+		/* RQ - read access only (0) */
 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
-					  ureq.rq_len, 0, 0, alloc_and_init);
+					  ureq.rq_len, true, 0, alloc_and_init);
 		if (rc)
 			return rc;
 	}
@@ -1638,29 +1794,76 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	qp->qp_id = out_params.qp_id;
 	qp->icid = out_params.icid;
 
-	rc = qedr_copy_qp_uresp(dev, qp, udata);
+	rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
+	if (rc)
+		goto err;
+
+	/* db offset was calculated in copy_qp_uresp, now set in the user q */
+	ctx = pd->uctx;
+	qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+	qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+
+	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
+
+		/* calculate the db_rec_db2 data since it is constant so no
+		 *  need to reflect from user
+		 */
+		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
+		qp->urq.db_rec_db2_data.data.value =
+			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+	}
+
+	rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+				  &qp->usq.db_rec_data->db_data,
+				  DB_REC_WIDTH_32B,
+				  DB_REC_USER);
 	if (rc)
 		goto err;
 
+	rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+				  &qp->urq.db_rec_data->db_data,
+				  DB_REC_WIDTH_32B,
+				  DB_REC_USER);
+	if (rc)
+		goto err;
+
+	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
+					  &qp->urq.db_rec_db2_data,
+					  DB_REC_WIDTH_32B,
+					  DB_REC_USER);
+		if (rc)
+			goto err;
+	}
 	qedr_qp_user_print(dev, qp);
 
-	return 0;
+	return rc;
 err:
 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
 	if (rc)
 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
 
 err1:
-	qedr_cleanup_user(dev, qp);
+	qedr_cleanup_user(dev, ctx, qp);
 	return rc;
 }
 
-static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
 {
+	int rc;
+
 	qp->sq.db = dev->db_addr +
 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
 	qp->sq.db_data.data.icid = qp->icid;
 
+	rc = qedr_db_recovery_add(dev, qp->sq.db,
+				  &qp->sq.db_data,
+				  DB_REC_WIDTH_32B,
+				  DB_REC_KERNEL);
+	if (rc)
+		return rc;
+
 	qp->rq.db = dev->db_addr +
 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
 	qp->rq.db_data.data.icid = qp->icid;
@@ -1668,6 +1871,19 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
+
+	rc = qedr_db_recovery_add(dev, qp->rq.db,
+				  &qp->rq.db_data,
+				  DB_REC_WIDTH_32B,
+				  DB_REC_KERNEL);
+	if (rc)
+		return rc;
+
+	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
+				  &qp->rq.iwarp_db2_data,
+				  DB_REC_WIDTH_32B,
+				  DB_REC_KERNEL);
+	return rc;
 }
 
 static int
@@ -1715,8 +1931,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
 	qp->qp_id = out_params.qp_id;
 	qp->icid = out_params.icid;
 
-	qedr_set_roce_db_info(dev, qp);
-	return rc;
+	return qedr_set_roce_db_info(dev, qp);
 }
 
 static int
@@ -1774,8 +1989,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
 	qp->qp_id = out_params.qp_id;
 	qp->icid = out_params.icid;
 
-	qedr_set_iwarp_db_info(dev, qp);
-	return rc;
+	return qedr_set_iwarp_db_info(dev, qp);
 
 err:
 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -1790,6 +2004,20 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
 
 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
 	kfree(qp->rqe_wr_id);
+
+	/* GSI qp is not registered to db mechanism so no need to delete */
+	if (qp->qp_type == IB_QPT_GSI)
+		return;
+
+	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
+
+	if (!qp->srq) {
+		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
+
+		if (rdma_protocol_iwarp(&dev->ibdev, 1))
+			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
+					     &qp->rq.iwarp_db2_data);
+	}
 }
 
 static int qedr_create_kernel_qp(struct qedr_dev *dev,
@@ -1805,6 +2033,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
 	u32 n_sq_entries;
 
 	memset(&in_params, 0, sizeof(in_params));
+	qp->create_type = QEDR_QP_CREATE_KERNEL;
 
 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
 	 * the ring. The ring should allow at least a single WR, even if the
@@ -1918,7 +2147,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 	qp->ibqp.qp_num = qp->qp_id;
 
 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
-		rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
+		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
 		if (rc)
 			goto err;
 	}
@@ -2429,7 +2658,10 @@ err:
 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
 				  struct ib_udata *udata)
 {
-	int rc = 0;
+	struct qedr_ucontext *ctx =
+		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+					  ibucontext);
+	int rc;
 
 	if (qp->qp_type != IB_QPT_GSI) {
 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
@@ -2437,8 +2669,8 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
 			return rc;
 	}
 
-	if (udata)
-		qedr_cleanup_user(dev, qp);
+	if (qp->create_type == QEDR_QP_CREATE_USER)
+		qedr_cleanup_user(dev, ctx, qp);
 	else
 		qedr_cleanup_kernel(dev, qp);
 
@@ -2467,34 +2699,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
 		}
 	} else {
-		/* Wait for the connect/accept to complete */
-		if (qp->ep) {
-			int wait_count = 1;
-
-			while (qp->ep->during_connect) {
-				DP_DEBUG(dev, QEDR_MSG_QP,
-					 "Still in during connect/accept\n");
-
-				msleep(100);
-				if (wait_count++ > 200) {
-					DP_NOTICE(dev,
-						  "during connect timeout\n");
-					break;
-				}
-			}
-		}
+		/* If connection establishment started the WAIT_FOR_CONNECT
+		 * bit will be on and we need to Wait for the establishment
+		 * to complete before destroying the qp.
+		 */
+		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+				     &qp->iwarp_cm_flags))
+			wait_for_completion(&qp->iwarp_cm_comp);
+
+		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
+		 * bit will be on, and we need to wait for the disconnect to
+		 * complete before continuing. We can use the same completion,
+		 * iwarp_cm_comp, since this is the only place that waits for
+		 * this completion and it is sequential. In addition,
+		 * disconnect can't occur before the connection is fully
+		 * established, therefore if WAIT_FOR_DISCONNECT is on it
+		 * means WAIT_FOR_CONNECT is also on and the completion for
+		 * CONNECT already occurred.
+		 */
+		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+				     &qp->iwarp_cm_flags))
+			wait_for_completion(&qp->iwarp_cm_comp);
 	}
 
 	if (qp->qp_type == IB_QPT_GSI)
 		qedr_destroy_gsi_qp(dev);
 
+	/* We need to remove the entry from the xarray before we release the
+	 * qp_id to avoid a race of the qp_id being reallocated and failing
+	 * on xa_insert
+	 */
+	if (rdma_protocol_iwarp(&dev->ibdev, 1))
+		xa_erase(&dev->qps, qp->qp_id);
+
 	qedr_free_qp_resources(dev, qp, udata);
 
-	if (atomic_dec_and_test(&qp->refcnt) &&
-	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
-		xa_erase_irq(&dev->qps, qp->qp_id);
-		kfree(qp);
-	}
+	if (rdma_protocol_iwarp(&dev->ibdev, 1))
+		qedr_iw_qp_rem_ref(&qp->ibqp);
+
 	return 0;
 }
 
@@ -2597,7 +2839,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 
 	mr->type = QEDR_MR_USER;
 
-	mr->umem = ib_umem_get(udata, start, len, acc, 0);
+	mr->umem = ib_umem_get(udata, start, len, acc);
 	if (IS_ERR(mr->umem)) {
 		rc = -EFAULT;
 		goto err0;
@@ -2673,8 +2915,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
 
 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
 
-	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
-		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+	if (mr->type != QEDR_MR_DMA)
+		free_mr_info(dev, &mr->info);
 
 	/* it could be user registered memory. */
 	ib_umem_release(mr->umem);
@@ -4106,19 +4348,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 }
 
 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
-		     u8 port_num,
-		     const struct ib_wc *in_wc,
-		     const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *mad_hdr,
-		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
-		     size_t *out_mad_size, u16 *out_mad_pkey_index)
+		     u8 port_num, const struct ib_wc *in_wc,
+		     const struct ib_grh *in_grh, const struct ib_mad *in,
+		     struct ib_mad *out_mad, size_t *out_mad_size,
+		     u16 *out_mad_pkey_index)
 {
-	struct qedr_dev *dev = get_qedr_dev(ibdev);
-
-	DP_DEBUG(dev, QEDR_MSG_GSI,
-		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
-		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
-		 mad_hdr->class_specific, mad_hdr->class_version,
-		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
 	return IB_MAD_RESULT_SUCCESS;
 }
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 9aaa90283d6e..18027844eb87 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -35,8 +35,6 @@
 int qedr_query_device(struct ib_device *ibdev,
 		      struct ib_device_attr *attr, struct ib_udata *udata);
 int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
-int qedr_modify_port(struct ib_device *, u8 port, int mask,
-		     struct ib_port_modify *props);
 
 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
 		      int index, union ib_gid *gid);
@@ -46,7 +44,8 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
 void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
 
-int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
+void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
 
@@ -93,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *,
 		   const struct ib_recv_wr **bad_wr);
 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
 		     u8 port_num, const struct ib_wc *in_wc,
-		     const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in_mad,
-		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
-		     size_t *out_mad_size, u16 *out_mad_pkey_index);
+		     const struct ib_grh *in_grh, const struct ib_mad *in_mad,
+		     struct ib_mad *out_mad, size_t *out_mad_size,
+		     u16 *out_mad_pkey_index);
 
 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
 			struct ib_port_immutable *immutable);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 531d8a1db2c3..ca5ea734e3d0 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1417,7 +1417,6 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd)
  *
  * The exact combo of LEDs if on is true is determined by looking
  * at the ibcstatus.
-
  * These LEDs indicate the physical and logical state of IB link.
  * For this chip (at least with recommended board pinouts), LED1
  * is Yellow (logical state) and LED2 is Green (physical state),
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index f92faf5ec369..79bb83222e8d 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2098,8 +2098,6 @@ static int cc_get_classportinfo(struct ib_cc_mad *ccp,
 	struct ib_cc_classportinfo_attr *p =
 		(struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
 
-	memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
 	p->base_version = 1;
 	p->class_version = 1;
 	p->cap_mask = 0;
@@ -2120,8 +2118,6 @@ static int cc_get_congestion_info(struct ib_cc_mad *ccp,
 	struct qib_ibport *ibp = to_iport(ibdev, port);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
-	memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
 	p->congestion_info = 0;
 	p->control_table_cap = ppd->cc_max_table_entries;
 
@@ -2138,8 +2134,6 @@ static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	struct ib_cc_congestion_entry_shadow *entries;
 
-	memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
 	spin_lock(&ppd->cc_shadow_lock);
 
 	entries = ppd->congestion_entries_shadow->entries;
@@ -2176,8 +2170,6 @@ static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
 	if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
 		goto bail;
 
-	memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
-
 	spin_lock(&ppd->cc_shadow_lock);
 
 	max_cct_block =
@@ -2296,18 +2288,11 @@ bail:
 	return reply_failure((struct ib_smp *) ccp);
 }
 
-static int check_cc_key(struct qib_ibport *ibp,
-			struct ib_cc_mad *ccp, int mad_flags)
-{
-	return 0;
-}
-
 static int process_cc(struct ib_device *ibdev, int mad_flags,
 			u8 port, const struct ib_mad *in_mad,
 			struct ib_mad *out_mad)
 {
 	struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
-	struct qib_ibport *ibp = to_iport(ibdev, port);
 	int ret;
 
 	*out_mad = *in_mad;
@@ -2318,10 +2303,6 @@ static int process_cc(struct ib_device *ibdev, int mad_flags,
 		goto bail;
 	}
 
-	ret = check_cc_key(ibp, ccp, mad_flags);
-	if (ret)
-		goto bail;
-
 	switch (ccp->method) {
 	case IB_MGMT_METHOD_GET:
 		switch (ccp->attr_id) {
@@ -2405,28 +2386,21 @@ bail:
  */
 int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		    const struct ib_mad_hdr *in, size_t in_mad_size,
-		    struct ib_mad_hdr *out, size_t *out_mad_size,
-		    u16 *out_mad_pkey_index)
+		    const struct ib_mad *in, struct ib_mad *out,
+		    size_t *out_mad_size, u16 *out_mad_pkey_index)
 {
 	int ret;
 	struct qib_ibport *ibp = to_iport(ibdev, port);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
 
-	switch (in_mad->mad_hdr.mgmt_class) {
+	switch (in->mad_hdr.mgmt_class) {
 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
+		ret = process_subn(ibdev, mad_flags, port, in, out);
 		goto bail;
 
 	case IB_MGMT_CLASS_PERF_MGMT:
-		ret = process_perf(ibdev, port, in_mad, out_mad);
+		ret = process_perf(ibdev, port, in, out);
 		goto bail;
 
 	case IB_MGMT_CLASS_CONG_MGMT:
@@ -2435,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 			ret = IB_MAD_RESULT_SUCCESS;
 			goto bail;
 		}
-		ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+		ret = process_cc(ibdev, mad_flags, port, in, out);
 		goto bail;
 
 	default:
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 3926be78036e..568b21eb6ea1 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj,
 	struct qib_pportdata *ppd =
 		container_of(kobj, struct qib_pportdata, pport_kobj);
 
+	if (!pattr->show)
+		return -EIO;
+
 	return pattr->show(ppd, buf);
 }
 
@@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
 	struct qib_pportdata *ppd =
 		container_of(kobj, struct qib_pportdata, pport_kobj);
 
+	if (!pattr->store)
+		return -EIO;
+
 	return pattr->store(ppd, buf, len);
 }
 
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 17bdf8acee2f..8bf414b47b96 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp);
 void qib_node_desc_chg(struct qib_ibport *ibp);
 int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		    const struct ib_mad_hdr *in, size_t in_mad_size,
-		    struct ib_mad_hdr *out, size_t *out_mad_size,
-		    u16 *out_mad_pkey_index);
+		    const struct ib_mad *in, struct ib_mad *out,
+		    size_t *out_mad_size, u16 *out_mad_pkey_index);
 void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx);
 void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx);
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 7800e6930502..a26a4fd86bf4 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -136,7 +136,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		}
 
 		cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size,
-				       IB_ACCESS_LOCAL_WRITE, 1);
+				       IB_ACCESS_LOCAL_WRITE);
 		if (IS_ERR(cq->umem)) {
 			ret = PTR_ERR(cq->umem);
 			goto err_cq;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
index 8f9749d54688..86a6c054ea26 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -58,7 +58,8 @@
 #define PVRDMA_ROCEV1_VERSION		17
 #define PVRDMA_ROCEV2_VERSION		18
 #define PVRDMA_PPN64_VERSION		19
-#define PVRDMA_VERSION			PVRDMA_PPN64_VERSION
+#define PVRDMA_QPHANDLE_VERSION		20
+#define PVRDMA_VERSION			PVRDMA_QPHANDLE_VERSION
 
 #define PVRDMA_BOARD_ID			1
 #define PVRDMA_REV_ID			1
@@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp {
 	u32 max_inline_data;
 };
 
+struct pvrdma_cmd_create_qp_resp_v2 {
+	struct pvrdma_cmd_resp_hdr hdr;
+	u32 qpn;
+	u32 qp_handle;
+	u32 max_send_wr;
+	u32 max_recv_wr;
+	u32 max_send_sge;
+	u32 max_recv_sge;
+	u32 max_inline_data;
+};
+
 struct pvrdma_cmd_modify_qp {
 	struct pvrdma_cmd_hdr hdr;
 	u32 qp_handle;
@@ -663,6 +675,7 @@ union pvrdma_cmd_resp {
 	struct pvrdma_cmd_create_cq_resp create_cq_resp;
 	struct pvrdma_cmd_resize_cq_resp resize_cq_resp;
 	struct pvrdma_cmd_create_qp_resp create_qp_resp;
+	struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2;
 	struct pvrdma_cmd_query_qp_resp query_qp_resp;
 	struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp;
 	struct pvrdma_cmd_create_srq_resp create_srq_resp;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index f3a3d22ee8d7..c61e665ff261 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		return ERR_PTR(-EINVAL);
 	}
 
-	umem = ib_umem_get(udata, start, length, access_flags, 0);
+	umem = ib_umem_get(udata, start, length, access_flags);
 	if (IS_ERR(umem)) {
 		dev_warn(&dev->pdev->dev,
 			 "could not get umem for mem region\n");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index bca6a58a442e..f15809c28f67 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -52,6 +52,9 @@
 
 #include "pvrdma.h"
 
+static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
+				struct pvrdma_qp *qp);
+
 static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
 			   struct pvrdma_cq **recv_cq)
 {
@@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 	union pvrdma_cmd_resp rsp;
 	struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
 	struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
+	struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2;
 	struct pvrdma_create_qp ucmd;
+	struct pvrdma_create_qp_resp qp_resp = {};
 	unsigned long flags;
 	int ret;
 	bool is_srq = !!init_attr->srq;
@@ -260,10 +265,19 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 				goto err_qp;
 			}
 
+			/* Userspace supports qpn and qp handles? */
+			if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION &&
+			    udata->outlen < sizeof(qp_resp)) {
+				dev_warn(&dev->pdev->dev,
+					 "create queuepair not supported\n");
+				ret = -EOPNOTSUPP;
+				goto err_qp;
+			}
+
 			if (!is_srq) {
 				/* set qp->sq.wqe_cnt, shift, buf_size.. */
 				qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr,
-							ucmd.rbuf_size, 0, 0);
+							ucmd.rbuf_size, 0);
 				if (IS_ERR(qp->rumem)) {
 					ret = PTR_ERR(qp->rumem);
 					goto err_qp;
@@ -275,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 			}
 
 			qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr,
-						ucmd.sbuf_size, 0, 0);
+						ucmd.sbuf_size, 0);
 			if (IS_ERR(qp->sumem)) {
 				if (!is_srq)
 					ib_umem_release(qp->rumem);
@@ -379,13 +393,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 	}
 
 	/* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
-	qp->qp_handle = resp->qpn;
 	qp->port = init_attr->port_num;
-	qp->ibqp.qp_num = resp->qpn;
+
+	if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) {
+		qp->ibqp.qp_num = resp_v2->qpn;
+		qp->qp_handle = resp_v2->qp_handle;
+	} else {
+		qp->ibqp.qp_num = resp->qpn;
+		qp->qp_handle = resp->qpn;
+	}
+
 	spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 	dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
 	spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 
+	if (udata) {
+		qp_resp.qpn = qp->ibqp.qp_num;
+		qp_resp.qp_handle = qp->qp_handle;
+
+		if (ib_copy_to_udata(udata, &qp_resp,
+				     min(udata->outlen, sizeof(qp_resp)))) {
+			dev_warn(&dev->pdev->dev,
+				 "failed to copy back udata\n");
+			__pvrdma_destroy_qp(dev, qp);
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
 	return &qp->ibqp;
 
 err_pdir:
@@ -400,27 +434,15 @@ err_qp:
 	return ERR_PTR(ret);
 }
 
-static void pvrdma_free_qp(struct pvrdma_qp *qp)
+static void _pvrdma_free_qp(struct pvrdma_qp *qp)
 {
+	unsigned long flags;
 	struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
-	struct pvrdma_cq *scq;
-	struct pvrdma_cq *rcq;
-	unsigned long flags, scq_flags, rcq_flags;
-
-	/* In case cq is polling */
-	get_cqs(qp, &scq, &rcq);
-	pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
-
-	_pvrdma_flush_cqe(qp, scq);
-	if (scq != rcq)
-		_pvrdma_flush_cqe(qp, rcq);
 
 	spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 	dev->qp_tbl[qp->qp_handle] = NULL;
 	spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 
-	pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
-
 	if (refcount_dec_and_test(&qp->refcnt))
 		complete(&qp->free);
 	wait_for_completion(&qp->free);
@@ -435,34 +457,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
 	atomic_dec(&dev->num_qps);
 }
 
-/**
- * pvrdma_destroy_qp - destroy a queue pair
- * @qp: the queue pair to destroy
- * @udata: user data or null for kernel object
- *
- * @return: 0 on success.
- */
-int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
+static void pvrdma_free_qp(struct pvrdma_qp *qp)
+{
+	struct pvrdma_cq *scq;
+	struct pvrdma_cq *rcq;
+	unsigned long scq_flags, rcq_flags;
+
+	/* In case cq is polling */
+	get_cqs(qp, &scq, &rcq);
+	pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+	_pvrdma_flush_cqe(qp, scq);
+	if (scq != rcq)
+		_pvrdma_flush_cqe(qp, rcq);
+
+	/*
+	 * We're now unlocking the CQs before clearing out the qp handle this
+	 * should still be safe. We have destroyed the backend QP and flushed
+	 * the CQEs so there should be no other completions for this QP.
+	 */
+	pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+	_pvrdma_free_qp(qp);
+}
+
+static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev,
+					   u32 qp_handle)
 {
-	struct pvrdma_qp *vqp = to_vqp(qp);
 	union pvrdma_cmd_req req;
 	struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
 	int ret;
 
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
-	cmd->qp_handle = vqp->qp_handle;
+	cmd->qp_handle = qp_handle;
 
-	ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
+	ret = pvrdma_cmd_post(dev, &req, NULL, 0);
 	if (ret < 0)
-		dev_warn(&to_vdev(qp->device)->pdev->dev,
+		dev_warn(&dev->pdev->dev,
 			 "destroy queuepair failed, error: %d\n", ret);
+}
 
+/**
+ * pvrdma_destroy_qp - destroy a queue pair
+ * @qp: the queue pair to destroy
+ * @udata: user data or null for kernel object
+ *
+ * @return: always 0.
+ */
+int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
+{
+	struct pvrdma_qp *vqp = to_vqp(qp);
+
+	_pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle);
 	pvrdma_free_qp(vqp);
 
 	return 0;
 }
 
+static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
+				struct pvrdma_qp *qp)
+{
+	_pvrdma_destroy_qp_work(dev, qp->qp_handle);
+	_pvrdma_free_qp(qp);
+}
+
 /**
  * pvrdma_modify_qp - modify queue pair attributes
  * @ibqp: the queue pair
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 36cdfbdbd325..98c8be71d91d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
 		goto err_srq;
 	}
 
-	srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0);
+	srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0);
 	if (IS_ERR(srq->umem)) {
 		ret = PTR_ERR(srq->umem);
 		goto err_srq;
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index fe99da0ff060..ee02c6176007 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
  * rvt_destory_ah - Destory an address handle
  * @ibah: address handle
  * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
- * @udata: user data or NULL for kernel object
  *
  * Return: 0 on success
  */
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index a85571a4cf57..13d7f66eadab 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -552,7 +552,6 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 
 /**
  * rvt_driver_cq_init - Init cq resources on behalf of driver
- * @rdi: rvt dev structure
  *
  * Return: 0 on success
  */
@@ -568,7 +567,6 @@ int rvt_driver_cq_init(void)
 
 /**
  * rvt_cq_exit - tear down cq reources
- * @rdi: rvt dev structure
  */
 void rvt_cq_exit(void)
 {
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index a6a39f01dca3..b9a76bf74857 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -390,7 +390,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (length == 0)
 		return ERR_PTR(-EINVAL);
 
-	umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
+	umem = ib_umem_get(udata, start, length, mr_access_flags);
 	if (IS_ERR(umem))
 		return (void *)umem;
 
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 0b0a241c57ff..3cdf75d0c7a4 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -2563,10 +2563,9 @@ void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
 EXPORT_SYMBOL(rvt_add_retry_timer_ext);
 
 /**
- * rvt_add_rnr_timer - add/start an rnr timer
- * @qp - the QP
- * @aeth - aeth of RNR timeout, simulated aeth for loopback
- * add an rnr timer on the QP
+ * rvt_add_rnr_timer - add/start an rnr timer on the QP
+ * @qp: the QP
+ * @aeth: aeth of RNR timeout, simulated aeth for loopback
  */
 void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
 {
@@ -2583,7 +2582,7 @@ EXPORT_SYMBOL(rvt_add_rnr_timer);
 
 /**
  * rvt_stop_rc_timers - stop all timers
- * @qp - the QP
+ * @qp: the QP
  * stop any pending timers
  */
 void rvt_stop_rc_timers(struct rvt_qp *qp)
@@ -2617,7 +2616,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp)
 
 /**
  * rvt_del_timers_sync - wait for any timeout routines to exit
- * @qp - the QP
+ * @qp: the QP
  */
 void rvt_del_timers_sync(struct rvt_qp *qp)
 {
@@ -2626,7 +2625,7 @@ void rvt_del_timers_sync(struct rvt_qp *qp)
 }
 EXPORT_SYMBOL(rvt_del_timers_sync);
 
-/**
+/*
  * This is called from s_timer for missing responses.
  */
 static void rvt_rc_timeout(struct timer_list *t)
@@ -2676,12 +2675,13 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry);
  * rvt_qp_iter_init - initial for QP iteration
  * @rdi: rvt devinfo
  * @v: u64 value
+ * @cb: user-defined callback
  *
  * This returns an iterator suitable for iterating QPs
  * in the system.
  *
- * The @cb is a user defined callback and @v is a 64
- * bit value passed to and relevant for processing in the
+ * The @cb is a user-defined callback and @v is a 64-bit
+ * value passed to and relevant for processing in the
  * @cb.  An example use case would be to alter QP processing
  * based on criteria not part of the rvt_qp.
  *
@@ -2712,7 +2712,7 @@ EXPORT_SYMBOL(rvt_qp_iter_init);
 
 /**
  * rvt_qp_iter_next - return the next QP in iter
- * @iter - the iterator
+ * @iter: the iterator
  *
  * Fine grained QP iterator suitable for use
  * with debugfs seq_file mechanisms.
@@ -2775,14 +2775,14 @@ EXPORT_SYMBOL(rvt_qp_iter_next);
 
 /**
  * rvt_qp_iter - iterate all QPs
- * @rdi - rvt devinfo
- * @v - a 64 bit value
- * @cb - a callback
+ * @rdi: rvt devinfo
+ * @v: a 64-bit value
+ * @cb: a callback
  *
  * This provides a way for iterating all QPs.
  *
- * The @cb is a user defined callback and @v is a 64
- * bit value passed to and relevant for processing in the
+ * The @cb is a user-defined callback and @v is a 64-bit
+ * value passed to and relevant for processing in the
  * cb.  An example use case would be to alter QP processing
  * based on criteria not part of the rvt_qp.
  *
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 18da1e1ea979..986265ad6e79 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -683,9 +683,10 @@ EXPORT_SYMBOL(rvt_unregister_device);
 
 /**
  * rvt_init_port - init internal data for driver port
- * @rdi: rvt dev strut
+ * @rdi: rvt_dev_info struct
  * @port: rvt port
  * @port_index: 0 based index of ports, different from IB core port num
+ * @pkey_table: pkey_table for @port
  *
  * Keep track of a list of ports. No need to have a detach port.
  * They persist until the driver goes away.
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index a8c11b5e1e94..0946a301a5c5 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -77,12 +77,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 {
 	rxe->max_inline_data			= RXE_MAX_INLINE_DATA;
 
-	rxe->attr.fw_ver			= RXE_FW_VER;
 	rxe->attr.max_mr_size			= RXE_MAX_MR_SIZE;
 	rxe->attr.page_size_cap			= RXE_PAGE_SIZE_CAP;
-	rxe->attr.vendor_id			= RXE_VENDOR_ID;
-	rxe->attr.vendor_part_id		= RXE_VENDOR_PART_ID;
-	rxe->attr.hw_ver			= RXE_HW_VER;
 	rxe->attr.max_qp			= RXE_MAX_QP;
 	rxe->attr.max_qp_wr			= RXE_MAX_QP_WR;
 	rxe->attr.device_cap_flags		= RXE_DEVICE_CAP_FLAGS;
@@ -94,22 +90,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 	rxe->attr.max_mr			= RXE_MAX_MR;
 	rxe->attr.max_pd			= RXE_MAX_PD;
 	rxe->attr.max_qp_rd_atom		= RXE_MAX_QP_RD_ATOM;
-	rxe->attr.max_ee_rd_atom		= RXE_MAX_EE_RD_ATOM;
 	rxe->attr.max_res_rd_atom		= RXE_MAX_RES_RD_ATOM;
 	rxe->attr.max_qp_init_rd_atom		= RXE_MAX_QP_INIT_RD_ATOM;
-	rxe->attr.max_ee_init_rd_atom		= RXE_MAX_EE_INIT_RD_ATOM;
 	rxe->attr.atomic_cap			= IB_ATOMIC_HCA;
-	rxe->attr.max_ee			= RXE_MAX_EE;
-	rxe->attr.max_rdd			= RXE_MAX_RDD;
-	rxe->attr.max_mw			= RXE_MAX_MW;
-	rxe->attr.max_raw_ipv6_qp		= RXE_MAX_RAW_IPV6_QP;
-	rxe->attr.max_raw_ethy_qp		= RXE_MAX_RAW_ETHY_QP;
 	rxe->attr.max_mcast_grp			= RXE_MAX_MCAST_GRP;
 	rxe->attr.max_mcast_qp_attach		= RXE_MAX_MCAST_QP_ATTACH;
 	rxe->attr.max_total_mcast_qp_attach	= RXE_MAX_TOT_MCAST_QP_ATTACH;
 	rxe->attr.max_ah			= RXE_MAX_AH;
-	rxe->attr.max_fmr			= RXE_MAX_FMR;
-	rxe->attr.max_map_per_fmr		= RXE_MAX_MAP_PER_FMR;
 	rxe->attr.max_srq			= RXE_MAX_SRQ;
 	rxe->attr.max_srq_wr			= RXE_MAX_SRQ_WR;
 	rxe->attr.max_srq_sge			= RXE_MAX_SRQ_SGE;
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index ea6a819b7167..35a2baf2f364 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -169,7 +169,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
 	void			*vaddr;
 	int err;
 
-	umem = ib_umem_get(udata, start, length, access, 0);
+	umem = ib_umem_get(udata, start, length, access);
 	if (IS_ERR(umem)) {
 		pr_warn("err %d from rxe_umem_get\n",
 			(int)PTR_ERR(umem));
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index fe5207386700..353c6668249e 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -60,12 +60,8 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
 
 /* default/initial rxe device parameter settings */
 enum rxe_device_param {
-	RXE_FW_VER			= 0,
 	RXE_MAX_MR_SIZE			= -1ull,
 	RXE_PAGE_SIZE_CAP		= 0xfffff000,
-	RXE_VENDOR_ID			= 0,
-	RXE_VENDOR_PART_ID		= 0,
-	RXE_HW_VER			= 0,
 	RXE_MAX_QP			= 0x10000,
 	RXE_MAX_QP_WR			= 0x4000,
 	RXE_MAX_INLINE_DATA		= 400,
@@ -87,21 +83,12 @@ enum rxe_device_param {
 	RXE_MAX_MR			= 256 * 1024,
 	RXE_MAX_PD			= 0x7ffc,
 	RXE_MAX_QP_RD_ATOM		= 128,
-	RXE_MAX_EE_RD_ATOM		= 0,
 	RXE_MAX_RES_RD_ATOM		= 0x3f000,
 	RXE_MAX_QP_INIT_RD_ATOM		= 128,
-	RXE_MAX_EE_INIT_RD_ATOM		= 0,
-	RXE_MAX_EE			= 0,
-	RXE_MAX_RDD			= 0,
-	RXE_MAX_MW			= 0,
-	RXE_MAX_RAW_IPV6_QP		= 0,
-	RXE_MAX_RAW_ETHY_QP		= 0,
 	RXE_MAX_MCAST_GRP		= 8192,
 	RXE_MAX_MCAST_QP_ATTACH		= 56,
 	RXE_MAX_TOT_MCAST_QP_ATTACH	= 0x70000,
 	RXE_MAX_AH			= 100,
-	RXE_MAX_FMR			= 0,
-	RXE_MAX_MAP_PER_FMR		= 0,
 	RXE_MAX_SRQ			= 960,
 	RXE_MAX_SRQ_WR			= 0x4000,
 	RXE_MIN_SRQ_WR			= 1,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 623129f27f5a..9dd4bd7aea92 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -106,6 +106,10 @@ static int rxe_modify_device(struct ib_device *dev,
 {
 	struct rxe_dev *rxe = to_rdev(dev);
 
+	if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+		     IB_DEVICE_MODIFY_NODE_DESC))
+		return -EOPNOTSUPP;
+
 	if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
 		rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
 
@@ -1171,6 +1175,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 	addrconf_addr_eui48((unsigned char *)&dev->node_guid,
 			    rxe->ndev->dev_addr);
 	dev->dev.dma_ops = &dma_virt_ops;
+	dev->dev.dma_parms = &rxe->dma_parms;
+	rxe->dma_parms = (struct device_dma_parameters)
+		{ .max_segment_size = SZ_2G };
 	dma_coerce_mask_and_coherent(&dev->dev,
 				     dma_get_required_mask(&dev->dev));
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5c4b2239129c..95834206c80c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -384,6 +384,7 @@ struct rxe_port {
 struct rxe_dev {
 	struct ib_device	ib_dev;
 	struct ib_device_attr	attr;
+	struct device_dma_parameters dma_parms;
 	int			max_ucontext;
 	int			max_inline_data;
 	struct mutex	usdev_lock;
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index dba4535494ab..b939f489cd46 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -70,6 +70,7 @@ struct siw_pd {
 
 struct siw_device {
 	struct ib_device base_dev;
+	struct device_dma_parameters dma_parms;
 	struct net_device *netdev;
 	struct siw_dev_cap attrs;
 
@@ -98,18 +99,9 @@ struct siw_device {
 	struct work_struct netdev_down;
 };
 
-struct siw_uobj {
-	void *addr;
-	u32 size;
-};
-
 struct siw_ucontext {
 	struct ib_ucontext base_ucontext;
 	struct siw_device *sdev;
-
-	/* xarray of user mappable objects */
-	struct xarray xa;
-	u32 uobj_nextkey;
 };
 
 /*
@@ -149,8 +141,6 @@ struct siw_pbl {
 	struct siw_pble pbe[1];
 };
 
-struct siw_mr;
-
 /*
  * Generic memory representation for registered siw memory.
  * Memory lookup always via higher 24 bit of STag (STag index).
@@ -220,7 +210,7 @@ struct siw_cq {
 	u32 cq_get;
 	u32 num_cqe;
 	bool kernel_verbs;
-	u32 xa_cq_index; /* mmap information for CQE array */
+	struct rdma_user_mmap_entry *cq_entry; /* mmap info for CQE array */
 	u32 id; /* For debugging only */
 };
 
@@ -263,7 +253,7 @@ struct siw_srq {
 	u32 rq_put;
 	u32 rq_get;
 	u32 num_rqe; /* max # of wqe's allowed */
-	u32 xa_srq_index; /* mmap information for SRQ array */
+	struct rdma_user_mmap_entry *srq_entry; /* mmap info for SRQ array */
 	char armed; /* inform user if limit hit */
 	char kernel_verbs; /* '1' if kernel client */
 };
@@ -477,8 +467,8 @@ struct siw_qp {
 		u8 layer : 4, etype : 4;
 		u8 ecode;
 	} term_info;
-	u32 xa_sq_index; /* mmap information for SQE array */
-	u32 xa_rq_index; /* mmap information for RQE array */
+	struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */
+	struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */
 	struct rcu_head rcu;
 };
 
@@ -503,6 +493,11 @@ struct iwarp_msg_info {
 	int (*rx_data)(struct siw_qp *qp);
 };
 
+struct siw_user_mmap_entry {
+	struct rdma_user_mmap_entry rdma_entry;
+	void *address;
+};
+
 /* Global siw parameters. Currently set in siw_main.c */
 extern const bool zcopy_tx;
 extern const bool try_gso;
@@ -607,6 +602,12 @@ static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr)
 	return container_of(base_mr, struct siw_mr, base_mr);
 }
 
+static inline struct siw_user_mmap_entry *
+to_siw_mmap_entry(struct rdma_user_mmap_entry *rdma_mmap)
+{
+	return container_of(rdma_mmap, struct siw_user_mmap_entry, rdma_entry);
+}
+
 static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id)
 {
 	struct siw_qp *qp;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 8c1931a57f4a..3bccfef40e7e 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1373,22 +1373,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
 		rv = -EINVAL;
 		goto error;
 	}
-	if (v4)
-		siw_dbg_qp(qp,
-			   "pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n",
-			   pd_len,
-			   &((struct sockaddr_in *)(laddr))->sin_addr,
-			   ntohs(((struct sockaddr_in *)(laddr))->sin_port),
-			   &((struct sockaddr_in *)(raddr))->sin_addr,
-			   ntohs(((struct sockaddr_in *)(raddr))->sin_port));
-	else
-		siw_dbg_qp(qp,
-			   "pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n",
-			   pd_len,
-			   &((struct sockaddr_in6 *)(laddr))->sin6_addr,
-			   ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port),
-			   &((struct sockaddr_in6 *)(raddr))->sin6_addr,
-			   ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port));
+	siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr,
+		   raddr);
 
 	rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s);
 	if (rv < 0)
@@ -1867,14 +1853,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog,
 	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
 	cep->state = SIW_EPSTATE_LISTENING;
 
-	if (addr_family == AF_INET)
-		siw_dbg(id->device, "Listen at laddr %pI4 %u\n",
-			&(((struct sockaddr_in *)laddr)->sin_addr),
-			((struct sockaddr_in *)laddr)->sin_port);
-	else
-		siw_dbg(id->device, "Listen at laddr %pI6 %u\n",
-			&(((struct sockaddr_in6 *)laddr)->sin6_addr),
-			((struct sockaddr_in6 *)laddr)->sin6_port);
+	siw_dbg(id->device, "Listen at laddr %pISp\n", laddr);
 
 	return 0;
 
@@ -1935,7 +1914,7 @@ static void siw_drop_listeners(struct iw_cm_id *id)
 /*
  * siw_create_listen - Create resources for a listener's IWCM ID @id
  *
- * Listens on the socket addresses id->local_addr and id->remote_addr.
+ * Listens on the socket address id->local_addr.
  *
  * If the listener's @id provides a specific local IP address, at most one
  * listening socket is created and associated with @id.
@@ -1959,7 +1938,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 	 */
 	if (id->local_addr.ss_family == AF_INET) {
 		struct in_device *in_dev = in_dev_get(dev);
-		struct sockaddr_in s_laddr, *s_raddr;
+		struct sockaddr_in s_laddr;
 		const struct in_ifaddr *ifa;
 
 		if (!in_dev) {
@@ -1967,12 +1946,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 			goto out;
 		}
 		memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr));
-		s_raddr = (struct sockaddr_in *)&id->remote_addr;
 
-		siw_dbg(id->device,
-			"laddr %pI4:%d, raddr %pI4:%d\n",
-			&s_laddr.sin_addr, ntohs(s_laddr.sin_port),
-			&s_raddr->sin_addr, ntohs(s_raddr->sin_port));
+		siw_dbg(id->device, "laddr %pISp\n", &s_laddr);
 
 		rtnl_lock();
 		in_dev_for_each_ifa_rtnl(ifa, in_dev) {
@@ -1992,17 +1967,13 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 	} else if (id->local_addr.ss_family == AF_INET6) {
 		struct inet6_dev *in6_dev = in6_dev_get(dev);
 		struct inet6_ifaddr *ifp;
-		struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr),
-			*s_raddr = &to_sockaddr_in6(id->remote_addr);
+		struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr);
 
 		if (!in6_dev) {
 			rv = -ENODEV;
 			goto out;
 		}
-		siw_dbg(id->device,
-			"laddr %pI6:%d, raddr %pI6:%d\n",
-			&s_laddr->sin6_addr, ntohs(s_laddr->sin6_port),
-			&s_raddr->sin6_addr, ntohs(s_raddr->sin6_port));
+		siw_dbg(id->device, "laddr %pISp\n", &s_laddr);
 
 		rtnl_lock();
 		list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 05a92f997f60..c147f0613d95 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
 
+#include <net/addrconf.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/rdma_netlink.h>
@@ -248,24 +249,6 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
 	return NULL;
 }
 
-static void siw_verbs_sq_flush(struct ib_qp *base_qp)
-{
-	struct siw_qp *qp = to_siw_qp(base_qp);
-
-	down_write(&qp->state_lock);
-	siw_sq_flush(qp);
-	up_write(&qp->state_lock);
-}
-
-static void siw_verbs_rq_flush(struct ib_qp *base_qp)
-{
-	struct siw_qp *qp = to_siw_qp(base_qp);
-
-	down_write(&qp->state_lock);
-	siw_rq_flush(qp);
-	up_write(&qp->state_lock);
-}
-
 static const struct ib_device_ops siw_device_ops = {
 	.owner = THIS_MODULE,
 	.uverbs_abi_ver = SIW_ABI_VERSION,
@@ -284,8 +267,6 @@ static const struct ib_device_ops siw_device_ops = {
 	.destroy_cq = siw_destroy_cq,
 	.destroy_qp = siw_destroy_qp,
 	.destroy_srq = siw_destroy_srq,
-	.drain_rq = siw_verbs_rq_flush,
-	.drain_sq = siw_verbs_sq_flush,
 	.get_dma_mr = siw_get_dma_mr,
 	.get_port_immutable = siw_get_port_immutable,
 	.iw_accept = siw_accept,
@@ -298,6 +279,7 @@ static const struct ib_device_ops siw_device_ops = {
 	.iw_rem_ref = siw_qp_put_ref,
 	.map_mr_sg = siw_map_mr_sg,
 	.mmap = siw_mmap,
+	.mmap_free = siw_mmap_free,
 	.modify_qp = siw_verbs_modify_qp,
 	.modify_srq = siw_modify_srq,
 	.poll_cq = siw_poll_cq,
@@ -350,15 +332,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
 	sdev->netdev = netdev;
 
 	if (netdev->type != ARPHRD_LOOPBACK) {
-		memcpy(&base_dev->node_guid, netdev->dev_addr, 6);
+		addrconf_addr_eui48((unsigned char *)&base_dev->node_guid,
+				    netdev->dev_addr);
 	} else {
 		/*
 		 * The loopback device does not have a HW address,
 		 * but connection mangagement lib expects gid != 0
 		 */
-		size_t gidlen = min_t(size_t, strlen(base_dev->name), 6);
+		size_t len = min_t(size_t, strlen(base_dev->name), 6);
+		char addr[6] = { };
 
-		memcpy(&base_dev->node_guid, base_dev->name, gidlen);
+		memcpy(addr, base_dev->name, len);
+		addrconf_addr_eui48((unsigned char *)&base_dev->node_guid,
+				    addr);
 	}
 	base_dev->uverbs_cmd_mask =
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -397,6 +383,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
 	base_dev->phys_port_cnt = 1;
 	base_dev->dev.parent = parent;
 	base_dev->dev.dma_ops = &dma_virt_ops;
+	base_dev->dev.dma_parms = &sdev->dma_parms;
+	sdev->dma_parms = (struct device_dma_parameters)
+		{ .max_segment_size = SZ_2G };
 	base_dev->num_comp_vectors = num_possible_cpus();
 
 	ib_set_device_ops(base_dev, &siw_device_ops);
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index b18a677832e1..5fd6d6499b3d 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -34,44 +34,19 @@ static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = {
 	[IB_QPS_ERR] = "ERR"
 };
 
-static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, u32 size)
+void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
 {
-	struct siw_uobj *uobj;
-	struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY);
-	u32 key;
+	struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry);
 
-	uobj = kzalloc(sizeof(*uobj), GFP_KERNEL);
-	if (!uobj)
-		return SIW_INVAL_UOBJ_KEY;
-
-	if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, &uctx->uobj_nextkey,
-			    GFP_KERNEL) < 0) {
-		kfree(uobj);
-		return SIW_INVAL_UOBJ_KEY;
-	}
-	uobj->size = PAGE_ALIGN(size);
-	uobj->addr = vaddr;
-
-	return key;
-}
-
-static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx,
-				     unsigned long off, u32 size)
-{
-	struct siw_uobj *uobj = xa_load(&uctx->xa, off);
-
-	if (uobj && uobj->size == size)
-		return uobj;
-
-	return NULL;
+	kfree(entry);
 }
 
 int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
 {
 	struct siw_ucontext *uctx = to_siw_ctx(ctx);
-	struct siw_uobj *uobj;
-	unsigned long off = vma->vm_pgoff;
-	int size = vma->vm_end - vma->vm_start;
+	size_t size = vma->vm_end - vma->vm_start;
+	struct rdma_user_mmap_entry *rdma_entry;
+	struct siw_user_mmap_entry *entry;
 	int rv = -EINVAL;
 
 	/*
@@ -79,18 +54,25 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
 	 */
 	if (vma->vm_start & (PAGE_SIZE - 1)) {
 		pr_warn("siw: mmap not page aligned\n");
-		goto out;
+		return -EINVAL;
 	}
-	uobj = siw_get_uobj(uctx, off, size);
-	if (!uobj) {
-		siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %u\n",
-			off, size);
+	rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma);
+	if (!rdma_entry) {
+		siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n",
+			vma->vm_pgoff, size);
+		return -EINVAL;
+	}
+	entry = to_siw_mmap_entry(rdma_entry);
+
+	rv = remap_vmalloc_range(vma, entry->address, 0);
+	if (rv) {
+		pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff,
+			size);
 		goto out;
 	}
-	rv = remap_vmalloc_range(vma, uobj->addr, 0);
-	if (rv)
-		pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, size);
 out:
+	rdma_user_mmap_entry_put(rdma_entry);
+
 	return rv;
 }
 
@@ -105,8 +87,6 @@ int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata)
 		rv = -ENOMEM;
 		goto err_out;
 	}
-	xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC);
-	ctx->uobj_nextkey = 0;
 	ctx->sdev = sdev;
 
 	uresp.dev_id = sdev->vendor_part_id;
@@ -135,19 +115,7 @@ err_out:
 void siw_dealloc_ucontext(struct ib_ucontext *base_ctx)
 {
 	struct siw_ucontext *uctx = to_siw_ctx(base_ctx);
-	void *entry;
-	unsigned long index;
 
-	/*
-	 * Make sure all user mmap objects are gone. Since QP, CQ
-	 * and SRQ destroy routines destroy related objects, nothing
-	 * should be found here.
-	 */
-	xa_for_each(&uctx->xa, index, entry) {
-		kfree(xa_erase(&uctx->xa, index));
-		pr_warn("siw: dropping orphaned uobj at %lu\n", index);
-	}
-	xa_destroy(&uctx->xa);
 	atomic_dec(&uctx->sdev->num_ctx);
 }
 
@@ -293,6 +261,33 @@ void siw_qp_put_ref(struct ib_qp *base_qp)
 	siw_qp_put(to_siw_qp(base_qp));
 }
 
+static struct rdma_user_mmap_entry *
+siw_mmap_entry_insert(struct siw_ucontext *uctx,
+		      void *address, size_t length,
+		      u64 *offset)
+{
+	struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	int rv;
+
+	*offset = SIW_INVAL_UOBJ_KEY;
+	if (!entry)
+		return NULL;
+
+	entry->address = address;
+
+	rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext,
+					 &entry->rdma_entry,
+					 length);
+	if (rv) {
+		kfree(entry);
+		return NULL;
+	}
+
+	*offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+	return &entry->rdma_entry;
+}
+
 /*
  * siw_create_qp()
  *
@@ -317,6 +312,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	struct siw_cq *scq = NULL, *rcq = NULL;
 	unsigned long flags;
 	int num_sqe, num_rqe, rv = 0;
+	size_t length;
 
 	siw_dbg(base_dev, "create new QP\n");
 
@@ -380,8 +376,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	spin_lock_init(&qp->orq_lock);
 
 	qp->kernel_verbs = !udata;
-	qp->xa_sq_index = SIW_INVAL_UOBJ_KEY;
-	qp->xa_rq_index = SIW_INVAL_UOBJ_KEY;
 
 	rv = siw_qp_add(sdev, qp);
 	if (rv)
@@ -458,22 +452,27 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		uresp.qp_id = qp_id(qp);
 
 		if (qp->sendq) {
-			qp->xa_sq_index =
-				siw_create_uobj(uctx, qp->sendq,
-					num_sqe * sizeof(struct siw_sqe));
+			length = num_sqe * sizeof(struct siw_sqe);
+			qp->sq_entry =
+				siw_mmap_entry_insert(uctx, qp->sendq,
+						      length, &uresp.sq_key);
+			if (!qp->sq_entry) {
+				rv = -ENOMEM;
+				goto err_out_xa;
+			}
 		}
+
 		if (qp->recvq) {
-			qp->xa_rq_index =
-				 siw_create_uobj(uctx, qp->recvq,
-					num_rqe * sizeof(struct siw_rqe));
-		}
-		if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY ||
-		    qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) {
-			rv = -ENOMEM;
-			goto err_out_xa;
+			length = num_rqe * sizeof(struct siw_rqe);
+			qp->rq_entry =
+				siw_mmap_entry_insert(uctx, qp->recvq,
+						      length, &uresp.rq_key);
+			if (!qp->rq_entry) {
+				uresp.sq_key = SIW_INVAL_UOBJ_KEY;
+				rv = -ENOMEM;
+				goto err_out_xa;
+			}
 		}
-		uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT;
-		uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT;
 
 		if (udata->outlen < sizeof(uresp)) {
 			rv = -EINVAL;
@@ -501,11 +500,10 @@ err_out:
 	kfree(siw_base_qp);
 
 	if (qp) {
-		if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY)
-			kfree(xa_erase(&uctx->xa, qp->xa_sq_index));
-		if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY)
-			kfree(xa_erase(&uctx->xa, qp->xa_rq_index));
-
+		if (uctx) {
+			rdma_user_mmap_entry_remove(qp->sq_entry);
+			rdma_user_mmap_entry_remove(qp->rq_entry);
+		}
 		vfree(qp->sendq);
 		vfree(qp->recvq);
 		kfree(qp);
@@ -618,10 +616,10 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
 	qp->attrs.flags |= SIW_QP_IN_DESTROY;
 	qp->rx_stream.rx_suspend = 1;
 
-	if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY)
-		kfree(xa_erase(&uctx->xa, qp->xa_sq_index));
-	if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY)
-		kfree(xa_erase(&uctx->xa, qp->xa_rq_index));
+	if (uctx) {
+		rdma_user_mmap_entry_remove(qp->sq_entry);
+		rdma_user_mmap_entry_remove(qp->rq_entry);
+	}
 
 	down_write(&qp->state_lock);
 
@@ -685,6 +683,47 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
 	return bytes;
 }
 
+/* Complete SQ WR's without processing */
+static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
+			   const struct ib_send_wr **bad_wr)
+{
+	struct siw_sqe sqe = {};
+	int rv = 0;
+
+	while (wr) {
+		sqe.id = wr->wr_id;
+		sqe.opcode = wr->opcode;
+		rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+		if (rv) {
+			if (bad_wr)
+				*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	return rv;
+}
+
+/* Complete RQ WR's without processing */
+static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr,
+			   const struct ib_recv_wr **bad_wr)
+{
+	struct siw_rqe rqe = {};
+	int rv = 0;
+
+	while (wr) {
+		rqe.id = wr->wr_id;
+		rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR);
+		if (rv) {
+			if (bad_wr)
+				*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	return rv;
+}
+
 /*
  * siw_post_send()
  *
@@ -703,26 +742,54 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
 	unsigned long flags;
 	int rv = 0;
 
+	if (wr && !qp->kernel_verbs) {
+		siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+
 	/*
 	 * Try to acquire QP state lock. Must be non-blocking
 	 * to accommodate kernel clients needs.
 	 */
 	if (!down_read_trylock(&qp->state_lock)) {
-		*bad_wr = wr;
-		siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state);
-		return -ENOTCONN;
+		if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+			/*
+			 * ERROR state is final, so we can be sure
+			 * this state will not change as long as the QP
+			 * exists.
+			 *
+			 * This handles an ib_drain_sq() call with
+			 * a concurrent request to set the QP state
+			 * to ERROR.
+			 */
+			rv = siw_sq_flush_wr(qp, wr, bad_wr);
+		} else {
+			siw_dbg_qp(qp, "QP locked, state %d\n",
+				   qp->attrs.state);
+			*bad_wr = wr;
+			rv = -ENOTCONN;
+		}
+		return rv;
 	}
 	if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) {
+		if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+			/*
+			 * Immediately flush this WR to CQ, if QP
+			 * is in ERROR state. SQ is guaranteed to
+			 * be empty, so WR complets in-order.
+			 *
+			 * Typically triggered by ib_drain_sq().
+			 */
+			rv = siw_sq_flush_wr(qp, wr, bad_wr);
+		} else {
+			siw_dbg_qp(qp, "QP out of state %d\n",
+				   qp->attrs.state);
+			*bad_wr = wr;
+			rv = -ENOTCONN;
+		}
 		up_read(&qp->state_lock);
-		*bad_wr = wr;
-		siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state);
-		return -ENOTCONN;
-	}
-	if (wr && !qp->kernel_verbs) {
-		siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
-		up_read(&qp->state_lock);
-		*bad_wr = wr;
-		return -EINVAL;
+		return rv;
 	}
 	spin_lock_irqsave(&qp->sq_lock, flags);
 
@@ -917,24 +984,54 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
 		*bad_wr = wr;
 		return -EOPNOTSUPP; /* what else from errno.h? */
 	}
+	if (!qp->kernel_verbs) {
+		siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+
 	/*
 	 * Try to acquire QP state lock. Must be non-blocking
 	 * to accommodate kernel clients needs.
 	 */
 	if (!down_read_trylock(&qp->state_lock)) {
-		*bad_wr = wr;
-		return -ENOTCONN;
-	}
-	if (!qp->kernel_verbs) {
-		siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
-		up_read(&qp->state_lock);
-		*bad_wr = wr;
-		return -EINVAL;
+		if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+			/*
+			 * ERROR state is final, so we can be sure
+			 * this state will not change as long as the QP
+			 * exists.
+			 *
+			 * This handles an ib_drain_rq() call with
+			 * a concurrent request to set the QP state
+			 * to ERROR.
+			 */
+			rv = siw_rq_flush_wr(qp, wr, bad_wr);
+		} else {
+			siw_dbg_qp(qp, "QP locked, state %d\n",
+				   qp->attrs.state);
+			*bad_wr = wr;
+			rv = -ENOTCONN;
+		}
+		return rv;
 	}
 	if (qp->attrs.state > SIW_QP_STATE_RTS) {
+		if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+			/*
+			 * Immediately flush this WR to CQ, if QP
+			 * is in ERROR state. RQ is guaranteed to
+			 * be empty, so WR complets in-order.
+			 *
+			 * Typically triggered by ib_drain_rq().
+			 */
+			rv = siw_rq_flush_wr(qp, wr, bad_wr);
+		} else {
+			siw_dbg_qp(qp, "QP out of state %d\n",
+				   qp->attrs.state);
+			*bad_wr = wr;
+			rv = -ENOTCONN;
+		}
 		up_read(&qp->state_lock);
-		*bad_wr = wr;
-		return -EINVAL;
+		return rv;
 	}
 	/*
 	 * Serialize potentially multiple producers.
@@ -991,8 +1088,8 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
 
 	siw_cq_flush(cq);
 
-	if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY)
-		kfree(xa_erase(&ctx->xa, cq->xa_cq_index));
+	if (ctx)
+		rdma_user_mmap_entry_remove(cq->cq_entry);
 
 	atomic_dec(&sdev->num_cq);
 
@@ -1029,7 +1126,6 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 	size = roundup_pow_of_two(size);
 	cq->base_cq.cqe = size;
 	cq->num_cqe = size;
-	cq->xa_cq_index = SIW_INVAL_UOBJ_KEY;
 
 	if (!udata) {
 		cq->kernel_verbs = 1;
@@ -1055,16 +1151,17 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 		struct siw_ucontext *ctx =
 			rdma_udata_to_drv_context(udata, struct siw_ucontext,
 						  base_ucontext);
+		size_t length = size * sizeof(struct siw_cqe) +
+			sizeof(struct siw_cq_ctrl);
 
-		cq->xa_cq_index =
-			siw_create_uobj(ctx, cq->queue,
-					size * sizeof(struct siw_cqe) +
-						sizeof(struct siw_cq_ctrl));
-		if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) {
+		cq->cq_entry =
+			siw_mmap_entry_insert(ctx, cq->queue,
+					      length, &uresp.cq_key);
+		if (!cq->cq_entry) {
 			rv = -ENOMEM;
 			goto err_out;
 		}
-		uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT;
+
 		uresp.cq_id = cq->id;
 		uresp.num_cqe = size;
 
@@ -1085,8 +1182,8 @@ err_out:
 		struct siw_ucontext *ctx =
 			rdma_udata_to_drv_context(udata, struct siw_ucontext,
 						  base_ucontext);
-		if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY)
-			kfree(xa_erase(&ctx->xa, cq->xa_cq_index));
+		if (ctx)
+			rdma_user_mmap_entry_remove(cq->cq_entry);
 		vfree(cq->queue);
 	}
 	atomic_dec(&sdev->num_cq);
@@ -1490,7 +1587,6 @@ int siw_create_srq(struct ib_srq *base_srq,
 	}
 	srq->max_sge = attrs->max_sge;
 	srq->num_rqe = roundup_pow_of_two(attrs->max_wr);
-	srq->xa_srq_index = SIW_INVAL_UOBJ_KEY;
 	srq->limit = attrs->srq_limit;
 	if (srq->limit)
 		srq->armed = 1;
@@ -1509,15 +1605,16 @@ int siw_create_srq(struct ib_srq *base_srq,
 	}
 	if (udata) {
 		struct siw_uresp_create_srq uresp = {};
+		size_t length = srq->num_rqe * sizeof(struct siw_rqe);
 
-		srq->xa_srq_index = siw_create_uobj(
-			ctx, srq->recvq, srq->num_rqe * sizeof(struct siw_rqe));
-
-		if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) {
+		srq->srq_entry =
+			siw_mmap_entry_insert(ctx, srq->recvq,
+					      length, &uresp.srq_key);
+		if (!srq->srq_entry) {
 			rv = -ENOMEM;
 			goto err_out;
 		}
-		uresp.srq_key = srq->xa_srq_index;
+
 		uresp.num_rqe = srq->num_rqe;
 
 		if (udata->outlen < sizeof(uresp)) {
@@ -1536,8 +1633,8 @@ int siw_create_srq(struct ib_srq *base_srq,
 
 err_out:
 	if (srq->recvq) {
-		if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY)
-			kfree(xa_erase(&ctx->xa, srq->xa_srq_index));
+		if (ctx)
+			rdma_user_mmap_entry_remove(srq->srq_entry);
 		vfree(srq->recvq);
 	}
 	atomic_dec(&sdev->num_srq);
@@ -1623,9 +1720,8 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
 		rdma_udata_to_drv_context(udata, struct siw_ucontext,
 					  base_ucontext);
 
-	if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY)
-		kfree(xa_erase(&ctx->xa, srq->xa_srq_index));
-
+	if (ctx)
+		rdma_user_mmap_entry_remove(srq->srq_entry);
 	vfree(srq->recvq);
 	atomic_dec(&sdev->num_srq);
 }
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index 1910869281cb..1a731989fad6 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -83,6 +83,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
 int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
 		      const struct ib_recv_wr **bad_wr);
 int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
+void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 void siw_qp_event(struct siw_qp *qp, enum ib_event_type type);
 void siw_cq_event(struct siw_cq *cq, enum ib_event_type type);
 void siw_srq_event(struct siw_srq *srq, enum ib_event_type type);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ac0583ff280d..e5f438ab716c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2019,6 +2019,15 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
 	return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
 }
 
+static int ipoib_get_vf_guid(struct net_device *dev, int vf,
+			     struct ifla_vf_guid *node_guid,
+			     struct ifla_vf_guid *port_guid)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+	return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid);
+}
+
 static int ipoib_get_vf_stats(struct net_device *dev, int vf,
 			      struct ifla_vf_stats *vf_stats)
 {
@@ -2045,6 +2054,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
 	.ndo_set_vf_link_state	 = ipoib_set_vf_link_state,
 	.ndo_get_vf_config	 = ipoib_get_vf_config,
 	.ndo_get_vf_stats	 = ipoib_get_vf_stats,
+	.ndo_get_vf_guid	 = ipoib_get_vf_guid,
 	.ndo_set_vf_guid	 = ipoib_set_vf_guid,
 	.ndo_set_mac_address	 = ipoib_set_mac,
 	.ndo_get_stats64	 = ipoib_get_stats,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2e72fc5af157..3690e28cc7ea 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -646,13 +646,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 		if (ib_conn->pi_support) {
 			u32 sig_caps = ib_dev->attrs.sig_prot_cap;
 
+			shost->sg_prot_tablesize = shost->sg_tablesize;
 			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
 			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
 						   SHOST_DIX_GUARD_CRC);
 		}
 
 		if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
-			shost->virt_boundary_mask = ~MASK_4K;
+			shost->virt_boundary_mask = SZ_4K - 1;
 
 		if (iscsi_host_add(shost, ib_dev->dev.parent)) {
 			mutex_unlock(&iser_conn->state_mutex);
@@ -785,7 +786,7 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
  * iscsi_iser_ep_connect() - Initiate iSER connection establishment
  * @shost:          scsi_host
  * @dst_addr:       destination address
- * @non-blocking:   indicate if routine can block
+ * @non_blocking:   indicate if routine can block
  *
  * Allocate an iscsi endpoint, an iser_conn structure and bind them.
  * After that start RDMA connection establishment via rdma_cm. We
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 52ce63592dcf..029c00163442 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -96,16 +96,12 @@
 #define iser_err(fmt, arg...) \
 	pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
-#define SHIFT_4K	12
-#define SIZE_4K	(1ULL << SHIFT_4K)
-#define MASK_4K	(~(SIZE_4K-1))
-
 /* Default support is 512KB I/O size */
 #define ISER_DEF_MAX_SECTORS		1024
 #define ISCSI_ISER_DEF_SG_TABLESIZE                                            \
-	((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> SHIFT_4K)
+	((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> ilog2(SZ_4K))
 /* Maximum support is 16MB I/O size */
-#define ISCSI_ISER_MAX_SG_TABLESIZE	((32768 * SECTOR_SIZE) >> SHIFT_4K)
+#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> ilog2(SZ_4K))
 
 #define ISER_DEF_XMIT_CMDS_DEFAULT		512
 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
@@ -232,15 +228,16 @@ enum iser_desc_type {
  * @iser_header:   iser header
  * @iscsi_header:  iscsi header
  * @type:          command/control/dataout
- * @dam_addr:      header buffer dma_address
+ * @dma_addr:      header buffer dma_address
  * @tx_sg:         sg[0] points to iser/iscsi headers
  *                 sg[1] optionally points to either of immediate data
  *                 unsolicited data-out or control
  * @num_sge:       number sges used on this TX task
+ * @cqe:           completion handler
  * @mapped:        Is the task header mapped
- * reg_wr:         registration WR
- * send_wr:        send WR
- * inv_wr:         invalidate WR
+ * @reg_wr:        registration WR
+ * @send_wr:       send WR
+ * @inv_wr:        invalidate WR
  */
 struct iser_tx_desc {
 	struct iser_ctrl             iser_header;
@@ -267,6 +264,7 @@ struct iser_tx_desc {
  * @data:          received data segment
  * @dma_addr:      receive buffer dma address
  * @rx_sg:         ib_sge of receive buffer
+ * @cqe:           completion handler
  * @pad:           for sense data TODO: Modify to maximum sense length supported
  */
 struct iser_rx_desc {
@@ -283,9 +281,9 @@ struct iser_rx_desc {
  * struct iser_login_desc - iSER login descriptor
  *
  * @req:           pointer to login request buffer
- * @resp:          pointer to login response buffer
+ * @rsp:           pointer to login response buffer
  * @req_dma:       DMA address of login request buffer
- * @rsp_dma:      DMA address of login response buffer
+ * @rsp_dma:       DMA address of login response buffer
  * @sge:           IB sge for login post recv
  * @cqe:           completion handler
  */
@@ -315,12 +313,12 @@ struct iser_comp {
 };
 
 /**
- * struct iser_device - Memory registration operations
+ * struct iser_reg_ops - Memory registration operations
  *     per-device registration schemes
  *
  * @alloc_reg_res:     Allocate registration resources
  * @free_reg_res:      Free registration resources
- * @fast_reg_mem:      Register memory buffers
+ * @reg_mem:           Register memory buffers
  * @unreg_mem:         Un-register memory buffers
  * @reg_desc_get:      Get a registration descriptor for pool
  * @reg_desc_put:      Get a registration descriptor to pool
@@ -369,7 +367,7 @@ struct iser_device {
 };
 
 /**
- * struct iser_reg_resources - Fast registration recources
+ * struct iser_reg_resources - Fast registration resources
  *
  * @mr:         memory region
  * @fmr_pool:   pool of fmrs
@@ -402,7 +400,7 @@ struct iser_fr_desc {
 };
 
 /**
- * struct iser_fr_pool: connection fast registration pool
+ * struct iser_fr_pool - connection fast registration pool
  *
  * @list:                list of fastreg descriptors
  * @lock:                protects fmr/fastreg pool
@@ -427,6 +425,7 @@ struct iser_fr_pool {
  * @comp:                iser completion context
  * @fr_pool:             connection fast registration poool
  * @pi_support:          Indicate device T10-PI support
+ * @reg_cqe:             completion handler
  */
 struct ib_conn {
 	struct rdma_cm_id           *cma_id;
@@ -467,6 +466,7 @@ struct ib_conn {
  * @num_rx_descs:     number of rx descriptors
  * @scsi_sg_tablesize: scsi host sg_tablesize
  * @pages_per_mr:     maximum pages available for registration
+ * @snd_w_inv:        connection uses remote invalidation
  */
 struct iser_conn {
 	struct ib_conn		     ib_conn;
@@ -525,7 +525,7 @@ struct iser_page_vec {
 };
 
 /**
- * struct iser_global: iSER global context
+ * struct iser_global - iSER global context
  *
  * @device_list_mutex:    protects device_list
  * @device_list:          iser devices global list
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5cbb4b3a0566..4a7045bb0831 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -358,6 +358,8 @@ static inline bool iser_signal_comp(u8 sig_count)
 
 /**
  * iser_send_command - send command PDU
+ * @conn: link to matching iscsi connection
+ * @task: SCSI command task
  */
 int iser_send_command(struct iscsi_conn *conn,
 		      struct iscsi_task *task)
@@ -429,6 +431,9 @@ send_command_error:
 
 /**
  * iser_send_data_out - send data out PDU
+ * @conn: link to matching iscsi connection
+ * @task: SCSI command task
+ * @hdr: pointer to the LLD's iSCSI message header
  */
 int iser_send_data_out(struct iscsi_conn *conn,
 		       struct iscsi_task *task,
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 2cc89a9b9e9b..0f74dc6d12fa 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -170,7 +170,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
 	dev = iser_task->iser_conn->ib_conn.device->ib_device;
 
 	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
-	if (data->dma_nents == 0) {
+	if (unlikely(data->dma_nents == 0)) {
 		iser_err("dma_map_sg failed!!!\n");
 		return -EINVAL;
 	}
@@ -237,7 +237,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
 	int ret, plen;
 
 	page_vec->npages = 0;
-	page_vec->fake_mr.page_size = SIZE_4K;
+	page_vec->fake_mr.page_size = SZ_4K;
 	plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
 			      mem->dma_nents, NULL, iser_set_page);
 	if (unlikely(plen < mem->dma_nents)) {
@@ -451,7 +451,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 
 	ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
-	n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K);
+	n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SZ_4K);
 	if (unlikely(n != mem->dma_nents)) {
 		iser_err("failed to map sg (%d/%d)\n",
 			 n, mem->dma_nents);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a6548de0e218..1f4a37a3c2b3 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -58,12 +58,12 @@ static void iser_event_handler(struct ib_event_handler *handler,
 		dev_name(&event->device->dev), event->element.port_num);
 }
 
-/**
+/*
  * iser_create_device_ib_res - creates Protection Domain (PD), Completion
  * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
- * the adapator.
+ * the adaptor.
  *
- * returns 0 on success, -1 on failure
+ * Return: 0 on success, -1 on failure
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
@@ -124,9 +124,9 @@ comps_err:
 	return -1;
 }
 
-/**
+/*
  * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
- * CQ and PD created with the device associated with the adapator.
+ * CQ and PD created with the device associated with the adaptor.
  */
 static void iser_free_device_ib_res(struct iser_device *device)
 {
@@ -149,8 +149,11 @@ static void iser_free_device_ib_res(struct iser_device *device)
 
 /**
  * iser_alloc_fmr_pool - Creates FMR pool and page_vector
+ * @ib_conn: connection RDMA resources
+ * @cmds_max: max number of SCSI commands for this connection
+ * @size: max number of pages per map request
  *
- * returns 0 on success, or errno code on failure
+ * Return: 0 on success, or errno code on failure
  */
 int iser_alloc_fmr_pool(struct ib_conn *ib_conn,
 			unsigned cmds_max,
@@ -180,7 +183,7 @@ int iser_alloc_fmr_pool(struct ib_conn *ib_conn,
 
 	page_vec->pages = (u64 *)(page_vec + 1);
 
-	params.page_shift        = SHIFT_4K;
+	params.page_shift        = ilog2(SZ_4K);
 	params.max_pages_per_fmr = size;
 	/* make the pool size twice the max number of SCSI commands *
 	 * the ML is expected to queue, watermark for unmap at 50%  */
@@ -215,6 +218,7 @@ err_frpl:
 
 /**
  * iser_free_fmr_pool - releases the FMR pool and page vec
+ * @ib_conn: connection RDMA resources
  */
 void iser_free_fmr_pool(struct ib_conn *ib_conn)
 {
@@ -295,7 +299,11 @@ static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc)
 /**
  * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors
  * for fast registration work requests.
- * returns 0 on success, or errno code on failure
+ * @ib_conn: connection RDMA resources
+ * @cmds_max: max number of SCSI commands for this connection
+ * @size: max number of pages per map request
+ *
+ * Return: 0 on success, or errno code on failure
  */
 int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 			    unsigned cmds_max,
@@ -332,6 +340,7 @@ err:
 
 /**
  * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
+ * @ib_conn: connection RDMA resources
  */
 void iser_free_fastreg_pool(struct ib_conn *ib_conn)
 {
@@ -355,10 +364,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
 			  fr_pool->size - i);
 }
 
-/**
+/*
  * iser_create_ib_conn_res - Queue-Pair (QP)
  *
- * returns 0 on success, -1 on failure
+ * Return: 0 on success, -1 on failure
  */
 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
@@ -436,7 +445,7 @@ out_err:
 	return ret;
 }
 
-/**
+/*
  * based on the resolved device node GUID see if there already allocated
  * device for this device. If there's no such, create one.
  */
@@ -487,9 +496,9 @@ static void iser_device_try_release(struct iser_device *device)
 	mutex_unlock(&ig.device_list_mutex);
 }
 
-/**
+/*
  * Called with state mutex held
- **/
+ */
 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
 				     enum iser_conn_state comp,
 				     enum iser_conn_state exch)
@@ -561,7 +570,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
 }
 
 /**
- * Frees all conn objects and deallocs conn descriptor
+ * iser_conn_release - Frees all conn objects and deallocs conn descriptor
+ * @iser_conn: iSER connection context
  */
 void iser_conn_release(struct iser_conn *iser_conn)
 {
@@ -595,7 +605,10 @@ void iser_conn_release(struct iser_conn *iser_conn)
 }
 
 /**
- * triggers start of the disconnect procedures and wait for them to be done
+ * iser_conn_terminate - triggers start of the disconnect procedures and
+ * waits for them to be done
+ * @iser_conn: iSER connection context
+ *
  * Called with state mutex held
  */
 int iser_conn_terminate(struct iser_conn *iser_conn)
@@ -632,9 +645,9 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
 	return 1;
 }
 
-/**
+/*
  * Called with state mutex held
- **/
+ */
 static void iser_connect_error(struct rdma_cm_id *cma_id)
 {
 	struct iser_conn *iser_conn;
@@ -670,7 +683,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
 	else
 		max_num_sg = attr->max_fast_reg_page_list_len;
 
-	sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
+	sg_tablesize = DIV_ROUND_UP(max_sectors * SECTOR_SIZE, SZ_4K);
 	if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)
 		sup_sg_tablesize =
 			min_t(
@@ -684,9 +697,9 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
 		iser_conn->scsi_sg_tablesize + reserved_mr_pages;
 }
 
-/**
+/*
  * Called with state mutex held
- **/
+ */
 static void iser_addr_handler(struct rdma_cm_id *cma_id)
 {
 	struct iser_device *device;
@@ -732,9 +745,9 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 	}
 }
 
-/**
+/*
  * Called with state mutex held
- **/
+ */
 static void iser_route_handler(struct rdma_cm_id *cma_id)
 {
 	struct rdma_conn_param conn_param;
@@ -1019,7 +1032,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
 
 	ib_conn->post_recv_buf_count += count;
 	ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL);
-	if (ib_ret) {
+	if (unlikely(ib_ret)) {
 		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
 		ib_conn->post_recv_buf_count -= count;
 	} else
@@ -1030,9 +1043,12 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
 
 
 /**
- * iser_start_send - Initiate a Send DTO operation
+ * iser_post_send - Initiate a Send DTO operation
+ * @ib_conn: connection RDMA resources
+ * @tx_desc: iSER TX descriptor
+ * @signal: true to send work request as SIGNALED
  *
- * returns 0 on success, -1 on failure
+ * Return: 0 on success, -1 on failure
  */
 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
 		   bool signal)
@@ -1060,7 +1076,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
 		first_wr = wr;
 
 	ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL);
-	if (ib_ret)
+	if (unlikely(ib_ret))
 		iser_err("ib_post_send failed, ret:%d opcode:%d\n",
 			 ib_ret, wr->opcode);
 
@@ -1081,7 +1097,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 		ret = ib_check_mr_status(desc->rsc.sig_mr,
 					 IB_MR_CHECK_SIG_STATUS, &mr_status);
 		if (ret) {
-			pr_err("ib_check_mr_status failed, ret %d\n", ret);
+			iser_err("ib_check_mr_status failed, ret %d\n", ret);
 			/* Not a lot we can do, return ambiguous guard error */
 			*sector = 0;
 			return 0x1;
@@ -1093,7 +1109,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 			sector_div(sector_off, sector_size + 8);
 			*sector = scsi_get_lba(iser_task->sc) + sector_off;
 
-			pr_err("PI error found type %d at sector %llx "
+			iser_err("PI error found type %d at sector %llx "
 			       "expected %x vs actual %x\n",
 			       mr_status.sig_err.err_type,
 			       (unsigned long long)*sector,
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
index 43ac61ffef4a..6dbc08e1a6a6 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -70,7 +70,7 @@
 
 struct opa_vnic_adapter;
 
-/**
+/*
  * struct __opa_vesw_info - OPA vnic virtual switch info
  *
  * Same as opa_vesw_info without bitwise attribute.
@@ -96,7 +96,7 @@ struct __opa_vesw_info {
 	u8   rsvd4[2];
 } __packed;
 
-/**
+/*
  * struct __opa_per_veswport_info - OPA vnic per port info
  *
  * Same as opa_per_veswport_info without bitwise attribute.
@@ -136,7 +136,7 @@ struct __opa_per_veswport_info {
 	u8   rsvd3[8];
 } __packed;
 
-/**
+/*
  * struct __opa_veswport_info - OPA vnic port info
  *
  * Same as opa_veswport_info without bitwise attribute.
@@ -146,7 +146,7 @@ struct __opa_veswport_info {
 	struct __opa_per_veswport_info    vport;
 };
 
-/**
+/*
  * struct __opa_veswport_trap - OPA vnic trap info
  *
  * Same as opa_veswport_trap without bitwise attribute.
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b5960351bec0..b7f7a5f7bd98 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -174,9 +174,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
 	int tmo = *(int *)kp->arg;
 
 	if (tmo >= 0)
-		return sprintf(buffer, "%d", tmo);
+		return sprintf(buffer, "%d\n", tmo);
 	else
-		return sprintf(buffer, "off");
+		return sprintf(buffer, "off\n");
 }
 
 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
@@ -352,11 +352,11 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
 
 	init_completion(&ch->done);
 	ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
-				(struct sockaddr *)&target->rdma_cm.src : NULL,
-				(struct sockaddr *)&target->rdma_cm.dst,
+				&target->rdma_cm.src.sa : NULL,
+				&target->rdma_cm.dst.sa,
 				SRP_PATH_REC_TIMEOUT_MS);
 	if (ret) {
-		pr_err("No route available from %pIS to %pIS (%d)\n",
+		pr_err("No route available from %pISpsc to %pISpsc (%d)\n",
 		       &target->rdma_cm.src, &target->rdma_cm.dst, ret);
 		goto out;
 	}
@@ -366,7 +366,7 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
 
 	ret = ch->status;
 	if (ret) {
-		pr_err("Resolving address %pIS failed (%d)\n",
+		pr_err("Resolving address %pISpsc failed (%d)\n",
 		       &target->rdma_cm.dst, ret);
 		goto out;
 	}
@@ -552,6 +552,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
 {
 	struct srp_target_port *target = ch->target;
 	struct srp_device *dev = target->srp_host->srp_dev;
+	const struct ib_device_attr *attr = &dev->dev->attrs;
 	struct ib_qp_init_attr *init_attr;
 	struct ib_cq *recv_cq, *send_cq;
 	struct ib_qp *qp;
@@ -583,12 +584,14 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
 	init_attr->cap.max_send_wr     = m * target->queue_size;
 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
 	init_attr->cap.max_recv_sge    = 1;
-	init_attr->cap.max_send_sge    = SRP_MAX_SGE;
+	init_attr->cap.max_send_sge    = min(SRP_MAX_SGE, attr->max_send_sge);
 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
 	init_attr->qp_type             = IB_QPT_RC;
 	init_attr->send_cq             = send_cq;
 	init_attr->recv_cq             = recv_cq;
 
+	ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U);
+
 	if (target->using_rdma_cm) {
 		ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
 		qp = ch->rdma_cm.cm_id->qp;
@@ -1362,7 +1365,8 @@ static void srp_terminate_io(struct srp_rport *rport)
 }
 
 /* Calculate maximum initiator to target information unit length. */
-static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data)
+static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data,
+				  uint32_t max_it_iu_size)
 {
 	uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
 		sizeof(struct srp_indirect_buf) +
@@ -1372,6 +1376,11 @@ static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data)
 		max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
 				 srp_max_imm_data);
 
+	if (max_it_iu_size)
+		max_iu_len = min(max_iu_len, max_it_iu_size);
+
+	pr_debug("max_iu_len = %d\n", max_iu_len);
+
 	return max_iu_len;
 }
 
@@ -1389,7 +1398,8 @@ static int srp_rport_reconnect(struct srp_rport *rport)
 	struct srp_target_port *target = rport->lld_data;
 	struct srp_rdma_ch *ch;
 	uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
-						srp_use_imm_data);
+						srp_use_imm_data,
+						target->max_it_iu_size);
 	int i, j, ret = 0;
 	bool multich = false;
 
@@ -1838,7 +1848,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 		return -EIO;
 
 	if (ch->use_imm_data &&
-	    count <= SRP_MAX_IMM_SGE &&
+	    count <= ch->max_imm_sge &&
 	    SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
 	    scmnd->sc_data_direction == DMA_TO_DEVICE) {
 		struct srp_imm_buf *buf;
@@ -2538,7 +2548,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
 		ch->use_imm_data  = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
 		ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
-						      ch->use_imm_data);
+						      ch->use_imm_data,
+						      target->max_it_iu_size);
 		WARN_ON_ONCE(ch->max_it_iu_len >
 			     be32_to_cpu(lrsp->max_it_iu_len));
 
@@ -3411,6 +3422,7 @@ enum {
 	SRP_OPT_IP_SRC		= 1 << 15,
 	SRP_OPT_IP_DEST		= 1 << 16,
 	SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
+	SRP_OPT_MAX_IT_IU_SIZE  = 1 << 18,
 };
 
 static unsigned int srp_opt_mandatory[] = {
@@ -3443,6 +3455,7 @@ static const match_table_t srp_opt_tokens = {
 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
 	{ SRP_OPT_IP_SRC,		"src=%s"		},
 	{ SRP_OPT_IP_DEST,		"dest=%s"		},
+	{ SRP_OPT_MAX_IT_IU_SIZE,	"max_it_iu_size=%d"	},
 	{ SRP_OPT_ERR,			NULL 			}
 };
 
@@ -3736,6 +3749,14 @@ static int srp_parse_options(struct net *net, const char *buf,
 			target->tl_retry_count = token;
 			break;
 
+		case SRP_OPT_MAX_IT_IU_SIZE:
+			if (match_int(args, &token) || token < 0) {
+				pr_warn("bad maximum initiator to target IU size '%s'\n", p);
+				goto out;
+			}
+			target->max_it_iu_size = token;
+			break;
+
 		default:
 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
 				p);
@@ -3887,7 +3908,9 @@ static ssize_t srp_create_target(struct device *dev,
 	target->mr_per_cmd = mr_per_cmd;
 	target->indirect_size = target->sg_tablesize *
 				sizeof (struct srp_direct_buf);
-	max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data);
+	max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
+				       srp_use_imm_data,
+				       target->max_it_iu_size);
 
 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
 	INIT_WORK(&target->remove_work, srp_remove_work);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index b2861cd2087a..5359ece561ca 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -161,6 +161,7 @@ struct srp_rdma_ch {
 	};
 	uint32_t		max_it_iu_len;
 	uint32_t		max_ti_iu_len;
+	u8			max_imm_sge;
 	bool			use_imm_data;
 
 	/* Everything above this point is used in the hot path of
@@ -209,6 +210,7 @@ struct srp_target_port {
 	u32			ch_count;
 	u32			lkey;
 	enum srp_target_state	state;
+	uint32_t		max_it_iu_size;
 	unsigned int		cmd_sg_cnt;
 	unsigned int		indirect_size;
 	bool			allow_ext_sg;
@@ -245,11 +247,13 @@ struct srp_target_port {
 			union {
 				struct sockaddr_in	ip4;
 				struct sockaddr_in6	ip6;
+				struct sockaddr		sa;
 				struct sockaddr_storage ss;
 			} src;
 			union {
 				struct sockaddr_in	ip4;
 				struct sockaddr_in6	ip6;
+				struct sockaddr		sa;
 				struct sockaddr_storage ss;
 			} dst;
 			bool src_specified;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e25c70a56be6..23c782e3d49a 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -556,34 +556,41 @@ static int srpt_refresh_port(struct srpt_port *sport)
 	struct ib_port_attr port_attr;
 	int ret;
 
-	memset(&port_modify, 0, sizeof(port_modify));
-	port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
-	port_modify.clr_port_cap_mask = 0;
-
-	ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
-	if (ret)
-		goto err_mod_port;
-
 	ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
 	if (ret)
-		goto err_query_port;
+		return ret;
 
 	sport->sm_lid = port_attr.sm_lid;
 	sport->lid = port_attr.lid;
 
 	ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
 	if (ret)
-		goto err_query_port;
+		return ret;
 
-	sport->port_guid_wwn.priv = sport;
-	srpt_format_guid(sport->port_guid, sizeof(sport->port_guid),
+	sport->port_guid_id.wwn.priv = sport;
+	srpt_format_guid(sport->port_guid_id.name,
+			 sizeof(sport->port_guid_id.name),
 			 &sport->gid.global.interface_id);
-	sport->port_gid_wwn.priv = sport;
-	snprintf(sport->port_gid, sizeof(sport->port_gid),
+	sport->port_gid_id.wwn.priv = sport;
+	snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name),
 		 "0x%016llx%016llx",
 		 be64_to_cpu(sport->gid.global.subnet_prefix),
 		 be64_to_cpu(sport->gid.global.interface_id));
 
+	if (rdma_protocol_iwarp(sport->sdev->device, sport->port))
+		return 0;
+
+	memset(&port_modify, 0, sizeof(port_modify));
+	port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
+	port_modify.clr_port_cap_mask = 0;
+
+	ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
+	if (ret) {
+		pr_warn("%s-%d: enabling device management failed (%d). Note: this is expected if SR-IOV is enabled.\n",
+			dev_name(&sport->sdev->device->dev), sport->port, ret);
+		return 0;
+	}
+
 	if (!sport->mad_agent) {
 		memset(&reg_req, 0, sizeof(reg_req));
 		reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
@@ -599,23 +606,14 @@ static int srpt_refresh_port(struct srpt_port *sport)
 							 srpt_mad_recv_handler,
 							 sport, 0);
 		if (IS_ERR(sport->mad_agent)) {
-			ret = PTR_ERR(sport->mad_agent);
+			pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n",
+			       dev_name(&sport->sdev->device->dev), sport->port,
+			       PTR_ERR(sport->mad_agent));
 			sport->mad_agent = NULL;
-			goto err_query_port;
 		}
 	}
 
 	return 0;
-
-err_query_port:
-
-	port_modify.set_port_cap_mask = 0;
-	port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
-	ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
-
-err_mod_port:
-
-	return ret;
 }
 
 /**
@@ -1364,9 +1362,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
 			      struct srpt_send_ioctx *ioctx, u64 tag,
 			      int status)
 {
+	struct se_cmd *cmd = &ioctx->cmd;
 	struct srp_rsp *srp_rsp;
 	const u8 *sense_data;
 	int sense_data_len, max_sense_len;
+	u32 resid = cmd->residual_count;
 
 	/*
 	 * The lowest bit of all SAM-3 status codes is zero (see also
@@ -1388,6 +1388,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
 	srp_rsp->tag = tag;
 	srp_rsp->status = status;
 
+	if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+		if (cmd->data_direction == DMA_TO_DEVICE) {
+			/* residual data from an underflow write */
+			srp_rsp->flags = SRP_RSP_FLAG_DOUNDER;
+			srp_rsp->data_out_res_cnt = cpu_to_be32(resid);
+		} else if (cmd->data_direction == DMA_FROM_DEVICE) {
+			/* residual data from an underflow read */
+			srp_rsp->flags = SRP_RSP_FLAG_DIUNDER;
+			srp_rsp->data_in_res_cnt = cpu_to_be32(resid);
+		}
+	} else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) {
+		if (cmd->data_direction == DMA_TO_DEVICE) {
+			/* residual data from an overflow write */
+			srp_rsp->flags = SRP_RSP_FLAG_DOOVER;
+			srp_rsp->data_out_res_cnt = cpu_to_be32(resid);
+		} else if (cmd->data_direction == DMA_FROM_DEVICE) {
+			/* residual data from an overflow read */
+			srp_rsp->flags = SRP_RSP_FLAG_DIOVER;
+			srp_rsp->data_in_res_cnt = cpu_to_be32(resid);
+		}
+	}
+
 	if (sense_data_len) {
 		BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
 		max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
@@ -1931,41 +1953,22 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
 	return ret;
 }
 
-static bool srpt_ch_closed(struct srpt_port *sport, struct srpt_rdma_ch *ch)
-{
-	struct srpt_nexus *nexus;
-	struct srpt_rdma_ch *ch2;
-	bool res = true;
-
-	rcu_read_lock();
-	list_for_each_entry(nexus, &sport->nexus_list, entry) {
-		list_for_each_entry(ch2, &nexus->ch_list, list) {
-			if (ch2 == ch) {
-				res = false;
-				goto done;
-			}
-		}
-	}
-done:
-	rcu_read_unlock();
-
-	return res;
-}
-
 /* Send DREQ and wait for DREP. */
 static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch)
 {
+	DECLARE_COMPLETION_ONSTACK(closed);
 	struct srpt_port *sport = ch->sport;
 
 	pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
 		 ch->state);
 
+	ch->closed = &closed;
+
 	mutex_lock(&sport->mutex);
 	srpt_disconnect_ch(ch);
 	mutex_unlock(&sport->mutex);
 
-	while (wait_event_timeout(sport->ch_releaseQ, srpt_ch_closed(sport, ch),
-				  5 * HZ) == 0)
+	while (wait_for_completion_timeout(&closed, 5 * HZ) == 0)
 		pr_info("%s(%s-%d state %d): still waiting ...\n", __func__,
 			ch->sess_name, ch->qp->qp_num, ch->state);
 
@@ -2045,10 +2048,17 @@ static void srpt_set_enabled(struct srpt_port *sport, bool enabled)
 		__srpt_close_all_ch(sport);
 }
 
+static void srpt_drop_sport_ref(struct srpt_port *sport)
+{
+	if (atomic_dec_return(&sport->refcount) == 0 && sport->freed_channels)
+		complete(sport->freed_channels);
+}
+
 static void srpt_free_ch(struct kref *kref)
 {
 	struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
 
+	srpt_drop_sport_ref(ch->sport);
 	kfree_rcu(ch, rcu);
 }
 
@@ -2092,6 +2102,9 @@ static void srpt_release_channel_work(struct work_struct *w)
 	list_del_rcu(&ch->list);
 	mutex_unlock(&sport->mutex);
 
+	if (ch->closed)
+		complete(ch->closed);
+
 	srpt_destroy_ch_ib(ch);
 
 	srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
@@ -2106,8 +2119,6 @@ static void srpt_release_channel_work(struct work_struct *w)
 
 	kmem_cache_destroy(ch->req_buf_cache);
 
-	wake_up(&sport->ch_releaseQ);
-
 	kref_put(&ch->kref, srpt_free_ch);
 }
 
@@ -2144,6 +2155,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 	char i_port_id[36];
 	u32 it_iu_len;
 	int i, tag_num, tag_size, ret;
+	struct srpt_tpg *stpg;
 
 	WARN_ON_ONCE(irqs_disabled());
 
@@ -2296,23 +2308,38 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 			be64_to_cpu(*(__be64 *)nexus->i_port_id),
 			be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8)));
 
-	pr_debug("registering session %s\n", ch->sess_name);
+	pr_debug("registering src addr %s or i_port_id %s\n", ch->sess_name,
+		 i_port_id);
 
 	tag_num = ch->rq_size;
 	tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */
-	if (sport->port_guid_tpg.se_tpg_wwn)
-		ch->sess = target_setup_session(&sport->port_guid_tpg, tag_num,
+
+	mutex_lock(&sport->port_guid_id.mutex);
+	list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) {
+		if (!IS_ERR_OR_NULL(ch->sess))
+			break;
+		ch->sess = target_setup_session(&stpg->tpg, tag_num,
 						tag_size, TARGET_PROT_NORMAL,
 						ch->sess_name, ch, NULL);
-	if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
-		ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num,
+	}
+	mutex_unlock(&sport->port_guid_id.mutex);
+
+	mutex_lock(&sport->port_gid_id.mutex);
+	list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) {
+		if (!IS_ERR_OR_NULL(ch->sess))
+			break;
+		ch->sess = target_setup_session(&stpg->tpg, tag_num,
 					tag_size, TARGET_PROT_NORMAL, i_port_id,
 					ch, NULL);
-	/* Retry without leading "0x" */
-	if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
-		ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num,
+		if (!IS_ERR_OR_NULL(ch->sess))
+			break;
+		/* Retry without leading "0x" */
+		ch->sess = target_setup_session(&stpg->tpg, tag_num,
 						tag_size, TARGET_PROT_NORMAL,
 						i_port_id + 2, ch, NULL);
+	}
+	mutex_unlock(&sport->port_gid_id.mutex);
+
 	if (IS_ERR_OR_NULL(ch->sess)) {
 		WARN_ON_ONCE(ch->sess == NULL);
 		ret = PTR_ERR(ch->sess);
@@ -2325,6 +2352,12 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 		goto destroy_ib;
 	}
 
+	/*
+	 * Once a session has been created destruction of srpt_rdma_ch objects
+	 * will decrement sport->refcount. Hence increment sport->refcount now.
+	 */
+	atomic_inc(&sport->refcount);
+
 	mutex_lock(&sport->mutex);
 
 	if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
@@ -2505,6 +2538,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id,
 	struct srpt_device *sdev;
 	struct srp_login_req req;
 	const struct srp_login_req_rdma *req_rdma;
+	struct sa_path_rec *path_rec = cm_id->route.path_rec;
 	char src_addr[40];
 
 	sdev = ib_get_client_data(cm_id->device, &srpt_client);
@@ -2530,7 +2564,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id,
 		 &cm_id->route.addr.src_addr);
 
 	return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num,
-				cm_id->route.path_rec->pkey, &req, src_addr);
+				path_rec ? path_rec->pkey : 0, &req, src_addr);
 }
 
 static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
@@ -2906,39 +2940,29 @@ static void srpt_refresh_port_work(struct work_struct *work)
 	srpt_refresh_port(sport);
 }
 
-static bool srpt_ch_list_empty(struct srpt_port *sport)
-{
-	struct srpt_nexus *nexus;
-	bool res = true;
-
-	rcu_read_lock();
-	list_for_each_entry(nexus, &sport->nexus_list, entry)
-		if (!list_empty(&nexus->ch_list))
-			res = false;
-	rcu_read_unlock();
-
-	return res;
-}
-
 /**
  * srpt_release_sport - disable login and wait for associated channels
  * @sport: SRPT HCA port.
  */
 static int srpt_release_sport(struct srpt_port *sport)
 {
+	DECLARE_COMPLETION_ONSTACK(c);
 	struct srpt_nexus *nexus, *next_n;
 	struct srpt_rdma_ch *ch;
 
 	WARN_ON_ONCE(irqs_disabled());
 
+	sport->freed_channels = &c;
+
 	mutex_lock(&sport->mutex);
 	srpt_set_enabled(sport, false);
 	mutex_unlock(&sport->mutex);
 
-	while (wait_event_timeout(sport->ch_releaseQ,
-				  srpt_ch_list_empty(sport), 5 * HZ) <= 0) {
-		pr_info("%s_%d: waiting for session unregistration ...\n",
-			dev_name(&sport->sdev->device->dev), sport->port);
+	while (atomic_read(&sport->refcount) > 0 &&
+	       wait_for_completion_timeout(&c, 5 * HZ) <= 0) {
+		pr_info("%s_%d: waiting for unregistration of %d sessions ...\n",
+			dev_name(&sport->sdev->device->dev), sport->port,
+			atomic_read(&sport->refcount));
 		rcu_read_lock();
 		list_for_each_entry(nexus, &sport->nexus_list, entry) {
 			list_for_each_entry(ch, &nexus->ch_list, list) {
@@ -2975,10 +2999,10 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name)
 		for (i = 0; i < dev->phys_port_cnt; i++) {
 			sport = &sdev->port[i];
 
-			if (strcmp(sport->port_guid, name) == 0)
-				return &sport->port_guid_wwn;
-			if (strcmp(sport->port_gid, name) == 0)
-				return &sport->port_gid_wwn;
+			if (strcmp(sport->port_guid_id.name, name) == 0)
+				return &sport->port_guid_id.wwn;
+			if (strcmp(sport->port_gid_id.name, name) == 0)
+				return &sport->port_gid_id.wwn;
 		}
 	}
 
@@ -3147,7 +3171,6 @@ static void srpt_add_one(struct ib_device *device)
 	for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
 		sport = &sdev->port[i - 1];
 		INIT_LIST_HEAD(&sport->nexus_list);
-		init_waitqueue_head(&sport->ch_releaseQ);
 		mutex_init(&sport->mutex);
 		sport->sdev = sdev;
 		sport->port = i;
@@ -3156,6 +3179,10 @@ static void srpt_add_one(struct ib_device *device)
 		sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
 		sport->port_attrib.use_srq = false;
 		INIT_WORK(&sport->work, srpt_refresh_port_work);
+		mutex_init(&sport->port_guid_id.mutex);
+		INIT_LIST_HEAD(&sport->port_guid_id.tpg_list);
+		mutex_init(&sport->port_gid_id.mutex);
+		INIT_LIST_HEAD(&sport->port_gid_id.tpg_list);
 
 		if (srpt_refresh_port(sport)) {
 			pr_err("MAD registration failed for %s-%d.\n",
@@ -3258,14 +3285,23 @@ static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
 	return tpg->se_tpg_wwn->priv;
 }
 
+static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn)
+{
+	struct srpt_port *sport = wwn->priv;
+
+	if (wwn == &sport->port_guid_id.wwn)
+		return &sport->port_guid_id;
+	if (wwn == &sport->port_gid_id.wwn)
+		return &sport->port_gid_id;
+	WARN_ON_ONCE(true);
+	return NULL;
+}
+
 static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
 {
-	struct srpt_port *sport = srpt_tpg_to_sport(tpg);
+	struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg);
 
-	WARN_ON_ONCE(tpg != &sport->port_guid_tpg &&
-		     tpg != &sport->port_gid_tpg);
-	return tpg == &sport->port_guid_tpg ? sport->port_guid :
-		sport->port_gid;
+	return stpg->sport_id->name;
 }
 
 static u16 srpt_get_tag(struct se_portal_group *tpg)
@@ -3721,19 +3757,25 @@ static struct configfs_attribute *srpt_tpg_attrs[] = {
 static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
 					     const char *name)
 {
-	struct srpt_port *sport = wwn->priv;
-	struct se_portal_group *tpg;
-	int res;
-
-	WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
-		     wwn != &sport->port_gid_wwn);
-	tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg :
-		&sport->port_gid_tpg;
-	res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP);
-	if (res)
+	struct srpt_port_id *sport_id = srpt_wwn_to_sport_id(wwn);
+	struct srpt_tpg *stpg;
+	int res = -ENOMEM;
+
+	stpg = kzalloc(sizeof(*stpg), GFP_KERNEL);
+	if (!stpg)
 		return ERR_PTR(res);
+	stpg->sport_id = sport_id;
+	res = core_tpg_register(wwn, &stpg->tpg, SCSI_PROTOCOL_SRP);
+	if (res) {
+		kfree(stpg);
+		return ERR_PTR(res);
+	}
 
-	return tpg;
+	mutex_lock(&sport_id->mutex);
+	list_add_tail(&stpg->entry, &sport_id->tpg_list);
+	mutex_unlock(&sport_id->mutex);
+
+	return &stpg->tpg;
 }
 
 /**
@@ -3742,10 +3784,17 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
  */
 static void srpt_drop_tpg(struct se_portal_group *tpg)
 {
+	struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg);
+	struct srpt_port_id *sport_id = stpg->sport_id;
 	struct srpt_port *sport = srpt_tpg_to_sport(tpg);
 
+	mutex_lock(&sport_id->mutex);
+	list_del(&stpg->entry);
+	mutex_unlock(&sport_id->mutex);
+
 	sport->enabled = false;
 	core_tpg_deregister(tpg);
+	kfree(stpg);
 }
 
 /**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index ee9f20e9177a..2e1a69840857 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -264,6 +264,8 @@ enum rdma_ch_state {
  * @zw_cqe:	   Zero-length write CQE.
  * @rcu:           RCU head.
  * @kref:	   kref for this channel.
+ * @closed:	   Completion object that will be signaled as soon as a new
+ *		   channel object with the same identity can be created.
  * @rq_size:       IB receive queue size.
  * @max_rsp_size:  Maximum size of an RSP response message in bytes.
  * @sq_wr_avail:   number of work requests available in the send queue.
@@ -306,6 +308,7 @@ struct srpt_rdma_ch {
 	struct ib_cqe		zw_cqe;
 	struct rcu_head		rcu;
 	struct kref		kref;
+	struct completion	*closed;
 	int			rq_size;
 	u32			max_rsp_size;
 	atomic_t		sq_wr_avail;
@@ -361,24 +364,52 @@ struct srpt_port_attrib {
 };
 
 /**
+ * struct srpt_tpg - information about a single "target portal group"
+ * @entry:	Entry in @sport_id->tpg_list.
+ * @sport_id:	Port name this TPG is associated with.
+ * @tpg:	LIO TPG data structure.
+ *
+ * Zero or more target portal groups are associated with each port name
+ * (srpt_port_id). With each TPG an ACL list is associated.
+ */
+struct srpt_tpg {
+	struct list_head	entry;
+	struct srpt_port_id	*sport_id;
+	struct se_portal_group	tpg;
+};
+
+/**
+ * struct srpt_port_id - information about an RDMA port name
+ * @mutex:	Protects @tpg_list changes.
+ * @tpg_list:	TPGs associated with the RDMA port name.
+ * @wwn:	WWN associated with the RDMA port name.
+ * @name:	ASCII representation of the port name.
+ *
+ * Multiple sysfs directories can be associated with a single RDMA port. This
+ * data structure represents a single (port, name) pair.
+ */
+struct srpt_port_id {
+	struct mutex		mutex;
+	struct list_head	tpg_list;
+	struct se_wwn		wwn;
+	char			name[64];
+};
+
+/**
  * struct srpt_port - information associated by SRPT with a single IB port
  * @sdev:      backpointer to the HCA information.
  * @mad_agent: per-port management datagram processing information.
  * @enabled:   Whether or not this target port is enabled.
- * @port_guid: ASCII representation of Port GUID
- * @port_gid:  ASCII representation of Port GID
  * @port:      one-based port number.
  * @sm_lid:    cached value of the port's sm_lid.
  * @lid:       cached value of the port's lid.
  * @gid:       cached value of the port's gid.
- * @port_acl_lock spinlock for port_acl_list:
  * @work:      work structure for refreshing the aforementioned cached values.
- * @port_guid_tpg: TPG associated with target port GUID.
- * @port_guid_wwn: WWN associated with target port GUID.
- * @port_gid_tpg:  TPG associated with target port GID.
- * @port_gid_wwn:  WWN associated with target port GID.
+ * @port_guid_id: target port GUID
+ * @port_gid_id: target port GID
  * @port_attrib:   Port attributes that can be accessed through configfs.
- * @ch_releaseQ:   Enables waiting for removal from nexus_list.
+ * @refcount:	   Number of objects associated with this port.
+ * @freed_channels: Completion that will be signaled once @refcount becomes 0.
  * @mutex:	   Protects nexus_list.
  * @nexus_list:	   Nexus list. See also srpt_nexus.entry.
  */
@@ -386,19 +417,16 @@ struct srpt_port {
 	struct srpt_device	*sdev;
 	struct ib_mad_agent	*mad_agent;
 	bool			enabled;
-	u8			port_guid[24];
-	u8			port_gid[64];
 	u8			port;
 	u32			sm_lid;
 	u32			lid;
 	union ib_gid		gid;
 	struct work_struct	work;
-	struct se_portal_group	port_guid_tpg;
-	struct se_wwn		port_guid_wwn;
-	struct se_portal_group	port_gid_tpg;
-	struct se_wwn		port_gid_wwn;
+	struct srpt_port_id	port_guid_id;
+	struct srpt_port_id	port_gid_id;
 	struct srpt_port_attrib port_attrib;
-	wait_queue_head_t	ch_releaseQ;
+	atomic_t		refcount;
+	struct completion	*freed_channels;
 	struct mutex		mutex;
 	struct list_head	nexus_list;
 };