summary refs log tree commit diff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-13 12:57:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-13 12:57:21 -0700
commitc55244137306b626bc64023fd7160985443205a7 (patch)
tree459acfb5c9b41e3e1616fb36aafda68a07ddbf54 /drivers
parent858655116bfc722837e3aec0909b8e9d08f96996 (diff)
parente04abfa2436e3ab016b23eb1afb2c5578b8dc2cf (diff)
downloadlinux-c55244137306b626bc64023fd7160985443205a7.tar.gz
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull InfiniBand/RDMA changes from Roland Dreier:
 - AF_IB (native IB addressing) for CMA from Sean Hefty
 - new mlx5 driver for Mellanox Connect-IB adapters (including post
   merge request fixes)
 - SRP fixes from Bart Van Assche (including fix to first merge request)
 - qib HW driver updates
 - resurrection of ocrdma HW driver development
 - uverbs conversion to create fds with O_CLOEXEC set
 - other small changes and fixes

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (66 commits)
  mlx5: Return -EFAULT instead of -EPERM
  IB/qib: Log all SDMA errors unconditionally
  IB/qib: Fix module-level leak
  mlx5_core: Adjust hca_cap.uar_page_sz to conform to Connect-IB spec
  IB/srp: Let srp_abort() return FAST_IO_FAIL if TL offline
  IB/uverbs: Use get_unused_fd_flags(O_CLOEXEC) instead of get_unused_fd()
  mlx5_core: Fixes for sparse warnings
  IB/mlx5: Make profile[] static in main.c
  mlx5: Fix parameter type of health_handler_t
  mlx5: Add driver for Mellanox Connect-IB adapters
  IB/core: Add reserved values to enums for low-level driver use
  IB/srp: Bump driver version and release date
  IB/srp: Make HCA completion vector configurable
  IB/srp: Maintain a single connection per I_T nexus
  IB/srp: Fail I/O fast if target offline
  IB/srp: Skip host settle delay
  IB/srp: Avoid skipping srp_reset_host() after a transport error
  IB/srp: Fix remove_one crash due to resource exhaustion
  IB/qib: New transmitter tunning settings for Dell 1.1 backplane
  IB/core: Fix error return code in add_port()
  ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/cma.c906
-rw-r--r--drivers/infiniband/core/sa_query.c6
-rw-r--r--drivers/infiniband/core/sysfs.c8
-rw-r--r--drivers/infiniband/core/ucma.c321
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig10
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c92
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c843
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c100
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c139
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1504
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c162
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h545
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1007
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2524
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c473
-rw-r--r--drivers/infiniband/hw/mlx5/user.h121
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h63
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c86
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h35
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c135
-rw-r--r--drivers/infiniband/hw/qib/Kconfig8
-rw-r--r--drivers/infiniband/hw/qib/Makefile1
-rw-r--r--drivers/infiniband/hw/qib/qib.h63
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c67
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c283
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c176
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c507
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c145
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c123
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c56
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h33
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c89
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h1
-rw-r--r--drivers/net/ethernet/mellanox/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c238
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c1515
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c224
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c583
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c521
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c185
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c227
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mad.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c475
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c136
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c435
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c301
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c223
69 files changed, 15734 insertions, 786 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index c85b56c28099..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
+source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
 source "drivers/infiniband/hw/ocrdma/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index b126fefe0b1c..1fe69888515f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= hw/cxgb4/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND)		+= hw/mlx5/
 obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_INFINIBAND_OCRDMA)		+= hw/ocrdma/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index eaec8d7a3b73..e90f2b2eabd7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
 #include <rdma/ib_addr.h>
+#include <rdma/ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("IB Address Translation");
@@ -70,6 +71,21 @@ static LIST_HEAD(req_list);
 static DECLARE_DELAYED_WORK(work, process_req);
 static struct workqueue_struct *addr_wq;
 
+int rdma_addr_size(struct sockaddr *addr)
+{
+	switch (addr->sa_family) {
+	case AF_INET:
+		return sizeof(struct sockaddr_in);
+	case AF_INET6:
+		return sizeof(struct sockaddr_in6);
+	case AF_IB:
+		return sizeof(struct sockaddr_ib);
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_addr_size);
+
 void rdma_addr_register_client(struct rdma_addr_client *client)
 {
 	atomic_set(&client->refcount, 1);
@@ -369,12 +385,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 			goto err;
 		}
 
-		memcpy(src_in, src_addr, ip_addr_size(src_addr));
+		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 	} else {
 		src_in->sa_family = dst_addr->sa_family;
 	}
 
-	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
+	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 34fbc2f60a09..f1c279fabe64 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -50,6 +50,7 @@
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
 #include <rdma/rdma_netlink.h>
+#include <rdma/ib.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
@@ -79,7 +80,6 @@ static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
-static DEFINE_IDR(sdp_ps);
 static DEFINE_IDR(tcp_ps);
 static DEFINE_IDR(udp_ps);
 static DEFINE_IDR(ipoib_ps);
@@ -195,24 +195,7 @@ struct cma_hdr {
 	union cma_ip_addr dst_addr;
 };
 
-struct sdp_hh {
-	u8 bsdh[16];
-	u8 sdp_version; /* Major version: 7:4 */
-	u8 ip_version;	/* IP version: 7:4 */
-	u8 sdp_specific1[10];
-	__be16 port;
-	__be16 sdp_specific2;
-	union cma_ip_addr src_addr;
-	union cma_ip_addr dst_addr;
-};
-
-struct sdp_hah {
-	u8 bsdh[16];
-	u8 sdp_version;
-};
-
 #define CMA_VERSION 0x00
-#define SDP_MAJ_VERSION 0x2
 
 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 {
@@ -261,21 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
 	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
-static inline u8 sdp_get_majv(u8 sdp_version)
-{
-	return sdp_version >> 4;
-}
-
-static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
-{
-	return hh->ip_version >> 4;
-}
-
-static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
-{
-	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
-}
-
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
@@ -310,16 +278,40 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
 	mutex_unlock(&lock);
 }
 
-static int cma_set_qkey(struct rdma_id_private *id_priv)
+static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+	return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+	return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+}
+
+static inline unsigned short cma_family(struct rdma_id_private *id_priv)
+{
+	return id_priv->id.route.addr.src_addr.ss_family;
+}
+
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
 {
 	struct ib_sa_mcmember_rec rec;
 	int ret = 0;
 
-	if (id_priv->qkey)
+	if (id_priv->qkey) {
+		if (qkey && id_priv->qkey != qkey)
+			return -EINVAL;
+		return 0;
+	}
+
+	if (qkey) {
+		id_priv->qkey = qkey;
 		return 0;
+	}
 
 	switch (id_priv->id.ps) {
 	case RDMA_PS_UDP:
+	case RDMA_PS_IB:
 		id_priv->qkey = RDMA_UDP_QKEY;
 		break;
 	case RDMA_PS_IPOIB:
@@ -358,6 +350,27 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
 	return -EADDRNOTAVAIL;
 }
 
+static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
+{
+	dev_addr->dev_type = ARPHRD_INFINIBAND;
+	rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
+	ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
+}
+
+static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+	int ret;
+
+	if (addr->sa_family != AF_IB) {
+		ret = rdma_translate_ip(addr, dev_addr);
+	} else {
+		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
+		ret = 0;
+	}
+
+	return ret;
+}
+
 static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -401,6 +414,61 @@ out:
 	return ret;
 }
 
+/*
+ * Select the source IB device and address to reach the destination IB address.
+ */
+static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev, *cur_dev;
+	struct sockaddr_ib *addr;
+	union ib_gid gid, sgid, *dgid;
+	u16 pkey, index;
+	u8 port, p;
+	int i;
+
+	cma_dev = NULL;
+	addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
+	dgid = (union ib_gid *) &addr->sib_addr;
+	pkey = ntohs(addr->sib_pkey);
+
+	list_for_each_entry(cur_dev, &dev_list, list) {
+		if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+			continue;
+
+		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+			if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
+				continue;
+
+			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
+				if (!memcmp(&gid, dgid, sizeof(gid))) {
+					cma_dev = cur_dev;
+					sgid = gid;
+					port = p;
+					goto found;
+				}
+
+				if (!cma_dev && (gid.global.subnet_prefix ==
+						 dgid->global.subnet_prefix)) {
+					cma_dev = cur_dev;
+					sgid = gid;
+					port = p;
+				}
+			}
+		}
+	}
+
+	if (!cma_dev)
+		return -ENODEV;
+
+found:
+	cma_attach_to_dev(id_priv, cma_dev);
+	id_priv->id.port_num = port;
+	addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
+	memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+	cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
+	return 0;
+}
+
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@@ -630,7 +698,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
 	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
 
 	if (id_priv->id.qp_type == IB_QPT_UD) {
-		ret = cma_set_qkey(id_priv);
+		ret = cma_set_qkey(id_priv, 0);
 		if (ret)
 			return ret;
 
@@ -679,26 +747,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr);
 
 static inline int cma_zero_addr(struct sockaddr *addr)
 {
-	struct in6_addr *ip6;
-
-	if (addr->sa_family == AF_INET)
-		return ipv4_is_zeronet(
-			((struct sockaddr_in *)addr)->sin_addr.s_addr);
-	else {
-		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
-		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
-			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+	switch (addr->sa_family) {
+	case AF_INET:
+		return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
+	case AF_INET6:
+		return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
+	case AF_IB:
+		return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+	default:
+		return 0;
 	}
 }
 
 static inline int cma_loopback_addr(struct sockaddr *addr)
 {
-	if (addr->sa_family == AF_INET)
-		return ipv4_is_loopback(
-			((struct sockaddr_in *) addr)->sin_addr.s_addr);
-	else
-		return ipv6_addr_loopback(
-			&((struct sockaddr_in6 *) addr)->sin6_addr);
+	switch (addr->sa_family) {
+	case AF_INET:
+		return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+	case AF_INET6:
+		return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+	case AF_IB:
+		return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+	default:
+		return 0;
+	}
 }
 
 static inline int cma_any_addr(struct sockaddr *addr)
@@ -715,18 +787,31 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
 	case AF_INET:
 		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
 		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
-	default:
+	case AF_INET6:
 		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
 				     &((struct sockaddr_in6 *) dst)->sin6_addr);
+	default:
+		return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
+				   &((struct sockaddr_ib *) dst)->sib_addr);
 	}
 }
 
-static inline __be16 cma_port(struct sockaddr *addr)
+static __be16 cma_port(struct sockaddr *addr)
 {
-	if (addr->sa_family == AF_INET)
+	struct sockaddr_ib *sib;
+
+	switch (addr->sa_family) {
+	case AF_INET:
 		return ((struct sockaddr_in *) addr)->sin_port;
-	else
+	case AF_INET6:
 		return ((struct sockaddr_in6 *) addr)->sin6_port;
+	case AF_IB:
+		sib = (struct sockaddr_ib *) addr;
+		return htons((u16) (be64_to_cpu(sib->sib_sid) &
+				    be64_to_cpu(sib->sib_sid_mask)));
+	default:
+		return 0;
+	}
 }
 
 static inline int cma_any_port(struct sockaddr *addr)
@@ -734,83 +819,92 @@ static inline int cma_any_port(struct sockaddr *addr)
 	return !cma_port(addr);
 }
 
-static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
-			    u8 *ip_ver, __be16 *port,
-			    union cma_ip_addr **src, union cma_ip_addr **dst)
+static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+			     struct ib_sa_path_rec *path)
 {
-	switch (ps) {
-	case RDMA_PS_SDP:
-		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
-		    SDP_MAJ_VERSION)
-			return -EINVAL;
+	struct sockaddr_ib *listen_ib, *ib;
 
-		*ip_ver	= sdp_get_ip_ver(hdr);
-		*port	= ((struct sdp_hh *) hdr)->port;
-		*src	= &((struct sdp_hh *) hdr)->src_addr;
-		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
-		break;
-	default:
-		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
-			return -EINVAL;
+	listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
+	ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
+	ib->sib_family = listen_ib->sib_family;
+	ib->sib_pkey = path->pkey;
+	ib->sib_flowinfo = path->flow_label;
+	memcpy(&ib->sib_addr, &path->sgid, 16);
+	ib->sib_sid = listen_ib->sib_sid;
+	ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
+	ib->sib_scope_id = listen_ib->sib_scope_id;
 
-		*ip_ver	= cma_get_ip_ver(hdr);
-		*port	= ((struct cma_hdr *) hdr)->port;
-		*src	= &((struct cma_hdr *) hdr)->src_addr;
-		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
-		break;
-	}
-
-	if (*ip_ver != 4 && *ip_ver != 6)
-		return -EINVAL;
-	return 0;
+	ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
+	ib->sib_family = listen_ib->sib_family;
+	ib->sib_pkey = path->pkey;
+	ib->sib_flowinfo = path->flow_label;
+	memcpy(&ib->sib_addr, &path->dgid, 16);
 }
 
-static void cma_save_net_info(struct rdma_addr *addr,
-			      struct rdma_addr *listen_addr,
-			      u8 ip_ver, __be16 port,
-			      union cma_ip_addr *src, union cma_ip_addr *dst)
+static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+			      struct cma_hdr *hdr)
 {
 	struct sockaddr_in *listen4, *ip4;
+
+	listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
+	ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
+	ip4->sin_family = listen4->sin_family;
+	ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
+	ip4->sin_port = listen4->sin_port;
+
+	ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
+	ip4->sin_family = listen4->sin_family;
+	ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
+	ip4->sin_port = hdr->port;
+}
+
+static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+			      struct cma_hdr *hdr)
+{
 	struct sockaddr_in6 *listen6, *ip6;
 
-	switch (ip_ver) {
+	listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
+	ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
+	ip6->sin6_family = listen6->sin6_family;
+	ip6->sin6_addr = hdr->dst_addr.ip6;
+	ip6->sin6_port = listen6->sin6_port;
+
+	ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
+	ip6->sin6_family = listen6->sin6_family;
+	ip6->sin6_addr = hdr->src_addr.ip6;
+	ip6->sin6_port = hdr->port;
+}
+
+static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+			     struct ib_cm_event *ib_event)
+{
+	struct cma_hdr *hdr;
+
+	if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
+		cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
+		return 0;
+	}
+
+	hdr = ib_event->private_data;
+	if (hdr->cma_version != CMA_VERSION)
+		return -EINVAL;
+
+	switch (cma_get_ip_ver(hdr)) {
 	case 4:
-		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
-		ip4 = (struct sockaddr_in *) &addr->src_addr;
-		ip4->sin_family = listen4->sin_family;
-		ip4->sin_addr.s_addr = dst->ip4.addr;
-		ip4->sin_port = listen4->sin_port;
-
-		ip4 = (struct sockaddr_in *) &addr->dst_addr;
-		ip4->sin_family = listen4->sin_family;
-		ip4->sin_addr.s_addr = src->ip4.addr;
-		ip4->sin_port = port;
+		cma_save_ip4_info(id, listen_id, hdr);
 		break;
 	case 6:
-		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
-		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
-		ip6->sin6_family = listen6->sin6_family;
-		ip6->sin6_addr = dst->ip6;
-		ip6->sin6_port = listen6->sin6_port;
-
-		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
-		ip6->sin6_family = listen6->sin6_family;
-		ip6->sin6_addr = src->ip6;
-		ip6->sin6_port = port;
+		cma_save_ip6_info(id, listen_id, hdr);
 		break;
 	default:
-		break;
+		return -EINVAL;
 	}
+	return 0;
 }
 
-static inline int cma_user_data_offset(enum rdma_port_space ps)
+static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
 {
-	switch (ps) {
-	case RDMA_PS_SDP:
-		return 0;
-	default:
-		return sizeof(struct cma_hdr);
-	}
+	return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
 }
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
@@ -861,8 +955,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
 		cma_cancel_route(id_priv);
 		break;
 	case RDMA_CM_LISTEN:
-		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
-				&& !id_priv->cma_dev)
+		if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
 			cma_cancel_listens(id_priv);
 		break;
 	default:
@@ -977,16 +1070,6 @@ reject:
 	return ret;
 }
 
-static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
-{
-	if (id_priv->id.ps == RDMA_PS_SDP &&
-	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
-	    SDP_MAJ_VERSION)
-		return -EINVAL;
-
-	return 0;
-}
-
 static void cma_set_rep_event_data(struct rdma_cm_event *event,
 				   struct ib_cm_rep_event_param *rep_data,
 				   void *private_data)
@@ -1021,15 +1104,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		event.status = -ETIMEDOUT;
 		break;
 	case IB_CM_REP_RECEIVED:
-		event.status = cma_verify_rep(id_priv, ib_event->private_data);
-		if (event.status)
-			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
-		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
+		if (id_priv->id.qp) {
 			event.status = cma_rep_recv(id_priv);
 			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
 						     RDMA_CM_EVENT_ESTABLISHED;
-		} else
+		} else {
 			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
+		}
 		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
 				       ib_event->private_data);
 		break;
@@ -1085,22 +1166,16 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
 	struct rdma_route *rt;
-	union cma_ip_addr *src, *dst;
-	__be16 port;
-	u8 ip_ver;
 	int ret;
 
-	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
-			     &ip_ver, &port, &src, &dst))
-		return NULL;
-
 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
 			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
 	if (IS_ERR(id))
 		return NULL;
 
-	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
-			  ip_ver, port, src, dst);
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (cma_save_net_info(id, listen_id, ib_event))
+		goto err;
 
 	rt = &id->route;
 	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
@@ -1113,19 +1188,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	if (rt->num_paths == 2)
 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
-	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+	if (cma_any_addr(cma_src_addr(id_priv))) {
 		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
 		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 		ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
 	} else {
-		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
-					&rt->addr.dev_addr);
+		ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
 		if (ret)
 			goto err;
 	}
 	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
-	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 
@@ -1139,9 +1212,6 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 {
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
-	union cma_ip_addr *src, *dst;
-	__be16 port;
-	u8 ip_ver;
 	int ret;
 
 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
@@ -1149,22 +1219,16 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 	if (IS_ERR(id))
 		return NULL;
 
-
-	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
-			     &ip_ver, &port, &src, &dst))
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (cma_save_net_info(id, listen_id, ib_event))
 		goto err;
 
-	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
-			  ip_ver, port, src, dst);
-
 	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
-		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-					&id->route.addr.dev_addr);
+		ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
 		if (ret)
 			goto err;
 	}
 
-	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 err:
@@ -1210,7 +1274,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		return -ECONNABORTED;
 
 	memset(&event, 0, sizeof event);
-	offset = cma_user_data_offset(listen_id->id.ps);
+	offset = cma_user_data_offset(listen_id);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
 		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
@@ -1272,58 +1336,44 @@ err1:
 	return ret;
 }
 
-static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
+__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
 {
-	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
+	if (addr->sa_family == AF_IB)
+		return ((struct sockaddr_ib *) addr)->sib_sid;
+
+	return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
 }
+EXPORT_SYMBOL(rdma_get_service_id);
 
 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
 				 struct ib_cm_compare_data *compare)
 {
 	struct cma_hdr *cma_data, *cma_mask;
-	struct sdp_hh *sdp_data, *sdp_mask;
 	__be32 ip4_addr;
 	struct in6_addr ip6_addr;
 
 	memset(compare, 0, sizeof *compare);
 	cma_data = (void *) compare->data;
 	cma_mask = (void *) compare->mask;
-	sdp_data = (void *) compare->data;
-	sdp_mask = (void *) compare->mask;
 
 	switch (addr->sa_family) {
 	case AF_INET:
 		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
-		if (ps == RDMA_PS_SDP) {
-			sdp_set_ip_ver(sdp_data, 4);
-			sdp_set_ip_ver(sdp_mask, 0xF);
-			sdp_data->dst_addr.ip4.addr = ip4_addr;
-			sdp_mask->dst_addr.ip4.addr = htonl(~0);
-		} else {
-			cma_set_ip_ver(cma_data, 4);
-			cma_set_ip_ver(cma_mask, 0xF);
-			if (!cma_any_addr(addr)) {
-				cma_data->dst_addr.ip4.addr = ip4_addr;
-				cma_mask->dst_addr.ip4.addr = htonl(~0);
-			}
+		cma_set_ip_ver(cma_data, 4);
+		cma_set_ip_ver(cma_mask, 0xF);
+		if (!cma_any_addr(addr)) {
+			cma_data->dst_addr.ip4.addr = ip4_addr;
+			cma_mask->dst_addr.ip4.addr = htonl(~0);
 		}
 		break;
 	case AF_INET6:
 		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
-		if (ps == RDMA_PS_SDP) {
-			sdp_set_ip_ver(sdp_data, 6);
-			sdp_set_ip_ver(sdp_mask, 0xF);
-			sdp_data->dst_addr.ip6 = ip6_addr;
-			memset(&sdp_mask->dst_addr.ip6, 0xFF,
-			       sizeof sdp_mask->dst_addr.ip6);
-		} else {
-			cma_set_ip_ver(cma_data, 6);
-			cma_set_ip_ver(cma_mask, 0xF);
-			if (!cma_any_addr(addr)) {
-				cma_data->dst_addr.ip6 = ip6_addr;
-				memset(&cma_mask->dst_addr.ip6, 0xFF,
-				       sizeof cma_mask->dst_addr.ip6);
-			}
+		cma_set_ip_ver(cma_data, 6);
+		cma_set_ip_ver(cma_mask, 0xF);
+		if (!cma_any_addr(addr)) {
+			cma_data->dst_addr.ip6 = ip6_addr;
+			memset(&cma_mask->dst_addr.ip6, 0xFF,
+			       sizeof cma_mask->dst_addr.ip6);
 		}
 		break;
 	default:
@@ -1347,9 +1397,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
 	case IW_CM_EVENT_CONNECT_REPLY:
-		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		sin = (struct sockaddr_in *) cma_src_addr(id_priv);
 		*sin = iw_event->local_addr;
-		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
 		*sin = iw_event->remote_addr;
 		switch (iw_event->status) {
 		case 0:
@@ -1447,9 +1497,9 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	cm_id->context = conn_id;
 	cm_id->cm_handler = cma_iw_handler;
 
-	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	sin = (struct sockaddr_in *) cma_src_addr(conn_id);
 	*sin = iw_event->local_addr;
-	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	sin = (struct sockaddr_in *) cma_dst_addr(conn_id);
 	*sin = iw_event->remote_addr;
 
 	ret = ib_query_device(conn_id->id.device, &attr);
@@ -1506,8 +1556,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
 
 	id_priv->cm_id.ib = id;
 
-	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-	svc_id = cma_get_service_id(id_priv->id.ps, addr);
+	addr = cma_src_addr(id_priv);
+	svc_id = rdma_get_service_id(&id_priv->id, addr);
 	if (cma_any_addr(addr) && !id_priv->afonly)
 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
 	else {
@@ -1537,7 +1587,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
 
 	id_priv->cm_id.iw = id;
 
-	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	sin = (struct sockaddr_in *) cma_src_addr(id_priv);
 	id_priv->cm_id.iw->local_addr = *sin;
 
 	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
@@ -1567,6 +1617,10 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 	struct rdma_cm_id *id;
 	int ret;
 
+	if (cma_family(id_priv) == AF_IB &&
+	    rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
 			    id_priv->id.qp_type);
 	if (IS_ERR(id))
@@ -1575,8 +1629,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 	dev_id_priv = container_of(id, struct rdma_id_private, id);
 
 	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
-	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
-	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+	memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
+	       rdma_addr_size(cma_src_addr(id_priv)));
 
 	cma_attach_to_dev(dev_id_priv, cma_dev);
 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -1634,31 +1688,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 			      struct cma_work *work)
 {
-	struct rdma_addr *addr = &id_priv->id.route.addr;
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct ib_sa_path_rec path_rec;
 	ib_sa_comp_mask comp_mask;
 	struct sockaddr_in6 *sin6;
+	struct sockaddr_ib *sib;
 
 	memset(&path_rec, 0, sizeof path_rec);
-	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
-	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
-	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
+	rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
+	rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
+	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	path_rec.numb_path = 1;
 	path_rec.reversible = 1;
-	path_rec.service_id = cma_get_service_id(id_priv->id.ps,
-							(struct sockaddr *) &addr->dst_addr);
+	path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 
 	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
 		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
 
-	if (addr->src_addr.ss_family == AF_INET) {
+	switch (cma_family(id_priv)) {
+	case AF_INET:
 		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
 		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
-	} else {
-		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
+		break;
+	case AF_INET6:
+		sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
 		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
 		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+		break;
+	case AF_IB:
+		sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
+		path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
+		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+		break;
 	}
 
 	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
@@ -1800,14 +1862,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	struct rdma_addr *addr = &route->addr;
 	struct cma_work *work;
 	int ret;
-	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
-	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
 	struct net_device *ndev = NULL;
 	u16 vid;
 
-	if (src_addr->sin_family != dst_addr->sin_family)
-		return -EINVAL;
-
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
@@ -1913,28 +1970,57 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_route);
 
+static void cma_set_loopback(struct sockaddr *addr)
+{
+	switch (addr->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		break;
+	case AF_INET6:
+		ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
+			      0, 0, 0, htonl(1));
+		break;
+	default:
+		ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
+			    0, 0, 0, htonl(1));
+		break;
+	}
+}
+
 static int cma_bind_loopback(struct rdma_id_private *id_priv)
 {
-	struct cma_device *cma_dev;
+	struct cma_device *cma_dev, *cur_dev;
 	struct ib_port_attr port_attr;
 	union ib_gid gid;
 	u16 pkey;
 	int ret;
 	u8 p;
 
+	cma_dev = NULL;
 	mutex_lock(&lock);
-	if (list_empty(&dev_list)) {
+	list_for_each_entry(cur_dev, &dev_list, list) {
+		if (cma_family(id_priv) == AF_IB &&
+		    rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+			continue;
+
+		if (!cma_dev)
+			cma_dev = cur_dev;
+
+		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+			if (!ib_query_port(cur_dev->device, p, &port_attr) &&
+			    port_attr.state == IB_PORT_ACTIVE) {
+				cma_dev = cur_dev;
+				goto port_found;
+			}
+		}
+	}
+
+	if (!cma_dev) {
 		ret = -ENODEV;
 		goto out;
 	}
-	list_for_each_entry(cma_dev, &dev_list, list)
-		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
-			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
-			    port_attr.state == IB_PORT_ACTIVE)
-				goto port_found;
 
 	p = 1;
-	cma_dev = list_entry(dev_list.next, struct cma_device, list);
 
 port_found:
 	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
@@ -1953,6 +2039,7 @@ port_found:
 	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
 	id_priv->id.port_num = p;
 	cma_attach_to_dev(id_priv, cma_dev);
+	cma_set_loopback(cma_src_addr(id_priv));
 out:
 	mutex_unlock(&lock);
 	return ret;
@@ -1980,8 +2067,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
 		event.status = status;
 	} else {
-		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
-		       ip_addr_size(src_addr));
+		memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	}
 
@@ -2000,7 +2086,6 @@ out:
 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 {
 	struct cma_work *work;
-	struct sockaddr *src, *dst;
 	union ib_gid gid;
 	int ret;
 
@@ -2017,18 +2102,36 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
 	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
-	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-	if (cma_zero_addr(src)) {
-		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-		if ((src->sa_family = dst->sa_family) == AF_INET) {
-			((struct sockaddr_in *)src)->sin_addr =
-				((struct sockaddr_in *)dst)->sin_addr;
-		} else {
-			((struct sockaddr_in6 *)src)->sin6_addr =
-				((struct sockaddr_in6 *)dst)->sin6_addr;
-		}
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler);
+	work->old_state = RDMA_CM_ADDR_QUERY;
+	work->new_state = RDMA_CM_ADDR_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+err:
+	kfree(work);
+	return ret;
+}
+
+static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
+{
+	struct cma_work *work;
+	int ret;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	if (!id_priv->cma_dev) {
+		ret = cma_resolve_ib_dev(id_priv);
+		if (ret)
+			goto err;
 	}
 
+	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
+		&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
+
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = RDMA_CM_ADDR_QUERY;
@@ -2046,9 +2149,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 {
 	if (!src_addr || !src_addr->sa_family) {
 		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
-		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+		src_addr->sa_family = dst_addr->sa_family;
+		if (dst_addr->sa_family == AF_INET6) {
 			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
 				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+		} else if (dst_addr->sa_family == AF_IB) {
+			((struct sockaddr_ib *) src_addr)->sib_pkey =
+				((struct sockaddr_ib *) dst_addr)->sib_pkey;
 		}
 	}
 	return rdma_bind_addr(id, src_addr);
@@ -2067,17 +2174,25 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 			return ret;
 	}
 
+	if (cma_family(id_priv) != dst_addr->sa_family)
+		return -EINVAL;
+
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
-	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
-	if (cma_any_addr(dst_addr))
+	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+	if (cma_any_addr(dst_addr)) {
 		ret = cma_resolve_loopback(id_priv);
-	else
-		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
-				      dst_addr, &id->route.addr.dev_addr,
-				      timeout_ms, addr_handler, id_priv);
+	} else {
+		if (dst_addr->sa_family == AF_IB) {
+			ret = cma_resolve_ib_addr(id_priv);
+		} else {
+			ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
+					      dst_addr, &id->route.addr.dev_addr,
+					      timeout_ms, addr_handler, id_priv);
+		}
+	}
 	if (ret)
 		goto err;
 
@@ -2097,7 +2212,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irqsave(&id_priv->lock, flags);
-	if (id_priv->state == RDMA_CM_IDLE) {
+	if (reuse || id_priv->state == RDMA_CM_IDLE) {
 		id_priv->reuseaddr = reuse;
 		ret = 0;
 	} else {
@@ -2131,10 +2246,29 @@ EXPORT_SYMBOL(rdma_set_afonly);
 static void cma_bind_port(struct rdma_bind_list *bind_list,
 			  struct rdma_id_private *id_priv)
 {
-	struct sockaddr_in *sin;
+	struct sockaddr *addr;
+	struct sockaddr_ib *sib;
+	u64 sid, mask;
+	__be16 port;
 
-	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
-	sin->sin_port = htons(bind_list->port);
+	addr = cma_src_addr(id_priv);
+	port = htons(bind_list->port);
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *) addr)->sin_port = port;
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *) addr)->sin6_port = port;
+		break;
+	case AF_IB:
+		sib = (struct sockaddr_ib *) addr;
+		sid = be64_to_cpu(sib->sib_sid);
+		mask = be64_to_cpu(sib->sib_sid_mask);
+		sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
+		sib->sib_sid_mask = cpu_to_be64(~0ULL);
+		break;
+	}
 	id_priv->bind_list = bind_list;
 	hlist_add_head(&id_priv->node, &bind_list->owners);
 }
@@ -2205,7 +2339,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
 	struct rdma_id_private *cur_id;
 	struct sockaddr *addr, *cur_addr;
 
-	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+	addr = cma_src_addr(id_priv);
 	hlist_for_each_entry(cur_id, &bind_list->owners, node) {
 		if (id_priv == cur_id)
 			continue;
@@ -2214,7 +2348,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
 		    cur_id->reuseaddr)
 			continue;
 
-		cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
+		cur_addr = cma_src_addr(cur_id);
 		if (id_priv->afonly && cur_id->afonly &&
 		    (addr->sa_family != cur_addr->sa_family))
 			continue;
@@ -2234,7 +2368,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
 	unsigned short snum;
 	int ret;
 
-	snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+	snum = ntohs(cma_port(cma_src_addr(id_priv)));
 	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
@@ -2261,33 +2395,67 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
 	return ret;
 }
 
-static int cma_get_port(struct rdma_id_private *id_priv)
+static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
 {
-	struct idr *ps;
-	int ret;
-
 	switch (id_priv->id.ps) {
-	case RDMA_PS_SDP:
-		ps = &sdp_ps;
-		break;
 	case RDMA_PS_TCP:
-		ps = &tcp_ps;
-		break;
+		return &tcp_ps;
 	case RDMA_PS_UDP:
-		ps = &udp_ps;
-		break;
+		return &udp_ps;
 	case RDMA_PS_IPOIB:
-		ps = &ipoib_ps;
-		break;
+		return &ipoib_ps;
 	case RDMA_PS_IB:
-		ps = &ib_ps;
-		break;
+		return &ib_ps;
 	default:
-		return -EPROTONOSUPPORT;
+		return NULL;
+	}
+}
+
+static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
+{
+	struct idr *ps = NULL;
+	struct sockaddr_ib *sib;
+	u64 sid_ps, mask, sid;
+
+	sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
+	mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
+	sid = be64_to_cpu(sib->sib_sid) & mask;
+
+	if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
+		sid_ps = RDMA_IB_IP_PS_IB;
+		ps = &ib_ps;
+	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
+		   (sid == (RDMA_IB_IP_PS_TCP & mask))) {
+		sid_ps = RDMA_IB_IP_PS_TCP;
+		ps = &tcp_ps;
+	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
+		   (sid == (RDMA_IB_IP_PS_UDP & mask))) {
+		sid_ps = RDMA_IB_IP_PS_UDP;
+		ps = &udp_ps;
 	}
 
+	if (ps) {
+		sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
+		sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
+						be64_to_cpu(sib->sib_sid_mask));
+	}
+	return ps;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+	struct idr *ps;
+	int ret;
+
+	if (cma_family(id_priv) != AF_IB)
+		ps = cma_select_inet_ps(id_priv);
+	else
+		ps = cma_select_ib_ps(id_priv);
+	if (!ps)
+		return -EPROTONOSUPPORT;
+
 	mutex_lock(&lock);
-	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
+	if (cma_any_port(cma_src_addr(id_priv)))
 		ret = cma_alloc_any_port(ps, id_priv);
 	else
 		ret = cma_use_port(ps, id_priv);
@@ -2322,8 +2490,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (id_priv->state == RDMA_CM_IDLE) {
-		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
-		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
+		id->route.addr.src_addr.ss_family = AF_INET;
+		ret = rdma_bind_addr(id, cma_src_addr(id_priv));
 		if (ret)
 			return ret;
 	}
@@ -2370,7 +2538,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 	struct rdma_id_private *id_priv;
 	int ret;
 
-	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
+	    addr->sa_family != AF_IB)
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
@@ -2382,7 +2551,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		goto err1;
 
 	if (!cma_any_addr(addr)) {
-		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
 		if (ret)
 			goto err1;
 
@@ -2391,7 +2560,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 			goto err1;
 	}
 
-	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
 	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
 		if (addr->sa_family == AF_INET)
 			id_priv->afonly = 1;
@@ -2414,62 +2583,32 @@ err1:
 }
 EXPORT_SYMBOL(rdma_bind_addr);
 
-static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
-			  struct rdma_route *route)
+static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
 {
 	struct cma_hdr *cma_hdr;
-	struct sdp_hh *sdp_hdr;
 
-	if (route->addr.src_addr.ss_family == AF_INET) {
+	cma_hdr = hdr;
+	cma_hdr->cma_version = CMA_VERSION;
+	if (cma_family(id_priv) == AF_INET) {
 		struct sockaddr_in *src4, *dst4;
 
-		src4 = (struct sockaddr_in *) &route->addr.src_addr;
-		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
-
-		switch (ps) {
-		case RDMA_PS_SDP:
-			sdp_hdr = hdr;
-			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
-				return -EINVAL;
-			sdp_set_ip_ver(sdp_hdr, 4);
-			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-			sdp_hdr->port = src4->sin_port;
-			break;
-		default:
-			cma_hdr = hdr;
-			cma_hdr->cma_version = CMA_VERSION;
-			cma_set_ip_ver(cma_hdr, 4);
-			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-			cma_hdr->port = src4->sin_port;
-			break;
-		}
-	} else {
+		src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
+		dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
+
+		cma_set_ip_ver(cma_hdr, 4);
+		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+		cma_hdr->port = src4->sin_port;
+	} else if (cma_family(id_priv) == AF_INET6) {
 		struct sockaddr_in6 *src6, *dst6;
 
-		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
-		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
-
-		switch (ps) {
-		case RDMA_PS_SDP:
-			sdp_hdr = hdr;
-			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
-				return -EINVAL;
-			sdp_set_ip_ver(sdp_hdr, 6);
-			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
-			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
-			sdp_hdr->port = src6->sin6_port;
-			break;
-		default:
-			cma_hdr = hdr;
-			cma_hdr->cma_version = CMA_VERSION;
-			cma_set_ip_ver(cma_hdr, 6);
-			cma_hdr->src_addr.ip6 = src6->sin6_addr;
-			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
-			cma_hdr->port = src6->sin6_port;
-			break;
-		}
+		src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
+		dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
+
+		cma_set_ip_ver(cma_hdr, 6);
+		cma_hdr->src_addr.ip6 = src6->sin6_addr;
+		cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+		cma_hdr->port = src6->sin6_port;
 	}
 	return 0;
 }
@@ -2499,15 +2638,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 			event.status = ib_event->param.sidr_rep_rcvd.status;
 			break;
 		}
-		ret = cma_set_qkey(id_priv);
+		ret = cma_set_qkey(id_priv, rep->qkey);
 		if (ret) {
 			event.event = RDMA_CM_EVENT_ADDR_ERROR;
-			event.status = -EINVAL;
-			break;
-		}
-		if (id_priv->qkey != rep->qkey) {
-			event.event = RDMA_CM_EVENT_UNREACHABLE;
-			event.status = -EINVAL;
+			event.status = ret;
 			break;
 		}
 		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
@@ -2542,27 +2676,31 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 			      struct rdma_conn_param *conn_param)
 {
 	struct ib_cm_sidr_req_param req;
-	struct rdma_route *route;
 	struct ib_cm_id	*id;
-	int ret;
+	int offset, ret;
 
-	req.private_data_len = sizeof(struct cma_hdr) +
-			       conn_param->private_data_len;
+	offset = cma_user_data_offset(id_priv);
+	req.private_data_len = offset + conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;
 
-	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
-	if (!req.private_data)
-		return -ENOMEM;
+	if (req.private_data_len) {
+		req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+		if (!req.private_data)
+			return -ENOMEM;
+	} else {
+		req.private_data = NULL;
+	}
 
 	if (conn_param->private_data && conn_param->private_data_len)
-		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
+		memcpy((void *) req.private_data + offset,
 		       conn_param->private_data, conn_param->private_data_len);
 
-	route = &id_priv->id.route;
-	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
-	if (ret)
-		goto out;
+	if (req.private_data) {
+		ret = cma_format_hdr((void *) req.private_data, id_priv);
+		if (ret)
+			goto out;
+	}
 
 	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
 			     id_priv);
@@ -2572,9 +2710,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 	}
 	id_priv->cm_id.ib = id;
 
-	req.path = route->path_rec;
-	req.service_id = cma_get_service_id(id_priv->id.ps,
-					    (struct sockaddr *) &route->addr.dst_addr);
+	req.path = id_priv->id.route.path_rec;
+	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
 
@@ -2598,14 +2735,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	int offset, ret;
 
 	memset(&req, 0, sizeof req);
-	offset = cma_user_data_offset(id_priv->id.ps);
+	offset = cma_user_data_offset(id_priv);
 	req.private_data_len = offset + conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;
 
-	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
-	if (!private_data)
-		return -ENOMEM;
+	if (req.private_data_len) {
+		private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+		if (!private_data)
+			return -ENOMEM;
+	} else {
+		private_data = NULL;
+	}
 
 	if (conn_param->private_data && conn_param->private_data_len)
 		memcpy(private_data + offset, conn_param->private_data,
@@ -2619,17 +2760,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	id_priv->cm_id.ib = id;
 
 	route = &id_priv->id.route;
-	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
-	if (ret)
-		goto out;
-	req.private_data = private_data;
+	if (private_data) {
+		ret = cma_format_hdr(private_data, id_priv);
+		if (ret)
+			goto out;
+		req.private_data = private_data;
+	}
 
 	req.primary_path = &route->path_rec[0];
 	if (route->num_paths == 2)
 		req.alternate_path = &route->path_rec[1];
 
-	req.service_id = cma_get_service_id(id_priv->id.ps,
-					    (struct sockaddr *) &route->addr.dst_addr);
+	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 	req.qp_num = id_priv->qp_num;
 	req.qp_type = id_priv->id.qp_type;
 	req.starting_psn = id_priv->seq_num;
@@ -2668,10 +2810,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
 
 	id_priv->cm_id.iw = cm_id;
 
-	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	sin = (struct sockaddr_in *) cma_src_addr(id_priv);
 	cm_id->local_addr = *sin;
 
-	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
 	cm_id->remote_addr = *sin;
 
 	ret = cma_modify_qp_rtr(id_priv, conn_param);
@@ -2789,7 +2931,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
 }
 
 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
-			     enum ib_cm_sidr_status status,
+			     enum ib_cm_sidr_status status, u32 qkey,
 			     const void *private_data, int private_data_len)
 {
 	struct ib_cm_sidr_rep_param rep;
@@ -2798,7 +2940,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 	memset(&rep, 0, sizeof rep);
 	rep.status = status;
 	if (status == IB_SIDR_SUCCESS) {
-		ret = cma_set_qkey(id_priv);
+		ret = cma_set_qkey(id_priv, qkey);
 		if (ret)
 			return ret;
 		rep.qp_num = id_priv->qp_num;
@@ -2832,11 +2974,12 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 		if (id->qp_type == IB_QPT_UD) {
 			if (conn_param)
 				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+							conn_param->qkey,
 							conn_param->private_data,
 							conn_param->private_data_len);
 			else
 				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
-							NULL, 0);
+							0, NULL, 0);
 		} else {
 			if (conn_param)
 				ret = cma_accept_ib(id_priv, conn_param);
@@ -2897,7 +3040,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		if (id->qp_type == IB_QPT_UD)
-			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
+			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
 						private_data, private_data_len);
 		else
 			ret = ib_send_cm_rej(id_priv->cm_id.ib,
@@ -2958,6 +3101,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return 0;
 
+	if (!status)
+		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
 	mutex_lock(&id_priv->qp_mutex);
 	if (!status && id_priv->id.qp)
 		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
@@ -3004,6 +3149,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
 								 0xFF10A01B)) {
 		/* IPv6 address is an SA assigned MGID. */
 		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+	} else if (addr->sa_family == AF_IB) {
+		memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
 	} else if ((addr->sa_family == AF_INET6)) {
 		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
 		if (id_priv->id.ps == RDMA_PS_UDP)
@@ -3031,9 +3178,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 	if (ret)
 		return ret;
 
+	ret = cma_set_qkey(id_priv, 0);
+	if (ret)
+		return ret;
+
 	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
-	if (id_priv->id.ps == RDMA_PS_UDP)
-		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+	rec.qkey = cpu_to_be32(id_priv->qkey);
 	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
 	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	rec.join_state = 1;
@@ -3170,7 +3320,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	if (!mc)
 		return -ENOMEM;
 
-	memcpy(&mc->addr, addr, ip_addr_size(addr));
+	memcpy(&mc->addr, addr, rdma_addr_size(addr));
 	mc->context = context;
 	mc->id_priv = id_priv;
 
@@ -3215,7 +3365,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irq(&id_priv->lock);
 	list_for_each_entry(mc, &id_priv->mc_list, list) {
-		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+		if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
 			list_del(&mc->list);
 			spin_unlock_irq(&id_priv->lock);
 
@@ -3436,33 +3586,16 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
 			id_stats->bound_dev_if =
 				id->route.addr.dev_addr.bound_dev_if;
 
-			if (id->route.addr.src_addr.ss_family == AF_INET) {
-				if (ibnl_put_attr(skb, nlh,
-						  sizeof(struct sockaddr_in),
-						  &id->route.addr.src_addr,
-						  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
-					goto out;
-				}
-				if (ibnl_put_attr(skb, nlh,
-						  sizeof(struct sockaddr_in),
-						  &id->route.addr.dst_addr,
-						  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
-					goto out;
-				}
-			} else if (id->route.addr.src_addr.ss_family == AF_INET6) {
-				if (ibnl_put_attr(skb, nlh,
-						  sizeof(struct sockaddr_in6),
-						  &id->route.addr.src_addr,
-						  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
-					goto out;
-				}
-				if (ibnl_put_attr(skb, nlh,
-						  sizeof(struct sockaddr_in6),
-						  &id->route.addr.dst_addr,
-						  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
-					goto out;
-				}
-			}
+			if (ibnl_put_attr(skb, nlh,
+					  rdma_addr_size(cma_src_addr(id_priv)),
+					  cma_src_addr(id_priv),
+					  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
+				goto out;
+			if (ibnl_put_attr(skb, nlh,
+					  rdma_addr_size(cma_src_addr(id_priv)),
+					  cma_dst_addr(id_priv),
+					  RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
+				goto out;
 
 			id_stats->pid		= id_priv->owner;
 			id_stats->port_space	= id->ps;
@@ -3527,7 +3660,6 @@ static void __exit cma_cleanup(void)
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
-	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
 	idr_destroy(&udp_ps);
 	idr_destroy(&ipoib_ps);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 934f45e79e5e..9838ca484389 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -652,6 +652,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
 }
 EXPORT_SYMBOL(ib_sa_unpack_path);
 
+void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+{
+	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_path);
+
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 				    int status,
 				    struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 99904f7d59e3..cde1e7b5b85d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -545,8 +545,10 @@ static int add_port(struct ib_device *device, int port_num,
 
 	p->gid_group.name  = "gids";
 	p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
-	if (!p->gid_group.attrs)
+	if (!p->gid_group.attrs) {
+		ret = -ENOMEM;
 		goto err_remove_pma;
+	}
 
 	ret = sysfs_create_group(&p->kobj, &p->gid_group);
 	if (ret)
@@ -555,8 +557,10 @@ static int add_port(struct ib_device *device, int port_num,
 	p->pkey_group.name  = "pkeys";
 	p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
 						attr.pkey_tbl_len);
-	if (!p->pkey_group.attrs)
+	if (!p->pkey_group.attrs) {
+		ret = -ENOMEM;
 		goto err_remove_gid;
+	}
 
 	ret = sysfs_create_group(&p->kobj, &p->pkey_group);
 	if (ret)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 5ca44cd9b00c..b0f189be543b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -47,6 +47,8 @@
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -510,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
-static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
+static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
 			      int in_len, int out_len)
 {
-	struct rdma_ucm_bind_addr cmd;
+	struct rdma_ucm_bind_ip cmd;
 	struct ucma_context *ctx;
 	int ret;
 
@@ -529,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
+static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
+			 int in_len, int out_len)
+{
+	struct rdma_ucm_bind cmd;
+	struct sockaddr *addr;
+	struct ucma_context *ctx;
+	int ret;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	addr = (struct sockaddr *) &cmd.addr;
+	if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
+		return -EINVAL;
+
+	ctx = ucma_get_ctx(file, cmd.id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	ret = rdma_bind_addr(ctx->cm_id, addr);
+	ucma_put_ctx(ctx);
+	return ret;
+}
+
+static ssize_t ucma_resolve_ip(struct ucma_file *file,
+			       const char __user *inbuf,
+			       int in_len, int out_len)
+{
+	struct rdma_ucm_resolve_ip cmd;
+	struct ucma_context *ctx;
+	int ret;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	ctx = ucma_get_ctx(file, cmd.id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
+				(struct sockaddr *) &cmd.dst_addr,
+				cmd.timeout_ms);
+	ucma_put_ctx(ctx);
+	return ret;
+}
+
 static ssize_t ucma_resolve_addr(struct ucma_file *file,
 				 const char __user *inbuf,
 				 int in_len, int out_len)
 {
 	struct rdma_ucm_resolve_addr cmd;
+	struct sockaddr *src, *dst;
 	struct ucma_context *ctx;
 	int ret;
 
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
+	src = (struct sockaddr *) &cmd.src_addr;
+	dst = (struct sockaddr *) &cmd.dst_addr;
+	if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
+	    !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
+		return -EINVAL;
+
 	ctx = ucma_get_ctx(file, cmd.id);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
-				(struct sockaddr *) &cmd.dst_addr,
-				cmd.timeout_ms);
+	ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
 	ucma_put_ctx(ctx);
 	return ret;
 }
@@ -649,7 +702,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
 				const char __user *inbuf,
 				int in_len, int out_len)
 {
-	struct rdma_ucm_query_route cmd;
+	struct rdma_ucm_query cmd;
 	struct rdma_ucm_query_route_resp resp;
 	struct ucma_context *ctx;
 	struct sockaddr *addr;
@@ -709,7 +762,162 @@ out:
 	return ret;
 }
 
-static void ucma_copy_conn_param(struct rdma_conn_param *dst,
+static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
+				   struct rdma_ucm_query_addr_resp *resp)
+{
+	if (!cm_id->device)
+		return;
+
+	resp->node_guid = (__force __u64) cm_id->device->node_guid;
+	resp->port_num = cm_id->port_num;
+	resp->pkey = (__force __u16) cpu_to_be16(
+		     ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
+}
+
+static ssize_t ucma_query_addr(struct ucma_context *ctx,
+			       void __user *response, int out_len)
+{
+	struct rdma_ucm_query_addr_resp resp;
+	struct sockaddr *addr;
+	int ret = 0;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	memset(&resp, 0, sizeof resp);
+
+	addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
+	resp.src_size = rdma_addr_size(addr);
+	memcpy(&resp.src_addr, addr, resp.src_size);
+
+	addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
+	resp.dst_size = rdma_addr_size(addr);
+	memcpy(&resp.dst_addr, addr, resp.dst_size);
+
+	ucma_query_device_addr(ctx->cm_id, &resp);
+
+	if (copy_to_user(response, &resp, sizeof(resp)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static ssize_t ucma_query_path(struct ucma_context *ctx,
+			       void __user *response, int out_len)
+{
+	struct rdma_ucm_query_path_resp *resp;
+	int i, ret = 0;
+
+	if (out_len < sizeof(*resp))
+		return -ENOSPC;
+
+	resp = kzalloc(out_len, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	resp->num_paths = ctx->cm_id->route.num_paths;
+	for (i = 0, out_len -= sizeof(*resp);
+	     i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
+	     i++, out_len -= sizeof(struct ib_path_rec_data)) {
+
+		resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
+					   IB_PATH_BIDIRECTIONAL;
+		ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
+				&resp->path_data[i].path_rec);
+	}
+
+	if (copy_to_user(response, resp,
+			 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
+		ret = -EFAULT;
+
+	kfree(resp);
+	return ret;
+}
+
+static ssize_t ucma_query_gid(struct ucma_context *ctx,
+			      void __user *response, int out_len)
+{
+	struct rdma_ucm_query_addr_resp resp;
+	struct sockaddr_ib *addr;
+	int ret = 0;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	memset(&resp, 0, sizeof resp);
+
+	ucma_query_device_addr(ctx->cm_id, &resp);
+
+	addr = (struct sockaddr_ib *) &resp.src_addr;
+	resp.src_size = sizeof(*addr);
+	if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
+		memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
+	} else {
+		addr->sib_family = AF_IB;
+		addr->sib_pkey = (__force __be16) resp.pkey;
+		rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
+				   (union ib_gid *) &addr->sib_addr);
+		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+						    &ctx->cm_id->route.addr.src_addr);
+	}
+
+	addr = (struct sockaddr_ib *) &resp.dst_addr;
+	resp.dst_size = sizeof(*addr);
+	if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
+		memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
+	} else {
+		addr->sib_family = AF_IB;
+		addr->sib_pkey = (__force __be16) resp.pkey;
+		rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
+				   (union ib_gid *) &addr->sib_addr);
+		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+						    &ctx->cm_id->route.addr.dst_addr);
+	}
+
+	if (copy_to_user(response, &resp, sizeof(resp)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static ssize_t ucma_query(struct ucma_file *file,
+			  const char __user *inbuf,
+			  int in_len, int out_len)
+{
+	struct rdma_ucm_query cmd;
+	struct ucma_context *ctx;
+	void __user *response;
+	int ret;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	response = (void __user *)(unsigned long) cmd.response;
+	ctx = ucma_get_ctx(file, cmd.id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	switch (cmd.option) {
+	case RDMA_USER_CM_QUERY_ADDR:
+		ret = ucma_query_addr(ctx, response, out_len);
+		break;
+	case RDMA_USER_CM_QUERY_PATH:
+		ret = ucma_query_path(ctx, response, out_len);
+		break;
+	case RDMA_USER_CM_QUERY_GID:
+		ret = ucma_query_gid(ctx, response, out_len);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	ucma_put_ctx(ctx);
+	return ret;
+}
+
+static void ucma_copy_conn_param(struct rdma_cm_id *id,
+				 struct rdma_conn_param *dst,
 				 struct rdma_ucm_conn_param *src)
 {
 	dst->private_data = src->private_data;
@@ -721,6 +929,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst,
 	dst->rnr_retry_count = src->rnr_retry_count;
 	dst->srq = src->srq;
 	dst->qp_num = src->qp_num;
+	dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
 }
 
 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
@@ -741,7 +950,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+	ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
 	ret = rdma_connect(ctx->cm_id, &conn_param);
 	ucma_put_ctx(ctx);
 	return ret;
@@ -784,7 +993,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
 		return PTR_ERR(ctx);
 
 	if (cmd.conn_param.valid) {
-		ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
 		mutex_lock(&file->mut);
 		ret = rdma_accept(ctx->cm_id, &conn_param);
 		if (!ret)
@@ -1020,23 +1229,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
-static ssize_t ucma_join_multicast(struct ucma_file *file,
-				   const char __user *inbuf,
-				   int in_len, int out_len)
+static ssize_t ucma_process_join(struct ucma_file *file,
+				 struct rdma_ucm_join_mcast *cmd,  int out_len)
 {
-	struct rdma_ucm_join_mcast cmd;
 	struct rdma_ucm_create_id_resp resp;
 	struct ucma_context *ctx;
 	struct ucma_multicast *mc;
+	struct sockaddr *addr;
 	int ret;
 
 	if (out_len < sizeof(resp))
 		return -ENOSPC;
 
-	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
-		return -EFAULT;
+	addr = (struct sockaddr *) &cmd->addr;
+	if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+		return -EINVAL;
 
-	ctx = ucma_get_ctx(file, cmd.id);
+	ctx = ucma_get_ctx(file, cmd->id);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
@@ -1047,14 +1256,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
 		goto err1;
 	}
 
-	mc->uid = cmd.uid;
-	memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+	mc->uid = cmd->uid;
+	memcpy(&mc->addr, addr, cmd->addr_size);
 	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
 	if (ret)
 		goto err2;
 
 	resp.id = mc->id;
-	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+	if (copy_to_user((void __user *)(unsigned long) cmd->response,
 			 &resp, sizeof(resp))) {
 		ret = -EFAULT;
 		goto err3;
@@ -1079,6 +1288,38 @@ err1:
 	return ret;
 }
 
+static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
+				      const char __user *inbuf,
+				      int in_len, int out_len)
+{
+	struct rdma_ucm_join_ip_mcast cmd;
+	struct rdma_ucm_join_mcast join_cmd;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	join_cmd.response = cmd.response;
+	join_cmd.uid = cmd.uid;
+	join_cmd.id = cmd.id;
+	join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
+	join_cmd.reserved = 0;
+	memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
+
+	return ucma_process_join(file, &join_cmd, out_len);
+}
+
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+				   const char __user *inbuf,
+				   int in_len, int out_len)
+{
+	struct rdma_ucm_join_mcast cmd;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	return ucma_process_join(file, &cmd, out_len);
+}
+
 static ssize_t ucma_leave_multicast(struct ucma_file *file,
 				    const char __user *inbuf,
 				    int in_len, int out_len)
@@ -1221,25 +1462,29 @@ file_put:
 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
 				   const char __user *inbuf,
 				   int in_len, int out_len) = {
-	[RDMA_USER_CM_CMD_CREATE_ID]	= ucma_create_id,
-	[RDMA_USER_CM_CMD_DESTROY_ID]	= ucma_destroy_id,
-	[RDMA_USER_CM_CMD_BIND_ADDR]	= ucma_bind_addr,
-	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	= ucma_resolve_addr,
-	[RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
-	[RDMA_USER_CM_CMD_QUERY_ROUTE]	= ucma_query_route,
-	[RDMA_USER_CM_CMD_CONNECT]	= ucma_connect,
-	[RDMA_USER_CM_CMD_LISTEN]	= ucma_listen,
-	[RDMA_USER_CM_CMD_ACCEPT]	= ucma_accept,
-	[RDMA_USER_CM_CMD_REJECT]	= ucma_reject,
-	[RDMA_USER_CM_CMD_DISCONNECT]	= ucma_disconnect,
-	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	= ucma_init_qp_attr,
-	[RDMA_USER_CM_CMD_GET_EVENT]	= ucma_get_event,
-	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL,
-	[RDMA_USER_CM_CMD_SET_OPTION]	= ucma_set_option,
-	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify,
-	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast,
-	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast,
-	[RDMA_USER_CM_CMD_MIGRATE_ID]	= ucma_migrate_id
+	[RDMA_USER_CM_CMD_CREATE_ID] 	 = ucma_create_id,
+	[RDMA_USER_CM_CMD_DESTROY_ID]	 = ucma_destroy_id,
+	[RDMA_USER_CM_CMD_BIND_IP]	 = ucma_bind_ip,
+	[RDMA_USER_CM_CMD_RESOLVE_IP]	 = ucma_resolve_ip,
+	[RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
+	[RDMA_USER_CM_CMD_QUERY_ROUTE]	 = ucma_query_route,
+	[RDMA_USER_CM_CMD_CONNECT]	 = ucma_connect,
+	[RDMA_USER_CM_CMD_LISTEN]	 = ucma_listen,
+	[RDMA_USER_CM_CMD_ACCEPT]	 = ucma_accept,
+	[RDMA_USER_CM_CMD_REJECT]	 = ucma_reject,
+	[RDMA_USER_CM_CMD_DISCONNECT]	 = ucma_disconnect,
+	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	 = ucma_init_qp_attr,
+	[RDMA_USER_CM_CMD_GET_EVENT]	 = ucma_get_event,
+	[RDMA_USER_CM_CMD_GET_OPTION]	 = NULL,
+	[RDMA_USER_CM_CMD_SET_OPTION]	 = ucma_set_option,
+	[RDMA_USER_CM_CMD_NOTIFY]	 = ucma_notify,
+	[RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
+	[RDMA_USER_CM_CMD_LEAVE_MCAST]	 = ucma_leave_multicast,
+	[RDMA_USER_CM_CMD_MIGRATE_ID]	 = ucma_migrate_id,
+	[RDMA_USER_CM_CMD_QUERY]	 = ucma_query,
+	[RDMA_USER_CM_CMD_BIND]		 = ucma_bind,
+	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	 = ucma_resolve_addr,
+	[RDMA_USER_CM_CMD_JOIN_MCAST]	 = ucma_join_multicast
 };
 
 static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a7d00f6b3bc1..b3c07b0c9f26 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -334,7 +334,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 
 	resp.num_comp_vectors = file->device->num_comp_vectors;
 
-	ret = get_unused_fd();
+	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret < 0)
 		goto err_free;
 	resp.async_fd = ret;
@@ -1184,7 +1184,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	ret = get_unused_fd();
+	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret < 0)
 		return ret;
 	resp.fd = ret;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e5649e8b215d..b57c0befd962 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -883,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
 {
 	union t3_wr *wqe = qhp->wq.queue;
 	u16 count = 0;
-	while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+
+	while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
 		count++;
 		wqe++;
 	}
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 982e3efd98d3..cd8d290a09fc 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -211,6 +211,7 @@ static int ehca_create_slab_caches(void)
 	if (!ctblk_cache) {
 		ehca_gen_err("Cannot create ctblk SLAB cache.");
 		ehca_cleanup_small_qp_cache();
+		ret = -ENOMEM;
 		goto create_slab_caches6;
 	}
 #endif
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
new file mode 100644
index 000000000000..8e6aebfaf8a4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -0,0 +1,10 @@
+config MLX5_INFINIBAND
+	tristate "Mellanox Connect-IB HCA support"
+	depends on NETDEVICES && ETHERNET && PCI && X86
+	select NET_VENDOR_MELLANOX
+	select MLX5_CORE
+	---help---
+	  This driver provides low-level InfiniBand support for
+	  Mellanox Connect-IB PCI Express host channel adapters (HCAs).
+	  This is required to use InfiniBand protocols such as
+	  IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
new file mode 100644
index 000000000000..4ea0135af484
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
+
+mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
new file mode 100644
index 000000000000..39ab0caefdf9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx5_ib.h"
+
+struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
+			   struct mlx5_ib_ah *ah)
+{
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
+		ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
+						(1 << 30) |
+						ah_attr->grh.sgid_index << 20);
+		ah->av.hop_limit = ah_attr->grh.hop_limit;
+		ah->av.tclass = ah_attr->grh.traffic_class;
+	}
+
+	ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+	ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+	ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+
+	return &ah->ibah;
+}
+
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	struct mlx5_ib_ah *ah;
+
+	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+
+	return create_ib_ah(ah_attr, ah); /* never fails */
+}
+
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct mlx5_ib_ah *ah = to_mah(ibah);
+	u32 tmp;
+
+	memset(ah_attr, 0, sizeof(*ah_attr));
+
+	tmp = be32_to_cpu(ah->av.grh_gid_fl);
+	if (tmp & (1 << 30)) {
+		ah_attr->ah_flags = IB_AH_GRH;
+		ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
+		ah_attr->grh.flow_label = tmp & 0xfffff;
+		memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
+		ah_attr->grh.hop_limit = ah->av.hop_limit;
+		ah_attr->grh.traffic_class = ah->av.tclass;
+	}
+	ah_attr->dlid = be16_to_cpu(ah->av.rlid);
+	ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
+	ah_attr->sl = ah->av.stat_rate_sl & 0xf;
+
+	return 0;
+}
+
+int mlx5_ib_destroy_ah(struct ib_ah *ah)
+{
+	kfree(to_mah(ah));
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
new file mode 100644
index 000000000000..344ab03948a3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -0,0 +1,843 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+#include "user.h"
+
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+{
+	struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
+
+	ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
+{
+	struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
+	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+	struct ib_cq *ibcq = &cq->ibcq;
+	struct ib_event event;
+
+	if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
+		mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
+			     type, mcq->cqn);
+		return;
+	}
+
+	if (ibcq->event_handler) {
+		event.device     = &dev->ib_dev;
+		event.event      = IB_EVENT_CQ_ERR;
+		event.element.cq = ibcq;
+		ibcq->event_handler(&event, ibcq->cq_context);
+	}
+}
+
+static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
+{
+	return mlx5_buf_offset(&buf->buf, n * size);
+}
+
+static void *get_cqe(struct mlx5_ib_cq *cq, int n)
+{
+	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+}
+
+static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
+{
+	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+	struct mlx5_cqe64 *cqe64;
+
+	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+	return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
+		!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+}
+
+static void *next_cqe_sw(struct mlx5_ib_cq *cq)
+{
+	return get_sw_cqe(cq, cq->mcq.cons_index);
+}
+
+static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
+{
+	switch (wq->wr_data[idx]) {
+	case MLX5_IB_WR_UMR:
+		return 0;
+
+	case IB_WR_LOCAL_INV:
+		return IB_WC_LOCAL_INV;
+
+	case IB_WR_FAST_REG_MR:
+		return IB_WC_FAST_REG_MR;
+
+	default:
+		pr_warn("unknown completion status\n");
+		return 0;
+	}
+}
+
+static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
+			    struct mlx5_ib_wq *wq, int idx)
+{
+	wc->wc_flags = 0;
+	switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
+	case MLX5_OPCODE_RDMA_WRITE_IMM:
+		wc->wc_flags |= IB_WC_WITH_IMM;
+	case MLX5_OPCODE_RDMA_WRITE:
+		wc->opcode    = IB_WC_RDMA_WRITE;
+		break;
+	case MLX5_OPCODE_SEND_IMM:
+		wc->wc_flags |= IB_WC_WITH_IMM;
+	case MLX5_OPCODE_SEND:
+	case MLX5_OPCODE_SEND_INVAL:
+		wc->opcode    = IB_WC_SEND;
+		break;
+	case MLX5_OPCODE_RDMA_READ:
+		wc->opcode    = IB_WC_RDMA_READ;
+		wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
+		break;
+	case MLX5_OPCODE_ATOMIC_CS:
+		wc->opcode    = IB_WC_COMP_SWAP;
+		wc->byte_len  = 8;
+		break;
+	case MLX5_OPCODE_ATOMIC_FA:
+		wc->opcode    = IB_WC_FETCH_ADD;
+		wc->byte_len  = 8;
+		break;
+	case MLX5_OPCODE_ATOMIC_MASKED_CS:
+		wc->opcode    = IB_WC_MASKED_COMP_SWAP;
+		wc->byte_len  = 8;
+		break;
+	case MLX5_OPCODE_ATOMIC_MASKED_FA:
+		wc->opcode    = IB_WC_MASKED_FETCH_ADD;
+		wc->byte_len  = 8;
+		break;
+	case MLX5_OPCODE_BIND_MW:
+		wc->opcode    = IB_WC_BIND_MW;
+		break;
+	case MLX5_OPCODE_UMR:
+		wc->opcode = get_umr_comp(wq, idx);
+		break;
+	}
+}
+
+enum {
+	MLX5_GRH_IN_BUFFER = 1,
+	MLX5_GRH_IN_CQE	   = 2,
+};
+
+static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
+			     struct mlx5_ib_qp *qp)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+	struct mlx5_ib_srq *srq;
+	struct mlx5_ib_wq *wq;
+	u16 wqe_ctr;
+	u8 g;
+
+	if (qp->ibqp.srq || qp->ibqp.xrcd) {
+		struct mlx5_core_srq *msrq = NULL;
+
+		if (qp->ibqp.xrcd) {
+			msrq = mlx5_core_get_srq(&dev->mdev,
+						 be32_to_cpu(cqe->srqn));
+			srq = to_mibsrq(msrq);
+		} else {
+			srq = to_msrq(qp->ibqp.srq);
+		}
+		if (srq) {
+			wqe_ctr = be16_to_cpu(cqe->wqe_counter);
+			wc->wr_id = srq->wrid[wqe_ctr];
+			mlx5_ib_free_srq_wqe(srq, wqe_ctr);
+			if (msrq && atomic_dec_and_test(&msrq->refcount))
+				complete(&msrq->free);
+		}
+	} else {
+		wq	  = &qp->rq;
+		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+		++wq->tail;
+	}
+	wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+
+	switch (cqe->op_own >> 4) {
+	case MLX5_CQE_RESP_WR_IMM:
+		wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
+		wc->wc_flags	= IB_WC_WITH_IMM;
+		wc->ex.imm_data = cqe->imm_inval_pkey;
+		break;
+	case MLX5_CQE_RESP_SEND:
+		wc->opcode   = IB_WC_RECV;
+		wc->wc_flags = 0;
+		break;
+	case MLX5_CQE_RESP_SEND_IMM:
+		wc->opcode	= IB_WC_RECV;
+		wc->wc_flags	= IB_WC_WITH_IMM;
+		wc->ex.imm_data = cqe->imm_inval_pkey;
+		break;
+	case MLX5_CQE_RESP_SEND_INV:
+		wc->opcode	= IB_WC_RECV;
+		wc->wc_flags	= IB_WC_WITH_INVALIDATE;
+		wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
+		break;
+	}
+	wc->slid	   = be16_to_cpu(cqe->slid);
+	wc->sl		   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
+	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
+	wc->dlid_path_bits = cqe->ml_path;
+	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
+	wc->wc_flags |= g ? IB_WC_GRH : 0;
+	wc->pkey_index     = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
+}
+
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+{
+	__be32 *p = (__be32 *)cqe;
+	int i;
+
+	mlx5_ib_warn(dev, "dump error cqe\n");
+	for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
+		pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
+			be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+			be32_to_cpu(p[3]));
+}
+
+static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
+				  struct mlx5_err_cqe *cqe,
+				  struct ib_wc *wc)
+{
+	int dump = 1;
+
+	switch (cqe->syndrome) {
+	case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
+		wc->status = IB_WC_LOC_LEN_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
+		wc->status = IB_WC_LOC_QP_OP_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+		wc->status = IB_WC_LOC_PROT_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
+		dump = 0;
+		wc->status = IB_WC_WR_FLUSH_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_MW_BIND_ERR:
+		wc->status = IB_WC_MW_BIND_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
+		wc->status = IB_WC_BAD_RESP_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
+		wc->status = IB_WC_LOC_ACCESS_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
+		wc->status = IB_WC_REM_INV_REQ_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+		wc->status = IB_WC_REM_ACCESS_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+		wc->status = IB_WC_REM_OP_ERR;
+		break;
+	case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+		wc->status = IB_WC_RETRY_EXC_ERR;
+		dump = 0;
+		break;
+	case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+		wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+		dump = 0;
+		break;
+	case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
+		wc->status = IB_WC_REM_ABORT_ERR;
+		break;
+	default:
+		wc->status = IB_WC_GENERAL_ERR;
+		break;
+	}
+
+	wc->vendor_err = cqe->vendor_err_synd;
+	if (dump)
+		dump_cqe(dev, cqe);
+}
+
+static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
+{
+	/* TBD: waiting decision
+	*/
+	return 0;
+}
+
+static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
+{
+	struct mlx5_wqe_data_seg *dpseg;
+	void *addr;
+
+	dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
+		sizeof(struct mlx5_wqe_raddr_seg) +
+		sizeof(struct mlx5_wqe_atomic_seg);
+	addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
+	return addr;
+}
+
+static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+			  uint16_t idx)
+{
+	void *addr;
+	int byte_count;
+	int i;
+
+	if (!is_atomic_response(qp, idx))
+		return;
+
+	byte_count = be32_to_cpu(cqe64->byte_cnt);
+	addr = mlx5_get_atomic_laddr(qp, idx);
+
+	if (byte_count == 4) {
+		*(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
+	} else {
+		for (i = 0; i < byte_count; i += 8) {
+			*(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
+			addr += 8;
+		}
+	}
+
+	return;
+}
+
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+			   u16 tail, u16 head)
+{
+	int idx;
+
+	do {
+		idx = tail & (qp->sq.wqe_cnt - 1);
+		handle_atomic(qp, cqe64, idx);
+		if (idx == head)
+			break;
+
+		tail = qp->sq.w_list[idx].next;
+	} while (1);
+	tail = qp->sq.w_list[idx].next;
+	qp->sq.last_poll = tail;
+}
+
+static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+			 struct mlx5_ib_qp **cur_qp,
+			 struct ib_wc *wc)
+{
+	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+	struct mlx5_err_cqe *err_cqe;
+	struct mlx5_cqe64 *cqe64;
+	struct mlx5_core_qp *mqp;
+	struct mlx5_ib_wq *wq;
+	uint8_t opcode;
+	uint32_t qpn;
+	u16 wqe_ctr;
+	void *cqe;
+	int idx;
+
+	cqe = next_cqe_sw(cq);
+	if (!cqe)
+		return -EAGAIN;
+
+	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+
+	++cq->mcq.cons_index;
+
+	/* Make sure we read CQ entry contents after we've checked the
+	 * ownership bit.
+	 */
+	rmb();
+
+	/* TBD: resize CQ */
+
+	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
+	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+		/* We do not have to take the QP table lock here,
+		 * because CQs will be locked while QPs are removed
+		 * from the table.
+		 */
+		mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
+		if (unlikely(!mqp)) {
+			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
+				     cq->mcq.cqn, qpn);
+			return -EINVAL;
+		}
+
+		*cur_qp = to_mibqp(mqp);
+	}
+
+	wc->qp  = &(*cur_qp)->ibqp;
+	opcode = cqe64->op_own >> 4;
+	switch (opcode) {
+	case MLX5_CQE_REQ:
+		wq = &(*cur_qp)->sq;
+		wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+		idx = wqe_ctr & (wq->wqe_cnt - 1);
+		handle_good_req(wc, cqe64, wq, idx);
+		handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
+		wc->wr_id = wq->wrid[idx];
+		wq->tail = wq->wqe_head[idx] + 1;
+		wc->status = IB_WC_SUCCESS;
+		break;
+	case MLX5_CQE_RESP_WR_IMM:
+	case MLX5_CQE_RESP_SEND:
+	case MLX5_CQE_RESP_SEND_IMM:
+	case MLX5_CQE_RESP_SEND_INV:
+		handle_responder(wc, cqe64, *cur_qp);
+		wc->status = IB_WC_SUCCESS;
+		break;
+	case MLX5_CQE_RESIZE_CQ:
+		break;
+	case MLX5_CQE_REQ_ERR:
+	case MLX5_CQE_RESP_ERR:
+		err_cqe = (struct mlx5_err_cqe *)cqe64;
+		mlx5_handle_error_cqe(dev, err_cqe, wc);
+		mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
+			    opcode == MLX5_CQE_REQ_ERR ?
+			    "Requestor" : "Responder", cq->mcq.cqn);
+		mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
+			    err_cqe->syndrome, err_cqe->vendor_err_synd);
+		if (opcode == MLX5_CQE_REQ_ERR) {
+			wq = &(*cur_qp)->sq;
+			wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+			idx = wqe_ctr & (wq->wqe_cnt - 1);
+			wc->wr_id = wq->wrid[idx];
+			wq->tail = wq->wqe_head[idx] + 1;
+		} else {
+			struct mlx5_ib_srq *srq;
+
+			if ((*cur_qp)->ibqp.srq) {
+				srq = to_msrq((*cur_qp)->ibqp.srq);
+				wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+				wc->wr_id = srq->wrid[wqe_ctr];
+				mlx5_ib_free_srq_wqe(srq, wqe_ctr);
+			} else {
+				wq = &(*cur_qp)->rq;
+				wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+				++wq->tail;
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+	struct mlx5_ib_cq *cq = to_mcq(ibcq);
+	struct mlx5_ib_qp *cur_qp = NULL;
+	unsigned long flags;
+	int npolled;
+	int err = 0;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	for (npolled = 0; npolled < num_entries; npolled++) {
+		err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
+		if (err)
+			break;
+	}
+
+	if (npolled)
+		mlx5_cq_set_ci(&cq->mcq);
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	if (err == 0 || err == -EAGAIN)
+		return npolled;
+	else
+		return err;
+}
+
+int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+	mlx5_cq_arm(&to_mcq(ibcq)->mcq,
+		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
+		    to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
+		    MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
+
+	return 0;
+}
+
+static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
+			int nent, int cqe_size)
+{
+	int err;
+
+	err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
+			     PAGE_SIZE * 2, &buf->buf);
+	if (err)
+		return err;
+
+	buf->cqe_size = cqe_size;
+
+	return 0;
+}
+
+static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+{
+	mlx5_buf_free(&dev->mdev, &buf->buf);
+}
+
+static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+			  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
+			  int entries, struct mlx5_create_cq_mbox_in **cqb,
+			  int *cqe_size, int *index, int *inlen)
+{
+	struct mlx5_ib_create_cq ucmd;
+	int page_shift;
+	int npages;
+	int ncont;
+	int err;
+
+	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+		return -EFAULT;
+
+	if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+		return -EINVAL;
+
+	*cqe_size = ucmd.cqe_size;
+
+	cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
+				   entries * ucmd.cqe_size,
+				   IB_ACCESS_LOCAL_WRITE, 1);
+	if (IS_ERR(cq->buf.umem)) {
+		err = PTR_ERR(cq->buf.umem);
+		return err;
+	}
+
+	err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+				  &cq->db);
+	if (err)
+		goto err_umem;
+
+	mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
+			   &ncont, NULL);
+	mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
+		    ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+
+	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+	*cqb = mlx5_vzalloc(*inlen);
+	if (!*cqb) {
+		err = -ENOMEM;
+		goto err_db;
+	}
+	mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
+	(*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+
+	*index = to_mucontext(context)->uuari.uars[0].index;
+
+	return 0;
+
+err_db:
+	mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+
+err_umem:
+	ib_umem_release(cq->buf.umem);
+	return err;
+}
+
+static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+{
+	mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+	ib_umem_release(cq->buf.umem);
+}
+
+static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
+{
+	int i;
+	void *cqe;
+	struct mlx5_cqe64 *cqe64;
+
+	for (i = 0; i < nent; i++) {
+		cqe = get_cqe(cq, i);
+		cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
+		cqe64->op_own = 0xf1;
+	}
+}
+
+static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+			    int entries, int cqe_size,
+			    struct mlx5_create_cq_mbox_in **cqb,
+			    int *index, int *inlen)
+{
+	int err;
+
+	err = mlx5_db_alloc(&dev->mdev, &cq->db);
+	if (err)
+		return err;
+
+	cq->mcq.set_ci_db  = cq->db.db;
+	cq->mcq.arm_db     = cq->db.db + 1;
+	*cq->mcq.set_ci_db = 0;
+	*cq->mcq.arm_db    = 0;
+	cq->mcq.cqe_sz = cqe_size;
+
+	err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+	if (err)
+		goto err_db;
+
+	init_cq_buf(cq, entries);
+
+	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+	*cqb = mlx5_vzalloc(*inlen);
+	if (!*cqb) {
+		err = -ENOMEM;
+		goto err_buf;
+	}
+	mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+
+	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+	*index = dev->mdev.priv.uuari.uars[0].index;
+
+	return 0;
+
+err_buf:
+	free_cq_buf(dev, &cq->buf);
+
+err_db:
+	mlx5_db_free(&dev->mdev, &cq->db);
+	return err;
+}
+
+static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+	free_cq_buf(dev, &cq->buf);
+	mlx5_db_free(&dev->mdev, &cq->db);
+}
+
+struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+				int vector, struct ib_ucontext *context,
+				struct ib_udata *udata)
+{
+	struct mlx5_create_cq_mbox_in *cqb = NULL;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_cq *cq;
+	int uninitialized_var(index);
+	int uninitialized_var(inlen);
+	int cqe_size;
+	int irqn;
+	int eqn;
+	int err;
+
+	entries = roundup_pow_of_two(entries + 1);
+	if (entries < 1 || entries > dev->mdev.caps.max_cqes)
+		return ERR_PTR(-EINVAL);
+
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq)
+		return ERR_PTR(-ENOMEM);
+
+	cq->ibcq.cqe = entries - 1;
+	mutex_init(&cq->resize_mutex);
+	spin_lock_init(&cq->lock);
+	cq->resize_buf = NULL;
+	cq->resize_umem = NULL;
+
+	if (context) {
+		err = create_cq_user(dev, udata, context, cq, entries,
+				     &cqb, &cqe_size, &index, &inlen);
+		if (err)
+			goto err_create;
+	} else {
+		/* for now choose 64 bytes till we have a proper interface */
+		cqe_size = 64;
+		err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
+				       &index, &inlen);
+		if (err)
+			goto err_create;
+	}
+
+	cq->cqe_size = cqe_size;
+	cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+	cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
+	err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
+	if (err)
+		goto err_cqb;
+
+	cqb->ctx.c_eqn = cpu_to_be16(eqn);
+	cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
+
+	err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
+	if (err)
+		goto err_cqb;
+
+	mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
+	cq->mcq.irqn = irqn;
+	cq->mcq.comp  = mlx5_ib_cq_comp;
+	cq->mcq.event = mlx5_ib_cq_event;
+
+	if (context)
+		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
+			err = -EFAULT;
+			goto err_cmd;
+		}
+
+
+	mlx5_vfree(cqb);
+	return &cq->ibcq;
+
+err_cmd:
+	mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
+
+err_cqb:
+	mlx5_vfree(cqb);
+	if (context)
+		destroy_cq_user(cq, context);
+	else
+		destroy_cq_kernel(dev, cq);
+
+err_create:
+	kfree(cq);
+
+	return ERR_PTR(err);
+}
+
+
+int mlx5_ib_destroy_cq(struct ib_cq *cq)
+{
+	struct mlx5_ib_dev *dev = to_mdev(cq->device);
+	struct mlx5_ib_cq *mcq = to_mcq(cq);
+	struct ib_ucontext *context = NULL;
+
+	if (cq->uobject)
+		context = cq->uobject->context;
+
+	mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
+	if (context)
+		destroy_cq_user(mcq, context);
+	else
+		destroy_cq_kernel(dev, mcq);
+
+	kfree(mcq);
+
+	return 0;
+}
+
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
+			u32 rsn)
+{
+	u32 lrsn;
+
+	if (srq)
+		lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
+	else
+		lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
+
+	return rsn == lrsn;
+}
+
+void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+{
+	struct mlx5_cqe64 *cqe64, *dest64;
+	void *cqe, *dest;
+	u32 prod_index;
+	int nfreed = 0;
+	u8 owner_bit;
+
+	if (!cq)
+		return;
+
+	/* First we need to find the current producer index, so we
+	 * know where to start cleaning from.  It doesn't matter if HW
+	 * adds new entries after this loop -- the QP we're worried
+	 * about is already in RESET, so the new entries won't come
+	 * from our QP and therefore don't need to be checked.
+	 */
+	for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
+		if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
+			break;
+
+	/* Now sweep backwards through the CQ, removing CQ entries
+	 * that match our QP by copying older entries on top of them.
+	 */
+	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+		cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+		if (is_equal_rsn(cqe64, srq, rsn)) {
+			if (srq)
+				mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
+			++nfreed;
+		} else if (nfreed) {
+			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+			dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
+			owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
+			memcpy(dest, cqe, cq->mcq.cqe_sz);
+			dest64->op_own = owner_bit |
+				(dest64->op_own & ~MLX5_CQE_OWNER_MASK);
+		}
+	}
+
+	if (nfreed) {
+		cq->mcq.cons_index += nfreed;
+		/* Make sure update of buffer contents is done before
+		 * updating consumer index.
+		 */
+		wmb();
+		mlx5_cq_set_ci(&cq->mcq);
+	}
+}
+
+void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
+{
+	if (!cq)
+		return;
+
+	spin_lock_irq(&cq->lock);
+	__mlx5_ib_cq_clean(cq, qpn, srq);
+	spin_unlock_irq(&cq->lock);
+}
+
+int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+	return -ENOSYS;
+}
+
+int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+	return -ENOSYS;
+}
+
+int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+{
+	struct mlx5_ib_cq *cq;
+
+	if (!ibcq)
+		return 128;
+
+	cq = to_mcq(ibcq);
+	return cq->cqe_size;
+}
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
new file mode 100644
index 000000000000..256a23344f28
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
+
+#include "mlx5_ib.h"
+
+struct mlx5_ib_user_db_page {
+	struct list_head	list;
+	struct ib_umem	       *umem;
+	unsigned long		user_virt;
+	int			refcnt;
+};
+
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+			struct mlx5_db *db)
+{
+	struct mlx5_ib_user_db_page *page;
+	struct ib_umem_chunk *chunk;
+	int err = 0;
+
+	mutex_lock(&context->db_page_mutex);
+
+	list_for_each_entry(page, &context->db_page_list, list)
+		if (page->user_virt == (virt & PAGE_MASK))
+			goto found;
+
+	page = kmalloc(sizeof(*page), GFP_KERNEL);
+	if (!page) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	page->user_virt = (virt & PAGE_MASK);
+	page->refcnt    = 0;
+	page->umem      = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
+				      PAGE_SIZE, 0, 0);
+	if (IS_ERR(page->umem)) {
+		err = PTR_ERR(page->umem);
+		kfree(page);
+		goto out;
+	}
+
+	list_add(&page->list, &context->db_page_list);
+
+found:
+	chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
+	db->dma		= sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+	db->u.user_page = page;
+	++page->refcnt;
+
+out:
+	mutex_unlock(&context->db_page_mutex);
+
+	return err;
+}
+
+void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
+{
+	mutex_lock(&context->db_page_mutex);
+
+	if (!--db->u.user_page->refcnt) {
+		list_del(&db->u.user_page->list);
+		ib_umem_release(db->u.user_page->umem);
+		kfree(db->u.user_page);
+	}
+
+	mutex_unlock(&context->db_page_mutex);
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
new file mode 100644
index 000000000000..5c8938be0e08
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include "mlx5_ib.h"
+
+enum {
+	MLX5_IB_VENDOR_CLASS1 = 0x9,
+	MLX5_IB_VENDOR_CLASS2 = 0xa
+};
+
+int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+		 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+		 void *in_mad, void *response_mad)
+{
+	u8 op_modifier = 0;
+
+	/* Key check traps can't be generated unless we have in_wc to
+	 * tell us where to send the trap.
+	 */
+	if (ignore_mkey || !in_wc)
+		op_modifier |= 0x1;
+	if (ignore_bkey || !in_wc)
+		op_modifier |= 0x2;
+
+	return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+			struct ib_wc *in_wc, struct ib_grh *in_grh,
+			struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	u16 slid;
+	int err;
+
+	slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+
+	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+		if (in_mad->mad_hdr.method   != IB_MGMT_METHOD_GET &&
+		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_SET &&
+		    in_mad->mad_hdr.method   != IB_MGMT_METHOD_TRAP_REPRESS)
+			return IB_MAD_RESULT_SUCCESS;
+
+		/* Don't process SMInfo queries -- the SMA can't handle them.
+		 */
+		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+			return IB_MAD_RESULT_SUCCESS;
+	} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+		   in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1   ||
+		   in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2   ||
+		   in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
+		if (in_mad->mad_hdr.method  != IB_MGMT_METHOD_GET &&
+		    in_mad->mad_hdr.method  != IB_MGMT_METHOD_SET)
+			return IB_MAD_RESULT_SUCCESS;
+	} else {
+		return IB_MAD_RESULT_SUCCESS;
+	}
+
+	err = mlx5_MAD_IFC(to_mdev(ibdev),
+			   mad_flags & IB_MAD_IGNORE_MKEY,
+			   mad_flags & IB_MAD_IGNORE_BKEY,
+			   port_num, in_wc, in_grh, in_mad, out_mad);
+	if (err)
+		return IB_MAD_RESULT_FAILURE;
+
+	/* set return bit in status of directed route responses */
+	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+	if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+		/* no response for trap repress */
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int err = -ENOMEM;
+	u16 packet_error;
+
+	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	init_query_mad(in_mad);
+	in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
+	in_mad->attr_mod = cpu_to_be32(port);
+
+	err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+
+	packet_error = be16_to_cpu(out_mad->status);
+
+	dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+		MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
new file mode 100644
index 000000000000..8000fff4d444
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -0,0 +1,1504 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/sched.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include "user.h"
+#include "mlx5_ib.h"
+
+#define DRIVER_NAME "mlx5_ib"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_RELDATE	"June 2013"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+static int prof_sel = 2;
+module_param_named(prof_sel, prof_sel, int, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
+static char mlx5_version[] =
+	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
+	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+
+static struct mlx5_profile profile[] = {
+	[0] = {
+		.mask		= 0,
+	},
+	[1] = {
+		.mask		= MLX5_PROF_MASK_QP_SIZE,
+		.log_max_qp	= 12,
+	},
+	[2] = {
+		.mask		= MLX5_PROF_MASK_QP_SIZE |
+				  MLX5_PROF_MASK_MR_CACHE,
+		.log_max_qp	= 17,
+		.mr_cache[0]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[1]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[2]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[3]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[4]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[5]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[6]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[7]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[8]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[9]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[10]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[11]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[12]	= {
+			.size	= 64,
+			.limit	= 32
+		},
+		.mr_cache[13]	= {
+			.size	= 32,
+			.limit	= 16
+		},
+		.mr_cache[14]	= {
+			.size	= 16,
+			.limit	= 8
+		},
+		.mr_cache[15]	= {
+			.size	= 8,
+			.limit	= 4
+		},
+	},
+};
+
+int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
+{
+	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	struct mlx5_eq *eq, *n;
+	int err = -ENOENT;
+
+	spin_lock(&table->lock);
+	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+		if (eq->index == vector) {
+			*eqn = eq->eqn;
+			*irqn = eq->irqn;
+			err = 0;
+			break;
+		}
+	}
+	spin_unlock(&table->lock);
+
+	return err;
+}
+
+static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	struct mlx5_eq *eq, *n;
+	int ncomp_vec;
+	int nent;
+	int err;
+	int i;
+
+	INIT_LIST_HEAD(&dev->eqs_list);
+	ncomp_vec = table->num_comp_vectors;
+	nent = MLX5_COMP_EQ_SIZE;
+	for (i = 0; i < ncomp_vec; i++) {
+		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+		if (!eq) {
+			err = -ENOMEM;
+			goto clean;
+		}
+
+		snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+		err = mlx5_create_map_eq(&dev->mdev, eq,
+					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
+					 eq->name,
+					 &dev->mdev.priv.uuari.uars[0]);
+		if (err) {
+			kfree(eq);
+			goto clean;
+		}
+		mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
+		eq->index = i;
+		spin_lock(&table->lock);
+		list_add_tail(&eq->list, &dev->eqs_list);
+		spin_unlock(&table->lock);
+	}
+
+	dev->num_comp_vectors = ncomp_vec;
+	return 0;
+
+clean:
+	spin_lock(&table->lock);
+	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+		list_del(&eq->list);
+		spin_unlock(&table->lock);
+		if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+			mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
+		kfree(eq);
+		spin_lock(&table->lock);
+	}
+	spin_unlock(&table->lock);
+	return err;
+}
+
+static void free_comp_eqs(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	struct mlx5_eq *eq, *n;
+
+	spin_lock(&table->lock);
+	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+		list_del(&eq->list);
+		spin_unlock(&table->lock);
+		if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+			mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
+		kfree(eq);
+		spin_lock(&table->lock);
+	}
+	spin_unlock(&table->lock);
+}
+
+static int mlx5_ib_query_device(struct ib_device *ibdev,
+				struct ib_device_attr *props)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int err = -ENOMEM;
+	int max_rq_sg;
+	int max_sq_sg;
+	u64 flags;
+
+	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	init_query_mad(in_mad);
+	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+	err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	memset(props, 0, sizeof(*props));
+
+	props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
+		(fw_rev_min(&dev->mdev) << 16) |
+		fw_rev_sub(&dev->mdev);
+	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
+		IB_DEVICE_PORT_ACTIVE_EVENT		|
+		IB_DEVICE_SYS_IMAGE_GUID		|
+		IB_DEVICE_RC_RNR_NAK_GEN		|
+		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+	flags = dev->mdev.caps.flags;
+	if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
+		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+	if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
+		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+	if (flags & MLX5_DEV_CAP_FLAG_APM)
+		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+	if (flags & MLX5_DEV_CAP_FLAG_XRC)
+		props->device_cap_flags |= IB_DEVICE_XRC;
+	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+	props->vendor_id	   = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
+		0xffffff;
+	props->vendor_part_id	   = be16_to_cpup((__be16 *)(out_mad->data + 30));
+	props->hw_ver		   = be32_to_cpup((__be32 *)(out_mad->data + 32));
+	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
+
+	props->max_mr_size	   = ~0ull;
+	props->page_size_cap	   = dev->mdev.caps.min_page_sz;
+	props->max_qp		   = 1 << dev->mdev.caps.log_max_qp;
+	props->max_qp_wr	   = dev->mdev.caps.max_wqes;
+	max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+	max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+		sizeof(struct mlx5_wqe_data_seg);
+	props->max_sge = min(max_rq_sg, max_sq_sg);
+	props->max_cq		   = 1 << dev->mdev.caps.log_max_cq;
+	props->max_cqe		   = dev->mdev.caps.max_cqes - 1;
+	props->max_mr		   = 1 << dev->mdev.caps.log_max_mkey;
+	props->max_pd		   = 1 << dev->mdev.caps.log_max_pd;
+	props->max_qp_rd_atom	   = dev->mdev.caps.max_ra_req_qp;
+	props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
+	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
+	props->max_srq		   = 1 << dev->mdev.caps.log_max_srq;
+	props->max_srq_wr	   = dev->mdev.caps.max_srq_wqes - 1;
+	props->max_srq_sge	   = max_rq_sg - 1;
+	props->max_fast_reg_page_list_len = (unsigned int)-1;
+	props->local_ca_ack_delay  = dev->mdev.caps.local_ca_ack_delay;
+	props->atomic_cap	   = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
+		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+	props->masked_atomic_cap   = IB_ATOMIC_HCA;
+	props->max_pkeys	   = be16_to_cpup((__be16 *)(out_mad->data + 28));
+	props->max_mcast_grp	   = 1 << dev->mdev.caps.log_max_mcg;
+	props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
+	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+					   props->max_mcast_grp;
+	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+
+	return err;
+}
+
+int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+		       struct ib_port_attr *props)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int ext_active_speed;
+	int err = -ENOMEM;
+
+	if (port < 1 || port > dev->mdev.caps.num_ports) {
+		mlx5_ib_warn(dev, "invalid port number %d\n", port);
+		return -EINVAL;
+	}
+
+	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	memset(props, 0, sizeof(*props));
+
+	init_query_mad(in_mad);
+	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+	in_mad->attr_mod = cpu_to_be32(port);
+
+	err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad);
+	if (err) {
+		mlx5_ib_warn(dev, "err %d\n", err);
+		goto out;
+	}
+
+
+	props->lid		= be16_to_cpup((__be16 *)(out_mad->data + 16));
+	props->lmc		= out_mad->data[34] & 0x7;
+	props->sm_lid		= be16_to_cpup((__be16 *)(out_mad->data + 18));
+	props->sm_sl		= out_mad->data[36] & 0xf;
+	props->state		= out_mad->data[32] & 0xf;
+	props->phys_state	= out_mad->data[33] >> 4;
+	props->port_cap_flags	= be32_to_cpup((__be32 *)(out_mad->data + 20));
+	props->gid_tbl_len	= out_mad->data[50];
+	props->max_msg_sz	= 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
+	props->pkey_tbl_len	= to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
+	props->bad_pkey_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 46));
+	props->qkey_viol_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 48));
+	props->active_width	= out_mad->data[31] & 0xf;
+	props->active_speed	= out_mad->data[35] >> 4;
+	props->max_mtu		= out_mad->data[41] & 0xf;
+	props->active_mtu	= out_mad->data[36] >> 4;
+	props->subnet_timeout	= out_mad->data[51] & 0x1f;
+	props->max_vl_num	= out_mad->data[37] >> 4;
+	props->init_type_reply	= out_mad->data[41] >> 4;
+
+	/* Check if extended speeds (EDR/FDR/...) are supported */
+	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+		ext_active_speed = out_mad->data[62] >> 4;
+
+		switch (ext_active_speed) {
+		case 1:
+			props->active_speed = 16; /* FDR */
+			break;
+		case 2:
+			props->active_speed = 32; /* EDR */
+			break;
+		}
+	}
+
+	/* If reported active speed is QDR, check if is FDR-10 */
+	if (props->active_speed == 4) {
+		if (dev->mdev.caps.ext_port_cap[port - 1] &
+		    MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
+			init_query_mad(in_mad);
+			in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
+			in_mad->attr_mod = cpu_to_be32(port);
+
+			err = mlx5_MAD_IFC(dev, 1, 1, port,
+					   NULL, NULL, in_mad, out_mad);
+			if (err)
+				goto out;
+
+			/* Checking LinkSpeedActive for FDR-10 */
+			if (out_mad->data[15] & 0x1)
+				props->active_speed = 8;
+		}
+	}
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+
+	return err;
+}
+
+static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+			     union ib_gid *gid)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int err = -ENOMEM;
+
+	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	init_query_mad(in_mad);
+	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+	in_mad->attr_mod = cpu_to_be32(port);
+
+	err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	memcpy(gid->raw, out_mad->data + 8, 8);
+
+	init_query_mad(in_mad);
+	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
+	in_mad->attr_mod = cpu_to_be32(index / 8);
+
+	err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+	return err;
+}
+
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+			      u16 *pkey)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int err = -ENOMEM;
+
+	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	init_query_mad(in_mad);
+	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
+	in_mad->attr_mod = cpu_to_be32(index / 32);
+
+	err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	*pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]);
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+	return err;
+}
+
+struct mlx5_reg_node_desc {
+	u8	desc[64];
+};
+
+static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
+				 struct ib_device_modify *props)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_reg_node_desc in;
+	struct mlx5_reg_node_desc out;
+	int err;
+
+	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+		return -EOPNOTSUPP;
+
+	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
+		return 0;
+
+	/*
+	 * If possible, pass node desc to FW, so it can generate
+	 * a 144 trap.  If cmd fails, just ignore.
+	 */
+	memcpy(&in, props->node_desc, 64);
+	err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
+				   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
+	if (err)
+		return err;
+
+	memcpy(ibdev->node_desc, props->node_desc, 64);
+
+	return err;
+}
+
+static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+			       struct ib_port_modify *props)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct ib_port_attr attr;
+	u32 tmp;
+	int err;
+
+	mutex_lock(&dev->cap_mask_mutex);
+
+	err = mlx5_ib_query_port(ibdev, port, &attr);
+	if (err)
+		goto out;
+
+	tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
+		~props->clr_port_cap_mask;
+
+	err = mlx5_set_port_caps(&dev->mdev, port, tmp);
+
+out:
+	mutex_unlock(&dev->cap_mask_mutex);
+	return err;
+}
+
+static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+						  struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_alloc_ucontext_req req;
+	struct mlx5_ib_alloc_ucontext_resp resp;
+	struct mlx5_ib_ucontext *context;
+	struct mlx5_uuar_info *uuari;
+	struct mlx5_uar *uars;
+	int num_uars;
+	int uuarn;
+	int err;
+	int i;
+
+	if (!dev->ib_active)
+		return ERR_PTR(-EAGAIN);
+
+	err = ib_copy_from_udata(&req, udata, sizeof(req));
+	if (err)
+		return ERR_PTR(err);
+
+	if (req.total_num_uuars > MLX5_MAX_UUARS)
+		return ERR_PTR(-ENOMEM);
+
+	if (req.total_num_uuars == 0)
+		return ERR_PTR(-EINVAL);
+
+	req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
+	if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+		return ERR_PTR(-EINVAL);
+
+	num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
+	resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;
+	resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;
+	resp.cache_line_size  = L1_CACHE_BYTES;
+	resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
+	resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
+	resp.max_send_wqebb = dev->mdev.caps.max_wqes;
+	resp.max_recv_wr = dev->mdev.caps.max_wqes;
+	resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	uuari = &context->uuari;
+	mutex_init(&uuari->lock);
+	uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
+	if (!uars) {
+		err = -ENOMEM;
+		goto out_ctx;
+	}
+
+	uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
+				sizeof(*uuari->bitmap),
+				GFP_KERNEL);
+	if (!uuari->bitmap) {
+		err = -ENOMEM;
+		goto out_uar_ctx;
+	}
+	/*
+	 * clear all fast path uuars
+	 */
+	for (i = 0; i < req.total_num_uuars; i++) {
+		uuarn = i & 3;
+		if (uuarn == 2 || uuarn == 3)
+			set_bit(i, uuari->bitmap);
+	}
+
+	uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
+	if (!uuari->count) {
+		err = -ENOMEM;
+		goto out_bitmap;
+	}
+
+	for (i = 0; i < num_uars; i++) {
+		err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
+		if (err)
+			goto out_count;
+	}
+
+	INIT_LIST_HEAD(&context->db_page_list);
+	mutex_init(&context->db_page_mutex);
+
+	resp.tot_uuars = req.total_num_uuars;
+	resp.num_ports = dev->mdev.caps.num_ports;
+	err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (err)
+		goto out_uars;
+
+	uuari->num_low_latency_uuars = req.num_low_latency_uuars;
+	uuari->uars = uars;
+	uuari->num_uars = num_uars;
+	return &context->ibucontext;
+
+out_uars:
+	for (i--; i >= 0; i--)
+		mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
+out_count:
+	kfree(uuari->count);
+
+out_bitmap:
+	kfree(uuari->bitmap);
+
+out_uar_ctx:
+	kfree(uars);
+
+out_ctx:
+	kfree(context);
+	return ERR_PTR(err);
+}
+
+static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
+	struct mlx5_uuar_info *uuari = &context->uuari;
+	int i;
+
+	for (i = 0; i < uuari->num_uars; i++) {
+		if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
+			mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
+	}
+
+	kfree(uuari->count);
+	kfree(uuari->bitmap);
+	kfree(uuari->uars);
+	kfree(context);
+
+	return 0;
+}
+
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+{
+	return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
+}
+
+static int get_command(unsigned long offset)
+{
+	return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
+}
+
+static int get_arg(unsigned long offset)
+{
+	return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
+}
+
+static int get_index(unsigned long offset)
+{
+	return get_arg(offset);
+}
+
+static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
+	struct mlx5_uuar_info *uuari = &context->uuari;
+	unsigned long command;
+	unsigned long idx;
+	phys_addr_t pfn;
+
+	command = get_command(vma->vm_pgoff);
+	switch (command) {
+	case MLX5_IB_MMAP_REGULAR_PAGE:
+		if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+			return -EINVAL;
+
+		idx = get_index(vma->vm_pgoff);
+		pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+		mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
+			    (unsigned long long)pfn);
+
+		if (idx >= uuari->num_uars)
+			return -EINVAL;
+
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+				       PAGE_SIZE, vma->vm_page_prot))
+			return -EAGAIN;
+
+		mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
+			    vma->vm_start,
+			    (unsigned long long)pfn << PAGE_SHIFT);
+		break;
+
+	case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
+		return -ENOSYS;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
+{
+	struct mlx5_create_mkey_mbox_in *in;
+	struct mlx5_mkey_seg *seg;
+	struct mlx5_core_mr mr;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	seg = &in->seg;
+	seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
+	seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
+	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	seg->start_addr = 0;
+
+	err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+	if (err) {
+		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
+		goto err_in;
+	}
+
+	kfree(in);
+	*key = mr.key;
+
+	return 0;
+
+err_in:
+	kfree(in);
+
+	return err;
+}
+
+static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
+{
+	struct mlx5_core_mr mr;
+	int err;
+
+	memset(&mr, 0, sizeof(mr));
+	mr.key = key;
+	err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
+	if (err)
+		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
+}
+
+static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
+				      struct ib_ucontext *context,
+				      struct ib_udata *udata)
+{
+	struct mlx5_ib_alloc_pd_resp resp;
+	struct mlx5_ib_pd *pd;
+	int err;
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
+	if (err) {
+		kfree(pd);
+		return ERR_PTR(err);
+	}
+
+	if (context) {
+		resp.pdn = pd->pdn;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+			kfree(pd);
+			return ERR_PTR(-EFAULT);
+		}
+	} else {
+		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
+		if (err) {
+			mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+			kfree(pd);
+			return ERR_PTR(err);
+		}
+	}
+
+	return &pd->ibpd;
+}
+
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+{
+	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
+	struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+	if (!pd->uobject)
+		free_pa_mkey(mdev, mpd->pa_lkey);
+
+	mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
+	kfree(mpd);
+
+	return 0;
+}
+
+static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	int err;
+
+	err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
+	if (err)
+		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
+			     ibqp->qp_num, gid->raw);
+
+	return err;
+}
+
+static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	int err;
+
+	err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
+	if (err)
+		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
+			     ibqp->qp_num, gid->raw);
+
+	return err;
+}
+
+static int init_node_data(struct mlx5_ib_dev *dev)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int err = -ENOMEM;
+
+	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	init_query_mad(in_mad);
+	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+	err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+	err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
+	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+	kfree(in_mad);
+	kfree(out_mad);
+	return err;
+}
+
+static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
+			     char *buf)
+{
+	struct mlx5_ib_dev *dev =
+		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+
+	return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
+}
+
+static ssize_t show_reg_pages(struct device *device,
+			      struct device_attribute *attr, char *buf)
+{
+	struct mlx5_ib_dev *dev =
+		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+
+	return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct mlx5_ib_dev *dev =
+		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+	return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+			   char *buf)
+{
+	struct mlx5_ib_dev *dev =
+		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+	return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
+		       fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct mlx5_ib_dev *dev =
+		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+	return sprintf(buf, "%x\n", dev->mdev.rev_id);
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+			  char *buf)
+{
+	struct mlx5_ib_dev *dev =
+		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+		       dev->mdev.board_id);
+}
+
+static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
+static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
+static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
+static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+
+static struct device_attribute *mlx5_class_attributes[] = {
+	&dev_attr_hw_rev,
+	&dev_attr_fw_ver,
+	&dev_attr_hca_type,
+	&dev_attr_board_id,
+	&dev_attr_fw_pages,
+	&dev_attr_reg_pages,
+};
+
+static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+			  void *data)
+{
+	struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
+	struct ib_event ibev;
+	u8 port = 0;
+
+	switch (event) {
+	case MLX5_DEV_EVENT_SYS_ERROR:
+		ibdev->ib_active = false;
+		ibev.event = IB_EVENT_DEVICE_FATAL;
+		break;
+
+	case MLX5_DEV_EVENT_PORT_UP:
+		ibev.event = IB_EVENT_PORT_ACTIVE;
+		port = *(u8 *)data;
+		break;
+
+	case MLX5_DEV_EVENT_PORT_DOWN:
+		ibev.event = IB_EVENT_PORT_ERR;
+		port = *(u8 *)data;
+		break;
+
+	case MLX5_DEV_EVENT_PORT_INITIALIZED:
+		/* not used by ULPs */
+		return;
+
+	case MLX5_DEV_EVENT_LID_CHANGE:
+		ibev.event = IB_EVENT_LID_CHANGE;
+		port = *(u8 *)data;
+		break;
+
+	case MLX5_DEV_EVENT_PKEY_CHANGE:
+		ibev.event = IB_EVENT_PKEY_CHANGE;
+		port = *(u8 *)data;
+		break;
+
+	case MLX5_DEV_EVENT_GUID_CHANGE:
+		ibev.event = IB_EVENT_GID_CHANGE;
+		port = *(u8 *)data;
+		break;
+
+	case MLX5_DEV_EVENT_CLIENT_REREG:
+		ibev.event = IB_EVENT_CLIENT_REREGISTER;
+		port = *(u8 *)data;
+		break;
+	}
+
+	ibev.device	      = &ibdev->ib_dev;
+	ibev.element.port_num = port;
+
+	if (ibdev->ib_active)
+		ib_dispatch_event(&ibev);
+}
+
+static void get_ext_port_caps(struct mlx5_ib_dev *dev)
+{
+	int port;
+
+	for (port = 1; port <= dev->mdev.caps.num_ports; port++)
+		mlx5_query_ext_port_caps(dev, port);
+}
+
+static int get_port_caps(struct mlx5_ib_dev *dev)
+{
+	struct ib_device_attr *dprops = NULL;
+	struct ib_port_attr *pprops = NULL;
+	int err = 0;
+	int port;
+
+	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
+	if (!pprops)
+		goto out;
+
+	dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
+	if (!dprops)
+		goto out;
+
+	err = mlx5_ib_query_device(&dev->ib_dev, dprops);
+	if (err) {
+		mlx5_ib_warn(dev, "query_device failed %d\n", err);
+		goto out;
+	}
+
+	for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
+		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
+		if (err) {
+			mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
+			break;
+		}
+		dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
+		dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
+			    dprops->max_pkeys, pprops->gid_tbl_len);
+	}
+
+out:
+	kfree(pprops);
+	kfree(dprops);
+
+	return err;
+}
+
+static void destroy_umrc_res(struct mlx5_ib_dev *dev)
+{
+	int err;
+
+	err = mlx5_mr_cache_cleanup(dev);
+	if (err)
+		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+	mlx5_ib_destroy_qp(dev->umrc.qp);
+	ib_destroy_cq(dev->umrc.cq);
+	ib_dereg_mr(dev->umrc.mr);
+	ib_dealloc_pd(dev->umrc.pd);
+}
+
+enum {
+	MAX_UMR_WR = 128,
+};
+
+static int create_umr_res(struct mlx5_ib_dev *dev)
+{
+	struct ib_qp_init_attr *init_attr = NULL;
+	struct ib_qp_attr *attr = NULL;
+	struct ib_pd *pd;
+	struct ib_cq *cq;
+	struct ib_qp *qp;
+	struct ib_mr *mr;
+	int ret;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+	if (!attr || !init_attr) {
+		ret = -ENOMEM;
+		goto error_0;
+	}
+
+	pd = ib_alloc_pd(&dev->ib_dev);
+	if (IS_ERR(pd)) {
+		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+		ret = PTR_ERR(pd);
+		goto error_0;
+	}
+
+	mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(mr)) {
+		mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
+		ret = PTR_ERR(mr);
+		goto error_1;
+	}
+
+	cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128,
+			  0);
+	if (IS_ERR(cq)) {
+		mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+		ret = PTR_ERR(cq);
+		goto error_2;
+	}
+	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+	init_attr->send_cq = cq;
+	init_attr->recv_cq = cq;
+	init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+	init_attr->cap.max_send_wr = MAX_UMR_WR;
+	init_attr->cap.max_send_sge = 1;
+	init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+	init_attr->port_num = 1;
+	qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+	if (IS_ERR(qp)) {
+		mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+		ret = PTR_ERR(qp);
+		goto error_3;
+	}
+	qp->device     = &dev->ib_dev;
+	qp->real_qp    = qp;
+	qp->uobject    = NULL;
+	qp->qp_type    = MLX5_IB_QPT_REG_UMR;
+
+	attr->qp_state = IB_QPS_INIT;
+	attr->port_num = 1;
+	ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+				IB_QP_PORT, NULL);
+	if (ret) {
+		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+		goto error_4;
+	}
+
+	memset(attr, 0, sizeof(*attr));
+	attr->qp_state = IB_QPS_RTR;
+	attr->path_mtu = IB_MTU_256;
+
+	ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+	if (ret) {
+		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+		goto error_4;
+	}
+
+	memset(attr, 0, sizeof(*attr));
+	attr->qp_state = IB_QPS_RTS;
+	ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+	if (ret) {
+		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+		goto error_4;
+	}
+
+	dev->umrc.qp = qp;
+	dev->umrc.cq = cq;
+	dev->umrc.mr = mr;
+	dev->umrc.pd = pd;
+
+	sema_init(&dev->umrc.sem, MAX_UMR_WR);
+	ret = mlx5_mr_cache_init(dev);
+	if (ret) {
+		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+		goto error_4;
+	}
+
+	kfree(attr);
+	kfree(init_attr);
+
+	return 0;
+
+error_4:
+	mlx5_ib_destroy_qp(qp);
+
+error_3:
+	ib_destroy_cq(cq);
+
+error_2:
+	ib_dereg_mr(mr);
+
+error_1:
+	ib_dealloc_pd(pd);
+
+error_0:
+	kfree(attr);
+	kfree(init_attr);
+	return ret;
+}
+
+static int create_dev_resources(struct mlx5_ib_resources *devr)
+{
+	struct ib_srq_init_attr attr;
+	struct mlx5_ib_dev *dev;
+	int ret = 0;
+
+	dev = container_of(devr, struct mlx5_ib_dev, devr);
+
+	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
+	if (IS_ERR(devr->p0)) {
+		ret = PTR_ERR(devr->p0);
+		goto error0;
+	}
+	devr->p0->device  = &dev->ib_dev;
+	devr->p0->uobject = NULL;
+	atomic_set(&devr->p0->usecnt, 0);
+
+	devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
+	if (IS_ERR(devr->c0)) {
+		ret = PTR_ERR(devr->c0);
+		goto error1;
+	}
+	devr->c0->device        = &dev->ib_dev;
+	devr->c0->uobject       = NULL;
+	devr->c0->comp_handler  = NULL;
+	devr->c0->event_handler = NULL;
+	devr->c0->cq_context    = NULL;
+	atomic_set(&devr->c0->usecnt, 0);
+
+	devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+	if (IS_ERR(devr->x0)) {
+		ret = PTR_ERR(devr->x0);
+		goto error2;
+	}
+	devr->x0->device = &dev->ib_dev;
+	devr->x0->inode = NULL;
+	atomic_set(&devr->x0->usecnt, 0);
+	mutex_init(&devr->x0->tgt_qp_mutex);
+	INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
+
+	devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+	if (IS_ERR(devr->x1)) {
+		ret = PTR_ERR(devr->x1);
+		goto error3;
+	}
+	devr->x1->device = &dev->ib_dev;
+	devr->x1->inode = NULL;
+	atomic_set(&devr->x1->usecnt, 0);
+	mutex_init(&devr->x1->tgt_qp_mutex);
+	INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.attr.max_sge = 1;
+	attr.attr.max_wr = 1;
+	attr.srq_type = IB_SRQT_XRC;
+	attr.ext.xrc.cq = devr->c0;
+	attr.ext.xrc.xrcd = devr->x0;
+
+	devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
+	if (IS_ERR(devr->s0)) {
+		ret = PTR_ERR(devr->s0);
+		goto error4;
+	}
+	devr->s0->device	= &dev->ib_dev;
+	devr->s0->pd		= devr->p0;
+	devr->s0->uobject       = NULL;
+	devr->s0->event_handler = NULL;
+	devr->s0->srq_context   = NULL;
+	devr->s0->srq_type      = IB_SRQT_XRC;
+	devr->s0->ext.xrc.xrcd	= devr->x0;
+	devr->s0->ext.xrc.cq	= devr->c0;
+	atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
+	atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
+	atomic_inc(&devr->p0->usecnt);
+	atomic_set(&devr->s0->usecnt, 0);
+
+	return 0;
+
+error4:
+	mlx5_ib_dealloc_xrcd(devr->x1);
+error3:
+	mlx5_ib_dealloc_xrcd(devr->x0);
+error2:
+	mlx5_ib_destroy_cq(devr->c0);
+error1:
+	mlx5_ib_dealloc_pd(devr->p0);
+error0:
+	return ret;
+}
+
+static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+{
+	mlx5_ib_destroy_srq(devr->s0);
+	mlx5_ib_dealloc_xrcd(devr->x0);
+	mlx5_ib_dealloc_xrcd(devr->x1);
+	mlx5_ib_destroy_cq(devr->c0);
+	mlx5_ib_dealloc_pd(devr->p0);
+}
+
+static int init_one(struct pci_dev *pdev,
+		    const struct pci_device_id *id)
+{
+	struct mlx5_core_dev *mdev;
+	struct mlx5_ib_dev *dev;
+	int err;
+	int i;
+
+	printk_once(KERN_INFO "%s", mlx5_version);
+
+	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+	if (!dev)
+		return -ENOMEM;
+
+	mdev = &dev->mdev;
+	mdev->event = mlx5_ib_event;
+	if (prof_sel >= ARRAY_SIZE(profile)) {
+		pr_warn("selected pofile out of range, selceting default\n");
+		prof_sel = 0;
+	}
+	mdev->profile = &profile[prof_sel];
+	err = mlx5_dev_init(mdev, pdev);
+	if (err)
+		goto err_free;
+
+	err = get_port_caps(dev);
+	if (err)
+		goto err_cleanup;
+
+	get_ext_port_caps(dev);
+
+	err = alloc_comp_eqs(dev);
+	if (err)
+		goto err_cleanup;
+
+	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
+
+	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
+	dev->ib_dev.owner		= THIS_MODULE;
+	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
+	dev->ib_dev.local_dma_lkey	= mdev->caps.reserved_lkey;
+	dev->num_ports		= mdev->caps.num_ports;
+	dev->ib_dev.phys_port_cnt     = dev->num_ports;
+	dev->ib_dev.num_comp_vectors	= dev->num_comp_vectors;
+	dev->ib_dev.dma_device	= &mdev->pdev->dev;
+
+	dev->ib_dev.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION;
+	dev->ib_dev.uverbs_cmd_mask	=
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
+		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+	dev->ib_dev.query_device	= mlx5_ib_query_device;
+	dev->ib_dev.query_port		= mlx5_ib_query_port;
+	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
+	dev->ib_dev.query_pkey		= mlx5_ib_query_pkey;
+	dev->ib_dev.modify_device	= mlx5_ib_modify_device;
+	dev->ib_dev.modify_port		= mlx5_ib_modify_port;
+	dev->ib_dev.alloc_ucontext	= mlx5_ib_alloc_ucontext;
+	dev->ib_dev.dealloc_ucontext	= mlx5_ib_dealloc_ucontext;
+	dev->ib_dev.mmap		= mlx5_ib_mmap;
+	dev->ib_dev.alloc_pd		= mlx5_ib_alloc_pd;
+	dev->ib_dev.dealloc_pd		= mlx5_ib_dealloc_pd;
+	dev->ib_dev.create_ah		= mlx5_ib_create_ah;
+	dev->ib_dev.query_ah		= mlx5_ib_query_ah;
+	dev->ib_dev.destroy_ah		= mlx5_ib_destroy_ah;
+	dev->ib_dev.create_srq		= mlx5_ib_create_srq;
+	dev->ib_dev.modify_srq		= mlx5_ib_modify_srq;
+	dev->ib_dev.query_srq		= mlx5_ib_query_srq;
+	dev->ib_dev.destroy_srq		= mlx5_ib_destroy_srq;
+	dev->ib_dev.post_srq_recv	= mlx5_ib_post_srq_recv;
+	dev->ib_dev.create_qp		= mlx5_ib_create_qp;
+	dev->ib_dev.modify_qp		= mlx5_ib_modify_qp;
+	dev->ib_dev.query_qp		= mlx5_ib_query_qp;
+	dev->ib_dev.destroy_qp		= mlx5_ib_destroy_qp;
+	dev->ib_dev.post_send		= mlx5_ib_post_send;
+	dev->ib_dev.post_recv		= mlx5_ib_post_recv;
+	dev->ib_dev.create_cq		= mlx5_ib_create_cq;
+	dev->ib_dev.modify_cq		= mlx5_ib_modify_cq;
+	dev->ib_dev.resize_cq		= mlx5_ib_resize_cq;
+	dev->ib_dev.destroy_cq		= mlx5_ib_destroy_cq;
+	dev->ib_dev.poll_cq		= mlx5_ib_poll_cq;
+	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
+	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
+	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
+	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
+	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
+	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
+	dev->ib_dev.process_mad		= mlx5_ib_process_mad;
+	dev->ib_dev.alloc_fast_reg_mr	= mlx5_ib_alloc_fast_reg_mr;
+	dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
+	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
+
+	if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
+		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
+		dev->ib_dev.uverbs_cmd_mask |=
+			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+	}
+
+	err = init_node_data(dev);
+	if (err)
+		goto err_eqs;
+
+	mutex_init(&dev->cap_mask_mutex);
+	spin_lock_init(&dev->mr_lock);
+
+	err = create_dev_resources(&dev->devr);
+	if (err)
+		goto err_eqs;
+
+	if (ib_register_device(&dev->ib_dev, NULL))
+		goto err_rsrc;
+
+	err = create_umr_res(dev);
+	if (err)
+		goto err_dev;
+
+	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
+		if (device_create_file(&dev->ib_dev.dev,
+				       mlx5_class_attributes[i]))
+			goto err_umrc;
+	}
+
+	dev->ib_active = true;
+
+	return 0;
+
+err_umrc:
+	destroy_umrc_res(dev);
+
+err_dev:
+	ib_unregister_device(&dev->ib_dev);
+
+err_rsrc:
+	destroy_dev_resources(&dev->devr);
+
+err_eqs:
+	free_comp_eqs(dev);
+
+err_cleanup:
+	mlx5_dev_cleanup(mdev);
+
+err_free:
+	ib_dealloc_device((struct ib_device *)dev);
+
+	return err;
+}
+
+static void remove_one(struct pci_dev *pdev)
+{
+	struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
+
+	destroy_umrc_res(dev);
+	ib_unregister_device(&dev->ib_dev);
+	destroy_dev_resources(&dev->devr);
+	free_comp_eqs(dev);
+	mlx5_dev_cleanup(&dev->mdev);
+	ib_dealloc_device(&dev->ib_dev);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
+	{ PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
+
+static struct pci_driver mlx5_ib_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= mlx5_ib_pci_table,
+	.probe		= init_one,
+	.remove		= remove_one
+};
+
+static int __init mlx5_ib_init(void)
+{
+	return pci_register_driver(&mlx5_ib_driver);
+}
+
+static void __exit mlx5_ib_cleanup(void)
+{
+	pci_unregister_driver(&mlx5_ib_driver);
+}
+
+module_init(mlx5_ib_init);
+module_exit(mlx5_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
new file mode 100644
index 000000000000..3a5322870b96
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+
+/* @umem: umem object to scan
+ * @addr: ib virtual address requested by the user
+ * @count: number of PAGE_SIZE pages covered by umem
+ * @shift: page shift for the compound pages found in the region
+ * @ncont: number of compund pages
+ * @order: log2 of the number of compound pages
+ */
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+			int *ncont, int *order)
+{
+	struct ib_umem_chunk *chunk;
+	unsigned long tmp;
+	unsigned long m;
+	int i, j, k;
+	u64 base = 0;
+	int p = 0;
+	int skip;
+	int mask;
+	u64 len;
+	u64 pfn;
+
+	addr = addr >> PAGE_SHIFT;
+	tmp = (unsigned long)addr;
+	m = find_first_bit(&tmp, sizeof(tmp));
+	skip = 1 << m;
+	mask = skip - 1;
+	i = 0;
+	list_for_each_entry(chunk, &umem->chunk_list, list)
+		for (j = 0; j < chunk->nmap; j++) {
+			len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
+			pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
+			for (k = 0; k < len; k++) {
+				if (!(i & mask)) {
+					tmp = (unsigned long)pfn;
+					m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+					skip = 1 << m;
+					mask = skip - 1;
+					base = pfn;
+					p = 0;
+				} else {
+					if (base + p != pfn) {
+						tmp = (unsigned long)p;
+						m = find_first_bit(&tmp, sizeof(tmp));
+						skip = 1 << m;
+						mask = skip - 1;
+						base = pfn;
+						p = 0;
+					}
+				}
+				p++;
+				i++;
+			}
+		}
+
+	if (i) {
+		m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
+
+		if (order)
+			*order = ilog2(roundup_pow_of_two(i) >> m);
+
+		*ncont = DIV_ROUND_UP(i, (1 << m));
+	} else {
+		m  = 0;
+
+		if (order)
+			*order = 0;
+
+		*ncont = 0;
+	}
+	*shift = PAGE_SHIFT + m;
+	*count = i;
+}
+
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			  int page_shift, __be64 *pas, int umr)
+{
+	int shift = page_shift - PAGE_SHIFT;
+	int mask = (1 << shift) - 1;
+	struct ib_umem_chunk *chunk;
+	int i, j, k;
+	u64 cur = 0;
+	u64 base;
+	int len;
+
+	i = 0;
+	list_for_each_entry(chunk, &umem->chunk_list, list)
+		for (j = 0; j < chunk->nmap; j++) {
+			len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
+			base = sg_dma_address(&chunk->page_list[j]);
+			for (k = 0; k < len; k++) {
+				if (!(i & mask)) {
+					cur = base + (k << PAGE_SHIFT);
+					if (umr)
+						cur |= 3;
+
+					pas[i >> shift] = cpu_to_be64(cur);
+					mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
+						    i >> shift, be64_to_cpu(pas[i >> shift]));
+				}  else
+					mlx5_ib_dbg(dev, "=====> 0x%llx\n",
+						    base + (k << PAGE_SHIFT));
+				i++;
+			}
+		}
+}
+
+int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
+{
+	u64 page_size;
+	u64 page_mask;
+	u64 off_size;
+	u64 off_mask;
+	u64 buf_off;
+
+	page_size = 1 << page_shift;
+	page_mask = page_size - 1;
+	buf_off = addr & page_mask;
+	off_size = page_size >> 6;
+	off_mask = off_size - 1;
+
+	if (buf_off & off_mask)
+		return -EINVAL;
+
+	*offset = buf_off >> ilog2(off_size);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
new file mode 100644
index 000000000000..836be9157242
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_H
+#define MLX5_IB_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/types.h>
+
+#define mlx5_ib_dbg(dev, format, arg...)				\
+pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
+	 __LINE__, current->pid, ##arg)
+
+#define mlx5_ib_err(dev, format, arg...)				\
+pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
+	__LINE__, current->pid, ##arg)
+
+#define mlx5_ib_warn(dev, format, arg...)				\
+pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
+	__LINE__, current->pid, ##arg)
+
+enum {
+	MLX5_IB_MMAP_CMD_SHIFT	= 8,
+	MLX5_IB_MMAP_CMD_MASK	= 0xff,
+};
+
+enum mlx5_ib_mmap_cmd {
+	MLX5_IB_MMAP_REGULAR_PAGE		= 0,
+	MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES	= 1, /* always last */
+};
+
+enum {
+	MLX5_RES_SCAT_DATA32_CQE	= 0x1,
+	MLX5_RES_SCAT_DATA64_CQE	= 0x2,
+	MLX5_REQ_SCAT_DATA32_CQE	= 0x11,
+	MLX5_REQ_SCAT_DATA64_CQE	= 0x22,
+};
+
+enum mlx5_ib_latency_class {
+	MLX5_IB_LATENCY_CLASS_LOW,
+	MLX5_IB_LATENCY_CLASS_MEDIUM,
+	MLX5_IB_LATENCY_CLASS_HIGH,
+	MLX5_IB_LATENCY_CLASS_FAST_PATH
+};
+
+enum mlx5_ib_mad_ifc_flags {
+	MLX5_MAD_IFC_IGNORE_MKEY	= 1,
+	MLX5_MAD_IFC_IGNORE_BKEY	= 2,
+	MLX5_MAD_IFC_NET_VIEW		= 4,
+};
+
+struct mlx5_ib_ucontext {
+	struct ib_ucontext	ibucontext;
+	struct list_head	db_page_list;
+
+	/* protect doorbell record alloc/free
+	 */
+	struct mutex		db_page_mutex;
+	struct mlx5_uuar_info	uuari;
+};
+
+static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
+}
+
+struct mlx5_ib_pd {
+	struct ib_pd		ibpd;
+	u32			pdn;
+	u32			pa_lkey;
+};
+
+/* Use macros here so that don't have to duplicate
+ * enum ib_send_flags and enum ib_qp_type for low-level driver
+ */
+
+#define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
+#define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
+#define MLX5_IB_WR_UMR		IB_WR_RESERVED1
+
+struct wr_list {
+	u16	opcode;
+	u16	next;
+};
+
+struct mlx5_ib_wq {
+	u64		       *wrid;
+	u32		       *wr_data;
+	struct wr_list	       *w_list;
+	unsigned	       *wqe_head;
+	u16		        unsig_count;
+
+	/* serialize post to the work queue
+	 */
+	spinlock_t		lock;
+	int			wqe_cnt;
+	int			max_post;
+	int			max_gs;
+	int			offset;
+	int			wqe_shift;
+	unsigned		head;
+	unsigned		tail;
+	u16			cur_post;
+	u16			last_poll;
+	void		       *qend;
+};
+
+enum {
+	MLX5_QP_USER,
+	MLX5_QP_KERNEL,
+	MLX5_QP_EMPTY
+};
+
+struct mlx5_ib_qp {
+	struct ib_qp		ibqp;
+	struct mlx5_core_qp	mqp;
+	struct mlx5_buf		buf;
+
+	struct mlx5_db		db;
+	struct mlx5_ib_wq	rq;
+
+	u32			doorbell_qpn;
+	u8			sq_signal_bits;
+	u8			fm_cache;
+	int			sq_max_wqes_per_wr;
+	int			sq_spare_wqes;
+	struct mlx5_ib_wq	sq;
+
+	struct ib_umem	       *umem;
+	int			buf_size;
+
+	/* serialize qp state modifications
+	 */
+	struct mutex		mutex;
+	u16			xrcdn;
+	u32			flags;
+	u8			port;
+	u8			alt_port;
+	u8			atomic_rd_en;
+	u8			resp_depth;
+	u8			state;
+	int			mlx_type;
+	int			wq_sig;
+	int			scat_cqe;
+	int			max_inline_data;
+	struct mlx5_bf	       *bf;
+	int			has_rq;
+
+	/* only for user space QPs. For kernel
+	 * we have it from the bf object
+	 */
+	int			uuarn;
+
+	int			create_type;
+	u32			pa_lkey;
+};
+
+struct mlx5_ib_cq_buf {
+	struct mlx5_buf		buf;
+	struct ib_umem		*umem;
+	int			cqe_size;
+};
+
+enum mlx5_ib_qp_flags {
+	MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
+	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
+};
+
+struct mlx5_shared_mr_info {
+	int mr_id;
+	struct ib_umem		*umem;
+};
+
+struct mlx5_ib_cq {
+	struct ib_cq		ibcq;
+	struct mlx5_core_cq	mcq;
+	struct mlx5_ib_cq_buf	buf;
+	struct mlx5_db		db;
+
+	/* serialize access to the CQ
+	 */
+	spinlock_t		lock;
+
+	/* protect resize cq
+	 */
+	struct mutex		resize_mutex;
+	struct mlx5_ib_cq_resize *resize_buf;
+	struct ib_umem	       *resize_umem;
+	int			cqe_size;
+};
+
+struct mlx5_ib_srq {
+	struct ib_srq		ibsrq;
+	struct mlx5_core_srq	msrq;
+	struct mlx5_buf		buf;
+	struct mlx5_db		db;
+	u64		       *wrid;
+	/* protect SRQ hanlding
+	 */
+	spinlock_t		lock;
+	int			head;
+	int			tail;
+	u16			wqe_ctr;
+	struct ib_umem	       *umem;
+	/* serialize arming a SRQ
+	 */
+	struct mutex		mutex;
+	int			wq_sig;
+};
+
+struct mlx5_ib_xrcd {
+	struct ib_xrcd		ibxrcd;
+	u32			xrcdn;
+};
+
+struct mlx5_ib_mr {
+	struct ib_mr		ibmr;
+	struct mlx5_core_mr	mmr;
+	struct ib_umem	       *umem;
+	struct mlx5_shared_mr_info	*smr_info;
+	struct list_head	list;
+	int			order;
+	int			umred;
+	__be64			*pas;
+	dma_addr_t		dma;
+	int			npages;
+	struct completion	done;
+	enum ib_wc_status	status;
+};
+
+struct mlx5_ib_fast_reg_page_list {
+	struct ib_fast_reg_page_list	ibfrpl;
+	__be64			       *mapped_page_list;
+	dma_addr_t			map;
+};
+
+struct umr_common {
+	struct ib_pd	*pd;
+	struct ib_cq	*cq;
+	struct ib_qp	*qp;
+	struct ib_mr	*mr;
+	/* control access to UMR QP
+	 */
+	struct semaphore	sem;
+};
+
+enum {
+	MLX5_FMR_INVALID,
+	MLX5_FMR_VALID,
+	MLX5_FMR_BUSY,
+};
+
+struct mlx5_ib_fmr {
+	struct ib_fmr			ibfmr;
+	struct mlx5_core_mr		mr;
+	int				access_flags;
+	int				state;
+	/* protect fmr state
+	 */
+	spinlock_t			lock;
+	u64				wrid;
+	struct ib_send_wr		wr[2];
+	u8				page_shift;
+	struct ib_fast_reg_page_list	page_list;
+};
+
+struct mlx5_cache_ent {
+	struct list_head	head;
+	/* sync access to the cahce entry
+	 */
+	spinlock_t		lock;
+
+
+	struct dentry	       *dir;
+	char                    name[4];
+	u32                     order;
+	u32			size;
+	u32                     cur;
+	u32                     miss;
+	u32			limit;
+
+	struct dentry          *fsize;
+	struct dentry          *fcur;
+	struct dentry          *fmiss;
+	struct dentry          *flimit;
+
+	struct mlx5_ib_dev     *dev;
+	struct work_struct	work;
+	struct delayed_work	dwork;
+};
+
+struct mlx5_mr_cache {
+	struct workqueue_struct *wq;
+	struct mlx5_cache_ent	ent[MAX_MR_CACHE_ENTRIES];
+	int			stopped;
+	struct dentry		*root;
+	unsigned long		last_add;
+};
+
+struct mlx5_ib_resources {
+	struct ib_cq	*c0;
+	struct ib_xrcd	*x0;
+	struct ib_xrcd	*x1;
+	struct ib_pd	*p0;
+	struct ib_srq	*s0;
+};
+
+struct mlx5_ib_dev {
+	struct ib_device		ib_dev;
+	struct mlx5_core_dev		mdev;
+	MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
+	struct list_head		eqs_list;
+	int				num_ports;
+	int				num_comp_vectors;
+	/* serialize update of capability mask
+	 */
+	struct mutex			cap_mask_mutex;
+	bool				ib_active;
+	struct umr_common		umrc;
+	/* sync used page count stats
+	 */
+	spinlock_t			mr_lock;
+	struct mlx5_ib_resources	devr;
+	struct mlx5_mr_cache		cache;
+};
+
+static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
+{
+	return container_of(mcq, struct mlx5_ib_cq, mcq);
+}
+
+static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
+{
+	return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
+}
+
+static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
+}
+
+static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
+}
+
+static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct mlx5_ib_cq, ibcq);
+}
+
+static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
+{
+	return container_of(mqp, struct mlx5_ib_qp, mqp);
+}
+
+static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct mlx5_ib_pd, ibpd);
+}
+
+static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
+}
+
+static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct mlx5_ib_qp, ibqp);
+}
+
+static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
+{
+	return container_of(msrq, struct mlx5_ib_srq, msrq);
+}
+
+static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct mlx5_ib_mr, ibmr);
+}
+
+static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+	return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
+}
+
+struct mlx5_ib_ah {
+	struct ib_ah		ibah;
+	struct mlx5_av		av;
+};
+
+static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct mlx5_ib_ah, ibah);
+}
+
+static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
+{
+	return container_of(dev, struct mlx5_ib_dev, mdev);
+}
+
+static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
+{
+	return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
+}
+
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+			struct mlx5_db *db);
+void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
+void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
+void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
+void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
+int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+		 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+		 void *in_mad, void *response_mad);
+struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
+			   struct mlx5_ib_ah *ah);
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx5_ib_destroy_ah(struct ib_ah *ah);
+struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
+				  struct ib_srq_init_attr *init_attr,
+				  struct ib_udata *udata);
+int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
+int mlx5_ib_destroy_srq(struct ib_srq *srq);
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+			  struct ib_recv_wr **bad_wr);
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
+				struct ib_qp_init_attr *init_attr,
+				struct ib_udata *udata);
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		      int attr_mask, struct ib_udata *udata);
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+		     struct ib_qp_init_attr *qp_init_attr);
+int mlx5_ib_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		      struct ib_send_wr **bad_wr);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr);
+void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+				int vector, struct ib_ucontext *context,
+				struct ib_udata *udata);
+int mlx5_ib_destroy_cq(struct ib_cq *cq);
+int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+				  u64 virt_addr, int access_flags,
+				  struct ib_udata *udata);
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+					int max_page_list_len);
+struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+							       int page_list_len);
+void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
+				 struct ib_fmr_attr *fmr_attr);
+int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		      int npages, u64 iova);
+int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
+int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+			struct ib_wc *in_wc, struct ib_grh *in_grh,
+			struct ib_mad *in_mad, struct ib_mad *out_mad);
+struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
+					  struct ib_ucontext *context,
+					  struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
+int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
+int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+		       struct ib_port_attr *props);
+int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
+void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+			int *ncont, int *order);
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			  int page_shift, __be64 *pas, int umr);
+void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
+int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
+int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
+int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+
+static inline void init_query_mad(struct ib_smp *mad)
+{
+	mad->base_version  = 1;
+	mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+	mad->class_version = 1;
+	mad->method	   = IB_MGMT_METHOD_GET;
+}
+
+static inline u8 convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
+	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
+	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
+	       MLX5_PERM_LOCAL_READ;
+}
+
+#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
new file mode 100644
index 000000000000..bd41df95b6f0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <linux/kref.h>
+#include <linux/random.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+
+enum {
+	DEF_CACHE_SIZE	= 10,
+};
+
+static __be64 *mr_align(__be64 *ptr, int align)
+{
+	unsigned long mask = align - 1;
+
+	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+}
+
+static int order2idx(struct mlx5_ib_dev *dev, int order)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+
+	if (order < cache->ent[0].order)
+		return 0;
+	else
+		return order - cache->ent[0].order;
+}
+
+static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+{
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent = &cache->ent[c];
+	struct mlx5_create_mkey_mbox_in *in;
+	struct mlx5_ib_mr *mr;
+	int npages = 1 << ent->order;
+	int size = sizeof(u64) * npages;
+	int err = 0;
+	int i;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++) {
+		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+		if (!mr) {
+			err = -ENOMEM;
+			goto out;
+		}
+		mr->order = ent->order;
+		mr->umred = 1;
+		mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
+		if (!mr->pas) {
+			kfree(mr);
+			err = -ENOMEM;
+			goto out;
+		}
+		mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
+					 DMA_TO_DEVICE);
+		if (dma_mapping_error(ddev, mr->dma)) {
+			kfree(mr->pas);
+			kfree(mr);
+			err = -ENOMEM;
+			goto out;
+		}
+
+		in->seg.status = 1 << 6;
+		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
+		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
+		in->seg.log2_page_size = 12;
+
+		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+					    sizeof(*in));
+		if (err) {
+			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+			dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+			kfree(mr->pas);
+			kfree(mr);
+			goto out;
+		}
+		cache->last_add = jiffies;
+
+		spin_lock(&ent->lock);
+		list_add_tail(&mr->list, &ent->head);
+		ent->cur++;
+		ent->size++;
+		spin_unlock(&ent->lock);
+	}
+
+out:
+	kfree(in);
+	return err;
+}
+
+static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+{
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent = &cache->ent[c];
+	struct mlx5_ib_mr *mr;
+	int size;
+	int err;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		spin_lock(&ent->lock);
+		if (list_empty(&ent->head)) {
+			spin_unlock(&ent->lock);
+			return;
+		}
+		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+		list_del(&mr->list);
+		ent->cur--;
+		ent->size--;
+		spin_unlock(&ent->lock);
+		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+		if (err) {
+			mlx5_ib_warn(dev, "failed destroy mkey\n");
+		} else {
+			size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
+			dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+			kfree(mr->pas);
+			kfree(mr);
+		}
+	}
+}
+
+static ssize_t size_write(struct file *filp, const char __user *buf,
+			  size_t count, loff_t *pos)
+{
+	struct mlx5_cache_ent *ent = filp->private_data;
+	struct mlx5_ib_dev *dev = ent->dev;
+	char lbuf[20];
+	u32 var;
+	int err;
+	int c;
+
+	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+		return -EFAULT;
+
+	c = order2idx(dev, ent->order);
+	lbuf[sizeof(lbuf) - 1] = 0;
+
+	if (sscanf(lbuf, "%u", &var) != 1)
+		return -EINVAL;
+
+	if (var < ent->limit)
+		return -EINVAL;
+
+	if (var > ent->size) {
+		err = add_keys(dev, c, var - ent->size);
+		if (err)
+			return err;
+	} else if (var < ent->size) {
+		remove_keys(dev, c, ent->size - var);
+	}
+
+	return count;
+}
+
+static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
+			 loff_t *pos)
+{
+	struct mlx5_cache_ent *ent = filp->private_data;
+	char lbuf[20];
+	int err;
+
+	if (*pos)
+		return 0;
+
+	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(buf, lbuf, err))
+		return -EFAULT;
+
+	*pos += err;
+
+	return err;
+}
+
+static const struct file_operations size_fops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.write	= size_write,
+	.read	= size_read,
+};
+
+static ssize_t limit_write(struct file *filp, const char __user *buf,
+			   size_t count, loff_t *pos)
+{
+	struct mlx5_cache_ent *ent = filp->private_data;
+	struct mlx5_ib_dev *dev = ent->dev;
+	char lbuf[20];
+	u32 var;
+	int err;
+	int c;
+
+	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+		return -EFAULT;
+
+	c = order2idx(dev, ent->order);
+	lbuf[sizeof(lbuf) - 1] = 0;
+
+	if (sscanf(lbuf, "%u", &var) != 1)
+		return -EINVAL;
+
+	if (var > ent->size)
+		return -EINVAL;
+
+	ent->limit = var;
+
+	if (ent->cur < ent->limit) {
+		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
+		if (err)
+			return err;
+	}
+
+	return count;
+}
+
+static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
+			  loff_t *pos)
+{
+	struct mlx5_cache_ent *ent = filp->private_data;
+	char lbuf[20];
+	int err;
+
+	if (*pos)
+		return 0;
+
+	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(buf, lbuf, err))
+		return -EFAULT;
+
+	*pos += err;
+
+	return err;
+}
+
+static const struct file_operations limit_fops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.write	= limit_write,
+	.read	= limit_read,
+};
+
+static int someone_adding(struct mlx5_mr_cache *cache)
+{
+	int i;
+
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+		if (cache->ent[i].cur < cache->ent[i].limit)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void __cache_work_func(struct mlx5_cache_ent *ent)
+{
+	struct mlx5_ib_dev *dev = ent->dev;
+	struct mlx5_mr_cache *cache = &dev->cache;
+	int i = order2idx(dev, ent->order);
+
+	if (cache->stopped)
+		return;
+
+	ent = &dev->cache.ent[i];
+	if (ent->cur < 2 * ent->limit) {
+		add_keys(dev, i, 1);
+		if (ent->cur < 2 * ent->limit)
+			queue_work(cache->wq, &ent->work);
+	} else if (ent->cur > 2 * ent->limit) {
+		if (!someone_adding(cache) &&
+		    time_after(jiffies, cache->last_add + 60 * HZ)) {
+			remove_keys(dev, i, 1);
+			if (ent->cur > ent->limit)
+				queue_work(cache->wq, &ent->work);
+		} else {
+			queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+		}
+	}
+}
+
+static void delayed_cache_work_func(struct work_struct *work)
+{
+	struct mlx5_cache_ent *ent;
+
+	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
+	__cache_work_func(ent);
+}
+
+static void cache_work_func(struct work_struct *work)
+{
+	struct mlx5_cache_ent *ent;
+
+	ent = container_of(work, struct mlx5_cache_ent, work);
+	__cache_work_func(ent);
+}
+
+static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_ib_mr *mr = NULL;
+	struct mlx5_cache_ent *ent;
+	int c;
+	int i;
+
+	c = order2idx(dev, order);
+	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
+		return NULL;
+	}
+
+	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+		ent = &cache->ent[i];
+
+		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+
+		spin_lock(&ent->lock);
+		if (!list_empty(&ent->head)) {
+			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+					      list);
+			list_del(&mr->list);
+			ent->cur--;
+			spin_unlock(&ent->lock);
+			if (ent->cur < ent->limit)
+				queue_work(cache->wq, &ent->work);
+			break;
+		}
+		spin_unlock(&ent->lock);
+
+		queue_work(cache->wq, &ent->work);
+
+		if (mr)
+			break;
+	}
+
+	if (!mr)
+		cache->ent[c].miss++;
+
+	return mr;
+}
+
+static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent;
+	int shrink = 0;
+	int c;
+
+	c = order2idx(dev, mr->order);
+	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
+		return;
+	}
+	ent = &cache->ent[c];
+	spin_lock(&ent->lock);
+	list_add_tail(&mr->list, &ent->head);
+	ent->cur++;
+	if (ent->cur > 2 * ent->limit)
+		shrink = 1;
+	spin_unlock(&ent->lock);
+
+	if (shrink)
+		queue_work(cache->wq, &ent->work);
+}
+
+static void clean_keys(struct mlx5_ib_dev *dev, int c)
+{
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent = &cache->ent[c];
+	struct mlx5_ib_mr *mr;
+	int size;
+	int err;
+
+	while (1) {
+		spin_lock(&ent->lock);
+		if (list_empty(&ent->head)) {
+			spin_unlock(&ent->lock);
+			return;
+		}
+		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+		list_del(&mr->list);
+		ent->cur--;
+		ent->size--;
+		spin_unlock(&ent->lock);
+		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+		if (err) {
+			mlx5_ib_warn(dev, "failed destroy mkey\n");
+		} else {
+			size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
+			dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+			kfree(mr->pas);
+			kfree(mr);
+		}
+	}
+}
+
+static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent;
+	int i;
+
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
+	if (!cache->root)
+		return -ENOMEM;
+
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+		ent = &cache->ent[i];
+		sprintf(ent->name, "%d", ent->order);
+		ent->dir = debugfs_create_dir(ent->name,  cache->root);
+		if (!ent->dir)
+			return -ENOMEM;
+
+		ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
+						 &size_fops);
+		if (!ent->fsize)
+			return -ENOMEM;
+
+		ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
+						  &limit_fops);
+		if (!ent->flimit)
+			return -ENOMEM;
+
+		ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
+					       &ent->cur);
+		if (!ent->fcur)
+			return -ENOMEM;
+
+		ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
+						&ent->miss);
+		if (!ent->fmiss)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	debugfs_remove_recursive(dev->cache.root);
+}
+
+int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent;
+	int limit;
+	int size;
+	int err;
+	int i;
+
+	cache->wq = create_singlethread_workqueue("mkey_cache");
+	if (!cache->wq) {
+		mlx5_ib_warn(dev, "failed to create work queue\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+		INIT_LIST_HEAD(&cache->ent[i].head);
+		spin_lock_init(&cache->ent[i].lock);
+
+		ent = &cache->ent[i];
+		INIT_LIST_HEAD(&ent->head);
+		spin_lock_init(&ent->lock);
+		ent->order = i + 2;
+		ent->dev = dev;
+
+		if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
+			size = dev->mdev.profile->mr_cache[i].size;
+			limit = dev->mdev.profile->mr_cache[i].limit;
+		} else {
+			size = DEF_CACHE_SIZE;
+			limit = 0;
+		}
+		INIT_WORK(&ent->work, cache_work_func);
+		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+		ent->limit = limit;
+		queue_work(cache->wq, &ent->work);
+	}
+
+	err = mlx5_mr_cache_debugfs_init(dev);
+	if (err)
+		mlx5_ib_warn(dev, "cache debugfs failure\n");
+
+	return 0;
+}
+
+int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+{
+	int i;
+
+	dev->cache.stopped = 1;
+	destroy_workqueue(dev->cache.wq);
+
+	mlx5_mr_cache_debugfs_cleanup(dev);
+
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
+		clean_keys(dev, i);
+
+	return 0;
+}
+
+struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_core_dev *mdev = &dev->mdev;
+	struct mlx5_create_mkey_mbox_in *in;
+	struct mlx5_mkey_seg *seg;
+	struct mlx5_ib_mr *mr;
+	int err;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	seg = &in->seg;
+	seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
+	seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
+	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	seg->start_addr = 0;
+
+	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+	if (err)
+		goto err_in;
+
+	kfree(in);
+	mr->ibmr.lkey = mr->mmr.key;
+	mr->ibmr.rkey = mr->mmr.key;
+	mr->umem = NULL;
+
+	return &mr->ibmr;
+
+err_in:
+	kfree(in);
+
+err_free:
+	kfree(mr);
+
+	return ERR_PTR(err);
+}
+
+static int get_octo_len(u64 addr, u64 len, int page_size)
+{
+	u64 offset;
+	int npages;
+
+	offset = addr & (page_size - 1);
+	npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
+	return (npages + 1) / 2;
+}
+
+static int use_umr(int order)
+{
+	return order <= 17;
+}
+
+static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
+			     struct ib_sge *sg, u64 dma, int n, u32 key,
+			     int page_shift, u64 virt_addr, u64 len,
+			     int access_flags)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct ib_mr *mr = dev->umrc.mr;
+
+	sg->addr = dma;
+	sg->length = ALIGN(sizeof(u64) * n, 64);
+	sg->lkey = mr->lkey;
+
+	wr->next = NULL;
+	wr->send_flags = 0;
+	wr->sg_list = sg;
+	if (n)
+		wr->num_sge = 1;
+	else
+		wr->num_sge = 0;
+
+	wr->opcode = MLX5_IB_WR_UMR;
+	wr->wr.fast_reg.page_list_len = n;
+	wr->wr.fast_reg.page_shift = page_shift;
+	wr->wr.fast_reg.rkey = key;
+	wr->wr.fast_reg.iova_start = virt_addr;
+	wr->wr.fast_reg.length = len;
+	wr->wr.fast_reg.access_flags = access_flags;
+	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+}
+
+static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
+			       struct ib_send_wr *wr, u32 key)
+{
+	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+	wr->opcode = MLX5_IB_WR_UMR;
+	wr->wr.fast_reg.rkey = key;
+}
+
+void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
+{
+	struct mlx5_ib_mr *mr;
+	struct ib_wc wc;
+	int err;
+
+	while (1) {
+		err = ib_poll_cq(cq, 1, &wc);
+		if (err < 0) {
+			pr_warn("poll cq error %d\n", err);
+			return;
+		}
+		if (err == 0)
+			break;
+
+		mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
+		mr->status = wc.status;
+		complete(&mr->done);
+	}
+	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+}
+
+static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
+				  u64 virt_addr, u64 len, int npages,
+				  int page_shift, int order, int access_flags)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct umr_common *umrc = &dev->umrc;
+	struct ib_send_wr wr, *bad;
+	struct mlx5_ib_mr *mr;
+	struct ib_sge sg;
+	int err;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		mr = alloc_cached_mr(dev, order);
+		if (mr)
+			break;
+
+		err = add_keys(dev, order2idx(dev, order), 1);
+		if (err) {
+			mlx5_ib_warn(dev, "add_keys failed\n");
+			break;
+		}
+	}
+
+	if (!mr)
+		return ERR_PTR(-EAGAIN);
+
+	mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
+
+	memset(&wr, 0, sizeof(wr));
+	wr.wr_id = (u64)(unsigned long)mr;
+	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+
+	/* We serialize polls so one process does not kidnap another's
+	 * completion. This is not a problem since wr is completed in
+	 * around 1 usec
+	 */
+	down(&umrc->sem);
+	init_completion(&mr->done);
+	err = ib_post_send(umrc->qp, &wr, &bad);
+	if (err) {
+		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+		up(&umrc->sem);
+		goto error;
+	}
+	wait_for_completion(&mr->done);
+	up(&umrc->sem);
+
+	if (mr->status != IB_WC_SUCCESS) {
+		mlx5_ib_warn(dev, "reg umr failed\n");
+		err = -EFAULT;
+		goto error;
+	}
+
+	return mr;
+
+error:
+	free_cached_mr(dev, mr);
+	return ERR_PTR(err);
+}
+
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
+				     u64 length, struct ib_umem *umem,
+				     int npages, int page_shift,
+				     int access_flags)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_create_mkey_mbox_in *in;
+	struct mlx5_ib_mr *mr;
+	int inlen;
+	int err;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_1;
+	}
+	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+
+	in->seg.flags = convert_access(access_flags) |
+		MLX5_ACCESS_MODE_MTT;
+	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+	in->seg.start_addr = cpu_to_be64(virt_addr);
+	in->seg.len = cpu_to_be64(length);
+	in->seg.bsfs_octo_size = 0;
+	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+	in->seg.log2_page_size = page_shift;
+	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+	if (err) {
+		mlx5_ib_warn(dev, "create mkey failed\n");
+		goto err_2;
+	}
+	mr->umem = umem;
+	mlx5_vfree(in);
+
+	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
+
+	return mr;
+
+err_2:
+	mlx5_vfree(in);
+
+err_1:
+	kfree(mr);
+
+	return ERR_PTR(err);
+}
+
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+				  u64 virt_addr, int access_flags,
+				  struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_ib_mr *mr = NULL;
+	struct ib_umem *umem;
+	int page_shift;
+	int npages;
+	int ncont;
+	int order;
+	int err;
+
+	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
+		    start, virt_addr, length);
+	umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
+			   0);
+	if (IS_ERR(umem)) {
+		mlx5_ib_dbg(dev, "umem get failed\n");
+		return (void *)umem;
+	}
+
+	mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
+	if (!npages) {
+		mlx5_ib_warn(dev, "avoid zero region\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
+		    npages, ncont, order, page_shift);
+
+	if (use_umr(order)) {
+		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
+			     order, access_flags);
+		if (PTR_ERR(mr) == -EAGAIN) {
+			mlx5_ib_dbg(dev, "cache empty for order %d", order);
+			mr = NULL;
+		}
+	}
+
+	if (!mr)
+		mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
+				access_flags);
+
+	if (IS_ERR(mr)) {
+		err = PTR_ERR(mr);
+		goto error;
+	}
+
+	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
+
+	mr->umem = umem;
+	mr->npages = npages;
+	spin_lock(&dev->mr_lock);
+	dev->mdev.priv.reg_pages += npages;
+	spin_unlock(&dev->mr_lock);
+	mr->ibmr.lkey = mr->mmr.key;
+	mr->ibmr.rkey = mr->mmr.key;
+
+	return &mr->ibmr;
+
+error:
+	ib_umem_release(umem);
+	return ERR_PTR(err);
+}
+
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+	struct umr_common *umrc = &dev->umrc;
+	struct ib_send_wr wr, *bad;
+	int err;
+
+	memset(&wr, 0, sizeof(wr));
+	wr.wr_id = (u64)(unsigned long)mr;
+	prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+
+	down(&umrc->sem);
+	init_completion(&mr->done);
+	err = ib_post_send(umrc->qp, &wr, &bad);
+	if (err) {
+		up(&umrc->sem);
+		mlx5_ib_dbg(dev, "err %d\n", err);
+		goto error;
+	}
+	wait_for_completion(&mr->done);
+	up(&umrc->sem);
+	if (mr->status != IB_WC_SUCCESS) {
+		mlx5_ib_warn(dev, "unreg umr failed\n");
+		err = -EFAULT;
+		goto error;
+	}
+	return 0;
+
+error:
+	return err;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+	struct ib_umem *umem = mr->umem;
+	int npages = mr->npages;
+	int umred = mr->umred;
+	int err;
+
+	if (!umred) {
+		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+		if (err) {
+			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+				     mr->mmr.key, err);
+			return err;
+		}
+	} else {
+		err = unreg_umr(dev, mr);
+		if (err) {
+			mlx5_ib_warn(dev, "failed unregister\n");
+			return err;
+		}
+		free_cached_mr(dev, mr);
+	}
+
+	if (umem) {
+		ib_umem_release(umem);
+		spin_lock(&dev->mr_lock);
+		dev->mdev.priv.reg_pages -= npages;
+		spin_unlock(&dev->mr_lock);
+	}
+
+	if (!umred)
+		kfree(mr);
+
+	return 0;
+}
+
+struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+					int max_page_list_len)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_create_mkey_mbox_in *in;
+	struct mlx5_ib_mr *mr;
+	int err;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	in->seg.status = 1 << 6; /* free */
+	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
+	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+	/*
+	 * TBD not needed - issue 197292 */
+	in->seg.log2_page_size = PAGE_SHIFT;
+
+	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+	kfree(in);
+	if (err)
+		goto err_free;
+
+	mr->ibmr.lkey = mr->mmr.key;
+	mr->ibmr.rkey = mr->mmr.key;
+	mr->umem = NULL;
+
+	return &mr->ibmr;
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+							       int page_list_len)
+{
+	struct mlx5_ib_fast_reg_page_list *mfrpl;
+	int size = page_list_len * sizeof(u64);
+
+	mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
+	if (!mfrpl)
+		return ERR_PTR(-ENOMEM);
+
+	mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
+	if (!mfrpl->ibfrpl.page_list)
+		goto err_free;
+
+	mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
+						     size, &mfrpl->map,
+						     GFP_KERNEL);
+	if (!mfrpl->mapped_page_list)
+		goto err_free;
+
+	WARN_ON(mfrpl->map & 0x3f);
+
+	return &mfrpl->ibfrpl;
+
+err_free:
+	kfree(mfrpl->ibfrpl.page_list);
+	kfree(mfrpl);
+	return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+	struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+	struct mlx5_ib_dev *dev = to_mdev(page_list->device);
+	int size = page_list->max_page_list_len * sizeof(u64);
+
+	dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
+			  mfrpl->map);
+	kfree(mfrpl->ibfrpl.page_list);
+	kfree(mfrpl);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
new file mode 100644
index 000000000000..16ac54c9819f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -0,0 +1,2524 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+#include "user.h"
+
+/* not supported currently */
+static int wq_signature;
+
+enum {
+	MLX5_IB_ACK_REQ_FREQ	= 8,
+};
+
+enum {
+	MLX5_IB_DEFAULT_SCHED_QUEUE	= 0x83,
+	MLX5_IB_DEFAULT_QP0_SCHED_QUEUE	= 0x3f,
+	MLX5_IB_LINK_TYPE_IB		= 0,
+	MLX5_IB_LINK_TYPE_ETH		= 1
+};
+
+enum {
+	MLX5_IB_SQ_STRIDE	= 6,
+	MLX5_IB_CACHE_LINE_SIZE	= 64,
+};
+
+static const u32 mlx5_ib_opcode[] = {
+	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
+	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
+	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
+	[IB_WR_RDMA_READ]			= MLX5_OPCODE_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP]		= MLX5_OPCODE_ATOMIC_CS,
+	[IB_WR_ATOMIC_FETCH_AND_ADD]		= MLX5_OPCODE_ATOMIC_FA,
+	[IB_WR_SEND_WITH_INV]			= MLX5_OPCODE_SEND_INVAL,
+	[IB_WR_LOCAL_INV]			= MLX5_OPCODE_UMR,
+	[IB_WR_FAST_REG_MR]			= MLX5_OPCODE_UMR,
+	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= MLX5_OPCODE_ATOMIC_MASKED_CS,
+	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= MLX5_OPCODE_ATOMIC_MASKED_FA,
+	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
+};
+
+struct umr_wr {
+	u64				virt_addr;
+	struct ib_pd		       *pd;
+	unsigned int			page_shift;
+	unsigned int			npages;
+	u32				length;
+	int				access_flags;
+	u32				mkey;
+};
+
+static int is_qp0(enum ib_qp_type qp_type)
+{
+	return qp_type == IB_QPT_SMI;
+}
+
+static int is_qp1(enum ib_qp_type qp_type)
+{
+	return qp_type == IB_QPT_GSI;
+}
+
+static int is_sqp(enum ib_qp_type qp_type)
+{
+	return is_qp0(qp_type) || is_qp1(qp_type);
+}
+
+static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
+{
+	return mlx5_buf_offset(&qp->buf, offset);
+}
+
+static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
+{
+	return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+}
+
+void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
+{
+	return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
+}
+
+static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
+{
+	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+	struct ib_event event;
+
+	if (type == MLX5_EVENT_TYPE_PATH_MIG)
+		to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+
+	if (ibqp->event_handler) {
+		event.device     = ibqp->device;
+		event.element.qp = ibqp;
+		switch (type) {
+		case MLX5_EVENT_TYPE_PATH_MIG:
+			event.event = IB_EVENT_PATH_MIG;
+			break;
+		case MLX5_EVENT_TYPE_COMM_EST:
+			event.event = IB_EVENT_COMM_EST;
+			break;
+		case MLX5_EVENT_TYPE_SQ_DRAINED:
+			event.event = IB_EVENT_SQ_DRAINED;
+			break;
+		case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+			break;
+		case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+			event.event = IB_EVENT_QP_FATAL;
+			break;
+		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+			event.event = IB_EVENT_PATH_MIG_ERR;
+			break;
+		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+			event.event = IB_EVENT_QP_REQ_ERR;
+			break;
+		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+			event.event = IB_EVENT_QP_ACCESS_ERR;
+			break;
+		default:
+			pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
+			return;
+		}
+
+		ibqp->event_handler(&event, ibqp->qp_context);
+	}
+}
+
+static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
+		       int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
+{
+	int wqe_size;
+	int wq_size;
+
+	/* Sanity check RQ size before proceeding */
+	if (cap->max_recv_wr  > dev->mdev.caps.max_wqes)
+		return -EINVAL;
+
+	if (!has_rq) {
+		qp->rq.max_gs = 0;
+		qp->rq.wqe_cnt = 0;
+		qp->rq.wqe_shift = 0;
+	} else {
+		if (ucmd) {
+			qp->rq.wqe_cnt = ucmd->rq_wqe_count;
+			qp->rq.wqe_shift = ucmd->rq_wqe_shift;
+			qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+			qp->rq.max_post = qp->rq.wqe_cnt;
+		} else {
+			wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
+			wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
+			wqe_size = roundup_pow_of_two(wqe_size);
+			wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
+			wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
+			qp->rq.wqe_cnt = wq_size / wqe_size;
+			if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
+				mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
+					    wqe_size,
+					    dev->mdev.caps.max_rq_desc_sz);
+				return -EINVAL;
+			}
+			qp->rq.wqe_shift = ilog2(wqe_size);
+			qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+			qp->rq.max_post = qp->rq.wqe_cnt;
+		}
+	}
+
+	return 0;
+}
+
+static int sq_overhead(enum ib_qp_type qp_type)
+{
+	int size;
+
+	switch (qp_type) {
+	case IB_QPT_XRC_INI:
+		size = sizeof(struct mlx5_wqe_xrc_seg);
+		/* fall through */
+	case IB_QPT_RC:
+		size += sizeof(struct mlx5_wqe_ctrl_seg) +
+			sizeof(struct mlx5_wqe_atomic_seg) +
+			sizeof(struct mlx5_wqe_raddr_seg);
+		break;
+
+	case IB_QPT_UC:
+		size = sizeof(struct mlx5_wqe_ctrl_seg) +
+			sizeof(struct mlx5_wqe_raddr_seg);
+		break;
+
+	case IB_QPT_UD:
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		size = sizeof(struct mlx5_wqe_ctrl_seg) +
+			sizeof(struct mlx5_wqe_datagram_seg);
+		break;
+
+	case MLX5_IB_QPT_REG_UMR:
+		size = sizeof(struct mlx5_wqe_ctrl_seg) +
+			sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+			sizeof(struct mlx5_mkey_seg);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return size;
+}
+
+static int calc_send_wqe(struct ib_qp_init_attr *attr)
+{
+	int inl_size = 0;
+	int size;
+
+	size = sq_overhead(attr->qp_type);
+	if (size < 0)
+		return size;
+
+	if (attr->cap.max_inline_data) {
+		inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
+			attr->cap.max_inline_data;
+	}
+
+	size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
+
+	return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
+}
+
+static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
+			struct mlx5_ib_qp *qp)
+{
+	int wqe_size;
+	int wq_size;
+
+	if (!attr->cap.max_send_wr)
+		return 0;
+
+	wqe_size = calc_send_wqe(attr);
+	mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
+	if (wqe_size < 0)
+		return wqe_size;
+
+	if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
+		mlx5_ib_dbg(dev, "\n");
+		return -EINVAL;
+	}
+
+	qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
+		sizeof(struct mlx5_wqe_inline_seg);
+	attr->cap.max_inline_data = qp->max_inline_data;
+
+	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
+	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
+	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+	qp->sq.max_gs = attr->cap.max_send_sge;
+	qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
+
+	return wq_size;
+}
+
+static int set_user_buf_size(struct mlx5_ib_dev *dev,
+			    struct mlx5_ib_qp *qp,
+			    struct mlx5_ib_create_qp *ucmd)
+{
+	int desc_sz = 1 << qp->sq.wqe_shift;
+
+	if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
+		mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
+			     desc_sz, dev->mdev.caps.max_sq_desc_sz);
+		return -EINVAL;
+	}
+
+	if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
+		mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
+			     ucmd->sq_wqe_count, ucmd->sq_wqe_count);
+		return -EINVAL;
+	}
+
+	qp->sq.wqe_cnt = ucmd->sq_wqe_count;
+
+	if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+		mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
+			     qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+		return -EINVAL;
+	}
+
+	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+		(qp->sq.wqe_cnt << 6);
+
+	return 0;
+}
+
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+	if (attr->qp_type == IB_QPT_XRC_INI ||
+	    attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
+	    attr->qp_type == MLX5_IB_QPT_REG_UMR ||
+	    !attr->cap.max_recv_wr)
+		return 0;
+
+	return 1;
+}
+
+static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+{
+	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+	int start_uuar;
+	int i;
+
+	start_uuar = nuuars - uuari->num_low_latency_uuars;
+	for (i = start_uuar; i < nuuars; i++) {
+		if (!test_bit(i, uuari->bitmap)) {
+			set_bit(i, uuari->bitmap);
+			uuari->count[i]++;
+			return i;
+		}
+	}
+
+	return -ENOMEM;
+}
+
+static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+{
+	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+	int minidx = 1;
+	int uuarn;
+	int end;
+	int i;
+
+	end = nuuars - uuari->num_low_latency_uuars;
+
+	for (i = 1; i < end; i++) {
+		uuarn = i & 3;
+		if (uuarn == 2 || uuarn == 3)
+			continue;
+
+		if (uuari->count[i] < uuari->count[minidx])
+			minidx = i;
+	}
+
+	uuari->count[minidx]++;
+	return minidx;
+}
+
+static int alloc_uuar(struct mlx5_uuar_info *uuari,
+		      enum mlx5_ib_latency_class lat)
+{
+	int uuarn = -EINVAL;
+
+	mutex_lock(&uuari->lock);
+	switch (lat) {
+	case MLX5_IB_LATENCY_CLASS_LOW:
+		uuarn = 0;
+		uuari->count[uuarn]++;
+		break;
+
+	case MLX5_IB_LATENCY_CLASS_MEDIUM:
+		uuarn = alloc_med_class_uuar(uuari);
+		break;
+
+	case MLX5_IB_LATENCY_CLASS_HIGH:
+		uuarn = alloc_high_class_uuar(uuari);
+		break;
+
+	case MLX5_IB_LATENCY_CLASS_FAST_PATH:
+		uuarn = 2;
+		break;
+	}
+	mutex_unlock(&uuari->lock);
+
+	return uuarn;
+}
+
+static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+	clear_bit(uuarn, uuari->bitmap);
+	--uuari->count[uuarn];
+}
+
+static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+	clear_bit(uuarn, uuari->bitmap);
+	--uuari->count[uuarn];
+}
+
+static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+	int high_uuar = nuuars - uuari->num_low_latency_uuars;
+
+	mutex_lock(&uuari->lock);
+	if (uuarn == 0) {
+		--uuari->count[uuarn];
+		goto out;
+	}
+
+	if (uuarn < high_uuar) {
+		free_med_class_uuar(uuari, uuarn);
+		goto out;
+	}
+
+	free_high_class_uuar(uuari, uuarn);
+
+out:
+	mutex_unlock(&uuari->lock);
+}
+
+static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
+{
+	switch (state) {
+	case IB_QPS_RESET:	return MLX5_QP_STATE_RST;
+	case IB_QPS_INIT:	return MLX5_QP_STATE_INIT;
+	case IB_QPS_RTR:	return MLX5_QP_STATE_RTR;
+	case IB_QPS_RTS:	return MLX5_QP_STATE_RTS;
+	case IB_QPS_SQD:	return MLX5_QP_STATE_SQD;
+	case IB_QPS_SQE:	return MLX5_QP_STATE_SQER;
+	case IB_QPS_ERR:	return MLX5_QP_STATE_ERR;
+	default:		return -1;
+	}
+}
+
+static int to_mlx5_st(enum ib_qp_type type)
+{
+	switch (type) {
+	case IB_QPT_RC:			return MLX5_QP_ST_RC;
+	case IB_QPT_UC:			return MLX5_QP_ST_UC;
+	case IB_QPT_UD:			return MLX5_QP_ST_UD;
+	case MLX5_IB_QPT_REG_UMR:	return MLX5_QP_ST_REG_UMR;
+	case IB_QPT_XRC_INI:
+	case IB_QPT_XRC_TGT:		return MLX5_QP_ST_XRC;
+	case IB_QPT_SMI:		return MLX5_QP_ST_QP0;
+	case IB_QPT_GSI:		return MLX5_QP_ST_QP1;
+	case IB_QPT_RAW_IPV6:		return MLX5_QP_ST_RAW_IPV6;
+	case IB_QPT_RAW_ETHERTYPE:	return MLX5_QP_ST_RAW_ETHERTYPE;
+	case IB_QPT_RAW_PACKET:
+	case IB_QPT_MAX:
+	default:		return -EINVAL;
+	}
+}
+
+static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
+{
+	return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
+}
+
+static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+			  struct mlx5_ib_qp *qp, struct ib_udata *udata,
+			  struct mlx5_create_qp_mbox_in **in,
+			  struct mlx5_ib_create_qp_resp *resp, int *inlen)
+{
+	struct mlx5_ib_ucontext *context;
+	struct mlx5_ib_create_qp ucmd;
+	int page_shift;
+	int uar_index;
+	int npages;
+	u32 offset;
+	int uuarn;
+	int ncont;
+	int err;
+
+	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+	if (err) {
+		mlx5_ib_dbg(dev, "copy failed\n");
+		return err;
+	}
+
+	context = to_mucontext(pd->uobject->context);
+	/*
+	 * TBD: should come from the verbs when we have the API
+	 */
+	uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
+	if (uuarn < 0) {
+		mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+		mlx5_ib_dbg(dev, "reverting to high latency\n");
+		uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+		if (uuarn < 0) {
+			mlx5_ib_dbg(dev, "uuar allocation failed\n");
+			return uuarn;
+		}
+	}
+
+	uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
+	mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+
+	err = set_user_buf_size(dev, qp, &ucmd);
+	if (err)
+		goto err_uuar;
+
+	qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+			       qp->buf_size, 0, 0);
+	if (IS_ERR(qp->umem)) {
+		mlx5_ib_dbg(dev, "umem_get failed\n");
+		err = PTR_ERR(qp->umem);
+		goto err_uuar;
+	}
+
+	mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+			   &ncont, NULL);
+	err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+	if (err) {
+		mlx5_ib_warn(dev, "bad offset\n");
+		goto err_umem;
+	}
+	mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+		    ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+
+	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+	*in = mlx5_vzalloc(*inlen);
+	if (!*in) {
+		err = -ENOMEM;
+		goto err_umem;
+	}
+	mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+	(*in)->ctx.log_pg_sz_remote_qpn =
+		cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
+
+	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+	resp->uuar_index = uuarn;
+	qp->uuarn = uuarn;
+
+	err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
+	if (err) {
+		mlx5_ib_dbg(dev, "map failed\n");
+		goto err_free;
+	}
+
+	err = ib_copy_to_udata(udata, resp, sizeof(*resp));
+	if (err) {
+		mlx5_ib_dbg(dev, "copy failed\n");
+		goto err_unmap;
+	}
+	qp->create_type = MLX5_QP_USER;
+
+	return 0;
+
+err_unmap:
+	mlx5_ib_db_unmap_user(context, &qp->db);
+
+err_free:
+	mlx5_vfree(*in);
+
+err_umem:
+	ib_umem_release(qp->umem);
+
+err_uuar:
+	free_uuar(&context->uuari, uuarn);
+	return err;
+}
+
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+{
+	struct mlx5_ib_ucontext *context;
+
+	context = to_mucontext(pd->uobject->context);
+	mlx5_ib_db_unmap_user(context, &qp->db);
+	ib_umem_release(qp->umem);
+	free_uuar(&context->uuari, qp->uuarn);
+}
+
+static int create_kernel_qp(struct mlx5_ib_dev *dev,
+			    struct ib_qp_init_attr *init_attr,
+			    struct mlx5_ib_qp *qp,
+			    struct mlx5_create_qp_mbox_in **in, int *inlen)
+{
+	enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
+	struct mlx5_uuar_info *uuari;
+	int uar_index;
+	int uuarn;
+	int err;
+
+	uuari = &dev->mdev.priv.uuari;
+	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+		qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
+	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
+		lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
+
+	uuarn = alloc_uuar(uuari, lc);
+	if (uuarn < 0) {
+		mlx5_ib_dbg(dev, "\n");
+		return -ENOMEM;
+	}
+
+	qp->bf = &uuari->bfs[uuarn];
+	uar_index = qp->bf->uar->index;
+
+	err = calc_sq_size(dev, init_attr, qp);
+	if (err < 0) {
+		mlx5_ib_dbg(dev, "err %d\n", err);
+		goto err_uuar;
+	}
+
+	qp->rq.offset = 0;
+	qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+	qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+
+	err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
+	if (err) {
+		mlx5_ib_dbg(dev, "err %d\n", err);
+		goto err_uuar;
+	}
+
+	qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+	*in = mlx5_vzalloc(*inlen);
+	if (!*in) {
+		err = -ENOMEM;
+		goto err_buf;
+	}
+	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+	(*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+	/* Set "fast registration enabled" for all kernel QPs */
+	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+
+	mlx5_fill_page_array(&qp->buf, (*in)->pas);
+
+	err = mlx5_db_alloc(&dev->mdev, &qp->db);
+	if (err) {
+		mlx5_ib_dbg(dev, "err %d\n", err);
+		goto err_free;
+	}
+
+	qp->db.db[0] = 0;
+	qp->db.db[1] = 0;
+
+	qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
+	qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
+	qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
+	qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
+	qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
+
+	if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
+	    !qp->sq.w_list || !qp->sq.wqe_head) {
+		err = -ENOMEM;
+		goto err_wrid;
+	}
+	qp->create_type = MLX5_QP_KERNEL;
+
+	return 0;
+
+err_wrid:
+	mlx5_db_free(&dev->mdev, &qp->db);
+	kfree(qp->sq.wqe_head);
+	kfree(qp->sq.w_list);
+	kfree(qp->sq.wrid);
+	kfree(qp->sq.wr_data);
+	kfree(qp->rq.wrid);
+
+err_free:
+	mlx5_vfree(*in);
+
+err_buf:
+	mlx5_buf_free(&dev->mdev, &qp->buf);
+
+err_uuar:
+	free_uuar(&dev->mdev.priv.uuari, uuarn);
+	return err;
+}
+
+static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+	mlx5_db_free(&dev->mdev, &qp->db);
+	kfree(qp->sq.wqe_head);
+	kfree(qp->sq.w_list);
+	kfree(qp->sq.wrid);
+	kfree(qp->sq.wr_data);
+	kfree(qp->rq.wrid);
+	mlx5_buf_free(&dev->mdev, &qp->buf);
+	free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
+}
+
+static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
+{
+	if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
+	    (attr->qp_type == IB_QPT_XRC_INI))
+		return cpu_to_be32(MLX5_SRQ_RQ);
+	else if (!qp->has_rq)
+		return cpu_to_be32(MLX5_ZERO_LEN_RQ);
+	else
+		return cpu_to_be32(MLX5_NON_ZERO_RQ);
+}
+
+static int is_connected(enum ib_qp_type qp_type)
+{
+	if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
+		return 1;
+
+	return 0;
+}
+
+static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata, struct mlx5_ib_qp *qp)
+{
+	struct mlx5_ib_resources *devr = &dev->devr;
+	struct mlx5_ib_create_qp_resp resp;
+	struct mlx5_create_qp_mbox_in *in;
+	struct mlx5_ib_create_qp ucmd;
+	int inlen = sizeof(*in);
+	int err;
+
+	mutex_init(&qp->mutex);
+	spin_lock_init(&qp->sq.lock);
+	spin_lock_init(&qp->rq.lock);
+
+	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+	if (pd && pd->uobject) {
+		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+			mlx5_ib_dbg(dev, "copy failed\n");
+			return -EFAULT;
+		}
+
+		qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
+		qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+	} else {
+		qp->wq_sig = !!wq_signature;
+	}
+
+	qp->has_rq = qp_has_rq(init_attr);
+	err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
+			  qp, (pd && pd->uobject) ? &ucmd : NULL);
+	if (err) {
+		mlx5_ib_dbg(dev, "err %d\n", err);
+		return err;
+	}
+
+	if (pd) {
+		if (pd->uobject) {
+			mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
+			if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
+			    ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
+				mlx5_ib_dbg(dev, "invalid rq params\n");
+				return -EINVAL;
+			}
+			if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
+				mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
+					    ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
+				return -EINVAL;
+			}
+			err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+			if (err)
+				mlx5_ib_dbg(dev, "err %d\n", err);
+		} else {
+			err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+			if (err)
+				mlx5_ib_dbg(dev, "err %d\n", err);
+			else
+				qp->pa_lkey = to_mpd(pd)->pa_lkey;
+		}
+
+		if (err)
+			return err;
+	} else {
+		in = mlx5_vzalloc(sizeof(*in));
+		if (!in)
+			return -ENOMEM;
+
+		qp->create_type = MLX5_QP_EMPTY;
+	}
+
+	if (is_sqp(init_attr->qp_type))
+		qp->port = init_attr->port_num;
+
+	in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
+				    MLX5_QP_PM_MIGRATED << 11);
+
+	if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
+		in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
+	else
+		in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
+
+	if (qp->wq_sig)
+		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+
+	if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
+		int rcqe_sz;
+		int scqe_sz;
+
+		rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
+		scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
+
+		if (rcqe_sz == 128)
+			in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
+		else
+			in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
+
+		if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
+			if (scqe_sz == 128)
+				in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
+			else
+				in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
+		}
+	}
+
+	if (qp->rq.wqe_cnt) {
+		in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
+		in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
+	}
+
+	in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
+
+	if (qp->sq.wqe_cnt)
+		in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
+	else
+		in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
+
+	/* Set default resources */
+	switch (init_attr->qp_type) {
+	case IB_QPT_XRC_TGT:
+		in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+		in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+		in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+		in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
+		break;
+	case IB_QPT_XRC_INI:
+		in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+		in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
+		in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+		break;
+	default:
+		if (init_attr->srq) {
+			in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
+			in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
+		} else {
+			in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
+			in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+		}
+	}
+
+	if (init_attr->send_cq)
+		in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
+
+	if (init_attr->recv_cq)
+		in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
+
+	in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
+
+	err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
+	if (err) {
+		mlx5_ib_dbg(dev, "create qp failed\n");
+		goto err_create;
+	}
+
+	mlx5_vfree(in);
+	/* Hardware wants QPN written in big-endian order (after
+	 * shifting) for send doorbell.  Precompute this value to save
+	 * a little bit when posting sends.
+	 */
+	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+	qp->mqp.event = mlx5_ib_qp_event;
+
+	return 0;
+
+err_create:
+	if (qp->create_type == MLX5_QP_USER)
+		destroy_qp_user(pd, qp);
+	else if (qp->create_type == MLX5_QP_KERNEL)
+		destroy_qp_kernel(dev, qp);
+
+	mlx5_vfree(in);
+	return err;
+}
+
+static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
+	__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
+{
+	if (send_cq) {
+		if (recv_cq) {
+			if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
+				spin_lock_irq(&send_cq->lock);
+				spin_lock_nested(&recv_cq->lock,
+						 SINGLE_DEPTH_NESTING);
+			} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
+				spin_lock_irq(&send_cq->lock);
+				__acquire(&recv_cq->lock);
+			} else {
+				spin_lock_irq(&recv_cq->lock);
+				spin_lock_nested(&send_cq->lock,
+						 SINGLE_DEPTH_NESTING);
+			}
+		} else {
+			spin_lock_irq(&send_cq->lock);
+		}
+	} else if (recv_cq) {
+		spin_lock_irq(&recv_cq->lock);
+	}
+}
+
+static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
+	__releases(&send_cq->lock) __releases(&recv_cq->lock)
+{
+	if (send_cq) {
+		if (recv_cq) {
+			if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
+				spin_unlock(&recv_cq->lock);
+				spin_unlock_irq(&send_cq->lock);
+			} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
+				__release(&recv_cq->lock);
+				spin_unlock_irq(&send_cq->lock);
+			} else {
+				spin_unlock(&send_cq->lock);
+				spin_unlock_irq(&recv_cq->lock);
+			}
+		} else {
+			spin_unlock_irq(&send_cq->lock);
+		}
+	} else if (recv_cq) {
+		spin_unlock_irq(&recv_cq->lock);
+	}
+}
+
+static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
+{
+	return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx5_ib_qp *qp,
+		    struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
+{
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_XRC_TGT:
+		*send_cq = NULL;
+		*recv_cq = NULL;
+		break;
+	case MLX5_IB_QPT_REG_UMR:
+	case IB_QPT_XRC_INI:
+		*send_cq = to_mcq(qp->ibqp.send_cq);
+		*recv_cq = NULL;
+		break;
+
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+	case IB_QPT_UD:
+	case IB_QPT_RAW_IPV6:
+	case IB_QPT_RAW_ETHERTYPE:
+		*send_cq = to_mcq(qp->ibqp.send_cq);
+		*recv_cq = to_mcq(qp->ibqp.recv_cq);
+		break;
+
+	case IB_QPT_RAW_PACKET:
+	case IB_QPT_MAX:
+	default:
+		*send_cq = NULL;
+		*recv_cq = NULL;
+		break;
+	}
+}
+
+static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+	struct mlx5_ib_cq *send_cq, *recv_cq;
+	struct mlx5_modify_qp_mbox_in *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return;
+	if (qp->state != IB_QPS_RESET)
+		if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
+					MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
+			mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
+				     qp->mqp.qpn);
+
+	get_cqs(qp, &send_cq, &recv_cq);
+
+	if (qp->create_type == MLX5_QP_KERNEL) {
+		mlx5_ib_lock_cqs(send_cq, recv_cq);
+		__mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+				   qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+		if (send_cq != recv_cq)
+			__mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+		mlx5_ib_unlock_cqs(send_cq, recv_cq);
+	}
+
+	err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
+	if (err)
+		mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
+	kfree(in);
+
+
+	if (qp->create_type == MLX5_QP_KERNEL)
+		destroy_qp_kernel(dev, qp);
+	else if (qp->create_type == MLX5_QP_USER)
+		destroy_qp_user(&get_pd(qp)->ibpd, qp);
+}
+
+static const char *ib_qp_type_str(enum ib_qp_type type)
+{
+	switch (type) {
+	case IB_QPT_SMI:
+		return "IB_QPT_SMI";
+	case IB_QPT_GSI:
+		return "IB_QPT_GSI";
+	case IB_QPT_RC:
+		return "IB_QPT_RC";
+	case IB_QPT_UC:
+		return "IB_QPT_UC";
+	case IB_QPT_UD:
+		return "IB_QPT_UD";
+	case IB_QPT_RAW_IPV6:
+		return "IB_QPT_RAW_IPV6";
+	case IB_QPT_RAW_ETHERTYPE:
+		return "IB_QPT_RAW_ETHERTYPE";
+	case IB_QPT_XRC_INI:
+		return "IB_QPT_XRC_INI";
+	case IB_QPT_XRC_TGT:
+		return "IB_QPT_XRC_TGT";
+	case IB_QPT_RAW_PACKET:
+		return "IB_QPT_RAW_PACKET";
+	case MLX5_IB_QPT_REG_UMR:
+		return "MLX5_IB_QPT_REG_UMR";
+	case IB_QPT_MAX:
+	default:
+		return "Invalid QP type";
+	}
+}
+
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
+				struct ib_qp_init_attr *init_attr,
+				struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev;
+	struct mlx5_ib_qp *qp;
+	u16 xrcdn = 0;
+	int err;
+
+	if (pd) {
+		dev = to_mdev(pd->device);
+	} else {
+		/* being cautious here */
+		if (init_attr->qp_type != IB_QPT_XRC_TGT &&
+		    init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
+			pr_warn("%s: no PD for transport %s\n", __func__,
+				ib_qp_type_str(init_attr->qp_type));
+			return ERR_PTR(-EINVAL);
+		}
+		dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_XRC_TGT:
+	case IB_QPT_XRC_INI:
+		if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+			mlx5_ib_dbg(dev, "XRC not supported\n");
+			return ERR_PTR(-ENOSYS);
+		}
+		init_attr->recv_cq = NULL;
+		if (init_attr->qp_type == IB_QPT_XRC_TGT) {
+			xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+			init_attr->send_cq = NULL;
+		}
+
+		/* fall through */
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+	case IB_QPT_UD:
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case MLX5_IB_QPT_REG_UMR:
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+		if (!qp)
+			return ERR_PTR(-ENOMEM);
+
+		err = create_qp_common(dev, pd, init_attr, udata, qp);
+		if (err) {
+			mlx5_ib_dbg(dev, "create_qp_common failed\n");
+			kfree(qp);
+			return ERR_PTR(err);
+		}
+
+		if (is_qp0(init_attr->qp_type))
+			qp->ibqp.qp_num = 0;
+		else if (is_qp1(init_attr->qp_type))
+			qp->ibqp.qp_num = 1;
+		else
+			qp->ibqp.qp_num = qp->mqp.qpn;
+
+		mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
+			    qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+			    to_mcq(init_attr->send_cq)->mcq.cqn);
+
+		qp->xrcdn = xrcdn;
+
+		break;
+
+	case IB_QPT_RAW_IPV6:
+	case IB_QPT_RAW_ETHERTYPE:
+	case IB_QPT_RAW_PACKET:
+	case IB_QPT_MAX:
+	default:
+		mlx5_ib_dbg(dev, "unsupported qp type %d\n",
+			    init_attr->qp_type);
+		/* Don't support raw QPs */
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &qp->ibqp;
+}
+
+int mlx5_ib_destroy_qp(struct ib_qp *qp)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_ib_qp *mqp = to_mqp(qp);
+
+	destroy_qp_common(dev, mqp);
+
+	kfree(mqp);
+
+	return 0;
+}
+
+static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
+				   int attr_mask)
+{
+	u32 hw_access_flags = 0;
+	u8 dest_rd_atomic;
+	u32 access_flags;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		dest_rd_atomic = attr->max_dest_rd_atomic;
+	else
+		dest_rd_atomic = qp->resp_depth;
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS)
+		access_flags = attr->qp_access_flags;
+	else
+		access_flags = qp->atomic_rd_en;
+
+	if (!dest_rd_atomic)
+		access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+	if (access_flags & IB_ACCESS_REMOTE_READ)
+		hw_access_flags |= MLX5_QP_BIT_RRE;
+	if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+		hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
+	if (access_flags & IB_ACCESS_REMOTE_WRITE)
+		hw_access_flags |= MLX5_QP_BIT_RWE;
+
+	return cpu_to_be32(hw_access_flags);
+}
+
+enum {
+	MLX5_PATH_FLAG_FL	= 1 << 0,
+	MLX5_PATH_FLAG_FREE_AR	= 1 << 1,
+	MLX5_PATH_FLAG_COUNTER	= 1 << 2,
+};
+
+static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
+{
+	if (rate == IB_RATE_PORT_CURRENT) {
+		return 0;
+	} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
+		return -EINVAL;
+	} else {
+		while (rate != IB_RATE_2_5_GBPS &&
+		       !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
+			 dev->mdev.caps.stat_rate_support))
+			--rate;
+	}
+
+	return rate + MLX5_STAT_RATE_OFFSET;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+			 struct mlx5_qp_path *path, u8 port, int attr_mask,
+			 u32 path_flags, const struct ib_qp_attr *attr)
+{
+	int err;
+
+	path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+	path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
+
+	if (attr_mask & IB_QP_PKEY_INDEX)
+		path->pkey_index = attr->pkey_index;
+
+	path->grh_mlid	= ah->src_path_bits & 0x7f;
+	path->rlid	= cpu_to_be16(ah->dlid);
+
+	if (ah->ah_flags & IB_AH_GRH) {
+		path->grh_mlid |= 1 << 7;
+		path->mgid_index = ah->grh.sgid_index;
+		path->hop_limit  = ah->grh.hop_limit;
+		path->tclass_flowlabel =
+			cpu_to_be32((ah->grh.traffic_class << 20) |
+				    (ah->grh.flow_label));
+		memcpy(path->rgid, ah->grh.dgid.raw, 16);
+	}
+
+	err = ib_rate_to_mlx5(dev, ah->static_rate);
+	if (err < 0)
+		return err;
+	path->static_rate = err;
+	path->port = port;
+
+	if (ah->ah_flags & IB_AH_GRH) {
+		if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
+			pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+			       ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
+			return -EINVAL;
+		}
+
+		path->grh_mlid |= 1 << 7;
+		path->mgid_index = ah->grh.sgid_index;
+		path->hop_limit  = ah->grh.hop_limit;
+		path->tclass_flowlabel =
+			cpu_to_be32((ah->grh.traffic_class << 20) |
+				    (ah->grh.flow_label));
+		memcpy(path->rgid, ah->grh.dgid.raw, 16);
+	}
+
+	if (attr_mask & IB_QP_TIMEOUT)
+		path->ackto_lt = attr->timeout << 3;
+
+	path->sl = ah->sl & 0xf;
+
+	return 0;
+}
+
+static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
+	[MLX5_QP_STATE_INIT] = {
+		[MLX5_QP_STATE_INIT] = {
+			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE		|
+					  MLX5_QP_OPTPAR_RAE		|
+					  MLX5_QP_OPTPAR_RWE		|
+					  MLX5_QP_OPTPAR_PKEY_INDEX	|
+					  MLX5_QP_OPTPAR_PRI_PORT,
+			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE		|
+					  MLX5_QP_OPTPAR_PKEY_INDEX	|
+					  MLX5_QP_OPTPAR_PRI_PORT,
+			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX	|
+					  MLX5_QP_OPTPAR_Q_KEY		|
+					  MLX5_QP_OPTPAR_PRI_PORT,
+		},
+		[MLX5_QP_STATE_RTR] = {
+			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
+					  MLX5_QP_OPTPAR_RRE            |
+					  MLX5_QP_OPTPAR_RAE            |
+					  MLX5_QP_OPTPAR_RWE            |
+					  MLX5_QP_OPTPAR_PKEY_INDEX,
+			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
+					  MLX5_QP_OPTPAR_RWE            |
+					  MLX5_QP_OPTPAR_PKEY_INDEX,
+			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
+					  MLX5_QP_OPTPAR_Q_KEY,
+			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX	|
+					   MLX5_QP_OPTPAR_Q_KEY,
+		},
+	},
+	[MLX5_QP_STATE_RTR] = {
+		[MLX5_QP_STATE_RTS] = {
+			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH	|
+					  MLX5_QP_OPTPAR_RRE		|
+					  MLX5_QP_OPTPAR_RAE		|
+					  MLX5_QP_OPTPAR_RWE		|
+					  MLX5_QP_OPTPAR_PM_STATE	|
+					  MLX5_QP_OPTPAR_RNR_TIMEOUT,
+			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH	|
+					  MLX5_QP_OPTPAR_RWE		|
+					  MLX5_QP_OPTPAR_PM_STATE,
+			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+		},
+	},
+	[MLX5_QP_STATE_RTS] = {
+		[MLX5_QP_STATE_RTS] = {
+			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE		|
+					  MLX5_QP_OPTPAR_RAE		|
+					  MLX5_QP_OPTPAR_RWE		|
+					  MLX5_QP_OPTPAR_RNR_TIMEOUT	|
+					  MLX5_QP_OPTPAR_PM_STATE,
+			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE		|
+					  MLX5_QP_OPTPAR_PM_STATE,
+			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY		|
+					  MLX5_QP_OPTPAR_SRQN		|
+					  MLX5_QP_OPTPAR_CQN_RCV,
+		},
+	},
+	[MLX5_QP_STATE_SQER] = {
+		[MLX5_QP_STATE_RTS] = {
+			[MLX5_QP_ST_UD]	 = MLX5_QP_OPTPAR_Q_KEY,
+			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+		},
+	},
+};
+
+static int ib_nr_to_mlx5_nr(int ib_mask)
+{
+	switch (ib_mask) {
+	case IB_QP_STATE:
+		return 0;
+	case IB_QP_CUR_STATE:
+		return 0;
+	case IB_QP_EN_SQD_ASYNC_NOTIFY:
+		return 0;
+	case IB_QP_ACCESS_FLAGS:
+		return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
+			MLX5_QP_OPTPAR_RAE;
+	case IB_QP_PKEY_INDEX:
+		return MLX5_QP_OPTPAR_PKEY_INDEX;
+	case IB_QP_PORT:
+		return MLX5_QP_OPTPAR_PRI_PORT;
+	case IB_QP_QKEY:
+		return MLX5_QP_OPTPAR_Q_KEY;
+	case IB_QP_AV:
+		return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
+			MLX5_QP_OPTPAR_PRI_PORT;
+	case IB_QP_PATH_MTU:
+		return 0;
+	case IB_QP_TIMEOUT:
+		return MLX5_QP_OPTPAR_ACK_TIMEOUT;
+	case IB_QP_RETRY_CNT:
+		return MLX5_QP_OPTPAR_RETRY_COUNT;
+	case IB_QP_RNR_RETRY:
+		return MLX5_QP_OPTPAR_RNR_RETRY;
+	case IB_QP_RQ_PSN:
+		return 0;
+	case IB_QP_MAX_QP_RD_ATOMIC:
+		return MLX5_QP_OPTPAR_SRA_MAX;
+	case IB_QP_ALT_PATH:
+		return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
+	case IB_QP_MIN_RNR_TIMER:
+		return MLX5_QP_OPTPAR_RNR_TIMEOUT;
+	case IB_QP_SQ_PSN:
+		return 0;
+	case IB_QP_MAX_DEST_RD_ATOMIC:
+		return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
+			MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
+	case IB_QP_PATH_MIG_STATE:
+		return MLX5_QP_OPTPAR_PM_STATE;
+	case IB_QP_CAP:
+		return 0;
+	case IB_QP_DEST_QPN:
+		return 0;
+	}
+	return 0;
+}
+
+static int ib_mask_to_mlx5_opt(int ib_mask)
+{
+	int result = 0;
+	int i;
+
+	for (i = 0; i < 8 * sizeof(int); i++) {
+		if ((1 << i) & ib_mask)
+			result |= ib_nr_to_mlx5_nr(1 << i);
+	}
+
+	return result;
+}
+
+static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
+			       const struct ib_qp_attr *attr, int attr_mask,
+			       enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_ib_cq *send_cq, *recv_cq;
+	struct mlx5_qp_context *context;
+	struct mlx5_modify_qp_mbox_in *in;
+	struct mlx5_ib_pd *pd;
+	enum mlx5_qp_state mlx5_cur, mlx5_new;
+	enum mlx5_qp_optpar optpar;
+	int sqd_event;
+	int mlx5_st;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	context = &in->ctx;
+	err = to_mlx5_st(ibqp->qp_type);
+	if (err < 0)
+		goto out;
+
+	context->flags = cpu_to_be32(err << 16);
+
+	if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
+		context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+	} else {
+		switch (attr->path_mig_state) {
+		case IB_MIG_MIGRATED:
+			context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+			break;
+		case IB_MIG_REARM:
+			context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
+			break;
+		case IB_MIG_ARMED:
+			context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
+			break;
+		}
+	}
+
+	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+		context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
+	} else if (ibqp->qp_type == IB_QPT_UD ||
+		   ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
+		context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+	} else if (attr_mask & IB_QP_PATH_MTU) {
+		if (attr->path_mtu < IB_MTU_256 ||
+		    attr->path_mtu > IB_MTU_4096) {
+			mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
+			err = -EINVAL;
+			goto out;
+		}
+		context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
+	}
+
+	if (attr_mask & IB_QP_DEST_QPN)
+		context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
+
+	if (attr_mask & IB_QP_PKEY_INDEX)
+		context->pri_path.pkey_index = attr->pkey_index;
+
+	/* todo implement counter_index functionality */
+
+	if (is_sqp(ibqp->qp_type))
+		context->pri_path.port = qp->port;
+
+	if (attr_mask & IB_QP_PORT)
+		context->pri_path.port = attr->port_num;
+
+	if (attr_mask & IB_QP_AV) {
+		err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+				    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
+				    attr_mask, 0, attr);
+		if (err)
+			goto out;
+	}
+
+	if (attr_mask & IB_QP_TIMEOUT)
+		context->pri_path.ackto_lt |= attr->timeout << 3;
+
+	if (attr_mask & IB_QP_ALT_PATH) {
+		err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+				    attr->alt_port_num, attr_mask, 0, attr);
+		if (err)
+			goto out;
+	}
+
+	pd = get_pd(qp);
+	get_cqs(qp, &send_cq, &recv_cq);
+
+	context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
+	context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
+	context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
+	context->params1  = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
+
+	if (attr_mask & IB_QP_RNR_RETRY)
+		context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+
+	if (attr_mask & IB_QP_RETRY_CNT)
+		context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		if (attr->max_rd_atomic)
+			context->params1 |=
+				cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+	}
+
+	if (attr_mask & IB_QP_SQ_PSN)
+		context->next_send_psn = cpu_to_be32(attr->sq_psn);
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		if (attr->max_dest_rd_atomic)
+			context->params2 |=
+				cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+	}
+
+	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
+		context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER)
+		context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+
+	if (attr_mask & IB_QP_RQ_PSN)
+		context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+	if (attr_mask & IB_QP_QKEY)
+		context->qkey = cpu_to_be32(attr->qkey);
+
+	if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		context->db_rec_addr = cpu_to_be64(qp->db.dma);
+
+	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
+	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
+		sqd_event = 1;
+	else
+		sqd_event = 0;
+
+	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		context->sq_crq_size |= cpu_to_be16(1 << 4);
+
+
+	mlx5_cur = to_mlx5_state(cur_state);
+	mlx5_new = to_mlx5_state(new_state);
+	mlx5_st = to_mlx5_st(ibqp->qp_type);
+	if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
+		goto out;
+
+	optpar = ib_mask_to_mlx5_opt(attr_mask);
+	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
+	in->optparam = cpu_to_be32(optpar);
+	err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
+				  to_mlx5_state(new_state), in, sqd_event,
+				  &qp->mqp);
+	if (err)
+		goto out;
+
+	qp->state = new_state;
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS)
+		qp->atomic_rd_en = attr->qp_access_flags;
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		qp->resp_depth = attr->max_dest_rd_atomic;
+	if (attr_mask & IB_QP_PORT)
+		qp->port = attr->port_num;
+	if (attr_mask & IB_QP_ALT_PATH)
+		qp->alt_port = attr->alt_port_num;
+
+	/*
+	 * If we moved a kernel QP to RESET, clean up all old CQ
+	 * entries and reinitialize the QP.
+	 */
+	if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+		mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+				 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+		if (send_cq != recv_cq)
+			mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+		qp->rq.head = 0;
+		qp->rq.tail = 0;
+		qp->sq.head = 0;
+		qp->sq.tail = 0;
+		qp->sq.cur_post = 0;
+		qp->sq.last_poll = 0;
+		qp->db.db[MLX5_RCV_DBR] = 0;
+		qp->db.db[MLX5_SND_DBR] = 0;
+	}
+
+out:
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		      int attr_mask, struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	enum ib_qp_state cur_state, new_state;
+	int err = -EINVAL;
+	int port;
+
+	mutex_lock(&qp->mutex);
+
+	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+	if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
+	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+		goto out;
+
+	if ((attr_mask & IB_QP_PORT) &&
+	    (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
+		goto out;
+
+	if (attr_mask & IB_QP_PKEY_INDEX) {
+		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+		if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
+			goto out;
+	}
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+	    attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
+		goto out;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+	    attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
+		goto out;
+
+	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+		err = 0;
+		goto out;
+	}
+
+	err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+	mutex_unlock(&qp->mutex);
+	return err;
+}
+
+static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+{
+	struct mlx5_ib_cq *cq;
+	unsigned cur;
+
+	cur = wq->head - wq->tail;
+	if (likely(cur + nreq < wq->max_post))
+		return 0;
+
+	cq = to_mcq(ib_cq);
+	spin_lock(&cq->lock);
+	cur = wq->head - wq->tail;
+	spin_unlock(&cq->lock);
+
+	return cur + nreq >= wq->max_post;
+}
+
+static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
+					  u64 remote_addr, u32 rkey)
+{
+	rseg->raddr    = cpu_to_be64(remote_addr);
+	rseg->rkey     = cpu_to_be32(rkey);
+	rseg->reserved = 0;
+}
+
+static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+{
+	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+		aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
+	} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+	} else {
+		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+		aseg->compare  = 0;
+	}
+}
+
+static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
+				  struct ib_send_wr *wr)
+{
+	aseg->swap_add		= cpu_to_be64(wr->wr.atomic.swap);
+	aseg->swap_add_mask	= cpu_to_be64(wr->wr.atomic.swap_mask);
+	aseg->compare		= cpu_to_be64(wr->wr.atomic.compare_add);
+	aseg->compare_mask	= cpu_to_be64(wr->wr.atomic.compare_add_mask);
+}
+
+static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
+			     struct ib_send_wr *wr)
+{
+	memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
+	dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
+	dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+	dseg->byte_count = cpu_to_be32(sg->length);
+	dseg->lkey       = cpu_to_be32(sg->lkey);
+	dseg->addr       = cpu_to_be64(sg->addr);
+}
+
+static __be16 get_klm_octo(int npages)
+{
+	return cpu_to_be16(ALIGN(npages, 8) / 2);
+}
+
+static __be64 frwr_mkey_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LEN		|
+		MLX5_MKEY_MASK_PAGE_SIZE	|
+		MLX5_MKEY_MASK_START_ADDR	|
+		MLX5_MKEY_MASK_EN_RINVAL	|
+		MLX5_MKEY_MASK_KEY		|
+		MLX5_MKEY_MASK_LR		|
+		MLX5_MKEY_MASK_LW		|
+		MLX5_MKEY_MASK_RR		|
+		MLX5_MKEY_MASK_RW		|
+		MLX5_MKEY_MASK_A		|
+		MLX5_MKEY_MASK_SMALL_FENCE	|
+		MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+				 struct ib_send_wr *wr, int li)
+{
+	memset(umr, 0, sizeof(*umr));
+
+	if (li) {
+		umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+		umr->flags = 1 << 7;
+		return;
+	}
+
+	umr->flags = (1 << 5); /* fail if not free */
+	umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
+	umr->mkey_mask = frwr_mkey_mask();
+}
+
+static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+				struct ib_send_wr *wr)
+{
+	struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
+	u64 mask;
+
+	memset(umr, 0, sizeof(*umr));
+
+	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
+		umr->flags = 1 << 5; /* fail if not free */
+		umr->klm_octowords = get_klm_octo(umrwr->npages);
+		mask =  MLX5_MKEY_MASK_LEN		|
+			MLX5_MKEY_MASK_PAGE_SIZE	|
+			MLX5_MKEY_MASK_START_ADDR	|
+			MLX5_MKEY_MASK_PD		|
+			MLX5_MKEY_MASK_LR		|
+			MLX5_MKEY_MASK_LW		|
+			MLX5_MKEY_MASK_RR		|
+			MLX5_MKEY_MASK_RW		|
+			MLX5_MKEY_MASK_A		|
+			MLX5_MKEY_MASK_FREE;
+		umr->mkey_mask = cpu_to_be64(mask);
+	} else {
+		umr->flags = 2 << 5; /* fail if free */
+		mask = MLX5_MKEY_MASK_FREE;
+		umr->mkey_mask = cpu_to_be64(mask);
+	}
+
+	if (!wr->num_sge)
+		umr->flags |= (1 << 7); /* inline */
+}
+
+static u8 get_umr_flags(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
+	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
+	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
+		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+}
+
+static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
+			     int li, int *writ)
+{
+	memset(seg, 0, sizeof(*seg));
+	if (li) {
+		seg->status = 1 << 6;
+		return;
+	}
+
+	seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
+	*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
+	seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
+	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+	seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
+	seg->log2_page_size = wr->wr.fast_reg.page_shift;
+}
+
+static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
+{
+	memset(seg, 0, sizeof(*seg));
+	if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
+		seg->status = 1 << 6;
+		return;
+	}
+
+	seg->flags = convert_access(wr->wr.fast_reg.access_flags);
+	seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
+	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+	seg->log2_page_size = wr->wr.fast_reg.page_shift;
+	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+}
+
+static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
+			   struct ib_send_wr *wr,
+			   struct mlx5_core_dev *mdev,
+			   struct mlx5_ib_pd *pd,
+			   int writ)
+{
+	struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+	u64 *page_list = wr->wr.fast_reg.page_list->page_list;
+	u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
+	int i;
+
+	for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
+		mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
+	dseg->addr = cpu_to_be64(mfrpl->map);
+	dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
+	dseg->lkey = cpu_to_be32(pd->pa_lkey);
+}
+
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+	switch (wr->opcode) {
+	case IB_WR_SEND_WITH_IMM:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		return wr->ex.imm_data;
+
+	case IB_WR_SEND_WITH_INV:
+		return cpu_to_be32(wr->ex.invalidate_rkey);
+
+	default:
+		return 0;
+	}
+}
+
+static u8 calc_sig(void *wqe, int size)
+{
+	u8 *p = wqe;
+	u8 res = 0;
+	int i;
+
+	for (i = 0; i < size; i++)
+		res ^= p[i];
+
+	return ~res;
+}
+
+static u8 wq_sig(void *wqe)
+{
+	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
+}
+
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
+			    void *wqe, int *sz)
+{
+	struct mlx5_wqe_inline_seg *seg;
+	void *qend = qp->sq.qend;
+	void *addr;
+	int inl = 0;
+	int copy;
+	int len;
+	int i;
+
+	seg = wqe;
+	wqe += sizeof(*seg);
+	for (i = 0; i < wr->num_sge; i++) {
+		addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+		len  = wr->sg_list[i].length;
+		inl += len;
+
+		if (unlikely(inl > qp->max_inline_data))
+			return -ENOMEM;
+
+		if (unlikely(wqe + len > qend)) {
+			copy = qend - wqe;
+			memcpy(wqe, addr, copy);
+			addr += copy;
+			len -= copy;
+			wqe = mlx5_get_send_wqe(qp, 0);
+		}
+		memcpy(wqe, addr, len);
+		wqe += len;
+	}
+
+	seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
+
+	*sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+
+	return 0;
+}
+
+static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
+			  struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
+{
+	int writ = 0;
+	int li;
+
+	li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
+	if (unlikely(wr->send_flags & IB_SEND_INLINE))
+		return -EINVAL;
+
+	set_frwr_umr_segment(*seg, wr, li);
+	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+	if (unlikely((*seg == qp->sq.qend)))
+		*seg = mlx5_get_send_wqe(qp, 0);
+	set_mkey_segment(*seg, wr, li, &writ);
+	*seg += sizeof(struct mlx5_mkey_seg);
+	*size += sizeof(struct mlx5_mkey_seg) / 16;
+	if (unlikely((*seg == qp->sq.qend)))
+		*seg = mlx5_get_send_wqe(qp, 0);
+	if (!li) {
+		set_frwr_pages(*seg, wr, mdev, pd, writ);
+		*seg += sizeof(struct mlx5_wqe_data_seg);
+		*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+	}
+	return 0;
+}
+
+static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+{
+	__be32 *p = NULL;
+	int tidx = idx;
+	int i, j;
+
+	pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+	for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
+		if ((i & 0xf) == 0) {
+			void *buf = mlx5_get_send_wqe(qp, tidx);
+			tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
+			p = buf;
+			j = 0;
+		}
+		pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
+			 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
+			 be32_to_cpu(p[j + 3]));
+	}
+}
+
+static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
+			 unsigned bytecnt, struct mlx5_ib_qp *qp)
+{
+	while (bytecnt > 0) {
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		__iowrite64_copy(dst++, src++, 8);
+		bytecnt -= 64;
+		if (unlikely(src == qp->sq.qend))
+			src = mlx5_get_send_wqe(qp, 0);
+	}
+}
+
+static u8 get_fence(u8 fence, struct ib_send_wr *wr)
+{
+	if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
+		     wr->send_flags & IB_SEND_FENCE))
+		return MLX5_FENCE_MODE_STRONG_ORDERING;
+
+	if (unlikely(fence)) {
+		if (wr->send_flags & IB_SEND_FENCE)
+			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
+		else
+			return fence;
+
+	} else {
+		return 0;
+	}
+}
+
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		      struct ib_send_wr **bad_wr)
+{
+	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx5_core_dev *mdev = &dev->mdev;
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_wqe_data_seg *dpseg;
+	struct mlx5_wqe_xrc_seg *xrc;
+	struct mlx5_bf *bf = qp->bf;
+	int uninitialized_var(size);
+	void *qend = qp->sq.qend;
+	unsigned long flags;
+	u32 mlx5_opcode;
+	unsigned idx;
+	int err = 0;
+	int inl = 0;
+	int num_sge;
+	void *seg;
+	int nreq;
+	int i;
+	u8 next_fence = 0;
+	u8 opmod = 0;
+	u8 fence;
+
+	spin_lock_irqsave(&qp->sq.lock, flags);
+
+	for (nreq = 0; wr; nreq++, wr = wr->next) {
+		if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+			mlx5_ib_warn(dev, "\n");
+			err = -EINVAL;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+			mlx5_ib_warn(dev, "\n");
+			err = -ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		fence = qp->fm_cache;
+		num_sge = wr->num_sge;
+		if (unlikely(num_sge > qp->sq.max_gs)) {
+			mlx5_ib_warn(dev, "\n");
+			err = -ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+		seg = mlx5_get_send_wqe(qp, idx);
+		ctrl = seg;
+		*(uint32_t *)(seg + 8) = 0;
+		ctrl->imm = send_ieth(wr);
+		ctrl->fm_ce_se = qp->sq_signal_bits |
+			(wr->send_flags & IB_SEND_SIGNALED ?
+			 MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+			(wr->send_flags & IB_SEND_SOLICITED ?
+			 MLX5_WQE_CTRL_SOLICITED : 0);
+
+		seg += sizeof(*ctrl);
+		size = sizeof(*ctrl) / 16;
+
+		switch (ibqp->qp_type) {
+		case IB_QPT_XRC_INI:
+			xrc = seg;
+			xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
+			seg += sizeof(*xrc);
+			size += sizeof(*xrc) / 16;
+			/* fall through */
+		case IB_QPT_RC:
+			switch (wr->opcode) {
+			case IB_WR_RDMA_READ:
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+				set_raddr_seg(seg, wr->wr.rdma.remote_addr,
+					      wr->wr.rdma.rkey);
+				seg  += sizeof(struct mlx5_wqe_raddr_seg);
+				size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+				break;
+
+			case IB_WR_ATOMIC_CMP_AND_SWP:
+			case IB_WR_ATOMIC_FETCH_AND_ADD:
+				set_raddr_seg(seg, wr->wr.atomic.remote_addr,
+					      wr->wr.atomic.rkey);
+				seg  += sizeof(struct mlx5_wqe_raddr_seg);
+
+				set_atomic_seg(seg, wr);
+				seg  += sizeof(struct mlx5_wqe_atomic_seg);
+
+				size += (sizeof(struct mlx5_wqe_raddr_seg) +
+					 sizeof(struct mlx5_wqe_atomic_seg)) / 16;
+				break;
+
+			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+				set_raddr_seg(seg, wr->wr.atomic.remote_addr,
+					      wr->wr.atomic.rkey);
+				seg  += sizeof(struct mlx5_wqe_raddr_seg);
+
+				set_masked_atomic_seg(seg, wr);
+				seg  += sizeof(struct mlx5_wqe_masked_atomic_seg);
+
+				size += (sizeof(struct mlx5_wqe_raddr_seg) +
+					 sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
+				break;
+
+			case IB_WR_LOCAL_INV:
+				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+				qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
+				ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
+				err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+				if (err) {
+					mlx5_ib_warn(dev, "\n");
+					*bad_wr = wr;
+					goto out;
+				}
+				num_sge = 0;
+				break;
+
+			case IB_WR_FAST_REG_MR:
+				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+				qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
+				ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+				err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+				if (err) {
+					mlx5_ib_warn(dev, "\n");
+					*bad_wr = wr;
+					goto out;
+				}
+				num_sge = 0;
+				break;
+
+			default:
+				break;
+			}
+			break;
+
+		case IB_QPT_UC:
+			switch (wr->opcode) {
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+				set_raddr_seg(seg, wr->wr.rdma.remote_addr,
+					      wr->wr.rdma.rkey);
+				seg  += sizeof(struct mlx5_wqe_raddr_seg);
+				size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+				break;
+
+			default:
+				break;
+			}
+			break;
+
+		case IB_QPT_UD:
+		case IB_QPT_SMI:
+		case IB_QPT_GSI:
+			set_datagram_seg(seg, wr);
+			seg  += sizeof(struct mlx5_wqe_datagram_seg);
+			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+			if (unlikely((seg == qend)))
+				seg = mlx5_get_send_wqe(qp, 0);
+			break;
+
+		case MLX5_IB_QPT_REG_UMR:
+			if (wr->opcode != MLX5_IB_WR_UMR) {
+				err = -EINVAL;
+				mlx5_ib_warn(dev, "bad opcode\n");
+				goto out;
+			}
+			qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+			ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+			set_reg_umr_segment(seg, wr);
+			seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+			size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+			if (unlikely((seg == qend)))
+				seg = mlx5_get_send_wqe(qp, 0);
+			set_reg_mkey_segment(seg, wr);
+			seg += sizeof(struct mlx5_mkey_seg);
+			size += sizeof(struct mlx5_mkey_seg) / 16;
+			if (unlikely((seg == qend)))
+				seg = mlx5_get_send_wqe(qp, 0);
+			break;
+
+		default:
+			break;
+		}
+
+		if (wr->send_flags & IB_SEND_INLINE && num_sge) {
+			int uninitialized_var(sz);
+
+			err = set_data_inl_seg(qp, wr, seg, &sz);
+			if (unlikely(err)) {
+				mlx5_ib_warn(dev, "\n");
+				*bad_wr = wr;
+				goto out;
+			}
+			inl = 1;
+			size += sz;
+		} else {
+			dpseg = seg;
+			for (i = 0; i < num_sge; i++) {
+				if (unlikely(dpseg == qend)) {
+					seg = mlx5_get_send_wqe(qp, 0);
+					dpseg = seg;
+				}
+				if (likely(wr->sg_list[i].length)) {
+					set_data_ptr_seg(dpseg, wr->sg_list + i);
+					size += sizeof(struct mlx5_wqe_data_seg) / 16;
+					dpseg++;
+				}
+			}
+		}
+
+		mlx5_opcode = mlx5_ib_opcode[wr->opcode];
+		ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8)	|
+						     mlx5_opcode			|
+						     ((u32)opmod << 24));
+		ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+		ctrl->fm_ce_se |= get_fence(fence, wr);
+		qp->fm_cache = next_fence;
+		if (unlikely(qp->wq_sig))
+			ctrl->signature = wq_sig(ctrl);
+
+		qp->sq.wrid[idx] = wr->wr_id;
+		qp->sq.w_list[idx].opcode = mlx5_opcode;
+		qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+		qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+		qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+		if (0)
+			dump_wqe(qp, idx, size);
+	}
+
+out:
+	if (likely(nreq)) {
+		qp->sq.head += nreq;
+
+		/* Make sure that descriptors are written before
+		 * updating doorbell record and ringing the doorbell
+		 */
+		wmb();
+
+		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+		if (bf->need_lock)
+			spin_lock(&bf->lock);
+
+		/* TBD enable WC */
+		if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
+			mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
+			/* wc_wmb(); */
+		} else {
+			mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
+				     MLX5_GET_DOORBELL_LOCK(&bf->lock32));
+			/* Make sure doorbells don't leak out of SQ spinlock
+			 * and reach the HCA out of order.
+			 */
+			mmiowb();
+		}
+		bf->offset ^= bf->buf_size;
+		if (bf->need_lock)
+			spin_unlock(&bf->lock);
+	}
+
+	spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+	return err;
+}
+
+static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
+{
+	sig->signature = calc_sig(sig, size);
+}
+
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_wqe_data_seg *scat;
+	struct mlx5_rwqe_sig *sig;
+	unsigned long flags;
+	int err = 0;
+	int nreq;
+	int ind;
+	int i;
+
+	spin_lock_irqsave(&qp->rq.lock, flags);
+
+	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+	for (nreq = 0; wr; nreq++, wr = wr->next) {
+		if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+			err = -ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+			err = -EINVAL;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		scat = get_recv_wqe(qp, ind);
+		if (qp->wq_sig)
+			scat++;
+
+		for (i = 0; i < wr->num_sge; i++)
+			set_data_ptr_seg(scat + i, wr->sg_list + i);
+
+		if (i < qp->rq.max_gs) {
+			scat[i].byte_count = 0;
+			scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
+			scat[i].addr       = 0;
+		}
+
+		if (qp->wq_sig) {
+			sig = (struct mlx5_rwqe_sig *)scat;
+			set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
+		}
+
+		qp->rq.wrid[ind] = wr->wr_id;
+
+		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+	}
+
+out:
+	if (likely(nreq)) {
+		qp->rq.head += nreq;
+
+		/* Make sure that descriptors are written before
+		 * doorbell record.
+		 */
+		wmb();
+
+		*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+	}
+
+	spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+	return err;
+}
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
+{
+	switch (mlx5_state) {
+	case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
+	case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
+	case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
+	case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
+	case MLX5_QP_STATE_SQ_DRAINING:
+	case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
+	case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
+	case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
+	default:		     return -1;
+	}
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
+{
+	switch (mlx5_mig_state) {
+	case MLX5_QP_PM_ARMED:		return IB_MIG_ARMED;
+	case MLX5_QP_PM_REARM:		return IB_MIG_REARM;
+	case MLX5_QP_PM_MIGRATED:	return IB_MIG_MIGRATED;
+	default: return -1;
+	}
+}
+
+static int to_ib_qp_access_flags(int mlx5_flags)
+{
+	int ib_flags = 0;
+
+	if (mlx5_flags & MLX5_QP_BIT_RRE)
+		ib_flags |= IB_ACCESS_REMOTE_READ;
+	if (mlx5_flags & MLX5_QP_BIT_RWE)
+		ib_flags |= IB_ACCESS_REMOTE_WRITE;
+	if (mlx5_flags & MLX5_QP_BIT_RAE)
+		ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+	return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
+				struct mlx5_qp_path *path)
+{
+	struct mlx5_core_dev *dev = &ibdev->mdev;
+
+	memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
+	ib_ah_attr->port_num	  = path->port;
+
+	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+		return;
+
+	ib_ah_attr->sl = path->sl & 0xf;
+
+	ib_ah_attr->dlid	  = be16_to_cpu(path->rlid);
+	ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
+	ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
+	ib_ah_attr->ah_flags      = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
+	if (ib_ah_attr->ah_flags) {
+		ib_ah_attr->grh.sgid_index = path->mgid_index;
+		ib_ah_attr->grh.hop_limit  = path->hop_limit;
+		ib_ah_attr->grh.traffic_class =
+			(be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+		ib_ah_attr->grh.flow_label =
+			be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
+		memcpy(ib_ah_attr->grh.dgid.raw,
+		       path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
+	}
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+		     struct ib_qp_init_attr *qp_init_attr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_query_qp_mbox_out *outb;
+	struct mlx5_qp_context *context;
+	int mlx5_state;
+	int err = 0;
+
+	mutex_lock(&qp->mutex);
+	outb = kzalloc(sizeof(*outb), GFP_KERNEL);
+	if (!outb) {
+		err = -ENOMEM;
+		goto out;
+	}
+	context = &outb->ctx;
+	err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
+	if (err)
+		goto out_free;
+
+	mlx5_state = be32_to_cpu(context->flags) >> 28;
+
+	qp->state		     = to_ib_qp_state(mlx5_state);
+	qp_attr->qp_state	     = qp->state;
+	qp_attr->path_mtu	     = context->mtu_msgmax >> 5;
+	qp_attr->path_mig_state	     =
+		to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+	qp_attr->qkey		     = be32_to_cpu(context->qkey);
+	qp_attr->rq_psn		     = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+	qp_attr->sq_psn		     = be32_to_cpu(context->next_send_psn) & 0xffffff;
+	qp_attr->dest_qp_num	     = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
+	qp_attr->qp_access_flags     =
+		to_ib_qp_access_flags(be32_to_cpu(context->params2));
+
+	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+		qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
+		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
+	}
+
+	qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
+	qp_attr->port_num = context->pri_path.port;
+
+	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+	qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
+
+	qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+	qp_attr->max_dest_rd_atomic =
+		1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+	qp_attr->min_rnr_timer	    =
+		(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+	qp_attr->timeout	    = context->pri_path.ackto_lt >> 3;
+	qp_attr->retry_cnt	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
+	qp_attr->rnr_retry	    = (be32_to_cpu(context->params1) >> 13) & 0x7;
+	qp_attr->alt_timeout	    = context->alt_path.ackto_lt >> 3;
+	qp_attr->cur_qp_state	     = qp_attr->qp_state;
+	qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
+	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+
+	if (!ibqp->uobject) {
+		qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
+		qp_attr->cap.max_send_sge = qp->sq.max_gs;
+	} else {
+		qp_attr->cap.max_send_wr  = 0;
+		qp_attr->cap.max_send_sge = 0;
+	}
+
+	/* We don't support inline sends for kernel QPs (yet), and we
+	 * don't know what userspace's value should be.
+	 */
+	qp_attr->cap.max_inline_data = 0;
+
+	qp_init_attr->cap	     = qp_attr->cap;
+
+	qp_init_attr->create_flags = 0;
+	if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+		qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+	qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
+		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+
+out_free:
+	kfree(outb);
+
+out:
+	mutex_unlock(&qp->mutex);
+	return err;
+}
+
+struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
+					  struct ib_ucontext *context,
+					  struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_xrcd *xrcd;
+	int err;
+
+	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+		return ERR_PTR(-ENOSYS);
+
+	xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
+	if (!xrcd)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
+	if (err) {
+		kfree(xrcd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return &xrcd->ibxrcd;
+}
+
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+	struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
+	u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
+	int err;
+
+	err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
+	if (err) {
+		mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
+		return err;
+	}
+
+	kfree(xrcd);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
new file mode 100644
index 000000000000..84d297afd6a9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
+
+#include "mlx5_ib.h"
+#include "user.h"
+
+/* not supported currently */
+static int srq_signature;
+
+static void *get_wqe(struct mlx5_ib_srq *srq, int n)
+{
+	return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+}
+
+static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
+{
+	struct ib_event event;
+	struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
+
+	if (ibsrq->event_handler) {
+		event.device      = ibsrq->device;
+		event.element.srq = ibsrq;
+		switch (type) {
+		case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+			event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+			break;
+		case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+			event.event = IB_EVENT_SRQ_ERR;
+			break;
+		default:
+			pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n",
+				type, srq->srqn);
+			return;
+		}
+
+		ibsrq->event_handler(&event, ibsrq->srq_context);
+	}
+}
+
+static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+			   struct mlx5_create_srq_mbox_in **in,
+			   struct ib_udata *udata, int buf_size, int *inlen)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_ib_create_srq ucmd;
+	int err;
+	int npages;
+	int page_shift;
+	int ncont;
+	u32 offset;
+
+	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+		mlx5_ib_dbg(dev, "failed copy udata\n");
+		return -EFAULT;
+	}
+	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
+
+	srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
+				0, 0);
+	if (IS_ERR(srq->umem)) {
+		mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
+		err = PTR_ERR(srq->umem);
+		return err;
+	}
+
+	mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
+			   &page_shift, &ncont, NULL);
+	err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
+				     &offset);
+	if (err) {
+		mlx5_ib_warn(dev, "bad offset\n");
+		goto err_umem;
+	}
+
+	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+	*in = mlx5_vzalloc(*inlen);
+	if (!(*in)) {
+		err = -ENOMEM;
+		goto err_umem;
+	}
+
+	mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
+
+	err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
+				  ucmd.db_addr, &srq->db);
+	if (err) {
+		mlx5_ib_dbg(dev, "map doorbell failed\n");
+		goto err_in;
+	}
+
+	(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+
+	return 0;
+
+err_in:
+	mlx5_vfree(*in);
+
+err_umem:
+	ib_umem_release(srq->umem);
+
+	return err;
+}
+
+static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+			     struct mlx5_create_srq_mbox_in **in, int buf_size,
+			     int *inlen)
+{
+	int err;
+	int i;
+	struct mlx5_wqe_srq_next_seg *next;
+	int page_shift;
+	int npages;
+
+	err = mlx5_db_alloc(&dev->mdev, &srq->db);
+	if (err) {
+		mlx5_ib_warn(dev, "alloc dbell rec failed\n");
+		return err;
+	}
+
+	*srq->db.db = 0;
+
+	if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+		mlx5_ib_dbg(dev, "buf alloc failed\n");
+		err = -ENOMEM;
+		goto err_db;
+	}
+	page_shift = srq->buf.page_shift;
+
+	srq->head    = 0;
+	srq->tail    = srq->msrq.max - 1;
+	srq->wqe_ctr = 0;
+
+	for (i = 0; i < srq->msrq.max; i++) {
+		next = get_wqe(srq, i);
+		next->next_wqe_index =
+			cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+	}
+
+	npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
+	mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
+		    buf_size, page_shift, srq->buf.npages, npages);
+	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
+	*in = mlx5_vzalloc(*inlen);
+	if (!*in) {
+		err = -ENOMEM;
+		goto err_buf;
+	}
+	mlx5_fill_page_array(&srq->buf, (*in)->pas);
+
+	srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
+	if (!srq->wrid) {
+		mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
+			    (unsigned long)(srq->msrq.max * sizeof(u64)));
+		err = -ENOMEM;
+		goto err_in;
+	}
+	srq->wq_sig = !!srq_signature;
+
+	(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+
+	return 0;
+
+err_in:
+	mlx5_vfree(*in);
+
+err_buf:
+	mlx5_buf_free(&dev->mdev, &srq->buf);
+
+err_db:
+	mlx5_db_free(&dev->mdev, &srq->db);
+	return err;
+}
+
+static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
+{
+	mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+	ib_umem_release(srq->umem);
+}
+
+
+static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
+{
+	kfree(srq->wrid);
+	mlx5_buf_free(&dev->mdev, &srq->buf);
+	mlx5_db_free(&dev->mdev, &srq->db);
+}
+
+struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
+				  struct ib_srq_init_attr *init_attr,
+				  struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_ib_srq *srq;
+	int desc_size;
+	int buf_size;
+	int err;
+	struct mlx5_create_srq_mbox_in *uninitialized_var(in);
+	int uninitialized_var(inlen);
+	int is_xrc;
+	u32 flgs, xrcdn;
+
+	/* Sanity check SRQ size before proceeding */
+	if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
+		mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
+			    init_attr->attr.max_wr,
+			    dev->mdev.caps.max_srq_wqes);
+		return ERR_PTR(-EINVAL);
+	}
+
+	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&srq->mutex);
+	spin_lock_init(&srq->lock);
+	srq->msrq.max    = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+	srq->msrq.max_gs = init_attr->attr.max_sge;
+
+	desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
+		    srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
+	desc_size = roundup_pow_of_two(desc_size);
+	desc_size = max_t(int, 32, desc_size);
+	srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
+		sizeof(struct mlx5_wqe_data_seg);
+	srq->msrq.wqe_shift = ilog2(desc_size);
+	buf_size = srq->msrq.max * desc_size;
+	mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
+		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
+		    srq->msrq.max_avail_gather);
+
+	if (pd->uobject)
+		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+	else
+		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+
+	if (err) {
+		mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
+			     pd->uobject ? "user" : "kernel", err);
+		goto err_srq;
+	}
+
+	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+	in->ctx.state_log_sz = ilog2(srq->msrq.max);
+	flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
+	xrcdn = 0;
+	if (is_xrc) {
+		xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
+		in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
+	} else if (init_attr->srq_type == IB_SRQT_BASIC) {
+		xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
+		in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
+	}
+
+	in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
+
+	in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
+	in->ctx.db_record = cpu_to_be64(srq->db.dma);
+	err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
+	mlx5_vfree(in);
+	if (err) {
+		mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
+		goto err_srq;
+	}
+
+	mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
+
+	srq->msrq.event = mlx5_ib_srq_event;
+	srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
+
+	if (pd->uobject)
+		if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+			mlx5_ib_dbg(dev, "copy to user failed\n");
+			err = -EFAULT;
+			goto err_core;
+		}
+
+	init_attr->attr.max_wr = srq->msrq.max - 1;
+
+	return &srq->ibsrq;
+
+err_core:
+	mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+	if (pd->uobject)
+		destroy_srq_user(pd, srq);
+	else
+		destroy_srq_kernel(dev, srq);
+
+err_srq:
+	kfree(srq);
+
+	return ERR_PTR(err);
+}
+
+int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+	struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+	int ret;
+
+	/* We don't support resizing SRQs yet */
+	if (attr_mask & IB_SRQ_MAX_WR)
+		return -EINVAL;
+
+	if (attr_mask & IB_SRQ_LIMIT) {
+		if (attr->srq_limit >= srq->msrq.max)
+			return -EINVAL;
+
+		mutex_lock(&srq->mutex);
+		ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
+		mutex_unlock(&srq->mutex);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+	struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+	int ret;
+	struct mlx5_query_srq_mbox_out *out;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
+	if (ret)
+		goto out_box;
+
+	srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
+	srq_attr->max_wr    = srq->msrq.max - 1;
+	srq_attr->max_sge   = srq->msrq.max_gs;
+
+out_box:
+	kfree(out);
+	return ret;
+}
+
+int mlx5_ib_destroy_srq(struct ib_srq *srq)
+{
+	struct mlx5_ib_dev *dev = to_mdev(srq->device);
+	struct mlx5_ib_srq *msrq = to_msrq(srq);
+
+	mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
+
+	if (srq->uobject) {
+		mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+		ib_umem_release(msrq->umem);
+	} else {
+		kfree(msrq->wrid);
+		mlx5_buf_free(&dev->mdev, &msrq->buf);
+		mlx5_db_free(&dev->mdev, &msrq->db);
+	}
+
+	kfree(srq);
+	return 0;
+}
+
+void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
+{
+	struct mlx5_wqe_srq_next_seg *next;
+
+	/* always called with interrupts disabled. */
+	spin_lock(&srq->lock);
+
+	next = get_wqe(srq, srq->tail);
+	next->next_wqe_index = cpu_to_be16(wqe_index);
+	srq->tail = wqe_index;
+
+	spin_unlock(&srq->lock);
+}
+
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+			  struct ib_recv_wr **bad_wr)
+{
+	struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+	struct mlx5_wqe_srq_next_seg *next;
+	struct mlx5_wqe_data_seg *scat;
+	unsigned long flags;
+	int err = 0;
+	int nreq;
+	int i;
+
+	spin_lock_irqsave(&srq->lock, flags);
+
+	for (nreq = 0; wr; nreq++, wr = wr->next) {
+		if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
+			err = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
+
+		if (unlikely(srq->head == srq->tail)) {
+			err = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+
+		srq->wrid[srq->head] = wr->wr_id;
+
+		next      = get_wqe(srq, srq->head);
+		srq->head = be16_to_cpu(next->next_wqe_index);
+		scat      = (struct mlx5_wqe_data_seg *)(next + 1);
+
+		for (i = 0; i < wr->num_sge; i++) {
+			scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
+			scat[i].lkey       = cpu_to_be32(wr->sg_list[i].lkey);
+			scat[i].addr       = cpu_to_be64(wr->sg_list[i].addr);
+		}
+
+		if (i < srq->msrq.max_avail_gather) {
+			scat[i].byte_count = 0;
+			scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
+			scat[i].addr       = 0;
+		}
+	}
+
+	if (likely(nreq)) {
+		srq->wqe_ctr += nreq;
+
+		/* Make sure that descriptors are written before
+		 * doorbell record.
+		 */
+		wmb();
+
+		*srq->db.db = cpu_to_be32(srq->wqe_ctr);
+	}
+
+	spin_unlock_irqrestore(&srq->lock, flags);
+
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
new file mode 100644
index 000000000000..a886de3e593c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_USER_H
+#define MLX5_IB_USER_H
+
+#include <linux/types.h>
+
+enum {
+	MLX5_QP_FLAG_SIGNATURE		= 1 << 0,
+	MLX5_QP_FLAG_SCATTER_CQE	= 1 << 1,
+};
+
+enum {
+	MLX5_SRQ_FLAG_SIGNATURE		= 1 << 0,
+};
+
+
+/* Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MLX5_IB_UVERBS_ABI_VERSION	1
+
+/* Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx5_ib_alloc_ucontext_req {
+	__u32	total_num_uuars;
+	__u32	num_low_latency_uuars;
+};
+
+struct mlx5_ib_alloc_ucontext_resp {
+	__u32	qp_tab_size;
+	__u32	bf_reg_size;
+	__u32	tot_uuars;
+	__u32	cache_line_size;
+	__u16	max_sq_desc_sz;
+	__u16	max_rq_desc_sz;
+	__u32	max_send_wqebb;
+	__u32	max_recv_wr;
+	__u32	max_srq_recv_wr;
+	__u16	num_ports;
+	__u16	reserved;
+};
+
+struct mlx5_ib_alloc_pd_resp {
+	__u32	pdn;
+};
+
+struct mlx5_ib_create_cq {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u32	cqe_size;
+};
+
+struct mlx5_ib_create_cq_resp {
+	__u32	cqn;
+	__u32	reserved;
+};
+
+struct mlx5_ib_resize_cq {
+	__u64	buf_addr;
+};
+
+struct mlx5_ib_create_srq {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u32	flags;
+};
+
+struct mlx5_ib_create_srq_resp {
+	__u32	srqn;
+	__u32	reserved;
+};
+
+struct mlx5_ib_create_qp {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u32	sq_wqe_count;
+	__u32	rq_wqe_count;
+	__u32	rq_wqe_shift;
+	__u32	flags;
+};
+
+struct mlx5_ib_create_qp_resp {
+	__u32	uuar_index;
+};
+#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 48970af23679..d540180a8e42 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -42,8 +42,6 @@
 #define OCRDMA_ROCE_DEV_VERSION "1.0.0"
 #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
 
-#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg)
-
 #define OCRDMA_MAX_AH 512
 
 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
@@ -97,7 +95,6 @@ struct ocrdma_queue_info {
 	u16 id;			/* qid, where to ring the doorbell. */
 	u16 head, tail;
 	bool created;
-	atomic_t used;		/* Number of valid elements in the queue */
 };
 
 struct ocrdma_eq {
@@ -198,7 +195,6 @@ struct ocrdma_cq {
 	struct ocrdma_ucontext *ucontext;
 	dma_addr_t pa;
 	u32 len;
-	atomic_t use_cnt;
 
 	/* head of all qp's sq and rq for which cqes need to be flushed
 	 * by the software.
@@ -210,7 +206,6 @@ struct ocrdma_pd {
 	struct ib_pd ibpd;
 	struct ocrdma_dev *dev;
 	struct ocrdma_ucontext *uctx;
-	atomic_t use_cnt;
 	u32 id;
 	int num_dpp_qp;
 	u32 dpp_page;
@@ -241,16 +236,16 @@ struct ocrdma_srq {
 	struct ib_srq ibsrq;
 	struct ocrdma_dev *dev;
 	u8 __iomem *db;
+	struct ocrdma_qp_hwq_info rq;
+	u64 *rqe_wr_id_tbl;
+	u32 *idx_bit_fields;
+	u32 bit_fields_len;
+
 	/* provide synchronization to multiple context(s) posting rqe */
 	spinlock_t q_lock ____cacheline_aligned;
 
-	struct ocrdma_qp_hwq_info rq;
 	struct ocrdma_pd *pd;
-	atomic_t use_cnt;
 	u32 id;
-	u64 *rqe_wr_id_tbl;
-	u32 *idx_bit_fields;
-	u32 bit_fields_len;
 };
 
 struct ocrdma_qp {
@@ -258,8 +253,6 @@ struct ocrdma_qp {
 	struct ocrdma_dev *dev;
 
 	u8 __iomem *sq_db;
-	/* provide synchronization to multiple context(s) posting wqe, rqe */
-	spinlock_t q_lock ____cacheline_aligned;
 	struct ocrdma_qp_hwq_info sq;
 	struct {
 		uint64_t wrid;
@@ -269,6 +262,9 @@ struct ocrdma_qp {
 		uint8_t  rsvd[3];
 	} *wqe_wr_id_tbl;
 	u32 max_inline_data;
+
+	/* provide synchronization to multiple context(s) posting wqe, rqe */
+	spinlock_t q_lock ____cacheline_aligned;
 	struct ocrdma_cq *sq_cq;
 	/* list maintained per CQ to flush SQ errors */
 	struct list_head sq_entry;
@@ -296,10 +292,6 @@ struct ocrdma_qp {
 	u8 *ird_q_va;
 };
 
-#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \
-	(((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \
-		(qp->id < 64)) ? 24 : 16)
-
 struct ocrdma_hw_mr {
 	struct ocrdma_dev *dev;
 	u32 lkey;
@@ -390,4 +382,43 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
 	return container_of(ibsrq, struct ocrdma_srq, ibsrq);
 }
 
+
+static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
+{
+	return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
+		 qp->id < 64) ? 24 : 16);
+}
+
+static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
+{
+	int cqe_valid;
+	cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID;
+	return ((cqe_valid == cq->phase) ? 1 : 0);
+}
+
+static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe)
+{
+	return (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_QTYPE) ? 0 : 1;
+}
+
+static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe)
+{
+	return (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_INVALIDATE) ? 1 : 0;
+}
+
+static inline int is_cqe_imm(struct ocrdma_cqe *cqe)
+{
+	return (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_IMM) ? 1 : 0;
+}
+
+static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
+{
+	return (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
+}
+
+
 #endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 71942af4fce9..0965278dd2ed 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -128,7 +128,6 @@ static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
 static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
 {
 	dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1);
-	atomic_inc(&dev->mq.sq.used);
 }
 
 static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
@@ -564,32 +563,19 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
 	memset(cmd, 0, sizeof(*cmd));
 	num_pages = PAGES_4K_SPANNED(mq->va, mq->size);
 
-	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
-		ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ,
-				OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
-		cmd->v0.pages = num_pages;
-		cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
-		cmd->v0.async_cqid_valid = (cq->id << 1);
-		cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
-					     OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
-		cmd->v0.cqid_ringsize |=
-			(cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT);
-		cmd->v0.valid = OCRDMA_CREATE_MQ_VALID;
-		pa = &cmd->v0.pa[0];
-	} else {
-		ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
-				OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
-		cmd->req.rsvd_version = 1;
-		cmd->v1.cqid_pages = num_pages;
-		cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
-		cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
-		cmd->v1.async_event_bitmap = Bit(20);
-		cmd->v1.async_cqid_ringsize = cq->id;
-		cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
-					     OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
-		cmd->v1.valid = OCRDMA_CREATE_MQ_VALID;
-		pa = &cmd->v1.pa[0];
-	}
+	ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
+			OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+	cmd->req.rsvd_version = 1;
+	cmd->cqid_pages = num_pages;
+	cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
+	cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
+	cmd->async_event_bitmap = Bit(20);
+	cmd->async_cqid_ringsize = cq->id;
+	cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
+				OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
+	cmd->valid = OCRDMA_CREATE_MQ_VALID;
+	pa = &cmd->pa[0];
+
 	ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K);
 	status = be_roce_mcc_cmd(dev->nic_info.netdev,
 				 cmd, sizeof(*cmd), NULL, NULL);
@@ -745,7 +731,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 		qp_event = 0;
 		srq_event = 0;
 		dev_event = 0;
-		ocrdma_err("%s() unknown type=0x%x\n", __func__, type);
+		pr_err("%s() unknown type=0x%x\n", __func__, type);
 		break;
 	}
 
@@ -775,8 +761,8 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
 	if (evt_code == OCRDMA_ASYNC_EVE_CODE)
 		ocrdma_dispatch_ibevent(dev, cqe);
 	else
-		ocrdma_err("%s(%d) invalid evt code=0x%x\n",
-			   __func__, dev->id, evt_code);
+		pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
+		       dev->id, evt_code);
 }
 
 static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -790,8 +776,8 @@ static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
 		dev->mqe_ctx.cmd_done = true;
 		wake_up(&dev->mqe_ctx.cmd_wait);
 	} else
-		ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
-			   __func__, cqe->tag_lo, dev->mqe_ctx.tag);
+		pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
+		       __func__, cqe->tag_lo, dev->mqe_ctx.tag);
 }
 
 static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
@@ -810,7 +796,7 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
 		else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
 			ocrdma_process_mcqe(dev, cqe);
 		else
-			ocrdma_err("%s() cqe->compl is not set.\n", __func__);
+			pr_err("%s() cqe->compl is not set.\n", __func__);
 		memset(cqe, 0, sizeof(struct ocrdma_mcqe));
 		ocrdma_mcq_inc_tail(dev);
 	}
@@ -869,7 +855,7 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
 
 	cq = dev->cq_tbl[cq_idx];
 	if (cq == NULL) {
-		ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
+		pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
 		return;
 	}
 	spin_lock_irqsave(&cq->cq_lock, flags);
@@ -971,7 +957,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
 	rsp = ocrdma_get_mqe_rsp(dev);
 	ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
 	if (cqe_status || ext_status) {
-		ocrdma_err
+		pr_err
 		    ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
 		     __func__,
 		     (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
@@ -1353,8 +1339,8 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
 	if (dpp_cq)
 		return -EINVAL;
 	if (entries > dev->attr.max_cqe) {
-		ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
-			   __func__, dev->id, dev->attr.max_cqe, entries);
+		pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
+		       __func__, dev->id, dev->attr.max_cqe, entries);
 		return -EINVAL;
 	}
 	if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
@@ -1621,7 +1607,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev,
 	status = ocrdma_mbx_reg_mr(dev, hwmr, pdid,
 				   cur_pbl_cnt, hwmr->pbe_size, last);
 	if (status) {
-		ocrdma_err("%s() status=%d\n", __func__, status);
+		pr_err("%s() status=%d\n", __func__, status);
 		return status;
 	}
 	/* if there is no more pbls to register then exit. */
@@ -1644,7 +1630,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev,
 			break;
 	}
 	if (status)
-		ocrdma_err("%s() err. status=%d\n", __func__, status);
+		pr_err("%s() err. status=%d\n", __func__, status);
 
 	return status;
 }
@@ -1841,8 +1827,8 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
 	status = ocrdma_build_q_conf(&max_wqe_allocated,
 		dev->attr.wqe_size, &hw_pages, &hw_page_size);
 	if (status) {
-		ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__,
-			   max_wqe_allocated);
+		pr_err("%s() req. max_send_wr=0x%x\n", __func__,
+		       max_wqe_allocated);
 		return -EINVAL;
 	}
 	qp->sq.max_cnt = max_wqe_allocated;
@@ -1891,8 +1877,8 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
 	status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size,
 				     &hw_pages, &hw_page_size);
 	if (status) {
-		ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__,
-			   attrs->cap.max_recv_wr + 1);
+		pr_err("%s() req. max_recv_wr=0x%x\n", __func__,
+		       attrs->cap.max_recv_wr + 1);
 		return status;
 	}
 	qp->rq.max_cnt = max_rqe_allocated;
@@ -1900,7 +1886,7 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
 
 	qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
 	if (!qp->rq.va)
-		return status;
+		return -ENOMEM;
 	memset(qp->rq.va, 0, len);
 	qp->rq.pa = pa;
 	qp->rq.len = len;
@@ -2087,10 +2073,10 @@ mbx_err:
 	if (qp->rq.va)
 		dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
 rq_err:
-	ocrdma_err("%s(%d) rq_err\n", __func__, dev->id);
+	pr_err("%s(%d) rq_err\n", __func__, dev->id);
 	dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
 sq_err:
-	ocrdma_err("%s(%d) sq_err\n", __func__, dev->id);
+	pr_err("%s(%d) sq_err\n", __func__, dev->id);
 	kfree(cmd);
 	return status;
 }
@@ -2127,7 +2113,7 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
 	else if (rdma_link_local_addr(&in6))
 		rdma_get_ll_mac(&in6, mac_addr);
 	else {
-		ocrdma_err("%s() fail to resolve mac_addr.\n", __func__);
+		pr_err("%s() fail to resolve mac_addr.\n", __func__);
 		return -EINVAL;
 	}
 	return 0;
@@ -2362,8 +2348,8 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
 				dev->attr.rqe_size,
 				&hw_pages, &hw_page_size);
 	if (status) {
-		ocrdma_err("%s() req. max_wr=0x%x\n", __func__,
-			   srq_attr->attr.max_wr);
+		pr_err("%s() req. max_wr=0x%x\n", __func__,
+		       srq_attr->attr.max_wr);
 		status = -EINVAL;
 		goto ret;
 	}
@@ -2614,7 +2600,7 @@ mq_err:
 	ocrdma_destroy_qp_eqs(dev);
 qpeq_err:
 	ocrdma_destroy_eq(dev, &dev->meq);
-	ocrdma_err("%s() status=%d\n", __func__, status);
+	pr_err("%s() status=%d\n", __func__, status);
 	return status;
 }
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 48928c8e7774..ded416f1adea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -378,7 +378,7 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
 	spin_lock_init(&dev->flush_q_lock);
 	return 0;
 alloc_err:
-	ocrdma_err("%s(%d) error.\n", __func__, dev->id);
+	pr_err("%s(%d) error.\n", __func__, dev->id);
 	return -ENOMEM;
 }
 
@@ -396,7 +396,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 
 	dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
 	if (!dev) {
-		ocrdma_err("Unable to allocate ib device\n");
+		pr_err("Unable to allocate ib device\n");
 		return NULL;
 	}
 	dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
@@ -437,7 +437,7 @@ init_err:
 idr_err:
 	kfree(dev->mbx_cmd);
 	ib_dealloc_device(&dev->ibdev);
-	ocrdma_err("%s() leaving. ret=%d\n", __func__, status);
+	pr_err("%s() leaving. ret=%d\n", __func__, status);
 	return NULL;
 }
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index c75cbdfa87e7..36b062da2aea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -608,16 +608,8 @@ enum {
 	OCRDMA_CREATE_MQ_ASYNC_CQ_VALID		= Bit(0)
 };
 
-struct ocrdma_create_mq_v0 {
-	u32 pages;
-	u32 cqid_ringsize;
-	u32 valid;
-	u32 async_cqid_valid;
-	u32 rsvd;
-	struct ocrdma_pa pa[8];
-} __packed;
-
-struct ocrdma_create_mq_v1 {
+struct ocrdma_create_mq_req {
+	struct ocrdma_mbx_hdr req;
 	u32 cqid_pages;
 	u32 async_event_bitmap;
 	u32 async_cqid_ringsize;
@@ -627,14 +619,6 @@ struct ocrdma_create_mq_v1 {
 	struct ocrdma_pa pa[8];
 } __packed;
 
-struct ocrdma_create_mq_req {
-	struct ocrdma_mbx_hdr req;
-	union {
-		struct ocrdma_create_mq_v0 v0;
-		struct ocrdma_create_mq_v1 v1;
-	};
-} __packed;
-
 struct ocrdma_create_mq_rsp {
 	struct ocrdma_mbx_rsp rsp;
 	u32 id;
@@ -1550,21 +1534,6 @@ struct ocrdma_cqe {
 	u32 flags_status_srcqpn;	/* w3 */
 } __packed;
 
-#define is_cqe_valid(cq, cqe) \
-	(((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\
-	== cq->phase) ? 1 : 0)
-#define is_cqe_for_sq(cqe) \
-	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1)
-#define is_cqe_for_rq(cqe) \
-	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0)
-#define is_cqe_invalidated(cqe) \
-	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \
-	1 : 0)
-#define is_cqe_imm(cqe) \
-	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0)
-#define is_cqe_wr_imm(cqe) \
-	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0)
-
 struct ocrdma_sge {
 	u32 addr_hi;
 	u32 addr_lo;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b29a4246ef41..dcfbab177faa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -114,8 +114,8 @@ int ocrdma_query_port(struct ib_device *ibdev,
 
 	dev = get_ocrdma_dev(ibdev);
 	if (port > 1) {
-		ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__,
-			   dev->id, port);
+		pr_err("%s(%d) invalid_port=0x%x\n", __func__,
+		       dev->id, port);
 		return -EINVAL;
 	}
 	netdev = dev->nic_info.netdev;
@@ -155,8 +155,7 @@ int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
 
 	dev = get_ocrdma_dev(ibdev);
 	if (port > 1) {
-		ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__,
-			   dev->id, port);
+		pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
 		return -EINVAL;
 	}
 	return 0;
@@ -398,7 +397,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
 		kfree(pd);
 		return ERR_PTR(status);
 	}
-	atomic_set(&pd->use_cnt, 0);
 
 	if (udata && context) {
 		status = ocrdma_copy_pd_uresp(pd, context, udata);
@@ -419,12 +417,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
 	int status;
 	u64 usr_db;
 
-	if (atomic_read(&pd->use_cnt)) {
-		ocrdma_err("%s(%d) pd=0x%x is in use.\n",
-			   __func__, dev->id, pd->id);
-		status = -EFAULT;
-		goto dealloc_err;
-	}
 	status = ocrdma_mbx_dealloc_pd(dev, pd);
 	if (pd->uctx) {
 		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
@@ -436,7 +428,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
 		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
 	}
 	kfree(pd);
-dealloc_err:
 	return status;
 }
 
@@ -450,8 +441,8 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
 	struct ocrdma_dev *dev = pd->dev;
 
 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
-		ocrdma_err("%s(%d) leaving err, invalid access rights\n",
-			   __func__, dev->id);
+		pr_err("%s(%d) leaving err, invalid access rights\n",
+		       __func__, dev->id);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -474,7 +465,6 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
 		return ERR_PTR(-ENOMEM);
 	}
 	mr->pd = pd;
-	atomic_inc(&pd->use_cnt);
 	mr->ibmr.lkey = mr->hwmr.lkey;
 	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
 		mr->ibmr.rkey = mr->hwmr.lkey;
@@ -664,7 +654,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 	if (status)
 		goto mbx_err;
 	mr->pd = pd;
-	atomic_inc(&pd->use_cnt);
 	mr->ibmr.lkey = mr->hwmr.lkey;
 	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
 		mr->ibmr.rkey = mr->hwmr.lkey;
@@ -689,7 +678,6 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 	if (mr->hwmr.fr_mr == 0)
 		ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 
-	atomic_dec(&mr->pd->use_cnt);
 	/* it could be user registered memory. */
 	if (mr->umem)
 		ib_umem_release(mr->umem);
@@ -714,8 +702,8 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
 	uresp.phase_change = cq->phase_change ? 1 : 0;
 	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (status) {
-		ocrdma_err("%s(%d) copy error cqid=0x%x.\n",
-			   __func__, cq->dev->id, cq->id);
+		pr_err("%s(%d) copy error cqid=0x%x.\n",
+		       __func__, cq->dev->id, cq->id);
 		goto err;
 	}
 	uctx = get_ocrdma_ucontext(ib_ctx);
@@ -752,7 +740,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
 
 	spin_lock_init(&cq->cq_lock);
 	spin_lock_init(&cq->comp_handler_lock);
-	atomic_set(&cq->use_cnt, 0);
 	INIT_LIST_HEAD(&cq->sq_head);
 	INIT_LIST_HEAD(&cq->rq_head);
 	cq->dev = dev;
@@ -799,9 +786,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
 	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
 	struct ocrdma_dev *dev = cq->dev;
 
-	if (atomic_read(&cq->use_cnt))
-		return -EINVAL;
-
 	status = ocrdma_mbx_destroy_cq(dev, cq);
 
 	if (cq->ucontext) {
@@ -837,57 +821,56 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
 	if (attrs->qp_type != IB_QPT_GSI &&
 	    attrs->qp_type != IB_QPT_RC &&
 	    attrs->qp_type != IB_QPT_UD) {
-		ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n",
-			   __func__, dev->id, attrs->qp_type);
+		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
+		       __func__, dev->id, attrs->qp_type);
 		return -EINVAL;
 	}
 	if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
-		ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n",
-			   __func__, dev->id, attrs->cap.max_send_wr);
-		ocrdma_err("%s(%d) supported send_wr=0x%x\n",
-			   __func__, dev->id, dev->attr.max_wqe);
+		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
+		       __func__, dev->id, attrs->cap.max_send_wr);
+		pr_err("%s(%d) supported send_wr=0x%x\n",
+		       __func__, dev->id, dev->attr.max_wqe);
 		return -EINVAL;
 	}
 	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
-		ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n",
-			   __func__, dev->id, attrs->cap.max_recv_wr);
-		ocrdma_err("%s(%d) supported recv_wr=0x%x\n",
-			   __func__, dev->id, dev->attr.max_rqe);
+		pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
+		       __func__, dev->id, attrs->cap.max_recv_wr);
+		pr_err("%s(%d) supported recv_wr=0x%x\n",
+		       __func__, dev->id, dev->attr.max_rqe);
 		return -EINVAL;
 	}
 	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
-		ocrdma_err("%s(%d) unsupported inline data size=0x%x"
-			   " requested\n", __func__, dev->id,
-			   attrs->cap.max_inline_data);
-		ocrdma_err("%s(%d) supported inline data size=0x%x\n",
-			   __func__, dev->id, dev->attr.max_inline_data);
+		pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
+		       __func__, dev->id, attrs->cap.max_inline_data);
+		pr_err("%s(%d) supported inline data size=0x%x\n",
+		       __func__, dev->id, dev->attr.max_inline_data);
 		return -EINVAL;
 	}
 	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
-		ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n",
-			   __func__, dev->id, attrs->cap.max_send_sge);
-		ocrdma_err("%s(%d) supported send_sge=0x%x\n",
-			   __func__, dev->id, dev->attr.max_send_sge);
+		pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
+		       __func__, dev->id, attrs->cap.max_send_sge);
+		pr_err("%s(%d) supported send_sge=0x%x\n",
+		       __func__, dev->id, dev->attr.max_send_sge);
 		return -EINVAL;
 	}
 	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
-		ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n",
-			   __func__, dev->id, attrs->cap.max_recv_sge);
-		ocrdma_err("%s(%d) supported recv_sge=0x%x\n",
-			   __func__, dev->id, dev->attr.max_recv_sge);
+		pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
+		       __func__, dev->id, attrs->cap.max_recv_sge);
+		pr_err("%s(%d) supported recv_sge=0x%x\n",
+		       __func__, dev->id, dev->attr.max_recv_sge);
 		return -EINVAL;
 	}
 	/* unprivileged user space cannot create special QP */
 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
-		ocrdma_err
+		pr_err
 		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
 		     __func__, dev->id, attrs->qp_type);
 		return -EINVAL;
 	}
 	/* allow creating only one GSI type of QP */
 	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
-		ocrdma_err("%s(%d) GSI special QPs already created.\n",
-			   __func__, dev->id);
+		pr_err("%s(%d) GSI special QPs already created.\n",
+		       __func__, dev->id);
 		return -EINVAL;
 	}
 	/* verify consumer QPs are not trying to use GSI QP's CQ */
@@ -896,8 +879,8 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
 		    (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
 		    (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
 		    (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
-			ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
-				   __func__, dev->id);
+			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
+			       __func__, dev->id);
 			return -EINVAL;
 		}
 	}
@@ -949,7 +932,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
 	}
 	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (status) {
-		ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id);
+		pr_err("%s(%d) user copy error.\n", __func__, dev->id);
 		goto err;
 	}
 	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
@@ -1023,15 +1006,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
 	qp->state = OCRDMA_QPS_RST;
 }
 
-static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd)
-{
-	atomic_inc(&pd->use_cnt);
-	atomic_inc(&qp->sq_cq->use_cnt);
-	atomic_inc(&qp->rq_cq->use_cnt);
-	if (qp->srq)
-		atomic_inc(&qp->srq->use_cnt);
-	qp->ibqp.qp_num = qp->id;
-}
 
 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
 				   struct ib_qp_init_attr *attrs)
@@ -1099,7 +1073,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
 			goto cpy_err;
 	}
 	ocrdma_store_gsi_qp_cq(dev, attrs);
-	ocrdma_set_qp_use_cnt(qp, pd);
+	qp->ibqp.qp_num = qp->id;
 	mutex_unlock(&dev->dev_lock);
 	return &qp->ibqp;
 
@@ -1112,7 +1086,7 @@ mbx_err:
 	kfree(qp->wqe_wr_id_tbl);
 	kfree(qp->rqe_wr_id_tbl);
 	kfree(qp);
-	ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status);
+	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
 gen_err:
 	return ERR_PTR(status);
 }
@@ -1162,10 +1136,10 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
 	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
-		ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for "
-			   "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
-			   __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
-			   old_qps, new_qps);
+		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
+		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
+		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
+		       old_qps, new_qps);
 		goto param_err;
 	}
 
@@ -1475,11 +1449,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
 
 	ocrdma_del_flush_qp(qp);
 
-	atomic_dec(&qp->pd->use_cnt);
-	atomic_dec(&qp->sq_cq->use_cnt);
-	atomic_dec(&qp->rq_cq->use_cnt);
-	if (qp->srq)
-		atomic_dec(&qp->srq->use_cnt);
 	kfree(qp->wqe_wr_id_tbl);
 	kfree(qp->rqe_wr_id_tbl);
 	kfree(qp);
@@ -1565,14 +1534,12 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
 			goto arm_err;
 	}
 
-	atomic_set(&srq->use_cnt, 0);
 	if (udata) {
 		status = ocrdma_copy_srq_uresp(srq, udata);
 		if (status)
 			goto arm_err;
 	}
 
-	atomic_inc(&pd->use_cnt);
 	return &srq->ibsrq;
 
 arm_err:
@@ -1618,18 +1585,12 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
 
 	srq = get_ocrdma_srq(ibsrq);
 	dev = srq->dev;
-	if (atomic_read(&srq->use_cnt)) {
-		ocrdma_err("%s(%d) err, srq=0x%x in use\n",
-			   __func__, dev->id, srq->id);
-		return -EAGAIN;
-	}
 
 	status = ocrdma_mbx_destroy_srq(dev, srq);
 
 	if (srq->pd->uctx)
 		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
 
-	atomic_dec(&srq->pd->use_cnt);
 	kfree(srq->idx_bit_fields);
 	kfree(srq->rqe_wr_id_tbl);
 	kfree(srq);
@@ -1677,9 +1638,9 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
 {
 	if (wr->send_flags & IB_SEND_INLINE) {
 		if (wr->sg_list[0].length > qp->max_inline_data) {
-			ocrdma_err("%s() supported_len=0x%x,"
-				" unspported len req=0x%x\n", __func__,
-				qp->max_inline_data, wr->sg_list[0].length);
+			pr_err("%s() supported_len=0x%x,\n"
+			       " unspported len req=0x%x\n", __func__,
+			       qp->max_inline_data, wr->sg_list[0].length);
 			return -EINVAL;
 		}
 		memcpy(sge,
@@ -1773,12 +1734,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	spin_lock_irqsave(&qp->q_lock, flags);
 	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
 		spin_unlock_irqrestore(&qp->q_lock, flags);
+		*bad_wr = wr;
 		return -EINVAL;
 	}
 
 	while (wr) {
 		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
 		    wr->num_sge > qp->sq.max_sges) {
+			*bad_wr = wr;
 			status = -ENOMEM;
 			break;
 		}
@@ -1856,7 +1819,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
 {
-	u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp));
+	u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
 
 	iowrite32(val, qp->rq_db);
 }
@@ -2094,8 +2057,8 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
 		break;
 	default:
 		ibwc->status = IB_WC_GENERAL_ERR;
-		ocrdma_err("%s() invalid opcode received = 0x%x\n",
-			   __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
+		pr_err("%s() invalid opcode received = 0x%x\n",
+		       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
 		break;
 	};
 }
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 1e603a375069..d03ca4c1ff25 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -5,3 +5,11 @@ config INFINIBAND_QIB
 	This is a low-level driver for Intel PCIe QLE InfiniBand host
 	channel adapters.  This driver does not support the Intel
 	HyperTransport card (model QHT7140).
+
+config INFINIBAND_QIB_DCA
+	bool "QIB DCA support"
+	depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m)
+	default y
+	---help---
+	Setting this enables DCA support on some Intel chip sets
+	with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index f12d7bb8b39f..57f8103e51f8 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
 
 ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
 ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
+ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4d11575c2010..4a9af795b88f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,7 +1,7 @@
 #ifndef _QIB_KERNEL_H
 #define _QIB_KERNEL_H
 /*
- * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -51,6 +51,7 @@
 #include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/sched.h>
+#include <linux/kthread.h>
 
 #include "qib_common.h"
 #include "qib_verbs.h"
@@ -114,6 +115,11 @@ struct qib_eep_log_mask {
 /*
  * Below contains all data related to a single context (formerly called port).
  */
+
+#ifdef CONFIG_DEBUG_FS
+struct qib_opcode_stats_perctx;
+#endif
+
 struct qib_ctxtdata {
 	void **rcvegrbuf;
 	dma_addr_t *rcvegrbuf_phys;
@@ -154,6 +160,8 @@ struct qib_ctxtdata {
 	 */
 	/* instead of calculating it */
 	unsigned ctxt;
+	/* local node of context */
+	int node_id;
 	/* non-zero if ctxt is being shared. */
 	u16 subctxt_cnt;
 	/* non-zero if ctxt is being shared. */
@@ -222,12 +230,15 @@ struct qib_ctxtdata {
 	u8 redirect_seq_cnt;
 	/* ctxt rcvhdrq head offset */
 	u32 head;
-	u32 pkt_count;
 	/* lookaside fields */
 	struct qib_qp *lookaside_qp;
 	u32 lookaside_qpn;
 	/* QPs waiting for context processing */
 	struct list_head qp_wait_list;
+#ifdef CONFIG_DEBUG_FS
+	/* verbs stats per CTX */
+	struct qib_opcode_stats_perctx *opstats;
+#endif
 };
 
 struct qib_sge_state;
@@ -428,9 +439,19 @@ struct qib_verbs_txreq {
 #define ACTIVITY_TIMER 5
 
 #define MAX_NAME_SIZE 64
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify;
+#endif
+
 struct qib_msix_entry {
 	struct msix_entry msix;
 	void *arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	int dca;
+	int rcv;
+	struct qib_irq_notify *notifier;
+#endif
 	char name[MAX_NAME_SIZE];
 	cpumask_var_t mask;
 };
@@ -828,6 +849,9 @@ struct qib_devdata {
 		struct qib_ctxtdata *);
 	void (*f_writescratch)(struct qib_devdata *, u32);
 	int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
+#endif
 
 	char *boardname; /* human readable board info */
 
@@ -1075,6 +1099,10 @@ struct qib_devdata {
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors defered to tasklet */
 	struct tasklet_struct error_tasklet;
+	/* per device cq worker */
+	struct kthread_worker *worker;
+
+	int assigned_node_id; /* NUMA node closest to HCA */
 };
 
 /* hol_state values */
@@ -1154,7 +1182,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
 int qib_setup_eagerbufs(struct qib_ctxtdata *);
 void qib_set_ctxtcnt(struct qib_devdata *);
 int qib_create_ctxts(struct qib_devdata *dd);
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32);
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
 void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
 void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
 
@@ -1320,7 +1348,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd)
 	return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
 }
 int qib_sdma_running(struct qib_pportdata *);
-
+void dump_sdma_state(struct qib_pportdata *ppd);
 void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
 void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
 
@@ -1445,6 +1473,7 @@ extern unsigned qib_n_krcv_queues;
 extern unsigned qib_sdma_fetch_arb;
 extern unsigned qib_compat_ddr_negotiate;
 extern int qib_special_trigger;
+extern unsigned qib_numa_aware;
 
 extern struct mutex qib_mutex;
 
@@ -1474,27 +1503,23 @@ extern struct mutex qib_mutex;
  * first to avoid possible serial port delays from printk.
  */
 #define qib_early_err(dev, fmt, ...) \
-	do { \
-		dev_err(dev, fmt, ##__VA_ARGS__); \
-	} while (0)
+	dev_err(dev, fmt, ##__VA_ARGS__)
 
 #define qib_dev_err(dd, fmt, ...) \
-	do { \
-		dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
-			qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \
-	} while (0)
+	dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
+		qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+
+#define qib_dev_warn(dd, fmt, ...) \
+	dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
+		qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
 
 #define qib_dev_porterr(dd, port, fmt, ...) \
-	do { \
-		dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
-			qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
-			##__VA_ARGS__); \
-	} while (0)
+	dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
+		qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
+		##__VA_ARGS__)
 
 #define qib_devinfo(pcidev, fmt, ...) \
-	do { \
-		dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \
-	} while (0)
+	dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
 
 /*
  * this is used for formatting hw error messages...
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index d39e0183ff82..4f255b723ffd 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@ struct qib_base_info {
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define QIB_USER_SWMINOR 11
+#define QIB_USER_SWMINOR 12
 
 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
 
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
index 5246aa486bbe..ab4e11cfab15 100644
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2013 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -34,8 +35,10 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/kthread.h>
 
 #include "qib_verbs.h"
+#include "qib.h"
 
 /**
  * qib_cq_enter - add a new entry to the completion queue
@@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED &&
 	     (solicited || entry->status != IB_WC_SUCCESS))) {
-		cq->notify = IB_CQ_NONE;
-		cq->triggered++;
+		struct kthread_worker *worker;
 		/*
 		 * This will cause send_complete() to be called in
 		 * another thread.
 		 */
-		queue_work(qib_cq_wq, &cq->comptask);
+		smp_rmb();
+		worker = cq->dd->worker;
+		if (likely(worker)) {
+			cq->notify = IB_CQ_NONE;
+			cq->triggered++;
+			queue_kthread_work(worker, &cq->comptask);
+		}
 	}
 
 	spin_unlock_irqrestore(&cq->lock, flags);
@@ -163,7 +171,7 @@ bail:
 	return npolled;
 }
 
-static void send_complete(struct work_struct *work)
+static void send_complete(struct kthread_work *work)
 {
 	struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
 
@@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
 	 * The number of entries should be >= the number requested or return
 	 * an error.
 	 */
+	cq->dd = dd_from_dev(dev);
 	cq->ibcq.cqe = entries;
 	cq->notify = IB_CQ_NONE;
 	cq->triggered = 0;
 	spin_lock_init(&cq->lock);
-	INIT_WORK(&cq->comptask, send_complete);
+	init_kthread_work(&cq->comptask, send_complete);
 	wc->head = 0;
 	wc->tail = 0;
 	cq->queue = wc;
@@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq)
 	struct qib_ibdev *dev = to_idev(ibcq->device);
 	struct qib_cq *cq = to_icq(ibcq);
 
-	flush_work(&cq->comptask);
+	flush_kthread_work(&cq->comptask);
 	spin_lock(&dev->n_cqs_lock);
 	dev->n_cqs_allocated--;
 	spin_unlock(&dev->n_cqs_lock);
@@ -483,3 +492,49 @@ bail_free:
 bail:
 	return ret;
 }
+
+int qib_cq_init(struct qib_devdata *dd)
+{
+	int ret = 0;
+	int cpu;
+	struct task_struct *task;
+
+	if (dd->worker)
+		return 0;
+	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
+	if (!dd->worker)
+		return -ENOMEM;
+	init_kthread_worker(dd->worker);
+	task = kthread_create_on_node(
+		kthread_worker_fn,
+		dd->worker,
+		dd->assigned_node_id,
+		"qib_cq%d", dd->unit);
+	if (IS_ERR(task))
+		goto task_fail;
+	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+	kthread_bind(task, cpu);
+	wake_up_process(task);
+out:
+	return ret;
+task_fail:
+	ret = PTR_ERR(task);
+	kfree(dd->worker);
+	dd->worker = NULL;
+	goto out;
+}
+
+void qib_cq_exit(struct qib_devdata *dd)
+{
+	struct kthread_worker *worker;
+
+	worker = dd->worker;
+	if (!worker)
+		return;
+	/* blocks future queuing from send_complete() */
+	dd->worker = NULL;
+	smp_wmb();
+	flush_kthread_worker(worker);
+	kthread_stop(worker->task);
+	kfree(worker);
+}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
new file mode 100644
index 000000000000..799a0c3bffc4
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -0,0 +1,283 @@
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+
+#include "qib.h"
+#include "qib_verbs.h"
+#include "qib_debugfs.h"
+
+static struct dentry *qib_dbg_root;
+
+#define DEBUGFS_FILE(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+	.start = _##name##_seq_start, \
+	.next  = _##name##_seq_next, \
+	.stop  = _##name##_seq_stop, \
+	.show  = _##name##_seq_show \
+}; \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+	struct seq_file *seq; \
+	int ret; \
+	ret =  seq_open(s, &_##name##_seq_ops); \
+	if (ret) \
+		return ret; \
+	seq = s->private_data; \
+	seq->private = inode->i_private; \
+	return 0; \
+} \
+static const struct file_operations _##name##_file_ops = { \
+	.owner   = THIS_MODULE, \
+	.open    = _##name##_open, \
+	.read    = seq_read, \
+	.llseek  = seq_lseek, \
+	.release = seq_release \
+};
+
+#define DEBUGFS_FILE_CREATE(name) \
+do { \
+	struct dentry *ent; \
+	ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \
+		ibd, &_##name##_file_ops); \
+	if (!ent) \
+		pr_warn("create of " #name " failed\n"); \
+} while (0)
+
+static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct qib_opcode_stats_perctx *opstats;
+
+	if (*pos >= ARRAY_SIZE(opstats->stats))
+		return NULL;
+	return pos;
+}
+
+static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct qib_opcode_stats_perctx *opstats;
+
+	++*pos;
+	if (*pos >= ARRAY_SIZE(opstats->stats))
+		return NULL;
+	return pos;
+}
+
+
+static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
+{
+	/* nothing allocated */
+}
+
+static int _opcode_stats_seq_show(struct seq_file *s, void *v)
+{
+	loff_t *spos = v;
+	loff_t i = *spos, j;
+	u64 n_packets = 0, n_bytes = 0;
+	struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+	struct qib_devdata *dd = dd_from_dev(ibd);
+
+	for (j = 0; j < dd->first_user_ctxt; j++) {
+		if (!dd->rcd[j])
+			continue;
+		n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+		n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+	}
+	if (!n_packets && !n_bytes)
+		return SEQ_SKIP;
+	seq_printf(s, "%02llx %llu/%llu\n", i,
+		(unsigned long long) n_packets,
+		(unsigned long long) n_bytes);
+
+	return 0;
+}
+
+DEBUGFS_FILE(opcode_stats)
+
+static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+	struct qib_devdata *dd = dd_from_dev(ibd);
+
+	if (!*pos)
+		return SEQ_START_TOKEN;
+	if (*pos >= dd->first_user_ctxt)
+		return NULL;
+	return pos;
+}
+
+static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+	struct qib_devdata *dd = dd_from_dev(ibd);
+
+	if (v == SEQ_START_TOKEN)
+		return pos;
+
+	++*pos;
+	if (*pos >= dd->first_user_ctxt)
+		return NULL;
+	return pos;
+}
+
+static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
+{
+	/* nothing allocated */
+}
+
+static int _ctx_stats_seq_show(struct seq_file *s, void *v)
+{
+	loff_t *spos;
+	loff_t i, j;
+	u64 n_packets = 0;
+	struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+	struct qib_devdata *dd = dd_from_dev(ibd);
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(s, "Ctx:npkts\n");
+		return 0;
+	}
+
+	spos = v;
+	i = *spos;
+
+	if (!dd->rcd[i])
+		return SEQ_SKIP;
+
+	for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
+		n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
+
+	if (!n_packets)
+		return SEQ_SKIP;
+
+	seq_printf(s, "  %llu:%llu\n", i, n_packets);
+	return 0;
+}
+
+DEBUGFS_FILE(ctx_stats)
+
+static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct qib_qp_iter *iter;
+	loff_t n = *pos;
+
+	iter = qib_qp_iter_init(s->private);
+	if (!iter)
+		return NULL;
+
+	while (n--) {
+		if (qib_qp_iter_next(iter)) {
+			kfree(iter);
+			return NULL;
+		}
+	}
+
+	return iter;
+}
+
+static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+				   loff_t *pos)
+{
+	struct qib_qp_iter *iter = iter_ptr;
+
+	(*pos)++;
+
+	if (qib_qp_iter_next(iter)) {
+		kfree(iter);
+		return NULL;
+	}
+
+	return iter;
+}
+
+static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+{
+	/* nothing for now */
+}
+
+static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
+{
+	struct qib_qp_iter *iter = iter_ptr;
+
+	if (!iter)
+		return 0;
+
+	qib_qp_iter_print(s, iter);
+
+	return 0;
+}
+
+DEBUGFS_FILE(qp_stats)
+
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd)
+{
+	char name[10];
+
+	snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit);
+	ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root);
+	if (!ibd->qib_ibdev_dbg) {
+		pr_warn("create of %s failed\n", name);
+		return;
+	}
+	DEBUGFS_FILE_CREATE(opcode_stats);
+	DEBUGFS_FILE_CREATE(ctx_stats);
+	DEBUGFS_FILE_CREATE(qp_stats);
+	return;
+}
+
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd)
+{
+	if (!qib_dbg_root)
+		goto out;
+	debugfs_remove_recursive(ibd->qib_ibdev_dbg);
+out:
+	ibd->qib_ibdev_dbg = NULL;
+}
+
+void qib_dbg_init(void)
+{
+	qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL);
+	if (!qib_dbg_root)
+		pr_warn("init of debugfs failed\n");
+}
+
+void qib_dbg_exit(void)
+{
+	debugfs_remove_recursive(qib_dbg_root);
+	qib_dbg_root = NULL;
+}
+
+#endif
+
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h
new file mode 100644
index 000000000000..7ae983a91b8b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.h
@@ -0,0 +1,45 @@
+#ifndef _QIB_DEBUGFS_H
+#define _QIB_DEBUGFS_H
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+struct qib_ibdev;
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
+void qib_dbg_init(void);
+void qib_dbg_exit(void);
+
+#endif
+
+#endif                          /* _QIB_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 216092477dfc..5bee08f16d74 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -558,7 +558,6 @@ move_along:
 	}
 
 	rcd->head = l;
-	rcd->pkt_count += i;
 
 	/*
 	 * Iterate over all QPs waiting to respond.
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 9dd0bc89c3aa..b51a51486cb8 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
 	return pollflag;
 }
 
+static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
+{
+	struct qib_filedata *fd = fp->private_data;
+	const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+	const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
+	int local_cpu;
+
+	/*
+	 * If process has NOT already set it's affinity, select and
+	 * reserve a processor for it on the local NUMA node.
+	 */
+	if ((weight >= qib_cpulist_count) &&
+		(cpumask_weight(local_mask) <= qib_cpulist_count)) {
+		for_each_cpu(local_cpu, local_mask)
+			if (!test_and_set_bit(local_cpu, qib_cpulist)) {
+				fd->rec_cpu_num = local_cpu;
+				return;
+			}
+	}
+
+	/*
+	 * If process has NOT already set it's affinity, select and
+	 * reserve a processor for it, as a rendevous for all
+	 * users of the driver.  If they don't actually later
+	 * set affinity to this cpu, or set it to some other cpu,
+	 * it just means that sooner or later we don't recommend
+	 * a cpu, and let the scheduler do it's best.
+	 */
+	if (weight >= qib_cpulist_count) {
+		int cpu;
+		cpu = find_first_zero_bit(qib_cpulist,
+					  qib_cpulist_count);
+		if (cpu == qib_cpulist_count)
+			qib_dev_err(dd,
+			"no cpus avail for affinity PID %u\n",
+			current->pid);
+		else {
+			__set_bit(cpu, qib_cpulist);
+			fd->rec_cpu_num = cpu;
+		}
+	}
+}
+
 /*
  * Check that userland and driver are compatible for subcontexts.
  */
@@ -1259,12 +1302,20 @@ bail:
 static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
 		      struct file *fp, const struct qib_user_info *uinfo)
 {
+	struct qib_filedata *fd = fp->private_data;
 	struct qib_devdata *dd = ppd->dd;
 	struct qib_ctxtdata *rcd;
 	void *ptmp = NULL;
 	int ret;
+	int numa_id;
+
+	assign_ctxt_affinity(fp, dd);
 
-	rcd = qib_create_ctxtdata(ppd, ctxt);
+	numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
+		cpu_to_node(fd->rec_cpu_num) :
+		numa_node_id()) : dd->assigned_node_id;
+
+	rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
 
 	/*
 	 * Allocate memory for use in qib_tid_update() at open to
@@ -1296,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
 	goto bail;
 
 bailerr:
+	if (fd->rec_cpu_num != -1)
+		__clear_bit(fd->rec_cpu_num, qib_cpulist);
+
 	dd->rcd[ctxt] = NULL;
 	kfree(rcd);
 	kfree(ptmp);
@@ -1485,6 +1539,57 @@ static int qib_open(struct inode *in, struct file *fp)
 	return fp->private_data ? 0 : -ENOMEM;
 }
 
+static int find_hca(unsigned int cpu, int *unit)
+{
+	int ret = 0, devmax, npresent, nup, ndev;
+
+	*unit = -1;
+
+	devmax = qib_count_units(&npresent, &nup);
+	if (!npresent) {
+		ret = -ENXIO;
+		goto done;
+	}
+	if (!nup) {
+		ret = -ENETDOWN;
+		goto done;
+	}
+	for (ndev = 0; ndev < devmax; ndev++) {
+		struct qib_devdata *dd = qib_lookup(ndev);
+		if (dd) {
+			if (pcibus_to_node(dd->pcidev->bus) < 0) {
+				ret = -EINVAL;
+				goto done;
+			}
+			if (cpu_to_node(cpu) ==
+				pcibus_to_node(dd->pcidev->bus)) {
+				*unit = ndev;
+				goto done;
+			}
+		}
+	}
+done:
+	return ret;
+}
+
+static int do_qib_user_sdma_queue_create(struct file *fp)
+{
+	struct qib_filedata *fd = fp->private_data;
+	struct qib_ctxtdata *rcd = fd->rcd;
+	struct qib_devdata *dd = rcd->dd;
+
+	if (dd->flags & QIB_HAS_SEND_DMA)
+
+		fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
+						    dd->unit,
+						    rcd->ctxt,
+						    fd->subctxt);
+		if (!fd->pq)
+			return -ENOMEM;
+
+	return 0;
+}
+
 /*
  * Get ctxt early, so can set affinity prior to memory allocation.
  */
@@ -1517,61 +1622,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
 	if (qib_compatible_subctxts(swmajor, swminor) &&
 	    uinfo->spu_subctxt_cnt) {
 		ret = find_shared_ctxt(fp, uinfo);
-		if (ret) {
-			if (ret > 0)
-				ret = 0;
-			goto done_chk_sdma;
+		if (ret > 0) {
+			ret = do_qib_user_sdma_queue_create(fp);
+			if (!ret)
+				assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+			goto done_ok;
 		}
 	}
 
 	i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
 	if (i_minor)
 		ret = find_free_ctxt(i_minor - 1, fp, uinfo);
-	else
+	else {
+		int unit;
+		const unsigned int cpu = cpumask_first(&current->cpus_allowed);
+		const unsigned int weight =
+			cpumask_weight(&current->cpus_allowed);
+
+		if (weight == 1 && !test_bit(cpu, qib_cpulist))
+			if (!find_hca(cpu, &unit) && unit >= 0)
+				if (!find_free_ctxt(unit, fp, uinfo)) {
+					ret = 0;
+					goto done_chk_sdma;
+				}
 		ret = get_a_ctxt(fp, uinfo, alg);
-
-done_chk_sdma:
-	if (!ret) {
-		struct qib_filedata *fd = fp->private_data;
-		const struct qib_ctxtdata *rcd = fd->rcd;
-		const struct qib_devdata *dd = rcd->dd;
-		unsigned int weight;
-
-		if (dd->flags & QIB_HAS_SEND_DMA) {
-			fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
-							    dd->unit,
-							    rcd->ctxt,
-							    fd->subctxt);
-			if (!fd->pq)
-				ret = -ENOMEM;
-		}
-
-		/*
-		 * If process has NOT already set it's affinity, select and
-		 * reserve a processor for it, as a rendezvous for all
-		 * users of the driver.  If they don't actually later
-		 * set affinity to this cpu, or set it to some other cpu,
-		 * it just means that sooner or later we don't recommend
-		 * a cpu, and let the scheduler do it's best.
-		 */
-		weight = cpumask_weight(tsk_cpus_allowed(current));
-		if (!ret && weight >= qib_cpulist_count) {
-			int cpu;
-			cpu = find_first_zero_bit(qib_cpulist,
-						  qib_cpulist_count);
-			if (cpu != qib_cpulist_count) {
-				__set_bit(cpu, qib_cpulist);
-				fd->rec_cpu_num = cpu;
-			}
-		} else if (weight == 1 &&
-			test_bit(cpumask_first(tsk_cpus_allowed(current)),
-				 qib_cpulist))
-			qib_devinfo(dd->pcidev,
-				"%s PID %u affinity set to cpu %d; already allocated\n",
-				current->comm, current->pid,
-				cpumask_first(tsk_cpus_allowed(current)));
 	}
 
+done_chk_sdma:
+	if (!ret)
+		ret = do_qib_user_sdma_queue_create(fp);
+done_ok:
 	mutex_unlock(&qib_mutex);
 
 done:
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 0232ae56b1fa..84e593d6007b 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3464,6 +3464,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum)
 	return -ENXIO;
 }
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+	return 0;
+}
+#endif
+
 /* Dummy function, as 6120 boards never disable EEPROM Write */
 static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen)
 {
@@ -3539,6 +3546,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
 	dd->f_xgxs_reset        = qib_6120_xgxs_reset;
 	dd->f_writescratch      = writescratch;
 	dd->f_tempsense_rd	= qib_6120_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	dd->f_notify_dca = qib_6120_notify_dca;
+#endif
 	/*
 	 * Do remaining pcie setup and save pcie values in dd.
 	 * Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 64d0ecb90cdc..454c2e7668fe 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4513,6 +4513,13 @@ bail:
 	return ret;
 }
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+	return 0;
+}
+#endif
+
 /* Dummy function, as 7220 boards never disable EEPROM Write */
 static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
 {
@@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
 	dd->f_xgxs_reset        = qib_7220_xgxs_reset;
 	dd->f_writescratch      = writescratch;
 	dd->f_tempsense_rd	= qib_7220_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	dd->f_notify_dca = qib_7220_notify_dca;
+#endif
 	/*
 	 * Do remaining pcie setup and save pcie values in dd.
 	 * Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 3f6b21e9dc11..21e8b09d4bf8 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -44,6 +44,9 @@
 #include <linux/module.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_smi.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
 
 #include "qib.h"
 #include "qib_7322_regs.h"
@@ -80,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
 static void serdes_7322_los_enable(struct qib_pportdata *, int);
 static int serdes_7322_init_old(struct qib_pportdata *);
 static int serdes_7322_init_new(struct qib_pportdata *);
+static void dump_sdma_7322_state(struct qib_pportdata *);
 
 #define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
 
@@ -519,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = {
 	[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
 };
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify {
+	int rcv;
+	void *arg;
+	struct irq_affinity_notify notify;
+};
+#endif
+
 struct qib_chip_specific {
 	u64 __iomem *cregbase;
 	u64 *cntrs;
@@ -546,6 +558,12 @@ struct qib_chip_specific {
 	u32 lastbuf_for_pio;
 	u32 stay_in_freeze;
 	u32 recovery_ports_initted;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	u32 dca_ctrl;
+	int rhdr_cpu[18];
+	int sdma_cpu[2];
+	u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */
+#endif
 	struct qib_msix_entry *msix_entries;
 	unsigned long *sendchkenable;
 	unsigned long *sendgrhchk;
@@ -573,7 +591,7 @@ struct vendor_txdds_ent {
 static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
 
 #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
-#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */
+#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */
 #define TXDDS_MFG_SZ 2    /* number of mfg tx settings entries */
 #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
 
@@ -635,6 +653,7 @@ struct qib_chippport_specific {
 	u8 ibmalfusesnap;
 	struct qib_qsfp_data qsfp_data;
 	char epmsgbuf[192]; /* for port error interrupt msg buffer */
+	char sdmamsgbuf[192]; /* for per-port sdma error messages */
 };
 
 static struct {
@@ -642,28 +661,76 @@ static struct {
 	irq_handler_t handler;
 	int lsb;
 	int port; /* 0 if not port-specific, else port # */
+	int dca;
 } irq_table[] = {
-	{ "", qib_7322intr, -1, 0 },
+	{ "", qib_7322intr, -1, 0, 0 },
 	{ " (buf avail)", qib_7322bufavail,
-		SYM_LSB(IntStatus, SendBufAvail), 0 },
+		SYM_LSB(IntStatus, SendBufAvail), 0, 0},
 	{ " (sdma 0)", sdma_intr,
-		SYM_LSB(IntStatus, SDmaInt_0), 1 },
+		SYM_LSB(IntStatus, SDmaInt_0), 1, 1 },
 	{ " (sdma 1)", sdma_intr,
-		SYM_LSB(IntStatus, SDmaInt_1), 2 },
+		SYM_LSB(IntStatus, SDmaInt_1), 2, 1 },
 	{ " (sdmaI 0)", sdma_idle_intr,
-		SYM_LSB(IntStatus, SDmaIdleInt_0), 1 },
+		SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1},
 	{ " (sdmaI 1)", sdma_idle_intr,
-		SYM_LSB(IntStatus, SDmaIdleInt_1), 2 },
+		SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1},
 	{ " (sdmaP 0)", sdma_progress_intr,
-		SYM_LSB(IntStatus, SDmaProgressInt_0), 1 },
+		SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 },
 	{ " (sdmaP 1)", sdma_progress_intr,
-		SYM_LSB(IntStatus, SDmaProgressInt_1), 2 },
+		SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 },
 	{ " (sdmaC 0)", sdma_cleanup_intr,
-		SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 },
+		SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 },
 	{ " (sdmaC 1)", sdma_cleanup_intr,
-		SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 },
+		SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0},
 };
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static const struct dca_reg_map {
+	int     shadow_inx;
+	int     lsb;
+	u64     mask;
+	u16     regno;
+} dca_rcvhdr_reg_map[] = {
+	{ 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH),
+	   ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) },
+	{ 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH),
+	   ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) },
+	{ 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH),
+	   ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) },
+	{ 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH),
+	   ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) },
+	{ 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH),
+	   ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) },
+	{ 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH),
+	   ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) },
+	{ 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH),
+	   ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) },
+	{ 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH),
+	   ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) },
+	{ 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH),
+	   ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) },
+	{ 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH),
+	   ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) },
+	{ 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH),
+	   ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) },
+	{ 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH),
+	   ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) },
+	{ 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH),
+	   ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) },
+	{ 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH),
+	   ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) },
+	{ 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH),
+	   ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) },
+	{ 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH),
+	   ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) },
+	{ 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH),
+	   ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) },
+	{ 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH),
+	   ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) },
+};
+#endif
+
 /* ibcctrl bits */
 #define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
 /* cycle through TS1/TS2 till OK */
@@ -686,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *);
 static void setup_7322_link_recovery(struct qib_pportdata *, u32);
 static void check_7322_rxe_status(struct qib_pportdata *);
 static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static void qib_setup_dca(struct qib_devdata *dd);
+static void setup_dca_notifier(struct qib_devdata *dd,
+			       struct qib_msix_entry *m);
+static void reset_dca_notifier(struct qib_devdata *dd,
+			       struct qib_msix_entry *m);
+#endif
 
 /**
  * qib_read_ureg32 - read 32-bit virtualized per-context register
@@ -1529,6 +1603,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
 
 	spin_lock_irqsave(&ppd->sdma_lock, flags);
 
+	if (errs != QIB_E_P_SDMAHALT) {
+		/* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
+		qib_dev_porterr(dd, ppd->port,
+			"SDMA %s 0x%016llx %s\n",
+			qib_sdma_state_names[ppd->sdma_state.current_state],
+			errs, ppd->cpspec->sdmamsgbuf);
+		dump_sdma_7322_state(ppd);
+	}
+
 	switch (ppd->sdma_state.current_state) {
 	case qib_sdma_state_s00_hw_down:
 		break;
@@ -2084,6 +2167,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
 
 	qib_dev_err(dd, "%s hardware error\n", msg);
 
+	if (hwerrs &
+		   (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
+		    SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
+		int pidx = 0;
+		int err;
+		unsigned long flags;
+		struct qib_pportdata *ppd = dd->pport;
+		for (; pidx < dd->num_pports; ++pidx, ppd++) {
+			err = 0;
+			if (pidx == 0 && (hwerrs &
+				SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
+				err++;
+			if (pidx == 1 && (hwerrs &
+				SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
+				err++;
+			if (err) {
+				spin_lock_irqsave(&ppd->sdma_lock, flags);
+				dump_sdma_7322_state(ppd);
+				spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+			}
+		}
+	}
+
 	if (isfatal && !dd->diag_client) {
 		qib_dev_err(dd,
 			"Fatal Hardware Error, no longer usable, SN %.16s\n",
@@ -2558,6 +2664,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on)
 		qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink);
 }
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+	switch (event) {
+	case DCA_PROVIDER_ADD:
+		if (dd->flags & QIB_DCA_ENABLED)
+			break;
+		if (!dca_add_requester(&dd->pcidev->dev)) {
+			qib_devinfo(dd->pcidev, "DCA enabled\n");
+			dd->flags |= QIB_DCA_ENABLED;
+			qib_setup_dca(dd);
+		}
+		break;
+	case DCA_PROVIDER_REMOVE:
+		if (dd->flags & QIB_DCA_ENABLED) {
+			dca_remove_requester(&dd->pcidev->dev);
+			dd->flags &= ~QIB_DCA_ENABLED;
+			dd->cspec->dca_ctrl = 0;
+			qib_write_kreg(dd, KREG_IDX(DCACtrlA),
+				dd->cspec->dca_ctrl);
+		}
+		break;
+	}
+	return 0;
+}
+
+static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu)
+{
+	struct qib_devdata *dd = rcd->dd;
+	struct qib_chip_specific *cspec = dd->cspec;
+
+	if (!(dd->flags & QIB_DCA_ENABLED))
+		return;
+	if (cspec->rhdr_cpu[rcd->ctxt] != cpu) {
+		const struct dca_reg_map *rmp;
+
+		cspec->rhdr_cpu[rcd->ctxt] = cpu;
+		rmp = &dca_rcvhdr_reg_map[rcd->ctxt];
+		cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask;
+		cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |=
+			(u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb;
+		qib_devinfo(dd->pcidev,
+			"Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu,
+			(long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+		qib_write_kreg(dd, rmp->regno,
+			       cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+		cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable);
+		qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+	}
+}
+
+static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu)
+{
+	struct qib_devdata *dd = ppd->dd;
+	struct qib_chip_specific *cspec = dd->cspec;
+	unsigned pidx = ppd->port - 1;
+
+	if (!(dd->flags & QIB_DCA_ENABLED))
+		return;
+	if (cspec->sdma_cpu[pidx] != cpu) {
+		cspec->sdma_cpu[pidx] = cpu;
+		cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ?
+			SYM_MASK(DCACtrlF, SendDma1DCAOPH) :
+			SYM_MASK(DCACtrlF, SendDma0DCAOPH));
+		cspec->dca_rcvhdr_ctrl[4] |=
+			(u64) dca3_get_tag(&dd->pcidev->dev, cpu) <<
+				(ppd->hw_pidx ?
+					SYM_LSB(DCACtrlF, SendDma1DCAOPH) :
+					SYM_LSB(DCACtrlF, SendDma0DCAOPH));
+		qib_devinfo(dd->pcidev,
+			"sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu,
+			(long long) cspec->dca_rcvhdr_ctrl[4]);
+		qib_write_kreg(dd, KREG_IDX(DCACtrlF),
+			       cspec->dca_rcvhdr_ctrl[4]);
+		cspec->dca_ctrl |= ppd->hw_pidx ?
+			SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) :
+			SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable);
+		qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+	}
+}
+
+static void qib_setup_dca(struct qib_devdata *dd)
+{
+	struct qib_chip_specific *cspec = dd->cspec;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++)
+		cspec->rhdr_cpu[i] = -1;
+	for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+		cspec->sdma_cpu[i] = -1;
+	cspec->dca_rcvhdr_ctrl[0] =
+		(1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt));
+	cspec->dca_rcvhdr_ctrl[1] =
+		(1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt));
+	cspec->dca_rcvhdr_ctrl[2] =
+		(1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt));
+	cspec->dca_rcvhdr_ctrl[3] =
+		(1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt));
+	cspec->dca_rcvhdr_ctrl[4] =
+		(1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) |
+		(1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt));
+	for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+		qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i,
+			       cspec->dca_rcvhdr_ctrl[i]);
+	for (i = 0; i < cspec->num_msix_entries; i++)
+		setup_dca_notifier(dd, &cspec->msix_entries[i]);
+}
+
+static void qib_irq_notifier_notify(struct irq_affinity_notify *notify,
+			     const cpumask_t *mask)
+{
+	struct qib_irq_notify *n =
+		container_of(notify, struct qib_irq_notify, notify);
+	int cpu = cpumask_first(mask);
+
+	if (n->rcv) {
+		struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+		qib_update_rhdrq_dca(rcd, cpu);
+	} else {
+		struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+		qib_update_sdma_dca(ppd, cpu);
+	}
+}
+
+static void qib_irq_notifier_release(struct kref *ref)
+{
+	struct qib_irq_notify *n =
+		container_of(ref, struct qib_irq_notify, notify.kref);
+	struct qib_devdata *dd;
+
+	if (n->rcv) {
+		struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+		dd = rcd->dd;
+	} else {
+		struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+		dd = ppd->dd;
+	}
+	qib_devinfo(dd->pcidev,
+		"release on HCA notify 0x%p n 0x%p\n", ref, n);
+	kfree(n);
+}
+#endif
+
 /*
  * Disable MSIx interrupt if enabled, call generic MSIx code
  * to cleanup, and clear pending MSIx interrupts.
@@ -2575,6 +2837,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
 
 		dd->cspec->num_msix_entries = 0;
 		for (i = 0; i < n; i++) {
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+			reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
+#endif
 			irq_set_affinity_hint(
 			  dd->cspec->msix_entries[i].msix.vector, NULL);
 			free_cpumask_var(dd->cspec->msix_entries[i].mask);
@@ -2602,6 +2867,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
 {
 	int i;
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	if (dd->flags & QIB_DCA_ENABLED) {
+		dca_remove_requester(&dd->pcidev->dev);
+		dd->flags &= ~QIB_DCA_ENABLED;
+		dd->cspec->dca_ctrl = 0;
+		qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl);
+	}
+#endif
+
 	qib_7322_free_irq(dd);
 	kfree(dd->cspec->cntrs);
 	kfree(dd->cspec->sendchkenable);
@@ -3068,6 +3342,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+	if (!m->dca)
+		return;
+	qib_devinfo(dd->pcidev,
+		"Disabling notifier on HCA %d irq %d\n",
+		dd->unit,
+		m->msix.vector);
+	irq_set_affinity_notifier(
+		m->msix.vector,
+		NULL);
+	m->notifier = NULL;
+}
+
+static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+	struct qib_irq_notify *n;
+
+	if (!m->dca)
+		return;
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
+	if (n) {
+		int ret;
+
+		m->notifier = n;
+		n->notify.irq = m->msix.vector;
+		n->notify.notify = qib_irq_notifier_notify;
+		n->notify.release = qib_irq_notifier_release;
+		n->arg = m->arg;
+		n->rcv = m->rcv;
+		qib_devinfo(dd->pcidev,
+			"set notifier irq %d rcv %d notify %p\n",
+			n->notify.irq, n->rcv, &n->notify);
+		ret = irq_set_affinity_notifier(
+				n->notify.irq,
+				&n->notify);
+		if (ret) {
+			m->notifier = NULL;
+			kfree(n);
+		}
+	}
+}
+
+#endif
+
 /*
  * Set up our chip-specific interrupt handler.
  * The interrupt type has already been setup, so
@@ -3149,6 +3470,9 @@ try_intx:
 		void *arg;
 		u64 val;
 		int lsb, reg, sh;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+		int dca = 0;
+#endif
 
 		dd->cspec->msix_entries[msixnum].
 			name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
@@ -3161,6 +3485,9 @@ try_intx:
 				arg = dd->pport + irq_table[i].port - 1;
 			} else
 				arg = dd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+			dca = irq_table[i].dca;
+#endif
 			lsb = irq_table[i].lsb;
 			handler = irq_table[i].handler;
 			snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3178,6 +3505,9 @@ try_intx:
 				continue;
 			if (qib_krcvq01_no_msi && ctxt < 2)
 				continue;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+			dca = 1;
+#endif
 			lsb = QIB_I_RCVAVAIL_LSB + ctxt;
 			handler = qib_7322pintr;
 			snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3203,6 +3533,11 @@ try_intx:
 			goto try_intx;
 		}
 		dd->cspec->msix_entries[msixnum].arg = arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+		dd->cspec->msix_entries[msixnum].dca = dca;
+		dd->cspec->msix_entries[msixnum].rcv =
+			handler == qib_7322pintr;
+#endif
 		if (lsb >= 0) {
 			reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
 			sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -6452,6 +6787,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
 	qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
 }
 
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+static void dump_sdma_7322_state(struct qib_pportdata *ppd)
+{
+	u64 reg, reg1, reg2;
+
+	reg = qib_read_kreg_port(ppd, krp_senddmastatus);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmastatus: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_sendctrl);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA sendctrl: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmabase);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmabase: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
+	reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
+	reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmabufmask 0:%llx  1:%llx  2:%llx\n",
+		 reg, reg1, reg2);
+
+	/* get bufuse bits, clear them, and print them again if non-zero */
+	reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+	qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
+	reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+	qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
+	reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+	qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
+	/* 0 and 1 should always be zero, so print as short form */
+	qib_dev_porterr(ppd->dd, ppd->port,
+		 "SDMA current senddmabuf_use 0:%llx  1:%llx  2:%llx\n",
+		 reg, reg1, reg2);
+	reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+	reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+	reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+	/* 0 and 1 should always be zero, so print as short form */
+	qib_dev_porterr(ppd->dd, ppd->port,
+		 "SDMA cleared senddmabuf_use 0:%llx  1:%llx  2:%llx\n",
+		 reg, reg1, reg2);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmatail);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmatail: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmahead);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmahead: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmaheadaddr: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmalengen);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmalengen: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmadesccnt: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmaidlecnt: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmapriorityhld: 0x%016llx\n", reg);
+
+	reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA senddmareloadcnt: 0x%016llx\n", reg);
+
+	dump_sdma_state(ppd);
+}
+
 static struct sdma_set_state_action sdma_7322_action_table[] = {
 	[qib_sdma_state_s00_hw_down] = {
 		.go_s99_running_tofalse = 1,
@@ -6885,6 +7300,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
 	dd->f_sdma_init_early   = qib_7322_sdma_init_early;
 	dd->f_writescratch      = writescratch;
 	dd->f_tempsense_rd	= qib_7322_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	dd->f_notify_dca	= qib_7322_notify_dca;
+#endif
 	/*
 	 * Do remaining PCIe setup and save PCIe values in dd.
 	 * Any error printing is already done by the init code.
@@ -6921,7 +7339,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
 		actual_cnt -= dd->num_pports;
 
 	tabsize = actual_cnt;
-	dd->cspec->msix_entries = kmalloc(tabsize *
+	dd->cspec->msix_entries = kzalloc(tabsize *
 			sizeof(struct qib_msix_entry), GFP_KERNEL);
 	if (!dd->cspec->msix_entries) {
 		qib_dev_err(dd, "No memory for MSIx table\n");
@@ -6941,7 +7359,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
 
 	/* clear diagctrl register, in case diags were running and crashed */
 	qib_write_kreg(dd, kr_hwdiagctrl, 0);
-
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	if (!dca_add_requester(&pdev->dev)) {
+		qib_devinfo(dd->pcidev, "DCA enabled\n");
+		dd->flags |= QIB_DCA_ENABLED;
+		qib_setup_dca(dd);
+	}
+#endif
 	goto bail;
 
 bail_cleanup:
@@ -7156,15 +7580,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = {
 	{  0, 0, 0,  1 },	/* QMH7342 backplane settings */
 	{  0, 0, 0,  2 },	/* QMH7342 backplane settings */
 	{  0, 0, 0,  2 },	/* QMH7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 11 },	/* QME7342 backplane settings */
 	{  0, 0, 0,  3 },	/* QMH7342 backplane settings */
 	{  0, 0, 0,  4 },	/* QMH7342 backplane settings */
+	{  0, 1, 4, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 3, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 12 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 11 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0,  9 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 14 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 2, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 11 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  7 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  9 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  6 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  8 },       /* QME7342 backplane settings 1.1 */
 };
 
 static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
@@ -7173,15 +7602,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
 	{  0, 0, 0,  7 },	/* QMH7342 backplane settings */
 	{  0, 0, 0,  8 },	/* QMH7342 backplane settings */
 	{  0, 0, 0,  8 },	/* QMH7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
-	{  0, 0, 0, 13 },	/* QME7342 backplane settings */
 	{  0, 0, 0,  9 },	/* QMH7342 backplane settings */
 	{  0, 0, 0, 10 },	/* QMH7342 backplane settings */
+	{  0, 1, 4, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 3, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 12 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 11 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0,  9 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 14 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 2, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1, 0, 11 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  7 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  9 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  6 },       /* QME7342 backplane settings 1.1 */
+	{  0, 1, 0,  8 },       /* QME7342 backplane settings 1.1 */
 };
 
 static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
@@ -7190,15 +7624,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
 	{  0, 1,  0,  5 },	/* QMH7342 backplane settings */
 	{  0, 1,  0,  6 },	/* QMH7342 backplane settings */
 	{  0, 1,  0,  8 },	/* QMH7342 backplane settings */
-	{  0, 1, 12, 10 },	/* QME7342 backplane setting */
-	{  0, 1, 12, 11 },	/* QME7342 backplane setting */
-	{  0, 1, 12, 12 },	/* QME7342 backplane setting */
-	{  0, 1, 12, 14 },	/* QME7342 backplane setting */
-	{  0, 1, 12,  6 },	/* QME7342 backplane setting */
-	{  0, 1, 12,  7 },	/* QME7342 backplane setting */
-	{  0, 1, 12,  8 },	/* QME7342 backplane setting */
 	{  0, 1,  0, 10 },	/* QMH7342 backplane settings */
 	{  0, 1,  0, 12 },	/* QMH7342 backplane settings */
+	{  0, 1,  4, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  3, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  0, 12 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  0, 11 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  0,  9 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  0, 14 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  2, 15 },	/* QME7342 backplane settings 1.0 */
+	{  0, 1,  0, 11 },      /* QME7342 backplane settings 1.1 */
+	{  0, 1,  0,  7 },      /* QME7342 backplane settings 1.1 */
+	{  0, 1,  0,  9 },      /* QME7342 backplane settings 1.1 */
+	{  0, 1,  0,  6 },      /* QME7342 backplane settings 1.1 */
+	{  0, 1,  0,  8 },      /* QME7342 backplane settings 1.1 */
 };
 
 static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 173f805790da..36e048e0e1d9 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -39,10 +39,17 @@
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/printk.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
 
 #include "qib.h"
 #include "qib_common.h"
 #include "qib_mad.h"
+#ifdef CONFIG_DEBUG_FS
+#include "qib_debugfs.h"
+#include "qib_verbs.h"
+#endif
 
 #undef pr_fmt
 #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
@@ -64,6 +71,11 @@ ushort qib_cfgctxts;
 module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
 MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
 
+unsigned qib_numa_aware;
+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
+MODULE_PARM_DESC(numa_aware,
+	"0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
+
 /*
  * If set, do not write to any regs if avoidable, hack to allow
  * check for deranged default register values.
@@ -89,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
 module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
 MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
 
-struct workqueue_struct *qib_cq_wq;
-
 static void verify_interrupt(unsigned long);
 
 static struct idr qib_unit_table;
@@ -121,6 +131,11 @@ int qib_create_ctxts(struct qib_devdata *dd)
 {
 	unsigned i;
 	int ret;
+	int local_node_id = pcibus_to_node(dd->pcidev->bus);
+
+	if (local_node_id < 0)
+		local_node_id = numa_node_id();
+	dd->assigned_node_id = local_node_id;
 
 	/*
 	 * Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -143,7 +158,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
 			continue;
 
 		ppd = dd->pport + (i % dd->num_pports);
-		rcd = qib_create_ctxtdata(ppd, i);
+
+		rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
 		if (!rcd) {
 			qib_dev_err(dd,
 				"Unable to allocate ctxtdata for Kernel ctxt, failing\n");
@@ -161,20 +177,33 @@ done:
 /*
  * Common code for user and kernel context setup.
  */
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
+	int node_id)
 {
 	struct qib_devdata *dd = ppd->dd;
 	struct qib_ctxtdata *rcd;
 
-	rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+	rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
 	if (rcd) {
 		INIT_LIST_HEAD(&rcd->qp_wait_list);
+		rcd->node_id = node_id;
 		rcd->ppd = ppd;
 		rcd->dd = dd;
 		rcd->cnt = 1;
 		rcd->ctxt = ctxt;
 		dd->rcd[ctxt] = rcd;
-
+#ifdef CONFIG_DEBUG_FS
+		if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+			rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+				GFP_KERNEL, node_id);
+			if (!rcd->opstats) {
+				kfree(rcd);
+				qib_dev_err(dd,
+					"Unable to allocate per ctxt stats buffer\n");
+				return NULL;
+			}
+		}
+#endif
 		dd->f_init_ctxt(rcd);
 
 		/*
@@ -429,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd)
 	dd->intrchk_timer.function = verify_interrupt;
 	dd->intrchk_timer.data = (unsigned long) dd;
 
+	ret = qib_cq_init(dd);
 done:
 	return ret;
 }
@@ -944,6 +974,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
 	vfree(rcd->subctxt_uregbase);
 	vfree(rcd->subctxt_rcvegrbuf);
 	vfree(rcd->subctxt_rcvhdr_base);
+#ifdef CONFIG_DEBUG_FS
+	kfree(rcd->opstats);
+	rcd->opstats = NULL;
+#endif
 	kfree(rcd);
 }
 
@@ -1033,7 +1067,6 @@ done:
 	dd->f_set_armlaunch(dd, 1);
 }
 
-
 void qib_free_devdata(struct qib_devdata *dd)
 {
 	unsigned long flags;
@@ -1043,6 +1076,9 @@ void qib_free_devdata(struct qib_devdata *dd)
 	list_del(&dd->list);
 	spin_unlock_irqrestore(&qib_devs_lock, flags);
 
+#ifdef CONFIG_DEBUG_FS
+	qib_dbg_ibdev_exit(&dd->verbs_dev);
+#endif
 	ib_dealloc_device(&dd->verbs_dev.ibdev);
 }
 
@@ -1066,6 +1102,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 		goto bail;
 	}
 
+#ifdef CONFIG_DEBUG_FS
+	qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
+
 	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&qib_devs_lock, flags);
 
@@ -1081,6 +1121,9 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 	if (ret < 0) {
 		qib_early_err(&pdev->dev,
 			      "Could not allocate unit ID: error %d\n", -ret);
+#ifdef CONFIG_DEBUG_FS
+		qib_dbg_ibdev_exit(&dd->verbs_dev);
+#endif
 		ib_dealloc_device(&dd->verbs_dev.ibdev);
 		dd = ERR_PTR(ret);
 		goto bail;
@@ -1158,6 +1201,35 @@ struct pci_driver qib_driver = {
 	.err_handler = &qib_pci_err_handler,
 };
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+	.notifier_call  = qib_notify_dca,
+	.next           = NULL,
+	.priority       = 0
+};
+
+static int qib_notify_dca_device(struct device *device, void *data)
+{
+	struct qib_devdata *dd = dev_get_drvdata(device);
+	unsigned long event = *(unsigned long *)data;
+
+	return dd->f_notify_dca(dd, event);
+}
+
+static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
+					  void *p)
+{
+	int rval;
+
+	rval = driver_for_each_device(&qib_driver.driver, NULL,
+				      &event, qib_notify_dca_device);
+	return rval ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+#endif
+
 /*
  * Do all the generic driver unit- and chip-independent memory
  * allocation and initialization.
@@ -1170,22 +1242,22 @@ static int __init qlogic_ib_init(void)
 	if (ret)
 		goto bail;
 
-	qib_cq_wq = create_singlethread_workqueue("qib_cq");
-	if (!qib_cq_wq) {
-		ret = -ENOMEM;
-		goto bail_dev;
-	}
-
 	/*
 	 * These must be called before the driver is registered with
 	 * the PCI subsystem.
 	 */
 	idr_init(&qib_unit_table);
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	dca_register_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+	qib_dbg_init();
+#endif
 	ret = pci_register_driver(&qib_driver);
 	if (ret < 0) {
 		pr_err("Unable to register driver: error %d\n", -ret);
-		goto bail_unit;
+		goto bail_dev;
 	}
 
 	/* not fatal if it doesn't work */
@@ -1193,10 +1265,14 @@ static int __init qlogic_ib_init(void)
 		pr_err("Unable to register ipathfs\n");
 	goto bail; /* all OK */
 
-bail_unit:
-	idr_destroy(&qib_unit_table);
-	destroy_workqueue(qib_cq_wq);
 bail_dev:
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	dca_unregister_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+	qib_dbg_exit();
+#endif
+	idr_destroy(&qib_unit_table);
 	qib_dev_cleanup();
 bail:
 	return ret;
@@ -1217,9 +1293,13 @@ static void __exit qlogic_ib_cleanup(void)
 			"Unable to cleanup counter filesystem: error %d\n",
 			-ret);
 
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+	dca_unregister_notify(&dca_notifier);
+#endif
 	pci_unregister_driver(&qib_driver);
-
-	destroy_workqueue(qib_cq_wq);
+#ifdef CONFIG_DEBUG_FS
+	qib_dbg_exit();
+#endif
 
 	qib_cpulist_count = 0;
 	kfree(qib_cpulist);
@@ -1270,7 +1350,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
 	if (dd->pageshadow) {
 		struct page **tmpp = dd->pageshadow;
 		dma_addr_t *tmpd = dd->physshadow;
-		int i, cnt = 0;
+		int i;
 
 		for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) {
 			int ctxt_tidbase = ctxt * dd->rcvtidcnt;
@@ -1283,13 +1363,13 @@ static void cleanup_device_data(struct qib_devdata *dd)
 					       PAGE_SIZE, PCI_DMA_FROMDEVICE);
 				qib_release_user_pages(&tmpp[i], 1);
 				tmpp[i] = NULL;
-				cnt++;
 			}
 		}
 
-		tmpp = dd->pageshadow;
 		dd->pageshadow = NULL;
 		vfree(tmpp);
+		dd->physshadow = NULL;
+		vfree(tmpd);
 	}
 
 	/*
@@ -1311,6 +1391,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
 	}
 	kfree(tmp);
 	kfree(dd->boardname);
+	qib_cq_exit(dd);
 }
 
 /*
@@ -1483,6 +1564,7 @@ static void qib_remove_one(struct pci_dev *pdev)
 int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
 {
 	unsigned amt;
+	int old_node_id;
 
 	if (!rcd->rcvhdrq) {
 		dma_addr_t phys_hdrqtail;
@@ -1492,9 +1574,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
 			    sizeof(u32), PAGE_SIZE);
 		gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
 			GFP_USER : GFP_KERNEL;
+
+		old_node_id = dev_to_node(&dd->pcidev->dev);
+		set_dev_node(&dd->pcidev->dev, rcd->node_id);
 		rcd->rcvhdrq = dma_alloc_coherent(
 			&dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
 			gfp_flags | __GFP_COMP);
+		set_dev_node(&dd->pcidev->dev, old_node_id);
 
 		if (!rcd->rcvhdrq) {
 			qib_dev_err(dd,
@@ -1510,9 +1596,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
 		}
 
 		if (!(dd->flags & QIB_NODMA_RTAIL)) {
+			set_dev_node(&dd->pcidev->dev, rcd->node_id);
 			rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
 				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
 				gfp_flags);
+			set_dev_node(&dd->pcidev->dev, old_node_id);
 			if (!rcd->rcvhdrtail_kvaddr)
 				goto bail_free;
 			rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1556,6 +1644,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
 	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
 	size_t size;
 	gfp_t gfp_flags;
+	int old_node_id;
 
 	/*
 	 * GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1574,25 +1663,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
 	size = rcd->rcvegrbuf_size;
 	if (!rcd->rcvegrbuf) {
 		rcd->rcvegrbuf =
-			kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]),
-				GFP_KERNEL);
+			kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
+				GFP_KERNEL, rcd->node_id);
 		if (!rcd->rcvegrbuf)
 			goto bail;
 	}
 	if (!rcd->rcvegrbuf_phys) {
 		rcd->rcvegrbuf_phys =
-			kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
-				GFP_KERNEL);
+			kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
+				GFP_KERNEL, rcd->node_id);
 		if (!rcd->rcvegrbuf_phys)
 			goto bail_rcvegrbuf;
 	}
 	for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
 		if (rcd->rcvegrbuf[e])
 			continue;
+
+		old_node_id = dev_to_node(&dd->pcidev->dev);
+		set_dev_node(&dd->pcidev->dev, rcd->node_id);
 		rcd->rcvegrbuf[e] =
 			dma_alloc_coherent(&dd->pcidev->dev, size,
 					   &rcd->rcvegrbuf_phys[e],
 					   gfp_flags);
+		set_dev_node(&dd->pcidev->dev, old_node_id);
 		if (!rcd->rcvegrbuf[e])
 			goto bail_rcvegrbuf_phys;
 	}
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index a6a2cc2ba260..3cca55b51e54 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation.  * All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -35,6 +35,9 @@
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/jhash.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+#endif
 
 #include "qib.h"
 
@@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
 	unsigned long flags;
 	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
 
-	spin_lock_irqsave(&dev->qpt_lock, flags);
 	atomic_inc(&qp->refcount);
+	spin_lock_irqsave(&dev->qpt_lock, flags);
 
 	if (qp->ibqp.qp_num == 0)
 		rcu_assign_pointer(ibp->qp0, qp);
@@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
 	}
 
 	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	synchronize_rcu();
 }
 
 /*
@@ -247,36 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
 	unsigned long flags;
+	int removed = 1;
 
 	spin_lock_irqsave(&dev->qpt_lock, flags);
 
 	if (rcu_dereference_protected(ibp->qp0,
 			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		atomic_dec(&qp->refcount);
 		rcu_assign_pointer(ibp->qp0, NULL);
 	} else if (rcu_dereference_protected(ibp->qp1,
 			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		atomic_dec(&qp->refcount);
 		rcu_assign_pointer(ibp->qp1, NULL);
 	} else {
 		struct qib_qp *q;
 		struct qib_qp __rcu **qpp;
 
+		removed = 0;
 		qpp = &dev->qp_table[n];
 		for (; (q = rcu_dereference_protected(*qpp,
 				lockdep_is_held(&dev->qpt_lock))) != NULL;
 				qpp = &q->next)
 			if (q == qp) {
-				atomic_dec(&qp->refcount);
 				rcu_assign_pointer(*qpp,
 					rcu_dereference_protected(qp->next,
 					 lockdep_is_held(&dev->qpt_lock)));
+				removed = 1;
 				break;
 			}
 	}
 
 	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	synchronize_rcu();
+	if (removed) {
+		synchronize_rcu();
+		atomic_dec(&qp->refcount);
+	}
 }
 
 /**
@@ -334,26 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
 {
 	struct qib_qp *qp = NULL;
 
+	rcu_read_lock();
 	if (unlikely(qpn <= 1)) {
-		rcu_read_lock();
 		if (qpn == 0)
 			qp = rcu_dereference(ibp->qp0);
 		else
 			qp = rcu_dereference(ibp->qp1);
+		if (qp)
+			atomic_inc(&qp->refcount);
 	} else {
 		struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
 		unsigned n = qpn_hash(dev, qpn);
 
-		rcu_read_lock();
 		for (qp = rcu_dereference(dev->qp_table[n]); qp;
 			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn)
+			if (qp->ibqp.qp_num == qpn) {
+				atomic_inc(&qp->refcount);
 				break;
+			}
 	}
-	if (qp)
-		if (unlikely(!atomic_inc_not_zero(&qp->refcount)))
-			qp = NULL;
-
 	rcu_read_unlock();
 	return qp;
 }
@@ -1286,3 +1290,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
 		}
 	}
 }
+
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter {
+	struct qib_ibdev *dev;
+	struct qib_qp *qp;
+	int n;
+};
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
+{
+	struct qib_qp_iter *iter;
+
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return NULL;
+
+	iter->dev = dev;
+	if (qib_qp_iter_next(iter)) {
+		kfree(iter);
+		return NULL;
+	}
+
+	return iter;
+}
+
+int qib_qp_iter_next(struct qib_qp_iter *iter)
+{
+	struct qib_ibdev *dev = iter->dev;
+	int n = iter->n;
+	int ret = 1;
+	struct qib_qp *pqp = iter->qp;
+	struct qib_qp *qp;
+
+	rcu_read_lock();
+	for (; n < dev->qp_table_size; n++) {
+		if (pqp)
+			qp = rcu_dereference(pqp->next);
+		else
+			qp = rcu_dereference(dev->qp_table[n]);
+		pqp = qp;
+		if (qp) {
+			if (iter->qp)
+				atomic_dec(&iter->qp->refcount);
+			atomic_inc(&qp->refcount);
+			rcu_read_unlock();
+			iter->qp = qp;
+			iter->n = n;
+			return 0;
+		}
+	}
+	rcu_read_unlock();
+	if (iter->qp)
+		atomic_dec(&iter->qp->refcount);
+	return ret;
+}
+
+static const char * const qp_type_str[] = {
+	"SMI", "GSI", "RC", "UC", "UD",
+};
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
+{
+	struct qib_swqe *wqe;
+	struct qib_qp *qp = iter->qp;
+
+	wqe = get_swqe_ptr(qp, qp->s_last);
+	seq_printf(s,
+		   "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
+		   iter->n,
+		   qp->ibqp.qp_num,
+		   qp_type_str[qp->ibqp.qp_type],
+		   qp->state,
+		   wqe->wr.opcode,
+		   qp->s_hdrwords,
+		   qp->s_flags,
+		   atomic_read(&qp->s_dma_busy),
+		   !list_empty(&qp->iowait),
+		   qp->timeout,
+		   wqe->ssn,
+		   qp->s_lsn,
+		   qp->s_last_psn,
+		   qp->s_psn, qp->s_next_psn,
+		   qp->s_sending_psn, qp->s_sending_hpsn,
+		   qp->s_last, qp->s_acked, qp->s_cur,
+		   qp->s_tail, qp->s_head, qp->s_size,
+		   qp->remote_qpn,
+		   qp->remote_ah_attr.dlid);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 3fc514431212..32162d355370 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -708,6 +708,62 @@ unlock:
 	return ret;
 }
 
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+void dump_sdma_state(struct qib_pportdata *ppd)
+{
+	struct qib_sdma_desc *descq;
+	struct qib_sdma_txreq *txp, *txpnext;
+	__le64 *descqp;
+	u64 desc[2];
+	dma_addr_t addr;
+	u16 gen, dwlen, dwoffset;
+	u16 head, tail, cnt;
+
+	head = ppd->sdma_descq_head;
+	tail = ppd->sdma_descq_tail;
+	cnt = qib_sdma_descq_freecnt(ppd);
+	descq = ppd->sdma_descq;
+
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA ppd->sdma_descq_head: %u\n", head);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA ppd->sdma_descq_tail: %u\n", tail);
+	qib_dev_porterr(ppd->dd, ppd->port,
+		"SDMA sdma_descq_freecnt: %u\n", cnt);
+
+	/* print info for each entry in the descriptor queue */
+	while (head != tail) {
+		char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
+
+		descqp = &descq[head].qw[0];
+		desc[0] = le64_to_cpu(descqp[0]);
+		desc[1] = le64_to_cpu(descqp[1]);
+		flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
+		flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
+		flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
+		flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
+		flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
+		addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
+		gen = (desc[0] >> 30) & 3ULL;
+		dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
+		dwoffset = (desc[0] & 0x7ffULL) << 2;
+		qib_dev_porterr(ppd->dd, ppd->port,
+			"SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
+			 head, flags, addr, gen, dwlen, dwoffset);
+		if (++head == ppd->sdma_descq_cnt)
+			head = 0;
+	}
+
+	/* print dma descriptor indices from the TX requests */
+	list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
+				 list)
+		qib_dev_porterr(ppd->dd, ppd->port,
+			"SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
+			txp->start_idx, txp->next_descq_idx);
+}
+
 void qib_sdma_process_event(struct qib_pportdata *ppd,
 	enum qib_sdma_events event)
 {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 904c384aa361..092b0bb1bb78 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 	} else
 		goto drop;
 
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-	ibp->opstats[opcode & 0x7f].n_bytes += tlen;
-	ibp->opstats[opcode & 0x7f].n_packets++;
+	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
+#ifdef CONFIG_DEBUG_FS
+	rcd->opstats->stats[opcode].n_bytes += tlen;
+	rcd->opstats->stats[opcode].n_packets++;
+#endif
 
 	/* Get the destination QP number. */
 	qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index aff8b2c17886..012e2c7575ad 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -41,6 +41,7 @@
 #include <linux/interrupt.h>
 #include <linux/kref.h>
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 #include <linux/completion.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
@@ -267,7 +268,8 @@ struct qib_cq_wc {
  */
 struct qib_cq {
 	struct ib_cq ibcq;
-	struct work_struct comptask;
+	struct kthread_work comptask;
+	struct qib_devdata *dd;
 	spinlock_t lock; /* protect changes in this struct */
 	u8 notify;
 	u8 triggered;
@@ -658,6 +660,10 @@ struct qib_opcode_stats {
 	u64 n_bytes;            /* total number of bytes */
 };
 
+struct qib_opcode_stats_perctx {
+	struct qib_opcode_stats stats[128];
+};
+
 struct qib_ibport {
 	struct qib_qp __rcu *qp0;
 	struct qib_qp __rcu *qp1;
@@ -724,7 +730,6 @@ struct qib_ibport {
 	u8 vl_high_limit;
 	u8 sl_to_vl[16];
 
-	struct qib_opcode_stats opstats[128];
 };
 
 
@@ -768,6 +773,10 @@ struct qib_ibdev {
 	spinlock_t n_srqs_lock;
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
 	spinlock_t n_mcast_grps_lock;
+#ifdef CONFIG_DEBUG_FS
+	/* per HCA debugfs */
+	struct dentry *qib_ibdev_dbg;
+#endif
 };
 
 struct qib_verbs_counters {
@@ -832,8 +841,6 @@ static inline int qib_send_ok(struct qib_qp *qp)
 		 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
 }
 
-extern struct workqueue_struct *qib_cq_wq;
-
 /*
  * This must be called with s_lock held.
  */
@@ -910,6 +917,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
 
 void qib_free_qpn_table(struct qib_qpn_table *qpt);
 
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter;
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev);
+
+int qib_qp_iter_next(struct qib_qp_iter *iter);
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
+
+#endif
+
 void qib_get_credit(struct qib_qp *qp, u32 aeth);
 
 unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
@@ -972,6 +991,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
 int qib_destroy_srq(struct ib_srq *ibsrq);
 
+int qib_cq_init(struct qib_devdata *dd);
+
+void qib_cq_exit(struct qib_devdata *dd);
+
 void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
 
 int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 7ccf3284dda3..f93baf8254c4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -53,8 +53,8 @@
 
 #define DRV_NAME	"ib_srp"
 #define PFX		DRV_NAME ": "
-#define DRV_VERSION	"0.2"
-#define DRV_RELDATE	"November 1, 2005"
+#define DRV_VERSION	"1.0"
+#define DRV_RELDATE	"July 1, 2013"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
@@ -231,14 +231,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
 		return -ENOMEM;
 
 	recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-			       srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
+			       srp_recv_completion, NULL, target, SRP_RQ_SIZE,
+			       target->comp_vector);
 	if (IS_ERR(recv_cq)) {
 		ret = PTR_ERR(recv_cq);
 		goto err;
 	}
 
 	send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-			       srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
+			       srp_send_completion, NULL, target, SRP_SQ_SIZE,
+			       target->comp_vector);
 	if (IS_ERR(send_cq)) {
 		ret = PTR_ERR(send_cq);
 		goto err_recv_cq;
@@ -542,11 +544,11 @@ static void srp_remove_work(struct work_struct *work)
 
 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
 
+	srp_remove_target(target);
+
 	spin_lock(&target->srp_host->target_lock);
 	list_del(&target->list);
 	spin_unlock(&target->srp_host->target_lock);
-
-	srp_remove_target(target);
 }
 
 static void srp_rport_delete(struct srp_rport *rport)
@@ -1744,18 +1746,24 @@ static int srp_abort(struct scsi_cmnd *scmnd)
 {
 	struct srp_target_port *target = host_to_target(scmnd->device->host);
 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
+	int ret;
 
 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
 
 	if (!req || !srp_claim_req(target, req, scmnd))
 		return FAILED;
-	srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
-			  SRP_TSK_ABORT_TASK);
+	if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
+			      SRP_TSK_ABORT_TASK) == 0)
+		ret = SUCCESS;
+	else if (target->transport_offline)
+		ret = FAST_IO_FAIL;
+	else
+		ret = FAILED;
 	srp_free_req(target, req, scmnd, 0);
 	scmnd->result = DID_ABORT << 16;
 	scmnd->scsi_done(scmnd);
 
-	return SUCCESS;
+	return ret;
 }
 
 static int srp_reset_device(struct scsi_cmnd *scmnd)
@@ -1891,6 +1899,14 @@ static ssize_t show_local_ib_device(struct device *dev,
 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
 }
 
+static ssize_t show_comp_vector(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+	return sprintf(buf, "%d\n", target->comp_vector);
+}
+
 static ssize_t show_cmd_sg_entries(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -1917,6 +1933,7 @@ static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
+static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
 
@@ -1931,6 +1948,7 @@ static struct device_attribute *srp_host_attrs[] = {
 	&dev_attr_zero_req_lim,
 	&dev_attr_local_ib_port,
 	&dev_attr_local_ib_device,
+	&dev_attr_comp_vector,
 	&dev_attr_cmd_sg_entries,
 	&dev_attr_allow_ext_sg,
 	NULL
@@ -1946,6 +1964,7 @@ static struct scsi_host_template srp_template = {
 	.eh_abort_handler		= srp_abort,
 	.eh_device_reset_handler	= srp_reset_device,
 	.eh_host_reset_handler		= srp_reset_host,
+	.skip_settle_delay		= true,
 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
 	.can_queue			= SRP_CMD_SQ_SIZE,
 	.this_id			= -1,
@@ -2001,6 +2020,36 @@ static struct class srp_class = {
 	.dev_release = srp_release_dev
 };
 
+/**
+ * srp_conn_unique() - check whether the connection to a target is unique
+ */
+static bool srp_conn_unique(struct srp_host *host,
+			    struct srp_target_port *target)
+{
+	struct srp_target_port *t;
+	bool ret = false;
+
+	if (target->state == SRP_TARGET_REMOVED)
+		goto out;
+
+	ret = true;
+
+	spin_lock(&host->target_lock);
+	list_for_each_entry(t, &host->target_list, list) {
+		if (t != target &&
+		    target->id_ext == t->id_ext &&
+		    target->ioc_guid == t->ioc_guid &&
+		    target->initiator_ext == t->initiator_ext) {
+			ret = false;
+			break;
+		}
+	}
+	spin_unlock(&host->target_lock);
+
+out:
+	return ret;
+}
+
 /*
  * Target ports are added by writing
  *
@@ -2023,6 +2072,7 @@ enum {
 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
+	SRP_OPT_COMP_VECTOR	= 1 << 12,
 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
 				   SRP_OPT_IOC_GUID	|
 				   SRP_OPT_DGID		|
@@ -2043,6 +2093,7 @@ static const match_table_t srp_opt_tokens = {
 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
+	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
 	{ SRP_OPT_ERR,			NULL 			}
 };
 
@@ -2198,6 +2249,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
 			target->sg_tablesize = token;
 			break;
 
+		case SRP_OPT_COMP_VECTOR:
+			if (match_int(args, &token) || token < 0) {
+				pr_warn("bad comp_vector parameter '%s'\n", p);
+				goto out;
+			}
+			target->comp_vector = token;
+			break;
+
 		default:
 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
 				p);
@@ -2257,6 +2316,16 @@ static ssize_t srp_create_target(struct device *dev,
 	if (ret)
 		goto err;
 
+	if (!srp_conn_unique(target->srp_host, target)) {
+		shost_printk(KERN_INFO, target->scsi_host,
+			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
+			     be64_to_cpu(target->id_ext),
+			     be64_to_cpu(target->ioc_guid),
+			     be64_to_cpu(target->initiator_ext));
+		ret = -EEXIST;
+		goto err;
+	}
+
 	if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
 				target->cmd_sg_cnt < target->sg_tablesize) {
 		pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
@@ -2507,6 +2576,8 @@ static void srp_remove_one(struct ib_device *device)
 	struct srp_target_port *target;
 
 	srp_dev = ib_get_client_data(device, &srp_client);
+	if (!srp_dev)
+		return;
 
 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
 		device_unregister(&host->dev);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 66fbedda4571..e641088c14dc 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -156,6 +156,7 @@ struct srp_target_port {
 	char			target_name[32];
 	unsigned int		scsi_id;
 	unsigned int		sg_tablesize;
+	int			comp_vector;
 
 	struct ib_sa_path_rec	path;
 	__be16			orig_dgid[8];
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig
index bcdbc14aeff0..8cf7563a8d92 100644
--- a/drivers/net/ethernet/mellanox/Kconfig
+++ b/drivers/net/ethernet/mellanox/Kconfig
@@ -19,5 +19,6 @@ config NET_VENDOR_MELLANOX
 if NET_VENDOR_MELLANOX
 
 source "drivers/net/ethernet/mellanox/mlx4/Kconfig"
+source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig"
 
 endif # NET_VENDOR_MELLANOX
diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile
index 37afb9683372..38fe32ef5e5f 100644
--- a/drivers/net/ethernet/mellanox/Makefile
+++ b/drivers/net/ethernet/mellanox/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_MLX4_CORE) += mlx4/
+obj-$(CONFIG_MLX5_CORE) += mlx5/core/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
new file mode 100644
index 000000000000..21962828925a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -0,0 +1,18 @@
+#
+# Mellanox driver configuration
+#
+
+config MLX5_CORE
+	tristate
+	depends on PCI && X86
+	default n
+
+config MLX5_DEBUG
+	bool "Verbose debugging output" if (MLX5_CORE && EXPERT)
+	depends on MLX5_CORE
+	default y
+	---help---
+	  This option causes debugging code to be compiled into the
+	  mlx5_core driver.  The output can be turned on via the
+	  debug_mask module parameter (which can also be set after
+	  the driver is loaded through sysfs).
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
new file mode 100644
index 000000000000..105780bb980b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_MLX5_CORE)		+= mlx5_core.o
+
+mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
+		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
+		mad.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
new file mode 100644
index 000000000000..b215742b842f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.  If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+
+int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
+		   struct mlx5_buf *buf)
+{
+	dma_addr_t t;
+
+	buf->size = size;
+	if (size <= max_direct) {
+		buf->nbufs        = 1;
+		buf->npages       = 1;
+		buf->page_shift   = get_order(size) + PAGE_SHIFT;
+		buf->direct.buf   = dma_zalloc_coherent(&dev->pdev->dev,
+							size, &t, GFP_KERNEL);
+		if (!buf->direct.buf)
+			return -ENOMEM;
+
+		buf->direct.map = t;
+
+		while (t & ((1 << buf->page_shift) - 1)) {
+			--buf->page_shift;
+			buf->npages *= 2;
+		}
+	} else {
+		int i;
+
+		buf->direct.buf  = NULL;
+		buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+		buf->npages      = buf->nbufs;
+		buf->page_shift  = PAGE_SHIFT;
+		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
+					   GFP_KERNEL);
+		if (!buf->page_list)
+			return -ENOMEM;
+
+		for (i = 0; i < buf->nbufs; i++) {
+			buf->page_list[i].buf =
+				dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+						    &t, GFP_KERNEL);
+			if (!buf->page_list[i].buf)
+				goto err_free;
+
+			buf->page_list[i].map = t;
+		}
+
+		if (BITS_PER_LONG == 64) {
+			struct page **pages;
+			pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL);
+			if (!pages)
+				goto err_free;
+			for (i = 0; i < buf->nbufs; i++)
+				pages[i] = virt_to_page(buf->page_list[i].buf);
+			buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+			kfree(pages);
+			if (!buf->direct.buf)
+				goto err_free;
+		}
+	}
+
+	return 0;
+
+err_free:
+	mlx5_buf_free(dev, buf);
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+{
+	int i;
+
+	if (buf->nbufs == 1)
+		dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
+				  buf->direct.map);
+	else {
+		if (BITS_PER_LONG == 64 && buf->direct.buf)
+			vunmap(buf->direct.buf);
+
+		for (i = 0; i < buf->nbufs; i++)
+			if (buf->page_list[i].buf)
+				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+						  buf->page_list[i].buf,
+						  buf->page_list[i].map);
+		kfree(buf->page_list);
+	}
+}
+EXPORT_SYMBOL_GPL(mlx5_buf_free);
+
+static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device)
+{
+	struct mlx5_db_pgdir *pgdir;
+
+	pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+	if (!pgdir)
+		return NULL;
+
+	bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE);
+	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+					    &pgdir->db_dma, GFP_KERNEL);
+	if (!pgdir->db_page) {
+		kfree(pgdir);
+		return NULL;
+	}
+
+	return pgdir;
+}
+
+static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
+				    struct mlx5_db *db)
+{
+	int offset;
+	int i;
+
+	i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE);
+	if (i >= MLX5_DB_PER_PAGE)
+		return -ENOMEM;
+
+	__clear_bit(i, pgdir->bitmap);
+
+	db->u.pgdir = pgdir;
+	db->index   = i;
+	offset = db->index * L1_CACHE_BYTES;
+	db->db      = pgdir->db_page + offset / sizeof(*pgdir->db_page);
+	db->dma     = pgdir->db_dma  + offset;
+
+	return 0;
+}
+
+int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+	struct mlx5_db_pgdir *pgdir;
+	int ret = 0;
+
+	mutex_lock(&dev->priv.pgdir_mutex);
+
+	list_for_each_entry(pgdir, &dev->priv.pgdir_list, list)
+		if (!mlx5_alloc_db_from_pgdir(pgdir, db))
+			goto out;
+
+	pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev));
+	if (!pgdir) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	list_add(&pgdir->list, &dev->priv.pgdir_list);
+
+	/* This should never fail -- we just allocated an empty page: */
+	WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db));
+
+out:
+	mutex_unlock(&dev->priv.pgdir_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc);
+
+void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+	mutex_lock(&dev->priv.pgdir_mutex);
+
+	__set_bit(db->index, db->u.pgdir->bitmap);
+
+	if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) {
+		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+				  db->u.pgdir->db_page, db->u.pgdir->db_dma);
+		list_del(&db->u.pgdir->list);
+		kfree(db->u.pgdir);
+	}
+
+	mutex_unlock(&dev->priv.pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_free);
+
+
+void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+{
+	u64 addr;
+	int i;
+
+	for (i = 0; i < buf->npages; i++) {
+		if (buf->nbufs == 1)
+			addr = buf->direct.map + (i << buf->page_shift);
+		else
+			addr = buf->page_list[i].map;
+
+		pas[i] = cpu_to_be64(addr);
+	}
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
new file mode 100644
index 000000000000..205753a04cfc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -0,0 +1,1515 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/debugfs.h>
+
+#include "mlx5_core.h"
+
+enum {
+	CMD_IF_REV = 3,
+};
+
+enum {
+	CMD_MODE_POLLING,
+	CMD_MODE_EVENTS
+};
+
+enum {
+	NUM_LONG_LISTS	  = 2,
+	NUM_MED_LISTS	  = 64,
+	LONG_LIST_SIZE	  = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 +
+				MLX5_CMD_DATA_BLOCK_SIZE,
+	MED_LIST_SIZE	  = 16 + MLX5_CMD_DATA_BLOCK_SIZE,
+};
+
+enum {
+	MLX5_CMD_DELIVERY_STAT_OK			= 0x0,
+	MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR		= 0x1,
+	MLX5_CMD_DELIVERY_STAT_TOK_ERR			= 0x2,
+	MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR		= 0x3,
+	MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR	= 0x4,
+	MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR		= 0x5,
+	MLX5_CMD_DELIVERY_STAT_FW_ERR			= 0x6,
+	MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR		= 0x7,
+	MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR		= 0x8,
+	MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR	= 0x9,
+	MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR		= 0x10,
+};
+
+enum {
+	MLX5_CMD_STAT_OK			= 0x0,
+	MLX5_CMD_STAT_INT_ERR			= 0x1,
+	MLX5_CMD_STAT_BAD_OP_ERR		= 0x2,
+	MLX5_CMD_STAT_BAD_PARAM_ERR		= 0x3,
+	MLX5_CMD_STAT_BAD_SYS_STATE_ERR		= 0x4,
+	MLX5_CMD_STAT_BAD_RES_ERR		= 0x5,
+	MLX5_CMD_STAT_RES_BUSY			= 0x6,
+	MLX5_CMD_STAT_LIM_ERR			= 0x8,
+	MLX5_CMD_STAT_BAD_RES_STATE_ERR		= 0x9,
+	MLX5_CMD_STAT_IX_ERR			= 0xa,
+	MLX5_CMD_STAT_NO_RES_ERR		= 0xf,
+	MLX5_CMD_STAT_BAD_INP_LEN_ERR		= 0x50,
+	MLX5_CMD_STAT_BAD_OUTP_LEN_ERR		= 0x51,
+	MLX5_CMD_STAT_BAD_QP_STATE_ERR		= 0x10,
+	MLX5_CMD_STAT_BAD_PKT_ERR		= 0x30,
+	MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR	= 0x40,
+};
+
+static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+					   struct mlx5_cmd_msg *in,
+					   struct mlx5_cmd_msg *out,
+					   mlx5_cmd_cbk_t cbk,
+					   void *context, int page_queue)
+{
+	gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
+	struct mlx5_cmd_work_ent *ent;
+
+	ent = kzalloc(sizeof(*ent), alloc_flags);
+	if (!ent)
+		return ERR_PTR(-ENOMEM);
+
+	ent->in		= in;
+	ent->out	= out;
+	ent->callback	= cbk;
+	ent->context	= context;
+	ent->cmd	= cmd;
+	ent->page_queue = page_queue;
+
+	return ent;
+}
+
+static u8 alloc_token(struct mlx5_cmd *cmd)
+{
+	u8 token;
+
+	spin_lock(&cmd->token_lock);
+	token = cmd->token++ % 255 + 1;
+	spin_unlock(&cmd->token_lock);
+
+	return token;
+}
+
+static int alloc_ent(struct mlx5_cmd *cmd)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cmd->alloc_lock, flags);
+	ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds);
+	if (ret < cmd->max_reg_cmds)
+		clear_bit(ret, &cmd->bitmask);
+	spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+
+	return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
+}
+
+static void free_ent(struct mlx5_cmd *cmd, int idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->alloc_lock, flags);
+	set_bit(idx, &cmd->bitmask);
+	spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+}
+
+static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+{
+	return cmd->cmd_buf + (idx << cmd->log_stride);
+}
+
+static u8 xor8_buf(void *buf, int len)
+{
+	u8 *ptr = buf;
+	u8 sum = 0;
+	int i;
+
+	for (i = 0; i < len; i++)
+		sum ^= ptr[i];
+
+	return sum;
+}
+
+static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+{
+	if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
+		return -EINVAL;
+
+	if (xor8_buf(block, sizeof(*block)) != 0xff)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token)
+{
+	block->token = token;
+	block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2);
+	block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+}
+
+static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token)
+{
+	struct mlx5_cmd_mailbox *next = msg->next;
+
+	while (next) {
+		calc_block_sig(next->buf, token);
+		next = next->next;
+	}
+}
+
+static void set_signature(struct mlx5_cmd_work_ent *ent)
+{
+	ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
+	calc_chain_sig(ent->in, ent->token);
+	calc_chain_sig(ent->out, ent->token);
+}
+
+static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+{
+	unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
+	u8 own;
+
+	do {
+		own = ent->lay->status_own;
+		if (!(own & CMD_OWNER_HW)) {
+			ent->ret = 0;
+			return;
+		}
+		usleep_range(5000, 10000);
+	} while (time_before(jiffies, poll_end));
+
+	ent->ret = -ETIMEDOUT;
+}
+
+static void free_cmd(struct mlx5_cmd_work_ent *ent)
+{
+	kfree(ent);
+}
+
+
+static int verify_signature(struct mlx5_cmd_work_ent *ent)
+{
+	struct mlx5_cmd_mailbox *next = ent->out->next;
+	int err;
+	u8 sig;
+
+	sig = xor8_buf(ent->lay, sizeof(*ent->lay));
+	if (sig != 0xff)
+		return -EINVAL;
+
+	while (next) {
+		err = verify_block_sig(next->buf);
+		if (err)
+			return err;
+
+		next = next->next;
+	}
+
+	return 0;
+}
+
+static void dump_buf(void *buf, int size, int data_only, int offset)
+{
+	__be32 *p = buf;
+	int i;
+
+	for (i = 0; i < size; i += 16) {
+		pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
+			 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+			 be32_to_cpu(p[3]));
+		p += 4;
+		offset += 16;
+	}
+	if (!data_only)
+		pr_debug("\n");
+}
+
+const char *mlx5_command_str(int command)
+{
+	switch (command) {
+	case MLX5_CMD_OP_QUERY_HCA_CAP:
+		return "QUERY_HCA_CAP";
+
+	case MLX5_CMD_OP_SET_HCA_CAP:
+		return "SET_HCA_CAP";
+
+	case MLX5_CMD_OP_QUERY_ADAPTER:
+		return "QUERY_ADAPTER";
+
+	case MLX5_CMD_OP_INIT_HCA:
+		return "INIT_HCA";
+
+	case MLX5_CMD_OP_TEARDOWN_HCA:
+		return "TEARDOWN_HCA";
+
+	case MLX5_CMD_OP_QUERY_PAGES:
+		return "QUERY_PAGES";
+
+	case MLX5_CMD_OP_MANAGE_PAGES:
+		return "MANAGE_PAGES";
+
+	case MLX5_CMD_OP_CREATE_MKEY:
+		return "CREATE_MKEY";
+
+	case MLX5_CMD_OP_QUERY_MKEY:
+		return "QUERY_MKEY";
+
+	case MLX5_CMD_OP_DESTROY_MKEY:
+		return "DESTROY_MKEY";
+
+	case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+		return "QUERY_SPECIAL_CONTEXTS";
+
+	case MLX5_CMD_OP_CREATE_EQ:
+		return "CREATE_EQ";
+
+	case MLX5_CMD_OP_DESTROY_EQ:
+		return "DESTROY_EQ";
+
+	case MLX5_CMD_OP_QUERY_EQ:
+		return "QUERY_EQ";
+
+	case MLX5_CMD_OP_CREATE_CQ:
+		return "CREATE_CQ";
+
+	case MLX5_CMD_OP_DESTROY_CQ:
+		return "DESTROY_CQ";
+
+	case MLX5_CMD_OP_QUERY_CQ:
+		return "QUERY_CQ";
+
+	case MLX5_CMD_OP_MODIFY_CQ:
+		return "MODIFY_CQ";
+
+	case MLX5_CMD_OP_CREATE_QP:
+		return "CREATE_QP";
+
+	case MLX5_CMD_OP_DESTROY_QP:
+		return "DESTROY_QP";
+
+	case MLX5_CMD_OP_RST2INIT_QP:
+		return "RST2INIT_QP";
+
+	case MLX5_CMD_OP_INIT2RTR_QP:
+		return "INIT2RTR_QP";
+
+	case MLX5_CMD_OP_RTR2RTS_QP:
+		return "RTR2RTS_QP";
+
+	case MLX5_CMD_OP_RTS2RTS_QP:
+		return "RTS2RTS_QP";
+
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+		return "SQERR2RTS_QP";
+
+	case MLX5_CMD_OP_2ERR_QP:
+		return "2ERR_QP";
+
+	case MLX5_CMD_OP_RTS2SQD_QP:
+		return "RTS2SQD_QP";
+
+	case MLX5_CMD_OP_SQD2RTS_QP:
+		return "SQD2RTS_QP";
+
+	case MLX5_CMD_OP_2RST_QP:
+		return "2RST_QP";
+
+	case MLX5_CMD_OP_QUERY_QP:
+		return "QUERY_QP";
+
+	case MLX5_CMD_OP_CONF_SQP:
+		return "CONF_SQP";
+
+	case MLX5_CMD_OP_MAD_IFC:
+		return "MAD_IFC";
+
+	case MLX5_CMD_OP_INIT2INIT_QP:
+		return "INIT2INIT_QP";
+
+	case MLX5_CMD_OP_SUSPEND_QP:
+		return "SUSPEND_QP";
+
+	case MLX5_CMD_OP_UNSUSPEND_QP:
+		return "UNSUSPEND_QP";
+
+	case MLX5_CMD_OP_SQD2SQD_QP:
+		return "SQD2SQD_QP";
+
+	case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET:
+		return "ALLOC_QP_COUNTER_SET";
+
+	case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET:
+		return "DEALLOC_QP_COUNTER_SET";
+
+	case MLX5_CMD_OP_QUERY_QP_COUNTER_SET:
+		return "QUERY_QP_COUNTER_SET";
+
+	case MLX5_CMD_OP_CREATE_PSV:
+		return "CREATE_PSV";
+
+	case MLX5_CMD_OP_DESTROY_PSV:
+		return "DESTROY_PSV";
+
+	case MLX5_CMD_OP_QUERY_PSV:
+		return "QUERY_PSV";
+
+	case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE:
+		return "QUERY_SIG_RULE_TABLE";
+
+	case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE:
+		return "QUERY_BLOCK_SIZE_TABLE";
+
+	case MLX5_CMD_OP_CREATE_SRQ:
+		return "CREATE_SRQ";
+
+	case MLX5_CMD_OP_DESTROY_SRQ:
+		return "DESTROY_SRQ";
+
+	case MLX5_CMD_OP_QUERY_SRQ:
+		return "QUERY_SRQ";
+
+	case MLX5_CMD_OP_ARM_RQ:
+		return "ARM_RQ";
+
+	case MLX5_CMD_OP_RESIZE_SRQ:
+		return "RESIZE_SRQ";
+
+	case MLX5_CMD_OP_ALLOC_PD:
+		return "ALLOC_PD";
+
+	case MLX5_CMD_OP_DEALLOC_PD:
+		return "DEALLOC_PD";
+
+	case MLX5_CMD_OP_ALLOC_UAR:
+		return "ALLOC_UAR";
+
+	case MLX5_CMD_OP_DEALLOC_UAR:
+		return "DEALLOC_UAR";
+
+	case MLX5_CMD_OP_ATTACH_TO_MCG:
+		return "ATTACH_TO_MCG";
+
+	case MLX5_CMD_OP_DETACH_FROM_MCG:
+		return "DETACH_FROM_MCG";
+
+	case MLX5_CMD_OP_ALLOC_XRCD:
+		return "ALLOC_XRCD";
+
+	case MLX5_CMD_OP_DEALLOC_XRCD:
+		return "DEALLOC_XRCD";
+
+	case MLX5_CMD_OP_ACCESS_REG:
+		return "MLX5_CMD_OP_ACCESS_REG";
+
+	default: return "unknown command opcode";
+	}
+}
+
+static void dump_command(struct mlx5_core_dev *dev,
+			 struct mlx5_cmd_work_ent *ent, int input)
+{
+	u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode);
+	struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+	struct mlx5_cmd_mailbox *next = msg->next;
+	int data_only;
+	int offset = 0;
+	int dump_len;
+
+	data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
+
+	if (data_only)
+		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
+				   "dump command data %s(0x%x) %s\n",
+				   mlx5_command_str(op), op,
+				   input ? "INPUT" : "OUTPUT");
+	else
+		mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
+			      mlx5_command_str(op), op,
+			      input ? "INPUT" : "OUTPUT");
+
+	if (data_only) {
+		if (input) {
+			dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+			offset += sizeof(ent->lay->in);
+		} else {
+			dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+			offset += sizeof(ent->lay->out);
+		}
+	} else {
+		dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+		offset += sizeof(*ent->lay);
+	}
+
+	while (next && offset < msg->len) {
+		if (data_only) {
+			dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
+			dump_buf(next->buf, dump_len, 1, offset);
+			offset += MLX5_CMD_DATA_BLOCK_SIZE;
+		} else {
+			mlx5_core_dbg(dev, "command block:\n");
+			dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+			offset += sizeof(struct mlx5_cmd_prot_block);
+		}
+		next = next->next;
+	}
+
+	if (data_only)
+		pr_debug("\n");
+}
+
+static void cmd_work_handler(struct work_struct *work)
+{
+	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+	struct mlx5_cmd *cmd = ent->cmd;
+	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+	struct mlx5_cmd_layout *lay;
+	struct semaphore *sem;
+
+	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+	down(sem);
+	if (!ent->page_queue) {
+		ent->idx = alloc_ent(cmd);
+		if (ent->idx < 0) {
+			mlx5_core_err(dev, "failed to allocate command entry\n");
+			up(sem);
+			return;
+		}
+	} else {
+		ent->idx = cmd->max_reg_cmds;
+	}
+
+	ent->token = alloc_token(cmd);
+	cmd->ent_arr[ent->idx] = ent;
+	lay = get_inst(cmd, ent->idx);
+	ent->lay = lay;
+	memset(lay, 0, sizeof(*lay));
+	memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+	if (ent->in->next)
+		lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+	lay->inlen = cpu_to_be32(ent->in->len);
+	if (ent->out->next)
+		lay->out_ptr = cpu_to_be64(ent->out->next->dma);
+	lay->outlen = cpu_to_be32(ent->out->len);
+	lay->type = MLX5_PCI_CMD_XPORT;
+	lay->token = ent->token;
+	lay->status_own = CMD_OWNER_HW;
+	if (!cmd->checksum_disabled)
+		set_signature(ent);
+	dump_command(dev, ent, 1);
+	ktime_get_ts(&ent->ts1);
+
+	/* ring doorbell after the descriptor is valid */
+	wmb();
+	iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
+	mlx5_core_dbg(dev, "write 0x%x to command doorbell\n", 1 << ent->idx);
+	mmiowb();
+	if (cmd->mode == CMD_MODE_POLLING) {
+		poll_timeout(ent);
+		/* make sure we read the descriptor after ownership is SW */
+		rmb();
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+	}
+}
+
+static const char *deliv_status_to_str(u8 status)
+{
+	switch (status) {
+	case MLX5_CMD_DELIVERY_STAT_OK:
+		return "no errors";
+	case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+		return "signature error";
+	case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+		return "token error";
+	case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+		return "bad block number";
+	case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+		return "output pointer not aligned to block size";
+	case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+		return "input pointer not aligned to block size";
+	case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+		return "firmware internal error";
+	case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+		return "command input length error";
+	case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+		return "command ouput length error";
+	case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+		return "reserved fields not cleared";
+	case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+		return "bad command descriptor type";
+	default:
+		return "unknown status code";
+	}
+}
+
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+	return be16_to_cpu(hdr->opcode);
+}
+
+static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+{
+	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+	struct mlx5_cmd *cmd = &dev->cmd;
+	int err;
+
+	if (cmd->mode == CMD_MODE_POLLING) {
+		wait_for_completion(&ent->done);
+		err = ent->ret;
+	} else {
+		if (!wait_for_completion_timeout(&ent->done, timeout))
+			err = -ETIMEDOUT;
+		else
+			err = 0;
+	}
+	if (err == -ETIMEDOUT) {
+		mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+			       mlx5_command_str(msg_to_opcode(ent->in)),
+			       msg_to_opcode(ent->in));
+	}
+	mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err,
+		      deliv_status_to_str(ent->status), ent->status);
+
+	return err;
+}
+
+/*  Notes:
+ *    1. Callback functions may not sleep
+ *    2. page queue commands do not support asynchrous completion
+ */
+static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+			   struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
+			   void *context, int page_queue, u8 *status)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	struct mlx5_cmd_work_ent *ent;
+	ktime_t t1, t2, delta;
+	struct mlx5_cmd_stats *stats;
+	int err = 0;
+	s64 ds;
+	u16 op;
+
+	if (callback && page_queue)
+		return -EINVAL;
+
+	ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	if (!callback)
+		init_completion(&ent->done);
+
+	INIT_WORK(&ent->work, cmd_work_handler);
+	if (page_queue) {
+		cmd_work_handler(&ent->work);
+	} else if (!queue_work(cmd->wq, &ent->work)) {
+		mlx5_core_warn(dev, "failed to queue work\n");
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	if (!callback) {
+		err = wait_func(dev, ent);
+		if (err == -ETIMEDOUT)
+			goto out;
+
+		t1 = timespec_to_ktime(ent->ts1);
+		t2 = timespec_to_ktime(ent->ts2);
+		delta = ktime_sub(t2, t1);
+		ds = ktime_to_ns(delta);
+		op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+		if (op < ARRAY_SIZE(cmd->stats)) {
+			stats = &cmd->stats[op];
+			spin_lock(&stats->lock);
+			stats->sum += ds;
+			++stats->n;
+			spin_unlock(&stats->lock);
+		}
+		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+				   "fw exec time for %s is %lld nsec\n",
+				   mlx5_command_str(op), ds);
+		*status = ent->status;
+		free_cmd(ent);
+	}
+
+	return err;
+
+out_free:
+	free_cmd(ent);
+out:
+	return err;
+}
+
+static ssize_t dbg_write(struct file *filp, const char __user *buf,
+			 size_t count, loff_t *pos)
+{
+	struct mlx5_core_dev *dev = filp->private_data;
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+	char lbuf[3];
+	int err;
+
+	if (!dbg->in_msg || !dbg->out_msg)
+		return -ENOMEM;
+
+	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+		return -EFAULT;
+
+	lbuf[sizeof(lbuf) - 1] = 0;
+
+	if (strcmp(lbuf, "go"))
+		return -EINVAL;
+
+	err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen);
+
+	return err ? err : count;
+}
+
+
+static const struct file_operations fops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.write	= dbg_write,
+};
+
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+{
+	struct mlx5_cmd_prot_block *block;
+	struct mlx5_cmd_mailbox *next;
+	int copy;
+
+	if (!to || !from)
+		return -ENOMEM;
+
+	copy = min_t(int, size, sizeof(to->first.data));
+	memcpy(to->first.data, from, copy);
+	size -= copy;
+	from += copy;
+
+	next = to->next;
+	while (size) {
+		if (!next) {
+			/* this is a BUG */
+			return -ENOMEM;
+		}
+
+		copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+		block = next->buf;
+		memcpy(block->data, from, copy);
+		from += copy;
+		size -= copy;
+		next = next->next;
+	}
+
+	return 0;
+}
+
+static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
+{
+	struct mlx5_cmd_prot_block *block;
+	struct mlx5_cmd_mailbox *next;
+	int copy;
+
+	if (!to || !from)
+		return -ENOMEM;
+
+	copy = min_t(int, size, sizeof(from->first.data));
+	memcpy(to, from->first.data, copy);
+	size -= copy;
+	to += copy;
+
+	next = from->next;
+	while (size) {
+		if (!next) {
+			/* this is a BUG */
+			return -ENOMEM;
+		}
+
+		copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+		block = next->buf;
+		if (xor8_buf(block, sizeof(*block)) != 0xff)
+			return -EINVAL;
+
+		memcpy(to, block->data, copy);
+		to += copy;
+		size -= copy;
+		next = next->next;
+	}
+
+	return 0;
+}
+
+static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
+					      gfp_t flags)
+{
+	struct mlx5_cmd_mailbox *mailbox;
+
+	mailbox = kmalloc(sizeof(*mailbox), flags);
+	if (!mailbox)
+		return ERR_PTR(-ENOMEM);
+
+	mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags,
+				      &mailbox->dma);
+	if (!mailbox->buf) {
+		mlx5_core_dbg(dev, "failed allocation\n");
+		kfree(mailbox);
+		return ERR_PTR(-ENOMEM);
+	}
+	memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block));
+	mailbox->next = NULL;
+
+	return mailbox;
+}
+
+static void free_cmd_box(struct mlx5_core_dev *dev,
+			 struct mlx5_cmd_mailbox *mailbox)
+{
+	pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+	kfree(mailbox);
+}
+
+static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+					       gfp_t flags, int size)
+{
+	struct mlx5_cmd_mailbox *tmp, *head = NULL;
+	struct mlx5_cmd_prot_block *block;
+	struct mlx5_cmd_msg *msg;
+	int blen;
+	int err;
+	int n;
+	int i;
+
+	msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return ERR_PTR(-ENOMEM);
+
+	blen = size - min_t(int, sizeof(msg->first.data), size);
+	n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) / MLX5_CMD_DATA_BLOCK_SIZE;
+
+	for (i = 0; i < n; i++) {
+		tmp = alloc_cmd_box(dev, flags);
+		if (IS_ERR(tmp)) {
+			mlx5_core_warn(dev, "failed allocating block\n");
+			err = PTR_ERR(tmp);
+			goto err_alloc;
+		}
+
+		block = tmp->buf;
+		tmp->next = head;
+		block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+		block->block_num = cpu_to_be32(n - i - 1);
+		head = tmp;
+	}
+	msg->next = head;
+	msg->len = size;
+	return msg;
+
+err_alloc:
+	while (head) {
+		tmp = head->next;
+		free_cmd_box(dev, head);
+		head = tmp;
+	}
+	kfree(msg);
+
+	return ERR_PTR(err);
+}
+
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+				  struct mlx5_cmd_msg *msg)
+{
+	struct mlx5_cmd_mailbox *head = msg->next;
+	struct mlx5_cmd_mailbox *next;
+
+	while (head) {
+		next = head->next;
+		free_cmd_box(dev, head);
+		head = next;
+	}
+	kfree(msg);
+}
+
+static ssize_t data_write(struct file *filp, const char __user *buf,
+			  size_t count, loff_t *pos)
+{
+	struct mlx5_core_dev *dev = filp->private_data;
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+	void *ptr;
+	int err;
+
+	if (*pos != 0)
+		return -EINVAL;
+
+	kfree(dbg->in_msg);
+	dbg->in_msg = NULL;
+	dbg->inlen = 0;
+
+	ptr = kzalloc(count, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	if (copy_from_user(ptr, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
+	dbg->in_msg = ptr;
+	dbg->inlen = count;
+
+	*pos = count;
+
+	return count;
+
+out:
+	kfree(ptr);
+	return err;
+}
+
+static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
+			 loff_t *pos)
+{
+	struct mlx5_core_dev *dev = filp->private_data;
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+	int copy;
+
+	if (*pos)
+		return 0;
+
+	if (!dbg->out_msg)
+		return -ENOMEM;
+
+	copy = min_t(int, count, dbg->outlen);
+	if (copy_to_user(buf, dbg->out_msg, copy))
+		return -EFAULT;
+
+	*pos += copy;
+
+	return copy;
+}
+
+static const struct file_operations dfops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.write	= data_write,
+	.read	= data_read,
+};
+
+static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
+			   loff_t *pos)
+{
+	struct mlx5_core_dev *dev = filp->private_data;
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+	char outlen[8];
+	int err;
+
+	if (*pos)
+		return 0;
+
+	err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(buf, &outlen, err))
+		return -EFAULT;
+
+	*pos += err;
+
+	return err;
+}
+
+static ssize_t outlen_write(struct file *filp, const char __user *buf,
+			    size_t count, loff_t *pos)
+{
+	struct mlx5_core_dev *dev = filp->private_data;
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+	char outlen_str[8];
+	int outlen;
+	void *ptr;
+	int err;
+
+	if (*pos != 0 || count > 6)
+		return -EINVAL;
+
+	kfree(dbg->out_msg);
+	dbg->out_msg = NULL;
+	dbg->outlen = 0;
+
+	if (copy_from_user(outlen_str, buf, count))
+		return -EFAULT;
+
+	outlen_str[7] = 0;
+
+	err = sscanf(outlen_str, "%d", &outlen);
+	if (err < 0)
+		return err;
+
+	ptr = kzalloc(outlen, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	dbg->out_msg = ptr;
+	dbg->outlen = outlen;
+
+	*pos = count;
+
+	return count;
+}
+
+static const struct file_operations olfops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.write	= outlen_write,
+	.read	= outlen_read,
+};
+
+static void set_wqname(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+
+	snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
+		 dev_name(&dev->pdev->dev));
+}
+
+static void clean_debug_files(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+	if (!mlx5_debugfs_root)
+		return;
+
+	mlx5_cmdif_debugfs_cleanup(dev);
+	debugfs_remove_recursive(dbg->dbg_root);
+}
+
+static int create_debugfs_files(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+	int err = -ENOMEM;
+
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
+	if (!dbg->dbg_root)
+		return err;
+
+	dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
+					  dev, &dfops);
+	if (!dbg->dbg_in)
+		goto err_dbg;
+
+	dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
+					   dev, &dfops);
+	if (!dbg->dbg_out)
+		goto err_dbg;
+
+	dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
+					      dev, &olfops);
+	if (!dbg->dbg_outlen)
+		goto err_dbg;
+
+	dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
+					    &dbg->status);
+	if (!dbg->dbg_status)
+		goto err_dbg;
+
+	dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
+	if (!dbg->dbg_run)
+		goto err_dbg;
+
+	mlx5_cmdif_debugfs_init(dev);
+
+	return 0;
+
+err_dbg:
+	clean_debug_files(dev);
+	return err;
+}
+
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	int i;
+
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		down(&cmd->sem);
+
+	down(&cmd->pages_sem);
+
+	flush_workqueue(cmd->wq);
+
+	cmd->mode = CMD_MODE_EVENTS;
+
+	up(&cmd->pages_sem);
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		up(&cmd->sem);
+}
+
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	int i;
+
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		down(&cmd->sem);
+
+	down(&cmd->pages_sem);
+
+	flush_workqueue(cmd->wq);
+	cmd->mode = CMD_MODE_POLLING;
+
+	up(&cmd->pages_sem);
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		up(&cmd->sem);
+}
+
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	struct mlx5_cmd_work_ent *ent;
+	mlx5_cmd_cbk_t callback;
+	void *context;
+	int err;
+	int i;
+
+	for (i = 0; i < (1 << cmd->log_sz); i++) {
+		if (test_bit(i, &vector)) {
+			ent = cmd->ent_arr[i];
+			ktime_get_ts(&ent->ts2);
+			memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
+			dump_command(dev, ent, 0);
+			if (!ent->ret) {
+				if (!cmd->checksum_disabled)
+					ent->ret = verify_signature(ent);
+				else
+					ent->ret = 0;
+				ent->status = ent->lay->status_own >> 1;
+				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
+					      ent->ret, deliv_status_to_str(ent->status), ent->status);
+			}
+			free_ent(cmd, ent->idx);
+			if (ent->callback) {
+				callback = ent->callback;
+				context = ent->context;
+				err = ent->ret;
+				free_cmd(ent);
+				callback(err, context);
+			} else {
+				complete(&ent->done);
+			}
+			if (ent->page_queue)
+				up(&cmd->pages_sem);
+			else
+				up(&cmd->sem);
+		}
+	}
+}
+EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+static int status_to_err(u8 status)
+{
+	return status ? -1 : 0; /* TBD more meaningful codes */
+}
+
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
+{
+	struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+	struct mlx5_cmd *cmd = &dev->cmd;
+	struct cache_ent *ent = NULL;
+
+	if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE)
+		ent = &cmd->cache.large;
+	else if (in_size > 16 && in_size <= MED_LIST_SIZE)
+		ent = &cmd->cache.med;
+
+	if (ent) {
+		spin_lock(&ent->lock);
+		if (!list_empty(&ent->head)) {
+			msg = list_entry(ent->head.next, typeof(*msg), list);
+			/* For cached lists, we must explicitly state what is
+			 * the real size
+			 */
+			msg->len = in_size;
+			list_del(&msg->list);
+		}
+		spin_unlock(&ent->lock);
+	}
+
+	if (IS_ERR(msg))
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
+
+	return msg;
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+	if (msg->cache) {
+		spin_lock(&msg->cache->lock);
+		list_add_tail(&msg->list, &msg->cache->head);
+		spin_unlock(&msg->cache->lock);
+	} else {
+		mlx5_free_cmd_msg(dev, msg);
+	}
+}
+
+static int is_manage_pages(struct mlx5_inbox_hdr *in)
+{
+	return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+}
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+		  int out_size)
+{
+	struct mlx5_cmd_msg *inb;
+	struct mlx5_cmd_msg *outb;
+	int pages_queue;
+	int err;
+	u8 status = 0;
+
+	pages_queue = is_manage_pages(in);
+
+	inb = alloc_msg(dev, in_size);
+	if (IS_ERR(inb)) {
+		err = PTR_ERR(inb);
+		return err;
+	}
+
+	err = mlx5_copy_to_msg(inb, in, in_size);
+	if (err) {
+		mlx5_core_warn(dev, "err %d\n", err);
+		goto out_in;
+	}
+
+	outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
+	if (IS_ERR(outb)) {
+		err = PTR_ERR(outb);
+		goto out_in;
+	}
+
+	err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
+	if (err)
+		goto out_out;
+
+	mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
+	if (status) {
+		err = status_to_err(status);
+		goto out_out;
+	}
+
+	err = mlx5_copy_from_msg(out, outb, out_size);
+
+out_out:
+	mlx5_free_cmd_msg(dev, outb);
+
+out_in:
+	free_msg(dev, inb);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_exec);
+
+static void destroy_msg_cache(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	struct mlx5_cmd_msg *msg;
+	struct mlx5_cmd_msg *n;
+
+	list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) {
+		list_del(&msg->list);
+		mlx5_free_cmd_msg(dev, msg);
+	}
+
+	list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) {
+		list_del(&msg->list);
+		mlx5_free_cmd_msg(dev, msg);
+	}
+}
+
+static int create_msg_cache(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	struct mlx5_cmd_msg *msg;
+	int err;
+	int i;
+
+	spin_lock_init(&cmd->cache.large.lock);
+	INIT_LIST_HEAD(&cmd->cache.large.head);
+	spin_lock_init(&cmd->cache.med.lock);
+	INIT_LIST_HEAD(&cmd->cache.med.head);
+
+	for (i = 0; i < NUM_LONG_LISTS; i++) {
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
+		if (IS_ERR(msg)) {
+			err = PTR_ERR(msg);
+			goto ex_err;
+		}
+		msg->cache = &cmd->cache.large;
+		list_add_tail(&msg->list, &cmd->cache.large.head);
+	}
+
+	for (i = 0; i < NUM_MED_LISTS; i++) {
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
+		if (IS_ERR(msg)) {
+			err = PTR_ERR(msg);
+			goto ex_err;
+		}
+		msg->cache = &cmd->cache.med;
+		list_add_tail(&msg->list, &cmd->cache.med.head);
+	}
+
+	return 0;
+
+ex_err:
+	destroy_msg_cache(dev);
+	return err;
+}
+
+int mlx5_cmd_init(struct mlx5_core_dev *dev)
+{
+	int size = sizeof(struct mlx5_cmd_prot_block);
+	int align = roundup_pow_of_two(size);
+	struct mlx5_cmd *cmd = &dev->cmd;
+	u32 cmd_h, cmd_l;
+	u16 cmd_if_rev;
+	int err;
+	int i;
+
+	cmd_if_rev = cmdif_rev(dev);
+	if (cmd_if_rev != CMD_IF_REV) {
+		dev_err(&dev->pdev->dev,
+			"Driver cmdif rev(%d) differs from firmware's(%d)\n",
+			CMD_IF_REV, cmd_if_rev);
+		return -EINVAL;
+	}
+
+	cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0);
+	if (!cmd->pool)
+		return -ENOMEM;
+
+	cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0);
+	if (!cmd->cmd_buf) {
+		err = -ENOMEM;
+		goto err_free_pool;
+	}
+	cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE,
+				  DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
+	cmd->log_sz = cmd_l >> 4 & 0xf;
+	cmd->log_stride = cmd_l & 0xf;
+	if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
+		dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
+			1 << cmd->log_sz);
+		err = -EINVAL;
+		goto err_map;
+	}
+
+	if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) {
+		dev_err(&dev->pdev->dev, "command queue size overflow\n");
+		err = -EINVAL;
+		goto err_map;
+	}
+
+	cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
+	cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
+
+	cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+	if (cmd->cmdif_rev > CMD_IF_REV) {
+		dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
+			CMD_IF_REV, cmd->cmdif_rev);
+		err = -ENOTSUPP;
+		goto err_map;
+	}
+
+	spin_lock_init(&cmd->alloc_lock);
+	spin_lock_init(&cmd->token_lock);
+	for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
+		spin_lock_init(&cmd->stats[i].lock);
+
+	sema_init(&cmd->sem, cmd->max_reg_cmds);
+	sema_init(&cmd->pages_sem, 1);
+
+	cmd_h = (u32)((u64)(cmd->dma) >> 32);
+	cmd_l = (u32)(cmd->dma);
+	if (cmd_l & 0xfff) {
+		dev_err(&dev->pdev->dev, "invalid command queue address\n");
+		err = -ENOMEM;
+		goto err_map;
+	}
+
+	iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
+	iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
+
+	/* Make sure firmware sees the complete address before we proceed */
+	wmb();
+
+	mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
+
+	cmd->mode = CMD_MODE_POLLING;
+
+	err = create_msg_cache(dev);
+	if (err) {
+		dev_err(&dev->pdev->dev, "failed to create command cache\n");
+		goto err_map;
+	}
+
+	set_wqname(dev);
+	cmd->wq = create_singlethread_workqueue(cmd->wq_name);
+	if (!cmd->wq) {
+		dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+		err = -ENOMEM;
+		goto err_cache;
+	}
+
+	err = create_debugfs_files(dev);
+	if (err) {
+		err = -ENOMEM;
+		goto err_wq;
+	}
+
+	return 0;
+
+err_wq:
+	destroy_workqueue(cmd->wq);
+
+err_cache:
+	destroy_msg_cache(dev);
+
+err_map:
+	dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
+			 DMA_BIDIRECTIONAL);
+err_free:
+	free_pages((unsigned long)cmd->cmd_buf, 0);
+
+err_free_pool:
+	pci_pool_destroy(cmd->pool);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_init);
+
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+
+	clean_debug_files(dev);
+	destroy_workqueue(cmd->wq);
+	destroy_msg_cache(dev);
+	dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
+			 DMA_BIDIRECTIONAL);
+	free_pages((unsigned long)cmd->cmd_buf, 0);
+	pci_pool_destroy(cmd->pool);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup);
+
+static const char *cmd_status_str(u8 status)
+{
+	switch (status) {
+	case MLX5_CMD_STAT_OK:
+		return "OK";
+	case MLX5_CMD_STAT_INT_ERR:
+		return "internal error";
+	case MLX5_CMD_STAT_BAD_OP_ERR:
+		return "bad operation";
+	case MLX5_CMD_STAT_BAD_PARAM_ERR:
+		return "bad parameter";
+	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+		return "bad system state";
+	case MLX5_CMD_STAT_BAD_RES_ERR:
+		return "bad resource";
+	case MLX5_CMD_STAT_RES_BUSY:
+		return "resource busy";
+	case MLX5_CMD_STAT_LIM_ERR:
+		return "limits exceeded";
+	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+		return "bad resource state";
+	case MLX5_CMD_STAT_IX_ERR:
+		return "bad index";
+	case MLX5_CMD_STAT_NO_RES_ERR:
+		return "no resources";
+	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+		return "bad input length";
+	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+		return "bad output length";
+	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+		return "bad QP state";
+	case MLX5_CMD_STAT_BAD_PKT_ERR:
+		return "bad packet (discarded)";
+	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+		return "bad size too many outstanding CQEs";
+	default:
+		return "unknown status";
+	}
+}
+
+int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
+{
+	if (!hdr->status)
+		return 0;
+
+	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
+		cmd_status_str(hdr->status), hdr->status,
+		be32_to_cpu(hdr->syndrome));
+
+	switch (hdr->status) {
+	case MLX5_CMD_STAT_OK:				return 0;
+	case MLX5_CMD_STAT_INT_ERR:			return -EIO;
+	case MLX5_CMD_STAT_BAD_OP_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_BAD_PARAM_ERR:		return -EINVAL;
+	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:		return -EIO;
+	case MLX5_CMD_STAT_BAD_RES_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_RES_BUSY:			return -EBUSY;
+	case MLX5_CMD_STAT_LIM_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:		return -EINVAL;
+	case MLX5_CMD_STAT_IX_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_NO_RES_ERR:			return -EAGAIN;
+	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:		return -EIO;
+	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:		return -EIO;
+	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:		return -EINVAL;
+	case MLX5_CMD_STAT_BAD_PKT_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:	return -EINVAL;
+	default:					return -EIO;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
new file mode 100644
index 000000000000..c2d660be6f76
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/cq.h>
+#include "mlx5_core.h"
+
+void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
+{
+	struct mlx5_core_cq *cq;
+	struct mlx5_cq_table *table = &dev->priv.cq_table;
+
+	spin_lock(&table->lock);
+	cq = radix_tree_lookup(&table->tree, cqn);
+	if (likely(cq))
+		atomic_inc(&cq->refcount);
+	spin_unlock(&table->lock);
+
+	if (!cq) {
+		mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	++cq->arm_sn;
+
+	cq->comp(cq);
+
+	if (atomic_dec_and_test(&cq->refcount))
+		complete(&cq->free);
+}
+
+void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
+{
+	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	struct mlx5_core_cq *cq;
+
+	spin_lock(&table->lock);
+
+	cq = radix_tree_lookup(&table->tree, cqn);
+	if (cq)
+		atomic_inc(&cq->refcount);
+
+	spin_unlock(&table->lock);
+
+	if (!cq) {
+		mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	cq->event(cq, event_type);
+
+	if (atomic_dec_and_test(&cq->refcount))
+		complete(&cq->free);
+}
+
+
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+			struct mlx5_create_cq_mbox_in *in, int inlen)
+{
+	int err;
+	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	struct mlx5_create_cq_mbox_out out;
+	struct mlx5_destroy_cq_mbox_in din;
+	struct mlx5_destroy_cq_mbox_out dout;
+
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
+	memset(&out, 0, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	cq->cqn = be32_to_cpu(out.cqn) & 0xffffff;
+	cq->cons_index = 0;
+	cq->arm_sn     = 0;
+	atomic_set(&cq->refcount, 1);
+	init_completion(&cq->free);
+
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree, cq->cqn, cq);
+	spin_unlock_irq(&table->lock);
+	if (err)
+		goto err_cmd;
+
+	cq->pid = current->pid;
+	err = mlx5_debug_cq_add(dev, cq);
+	if (err)
+		mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
+			      cq->cqn);
+
+	return 0;
+
+err_cmd:
+	memset(&din, 0, sizeof(din));
+	memset(&dout, 0, sizeof(dout));
+	din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
+	mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_cq);
+
+int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	struct mlx5_destroy_cq_mbox_in in;
+	struct mlx5_destroy_cq_mbox_out out;
+	struct mlx5_core_cq *tmp;
+	int err;
+
+	spin_lock_irq(&table->lock);
+	tmp = radix_tree_delete(&table->tree, cq->cqn);
+	spin_unlock_irq(&table->lock);
+	if (!tmp) {
+		mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
+		return -EINVAL;
+	}
+	if (tmp != cq) {
+		mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
+		return -EINVAL;
+	}
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
+	in.cqn = cpu_to_be32(cq->cqn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	synchronize_irq(cq->irqn);
+
+	mlx5_debug_cq_remove(dev, cq);
+	if (atomic_dec_and_test(&cq->refcount))
+		complete(&cq->free);
+	wait_for_completion(&cq->free);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_cq);
+
+int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+		       struct mlx5_query_cq_mbox_out *out)
+{
+	struct mlx5_query_cq_mbox_in in;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(out, 0, sizeof(*out));
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ);
+	in.cqn = cpu_to_be32(cq->cqn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+	if (err)
+		return err;
+
+	if (out->hdr.status)
+		return mlx5_cmd_status_to_err(&out->hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_cq);
+
+
+int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+			int type, struct mlx5_cq_modify_params *params)
+{
+	return -ENOSYS;
+}
+
+int mlx5_init_cq_table(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	int err;
+
+	spin_lock_init(&table->lock);
+	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	err = mlx5_cq_debugfs_init(dev);
+
+	return err;
+}
+
+void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
+{
+	mlx5_cq_debugfs_cleanup(dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
new file mode 100644
index 000000000000..4273c06e2e96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+enum {
+	QP_PID,
+	QP_STATE,
+	QP_XPORT,
+	QP_MTU,
+	QP_N_RECV,
+	QP_RECV_SZ,
+	QP_N_SEND,
+	QP_LOG_PG_SZ,
+	QP_RQPN,
+};
+
+static char *qp_fields[] = {
+	[QP_PID]	= "pid",
+	[QP_STATE]	= "state",
+	[QP_XPORT]	= "transport",
+	[QP_MTU]	= "mtu",
+	[QP_N_RECV]	= "num_recv",
+	[QP_RECV_SZ]	= "rcv_wqe_sz",
+	[QP_N_SEND]	= "num_send",
+	[QP_LOG_PG_SZ]	= "log2_page_sz",
+	[QP_RQPN]	= "remote_qpn",
+};
+
+enum {
+	EQ_NUM_EQES,
+	EQ_INTR,
+	EQ_LOG_PG_SZ,
+};
+
+static char *eq_fields[] = {
+	[EQ_NUM_EQES]	= "num_eqes",
+	[EQ_INTR]	= "intr",
+	[EQ_LOG_PG_SZ]	= "log_page_size",
+};
+
+enum {
+	CQ_PID,
+	CQ_NUM_CQES,
+	CQ_LOG_PG_SZ,
+};
+
+static char *cq_fields[] = {
+	[CQ_PID]	= "pid",
+	[CQ_NUM_CQES]	= "num_cqes",
+	[CQ_LOG_PG_SZ]	= "log_page_size",
+};
+
+struct dentry *mlx5_debugfs_root;
+EXPORT_SYMBOL(mlx5_debugfs_root);
+
+void mlx5_register_debugfs(void)
+{
+	mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
+	if (IS_ERR_OR_NULL(mlx5_debugfs_root))
+		mlx5_debugfs_root = NULL;
+}
+
+void mlx5_unregister_debugfs(void)
+{
+	debugfs_remove(mlx5_debugfs_root);
+}
+
+int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	atomic_set(&dev->num_qps, 0);
+
+	dev->priv.qp_debugfs = debugfs_create_dir("QPs",  dev->priv.dbg_root);
+	if (!dev->priv.qp_debugfs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	debugfs_remove_recursive(dev->priv.qp_debugfs);
+}
+
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	dev->priv.eq_debugfs = debugfs_create_dir("EQs",  dev->priv.dbg_root);
+	if (!dev->priv.eq_debugfs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	debugfs_remove_recursive(dev->priv.eq_debugfs);
+}
+
+static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+			    loff_t *pos)
+{
+	struct mlx5_cmd_stats *stats;
+	u64 field = 0;
+	int ret;
+	char tbuf[22];
+
+	if (*pos)
+		return 0;
+
+	stats = filp->private_data;
+	spin_lock(&stats->lock);
+	if (stats->n)
+		field = stats->sum / stats->n;
+	spin_unlock(&stats->lock);
+	ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+	if (ret > 0) {
+		if (copy_to_user(buf, tbuf, ret))
+			return -EFAULT;
+	}
+
+	*pos += ret;
+	return ret;
+}
+
+
+static ssize_t average_write(struct file *filp, const char __user *buf,
+			     size_t count, loff_t *pos)
+{
+	struct mlx5_cmd_stats *stats;
+
+	stats = filp->private_data;
+	spin_lock(&stats->lock);
+	stats->sum = 0;
+	stats->n = 0;
+	spin_unlock(&stats->lock);
+
+	*pos += count;
+
+	return count;
+}
+
+static const struct file_operations stats_fops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.read	= average_read,
+	.write	= average_write,
+};
+
+int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_stats *stats;
+	struct dentry **cmd;
+	const char *namep;
+	int err;
+	int i;
+
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	cmd = &dev->priv.cmdif_debugfs;
+	*cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
+	if (!*cmd)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
+		stats = &dev->cmd.stats[i];
+		namep = mlx5_command_str(i);
+		if (strcmp(namep, "unknown command opcode")) {
+			stats->root = debugfs_create_dir(namep, *cmd);
+			if (!stats->root) {
+				mlx5_core_warn(dev, "failed adding command %d\n",
+					       i);
+				err = -ENOMEM;
+				goto out;
+			}
+
+			stats->avg = debugfs_create_file("average", 0400,
+							 stats->root, stats,
+							 &stats_fops);
+			if (!stats->avg) {
+				mlx5_core_warn(dev, "failed creating debugfs file\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			stats->count = debugfs_create_u64("n", 0400,
+							  stats->root,
+							  &stats->n);
+			if (!stats->count) {
+				mlx5_core_warn(dev, "failed creating debugfs file\n");
+				err = -ENOMEM;
+				goto out;
+			}
+		}
+	}
+
+	return 0;
+out:
+	debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+	return err;
+}
+
+void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+}
+
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	dev->priv.cq_debugfs = debugfs_create_dir("CQs",  dev->priv.dbg_root);
+	if (!dev->priv.cq_debugfs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	debugfs_remove_recursive(dev->priv.cq_debugfs);
+}
+
+static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+			 int index)
+{
+	struct mlx5_query_qp_mbox_out *out;
+	struct mlx5_qp_context *ctx;
+	u64 param = 0;
+	int err;
+	int no_sq;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
+		return param;
+
+	err = mlx5_core_qp_query(dev, qp, out, sizeof(*out));
+	if (err) {
+		mlx5_core_warn(dev, "failed to query qp\n");
+		goto out;
+	}
+
+	ctx = &out->ctx;
+	switch (index) {
+	case QP_PID:
+		param = qp->pid;
+		break;
+	case QP_STATE:
+		param = be32_to_cpu(ctx->flags) >> 28;
+		break;
+	case QP_XPORT:
+		param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
+		break;
+	case QP_MTU:
+		param = ctx->mtu_msgmax >> 5;
+		break;
+	case QP_N_RECV:
+		param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+		break;
+	case QP_RECV_SZ:
+		param = 1 << ((ctx->rq_size_stride & 7) + 4);
+		break;
+	case QP_N_SEND:
+		no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
+		if (!no_sq)
+			param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
+		else
+			param = 0;
+		break;
+	case QP_LOG_PG_SZ:
+		param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f;
+		param += 12;
+		break;
+	case QP_RQPN:
+		param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff;
+		break;
+	}
+
+out:
+	kfree(out);
+	return param;
+}
+
+static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+			 int index)
+{
+	struct mlx5_query_eq_mbox_out *out;
+	struct mlx5_eq_context *ctx;
+	u64 param = 0;
+	int err;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
+		return param;
+
+	ctx = &out->ctx;
+
+	err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
+	if (err) {
+		mlx5_core_warn(dev, "failed to query eq\n");
+		goto out;
+	}
+
+	switch (index) {
+	case EQ_NUM_EQES:
+		param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+		break;
+	case EQ_INTR:
+		param = ctx->intr;
+		break;
+	case EQ_LOG_PG_SZ:
+		param = (ctx->log_page_size & 0x1f) + 12;
+		break;
+	}
+
+out:
+	kfree(out);
+	return param;
+}
+
+static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+			 int index)
+{
+	struct mlx5_query_cq_mbox_out *out;
+	struct mlx5_cq_context *ctx;
+	u64 param = 0;
+	int err;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
+		return param;
+
+	ctx = &out->ctx;
+
+	err = mlx5_core_query_cq(dev, cq, out);
+	if (err) {
+		mlx5_core_warn(dev, "failed to query cq\n");
+		goto out;
+	}
+
+	switch (index) {
+	case CQ_PID:
+		param = cq->pid;
+		break;
+	case CQ_NUM_CQES:
+		param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+		break;
+	case CQ_LOG_PG_SZ:
+		param = (ctx->log_pg_sz & 0x1f) + 12;
+		break;
+	}
+
+out:
+	kfree(out);
+	return param;
+}
+
+static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+			loff_t *pos)
+{
+	struct mlx5_field_desc *desc;
+	struct mlx5_rsc_debug *d;
+	char tbuf[18];
+	u64 field;
+	int ret;
+
+	if (*pos)
+		return 0;
+
+	desc = filp->private_data;
+	d = (void *)(desc - desc->i) - sizeof(*d);
+	switch (d->type) {
+	case MLX5_DBG_RSC_QP:
+		field = qp_read_field(d->dev, d->object, desc->i);
+		break;
+
+	case MLX5_DBG_RSC_EQ:
+		field = eq_read_field(d->dev, d->object, desc->i);
+		break;
+
+	case MLX5_DBG_RSC_CQ:
+		field = cq_read_field(d->dev, d->object, desc->i);
+		break;
+
+	default:
+		mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type);
+		return -EINVAL;
+	}
+
+	ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+	if (ret > 0) {
+		if (copy_to_user(buf, tbuf, ret))
+			return -EFAULT;
+	}
+
+	*pos += ret;
+	return ret;
+}
+
+static const struct file_operations fops = {
+	.owner	= THIS_MODULE,
+	.open	= simple_open,
+	.read	= dbg_read,
+};
+
+static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
+			struct dentry *root, struct mlx5_rsc_debug **dbg,
+			int rsn, char **field, int nfile, void *data)
+{
+	struct mlx5_rsc_debug *d;
+	char resn[32];
+	int err;
+	int i;
+
+	d = kzalloc(sizeof(*d) + nfile * sizeof(d->fields[0]), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->dev = dev;
+	d->object = data;
+	d->type = type;
+	sprintf(resn, "0x%x", rsn);
+	d->root = debugfs_create_dir(resn,  root);
+	if (!d->root) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	for (i = 0; i < nfile; i++) {
+		d->fields[i].i = i;
+		d->fields[i].dent = debugfs_create_file(field[i], 0400,
+							d->root, &d->fields[i],
+							&fops);
+		if (!d->fields[i].dent) {
+			err = -ENOMEM;
+			goto out_rem;
+		}
+	}
+	*dbg = d;
+
+	return 0;
+out_rem:
+	debugfs_remove_recursive(d->root);
+
+out_free:
+	kfree(d);
+	return err;
+}
+
+static void rem_res_tree(struct mlx5_rsc_debug *d)
+{
+	debugfs_remove_recursive(d->root);
+	kfree(d);
+}
+
+int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+	int err;
+
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
+			   &qp->dbg, qp->qpn, qp_fields,
+			   ARRAY_SIZE(qp_fields), qp);
+	if (err)
+		qp->dbg = NULL;
+
+	return err;
+}
+
+void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	if (qp->dbg)
+		rem_res_tree(qp->dbg);
+}
+
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	int err;
+
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
+			   &eq->dbg, eq->eqn, eq_fields,
+			   ARRAY_SIZE(eq_fields), eq);
+	if (err)
+		eq->dbg = NULL;
+
+	return err;
+}
+
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	if (eq->dbg)
+		rem_res_tree(eq->dbg);
+}
+
+int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+	int err;
+
+	if (!mlx5_debugfs_root)
+		return 0;
+
+	err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
+			   &cq->dbg, cq->cqn, cq_fields,
+			   ARRAY_SIZE(cq_fields), cq);
+	if (err)
+		cq->dbg = NULL;
+
+	return err;
+}
+
+void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	if (cq->dbg)
+		rem_res_tree(cq->dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
new file mode 100644
index 000000000000..c02cbcfd0fb8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
+	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
+};
+
+enum {
+	MLX5_EQ_STATE_ARMED		= 0x9,
+	MLX5_EQ_STATE_FIRED		= 0xa,
+	MLX5_EQ_STATE_ALWAYS_ARMED	= 0xb,
+};
+
+enum {
+	MLX5_NUM_SPARE_EQE	= 0x80,
+	MLX5_NUM_ASYNC_EQE	= 0x100,
+	MLX5_NUM_CMD_EQE	= 32,
+};
+
+enum {
+	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
+};
+
+#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
+			       (1ull << MLX5_EVENT_TYPE_COMM_EST)	    | \
+			       (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)	    | \
+			       (1ull << MLX5_EVENT_TYPE_CQ_ERROR)	    | \
+			       (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)	    | \
+			       (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
+			       (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+			       (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
+			       (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)	    | \
+			       (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
+			       (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)	    | \
+			       (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
+
+struct map_eq_in {
+	u64	mask;
+	u32	reserved;
+	u32	unmap_eqn;
+};
+
+struct cre_des_eq {
+	u8	reserved[15];
+	u8	eqn;
+};
+
+static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
+{
+	struct mlx5_destroy_eq_mbox_in in;
+	struct mlx5_destroy_eq_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
+	in.eqn = eqn;
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (!err)
+		goto ex;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+ex:
+	return err;
+}
+
+static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+	return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+	return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static const char *eqe_type_str(u8 type)
+{
+	switch (type) {
+	case MLX5_EVENT_TYPE_COMP:
+		return "MLX5_EVENT_TYPE_COMP";
+	case MLX5_EVENT_TYPE_PATH_MIG:
+		return "MLX5_EVENT_TYPE_PATH_MIG";
+	case MLX5_EVENT_TYPE_COMM_EST:
+		return "MLX5_EVENT_TYPE_COMM_EST";
+	case MLX5_EVENT_TYPE_SQ_DRAINED:
+		return "MLX5_EVENT_TYPE_SQ_DRAINED";
+	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+	case MLX5_EVENT_TYPE_CQ_ERROR:
+		return "MLX5_EVENT_TYPE_CQ_ERROR";
+	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+	case MLX5_EVENT_TYPE_PORT_CHANGE:
+		return "MLX5_EVENT_TYPE_PORT_CHANGE";
+	case MLX5_EVENT_TYPE_GPIO_EVENT:
+		return "MLX5_EVENT_TYPE_GPIO_EVENT";
+	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+	case MLX5_EVENT_TYPE_STALL_EVENT:
+		return "MLX5_EVENT_TYPE_STALL_EVENT";
+	case MLX5_EVENT_TYPE_CMD:
+		return "MLX5_EVENT_TYPE_CMD";
+	case MLX5_EVENT_TYPE_PAGE_REQUEST:
+		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+	default:
+		return "Unrecognized event";
+	}
+}
+
+static enum mlx5_dev_event port_subtype_event(u8 subtype)
+{
+	switch (subtype) {
+	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+		return MLX5_DEV_EVENT_PORT_DOWN;
+	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+		return MLX5_DEV_EVENT_PORT_UP;
+	case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+		return MLX5_DEV_EVENT_PORT_INITIALIZED;
+	case MLX5_PORT_CHANGE_SUBTYPE_LID:
+		return MLX5_DEV_EVENT_LID_CHANGE;
+	case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+		return MLX5_DEV_EVENT_PKEY_CHANGE;
+	case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+		return MLX5_DEV_EVENT_GUID_CHANGE;
+	case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+		return MLX5_DEV_EVENT_CLIENT_REREG;
+	}
+	return -1;
+}
+
+static void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+	u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+	__raw_writel((__force u32) cpu_to_be32(val), addr);
+	/* We still want ordering, just not swabbing, so add a barrier */
+	mb();
+}
+
+static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	struct mlx5_eqe *eqe;
+	int eqes_found = 0;
+	int set_ci = 0;
+	u32 cqn;
+	u32 srqn;
+	u8 port;
+
+	while ((eqe = next_eqe_sw(eq))) {
+		/*
+		 * Make sure we read EQ entry contents after we've
+		 * checked the ownership bit.
+		 */
+		rmb();
+
+		mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type));
+		switch (eqe->type) {
+		case MLX5_EVENT_TYPE_COMP:
+			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+			mlx5_cq_completion(dev, cqn);
+			break;
+
+		case MLX5_EVENT_TYPE_PATH_MIG:
+		case MLX5_EVENT_TYPE_COMM_EST:
+		case MLX5_EVENT_TYPE_SQ_DRAINED:
+		case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+		case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+			mlx5_core_dbg(dev, "event %s(%d) arrived\n",
+				      eqe_type_str(eqe->type), eqe->type);
+			mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff,
+				      eqe->type);
+			break;
+
+		case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+		case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+			srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+			mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
+				      eqe_type_str(eqe->type), eqe->type, srqn);
+			mlx5_srq_event(dev, srqn, eqe->type);
+			break;
+
+		case MLX5_EVENT_TYPE_CMD:
+			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+			break;
+
+		case MLX5_EVENT_TYPE_PORT_CHANGE:
+			port = (eqe->data.port.port >> 4) & 0xf;
+			switch (eqe->sub_type) {
+			case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+			case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+			case MLX5_PORT_CHANGE_SUBTYPE_LID:
+			case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+			case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+			case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+			case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+				dev->event(dev, port_subtype_event(eqe->sub_type), &port);
+				break;
+			default:
+				mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
+					       port, eqe->sub_type);
+			}
+			break;
+		case MLX5_EVENT_TYPE_CQ_ERROR:
+			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+				       cqn, eqe->data.cq_err.syndrome);
+			mlx5_cq_event(dev, cqn, eqe->type);
+			break;
+
+		case MLX5_EVENT_TYPE_PAGE_REQUEST:
+			{
+				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+				s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages);
+
+				mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
+				mlx5_core_req_pages_handler(dev, func_id, npages);
+			}
+			break;
+
+
+		default:
+			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn);
+			break;
+		}
+
+		++eq->cons_index;
+		eqes_found = 1;
+		++set_ci;
+
+		/* The HCA will think the queue has overflowed if we
+		 * don't tell it we've been processing events.  We
+		 * create our EQs with MLX5_NUM_SPARE_EQE extra
+		 * entries, so we must update our consumer index at
+		 * least that often.
+		 */
+		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+			eq_update_ci(eq, 0);
+			set_ci = 0;
+		}
+	}
+
+	eq_update_ci(eq, 1);
+
+	return eqes_found;
+}
+
+static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr)
+{
+	struct mlx5_eq *eq = eq_ptr;
+	struct mlx5_core_dev *dev = eq->dev;
+
+	mlx5_eq_int(dev, eq);
+
+	/* MSI-X vectors always belong to us */
+	return IRQ_HANDLED;
+}
+
+static void init_eq_buf(struct mlx5_eq *eq)
+{
+	struct mlx5_eqe *eqe;
+	int i;
+
+	for (i = 0; i < eq->nent; i++) {
+		eqe = get_eqe(eq, i);
+		eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
+	}
+}
+
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+		       int nent, u64 mask, const char *name, struct mlx5_uar *uar)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	struct mlx5_create_eq_mbox_in *in;
+	struct mlx5_create_eq_mbox_out out;
+	int err;
+	int inlen;
+
+	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+	err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE,
+			     &eq->buf);
+	if (err)
+		return err;
+
+	init_eq_buf(eq);
+
+	inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_buf;
+	}
+	memset(&out, 0, sizeof(out));
+
+	mlx5_fill_page_array(&eq->buf, in->pas);
+
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
+	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
+	in->ctx.intr = vecidx;
+	in->ctx.log_page_size = PAGE_SHIFT - 12;
+	in->events_mask = cpu_to_be64(mask);
+
+	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	if (err)
+		goto err_in;
+
+	if (out.hdr.status) {
+		err = mlx5_cmd_status_to_err(&out.hdr);
+		goto err_in;
+	}
+
+	eq->eqn = out.eq_number;
+	err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0,
+			  name, eq);
+	if (err)
+		goto err_eq;
+
+	eq->irqn = vecidx;
+	eq->dev = dev;
+	eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
+
+	err = mlx5_debug_eq_add(dev, eq);
+	if (err)
+		goto err_irq;
+
+	/* EQs are created in ARMED state
+	 */
+	eq_update_ci(eq, 1);
+
+	mlx5_vfree(in);
+	return 0;
+
+err_irq:
+	free_irq(table->msix_arr[vecidx].vector, eq);
+
+err_eq:
+	mlx5_cmd_destroy_eq(dev, eq->eqn);
+
+err_in:
+	mlx5_vfree(in);
+
+err_buf:
+	mlx5_buf_free(dev, &eq->buf);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
+
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	int err;
+
+	mlx5_debug_eq_remove(dev, eq);
+	free_irq(table->msix_arr[eq->irqn].vector, eq);
+	err = mlx5_cmd_destroy_eq(dev, eq->eqn);
+	if (err)
+		mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
+			       eq->eqn);
+	mlx5_buf_free(dev, &eq->buf);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	spin_lock_init(&dev->priv.eq_table.lock);
+
+	err = mlx5_eq_debugfs_init(dev);
+
+	return err;
+}
+
+
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+{
+	mlx5_eq_debugfs_cleanup(dev);
+}
+
+int mlx5_start_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	int err;
+
+	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
+				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
+				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
+	if (err) {
+		mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
+		return err;
+	}
+
+	mlx5_cmd_use_events(dev);
+
+	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
+				 MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
+				 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
+	if (err) {
+		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+		goto err1;
+	}
+
+	err = mlx5_create_map_eq(dev, &table->pages_eq,
+				 MLX5_EQ_VEC_PAGES,
+				 dev->caps.max_vf + 1,
+				 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
+				 &dev->priv.uuari.uars[0]);
+	if (err) {
+		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+		goto err2;
+	}
+
+	return err;
+
+err2:
+	mlx5_destroy_unmap_eq(dev, &table->async_eq);
+
+err1:
+	mlx5_cmd_use_polling(dev);
+	mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+	return err;
+}
+
+int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	int err;
+
+	err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+	if (err)
+		return err;
+
+	mlx5_destroy_unmap_eq(dev, &table->async_eq);
+	mlx5_cmd_use_polling(dev);
+
+	err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+	if (err)
+		mlx5_cmd_use_events(dev);
+
+	return err;
+}
+
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		       struct mlx5_query_eq_mbox_out *out, int outlen)
+{
+	struct mlx5_query_eq_mbox_in in;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(out, 0, outlen);
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ);
+	in.eqn = eq->eqn;
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	if (err)
+		return err;
+
+	if (out->hdr.status)
+		err = mlx5_cmd_status_to_err(&out->hdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
new file mode 100644
index 000000000000..72a5222447f5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/module.h>
+#include "mlx5_core.h"
+
+int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_query_adapter_mbox_out *out;
+	struct mlx5_cmd_query_adapter_mbox_in in;
+	int err;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	memset(&in, 0, sizeof(in));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+	if (err)
+		goto out_out;
+
+	if (out->hdr.status) {
+		err = mlx5_cmd_status_to_err(&out->hdr);
+		goto out_out;
+	}
+
+	memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid));
+
+out_out:
+	kfree(out);
+
+	return err;
+}
+
+int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
+			   struct mlx5_caps *caps)
+{
+	struct mlx5_cmd_query_hca_cap_mbox_out *out;
+	struct mlx5_cmd_query_hca_cap_mbox_in in;
+	struct mlx5_query_special_ctxs_mbox_out ctx_out;
+	struct mlx5_query_special_ctxs_mbox_in ctx_in;
+	int err;
+	u16 t16;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	memset(&in, 0, sizeof(in));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+	in.hdr.opmod  = cpu_to_be16(0x1);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+	if (err)
+		goto out_out;
+
+	if (out->hdr.status) {
+		err = mlx5_cmd_status_to_err(&out->hdr);
+		goto out_out;
+	}
+
+
+	caps->log_max_eq = out->hca_cap.log_max_eq & 0xf;
+	caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
+	caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
+	caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
+	caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
+	caps->flags = be64_to_cpu(out->hca_cap.flags);
+	caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
+	caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
+	caps->num_ports = out->hca_cap.num_ports & 0xf;
+	caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
+	if (caps->num_ports > MLX5_MAX_PORTS) {
+		mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n",
+			      caps->num_ports, MLX5_MAX_PORTS);
+		err = -EINVAL;
+		goto out_out;
+	}
+	caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
+	caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
+	caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
+	caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
+	caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
+	caps->log_max_mcg = out->hca_cap.log_max_mcg;
+	caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg);
+	caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
+	caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
+	caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
+	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
+	if (t16 & 0x8000) {
+		caps->bf_reg_size = 1 << (t16 & 0x1f);
+		caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
+	} else {
+		caps->bf_reg_size = 0;
+		caps->bf_regs_per_page = 0;
+	}
+	caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1);
+
+	memset(&ctx_in, 0, sizeof(ctx_in));
+	memset(&ctx_out, 0, sizeof(ctx_out));
+	ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in),
+				 &ctx_out, sizeof(ctx_out));
+	if (err)
+		goto out_out;
+
+	if (ctx_out.hdr.status)
+		err = mlx5_cmd_status_to_err(&ctx_out.hdr);
+
+	caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey);
+
+out_out:
+	kfree(out);
+
+	return err;
+}
+
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_init_hca_mbox_in in;
+	struct mlx5_cmd_init_hca_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_teardown_hca_mbox_in in;
+	struct mlx5_cmd_teardown_hca_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
new file mode 100644
index 000000000000..748f10a155c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+	MLX5_HEALTH_POLL_INTERVAL	= 2 * HZ,
+	MAX_MISSES			= 3,
+};
+
+enum {
+	MLX5_HEALTH_SYNDR_FW_ERR		= 0x1,
+	MLX5_HEALTH_SYNDR_IRISC_ERR		= 0x7,
+	MLX5_HEALTH_SYNDR_CRC_ERR		= 0x9,
+	MLX5_HEALTH_SYNDR_FETCH_PCI_ERR		= 0xa,
+	MLX5_HEALTH_SYNDR_HW_FTL_ERR		= 0xb,
+	MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR	= 0xc,
+	MLX5_HEALTH_SYNDR_EQ_ERR		= 0xd,
+	MLX5_HEALTH_SYNDR_FFSER_ERR		= 0xf,
+};
+
+static DEFINE_SPINLOCK(health_lock);
+
+static LIST_HEAD(health_list);
+static struct work_struct health_work;
+
+static health_handler_t reg_handler;
+int mlx5_register_health_report_handler(health_handler_t handler)
+{
+	spin_lock_irq(&health_lock);
+	if (reg_handler) {
+		spin_unlock_irq(&health_lock);
+		return -EEXIST;
+	}
+	reg_handler = handler;
+	spin_unlock_irq(&health_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_register_health_report_handler);
+
+void mlx5_unregister_health_report_handler(void)
+{
+	spin_lock_irq(&health_lock);
+	reg_handler = NULL;
+	spin_unlock_irq(&health_lock);
+}
+EXPORT_SYMBOL(mlx5_unregister_health_report_handler);
+
+static void health_care(struct work_struct *work)
+{
+	struct mlx5_core_health *health, *n;
+	struct mlx5_core_dev *dev;
+	struct mlx5_priv *priv;
+	LIST_HEAD(tlist);
+
+	spin_lock_irq(&health_lock);
+	list_splice_init(&health_list, &tlist);
+
+	spin_unlock_irq(&health_lock);
+
+	list_for_each_entry_safe(health, n, &tlist, list) {
+		priv = container_of(health, struct mlx5_priv, health);
+		dev = container_of(priv, struct mlx5_core_dev, priv);
+		mlx5_core_warn(dev, "handling bad device here\n");
+		spin_lock_irq(&health_lock);
+		if (reg_handler)
+			reg_handler(dev->pdev, health->health,
+				    sizeof(health->health));
+
+		list_del_init(&health->list);
+		spin_unlock_irq(&health_lock);
+	}
+}
+
+static const char *hsynd_str(u8 synd)
+{
+	switch (synd) {
+	case MLX5_HEALTH_SYNDR_FW_ERR:
+		return "firmware internal error";
+	case MLX5_HEALTH_SYNDR_IRISC_ERR:
+		return "irisc not responding";
+	case MLX5_HEALTH_SYNDR_CRC_ERR:
+		return "firmware CRC error";
+	case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
+		return "ICM fetch PCI error";
+	case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
+		return "HW fatal error\n";
+	case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
+		return "async EQ buffer overrun";
+	case MLX5_HEALTH_SYNDR_EQ_ERR:
+		return "EQ error";
+	case MLX5_HEALTH_SYNDR_FFSER_ERR:
+		return "FFSER error";
+	default:
+		return "unrecognized error";
+	}
+}
+
+static u16 read_be16(__be16 __iomem *p)
+{
+	return swab16(readl((__force u16 __iomem *) p));
+}
+
+static u32 read_be32(__be32 __iomem *p)
+{
+	return swab32(readl((__force u32 __iomem *) p));
+}
+
+static void print_health_info(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct health_buffer __iomem *h = health->health;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
+		pr_info("assert_var[%d] 0x%08x\n", i, read_be32(h->assert_var + i));
+
+	pr_info("assert_exit_ptr 0x%08x\n", read_be32(&h->assert_exit_ptr));
+	pr_info("assert_callra 0x%08x\n", read_be32(&h->assert_callra));
+	pr_info("fw_ver 0x%08x\n", read_be32(&h->fw_ver));
+	pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id));
+	pr_info("irisc_index %d\n", readb(&h->irisc_index));
+	pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd)));
+	pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync));
+}
+
+static void poll_health(unsigned long data)
+{
+	struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
+	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long next;
+	u32 count;
+
+	count = ioread32be(health->health_counter);
+	if (count == health->prev)
+		++health->miss_counter;
+	else
+		health->miss_counter = 0;
+
+	health->prev = count;
+	if (health->miss_counter == MAX_MISSES) {
+		mlx5_core_err(dev, "device's health compromised\n");
+		print_health_info(dev);
+		spin_lock_irq(&health_lock);
+		list_add_tail(&health->list, &health_list);
+		spin_unlock_irq(&health_lock);
+
+		queue_work(mlx5_core_wq, &health_work);
+	} else {
+		get_random_bytes(&next, sizeof(next));
+		next %= HZ;
+		next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+		mod_timer(&health->timer, next);
+	}
+}
+
+void mlx5_start_health_poll(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	INIT_LIST_HEAD(&health->list);
+	init_timer(&health->timer);
+	health->health = &dev->iseg->health;
+	health->health_counter = &dev->iseg->health_counter;
+
+	health->timer.data = (unsigned long)dev;
+	health->timer.function = poll_health;
+	health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+	add_timer(&health->timer);
+}
+
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	del_timer_sync(&health->timer);
+
+	spin_lock_irq(&health_lock);
+	if (!list_empty(&health->list))
+		list_del_init(&health->list);
+	spin_unlock_irq(&health_lock);
+}
+
+void mlx5_health_cleanup(void)
+{
+}
+
+void  __init mlx5_health_init(void)
+{
+	INIT_WORK(&health_work, health_care);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
new file mode 100644
index 000000000000..18d6fd5dd90b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
+		      u16 opmod, int port)
+{
+	struct mlx5_mad_ifc_mbox_in *in = NULL;
+	struct mlx5_mad_ifc_mbox_out *out = NULL;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
+	in->hdr.opmod = cpu_to_be16(opmod);
+	in->port = port;
+
+	memcpy(in->data, inb, sizeof(in->data));
+
+	err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
+	if (err)
+		goto out;
+
+	if (out->hdr.status) {
+		err = mlx5_cmd_status_to_err(&out->hdr);
+		goto out;
+	}
+
+	memcpy(outb, out->data, sizeof(out->data));
+
+out:
+	kfree(out);
+	kfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
new file mode 100644
index 000000000000..12242de2b0e3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/debugfs.h>
+#include "mlx5_core.h"
+
+#define DRIVER_NAME "mlx5_core"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_RELDATE	"June 2013"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox ConnectX-IB HCA core library");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+int mlx5_core_debug_mask;
+module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
+MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
+
+struct workqueue_struct *mlx5_core_wq;
+
+static int set_dma_caps(struct pci_dev *pdev)
+{
+	int err;
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+			return err;
+		}
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev,
+			 "Warning: couldn't set 64-bit consistent PCI DMA mask.\n");
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev,
+				"Can't set consistent PCI DMA mask, aborting.\n");
+			return err;
+		}
+	}
+
+	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
+	return err;
+}
+
+static int request_bar(struct pci_dev *pdev)
+{
+	int err = 0;
+
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing registers BAR, aborting.\n");
+		return -ENODEV;
+	}
+
+	err = pci_request_regions(pdev, DRIVER_NAME);
+	if (err)
+		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+
+	return err;
+}
+
+static void release_bar(struct pci_dev *pdev)
+{
+	pci_release_regions(pdev);
+}
+
+static int mlx5_enable_msix(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	int num_eqs = 1 << dev->caps.log_max_eq;
+	int nvec;
+	int err;
+	int i;
+
+	nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
+	nvec = min_t(int, nvec, num_eqs);
+	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+		return -ENOMEM;
+
+	table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL);
+	if (!table->msix_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < nvec; i++)
+		table->msix_arr[i].entry = i;
+
+retry:
+	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+	err = pci_enable_msix(dev->pdev, table->msix_arr, nvec);
+	if (err <= 0) {
+		return err;
+	} else if (err > 2) {
+		nvec = err;
+		goto retry;
+	}
+
+	mlx5_core_dbg(dev, "received %d MSI vectors out of %d requested\n", err, nvec);
+
+	return 0;
+}
+
+static void mlx5_disable_msix(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+
+	pci_disable_msix(dev->pdev);
+	kfree(table->msix_arr);
+}
+
+struct mlx5_reg_host_endianess {
+	u8	he;
+	u8      rsvd[15];
+};
+
+static int handle_hca_cap(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
+	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
+	struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
+	struct mlx5_cmd_set_hca_cap_mbox_out set_out;
+	struct mlx5_profile *prof = dev->profile;
+	u64 flags;
+	int csum = 1;
+	int err;
+
+	memset(&query_ctx, 0, sizeof(query_ctx));
+	query_out = kzalloc(sizeof(*query_out), GFP_KERNEL);
+	if (!query_out)
+		return -ENOMEM;
+
+	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
+	if (!set_ctx) {
+		err = -ENOMEM;
+		goto query_ex;
+	}
+
+	query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+	query_ctx.hdr.opmod  = cpu_to_be16(0x1);
+	err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
+				 query_out, sizeof(*query_out));
+	if (err)
+		goto query_ex;
+
+	err = mlx5_cmd_status_to_err(&query_out->hdr);
+	if (err) {
+		mlx5_core_warn(dev, "query hca cap failed, %d\n", err);
+		goto query_ex;
+	}
+
+	memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
+	       sizeof(set_ctx->hca_cap));
+
+	if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) {
+		csum = !!prof->cmdif_csum;
+		flags = be64_to_cpu(set_ctx->hca_cap.flags);
+		if (csum)
+			flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+		else
+			flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+
+		set_ctx->hca_cap.flags = cpu_to_be64(flags);
+	}
+
+	if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
+		set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
+
+	memset(&set_out, 0, sizeof(set_out));
+	set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
+	set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
+	err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx),
+				 &set_out, sizeof(set_out));
+	if (err) {
+		mlx5_core_warn(dev, "set hca cap failed, %d\n", err);
+		goto query_ex;
+	}
+
+	err = mlx5_cmd_status_to_err(&set_out.hdr);
+	if (err)
+		goto query_ex;
+
+	if (!csum)
+		dev->cmd.checksum_disabled = 1;
+
+query_ex:
+	kfree(query_out);
+	kfree(set_ctx);
+
+	return err;
+}
+
+static int set_hca_ctrl(struct mlx5_core_dev *dev)
+{
+	struct mlx5_reg_host_endianess he_in;
+	struct mlx5_reg_host_endianess he_out;
+	int err;
+
+	memset(&he_in, 0, sizeof(he_in));
+	he_in.he = MLX5_SET_HOST_ENDIANNESS;
+	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
+					&he_out, sizeof(he_out),
+					MLX5_REG_HOST_ENDIANNESS, 0, 1);
+	return err;
+}
+
+int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	int err;
+
+	dev->pdev = pdev;
+	pci_set_drvdata(dev->pdev, dev);
+	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
+	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
+
+	mutex_init(&priv->pgdir_mutex);
+	INIT_LIST_HEAD(&priv->pgdir_list);
+	spin_lock_init(&priv->mkey_lock);
+
+	priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
+	if (!priv->dbg_root)
+		return -ENOMEM;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device, aborting.\n");
+		goto err_dbg;
+	}
+
+	err = request_bar(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "error requesting BARs, aborting.\n");
+		goto err_disable;
+	}
+
+	pci_set_master(pdev);
+
+	err = set_dma_caps(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+		goto err_clr_master;
+	}
+
+	dev->iseg_base = pci_resource_start(dev->pdev, 0);
+	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
+	if (!dev->iseg) {
+		err = -ENOMEM;
+		dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+		goto err_clr_master;
+	}
+	dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+		 fw_rev_min(dev), fw_rev_sub(dev));
+
+	err = mlx5_cmd_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
+		goto err_unmap;
+	}
+
+	mlx5_pagealloc_init(dev);
+	err = set_hca_ctrl(dev);
+	if (err) {
+		dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+		goto err_pagealloc_cleanup;
+	}
+
+	err = handle_hca_cap(dev);
+	if (err) {
+		dev_err(&pdev->dev, "handle_hca_cap failed\n");
+		goto err_pagealloc_cleanup;
+	}
+
+	err = mlx5_satisfy_startup_pages(dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to allocate startup pages\n");
+		goto err_pagealloc_cleanup;
+	}
+
+	err = mlx5_pagealloc_start(dev);
+	if (err) {
+		dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
+		goto err_reclaim_pages;
+	}
+
+	err = mlx5_cmd_init_hca(dev);
+	if (err) {
+		dev_err(&pdev->dev, "init hca failed\n");
+		goto err_pagealloc_stop;
+	}
+
+	mlx5_start_health_poll(dev);
+
+	err = mlx5_cmd_query_hca_cap(dev, &dev->caps);
+	if (err) {
+		dev_err(&pdev->dev, "query hca failed\n");
+		goto err_stop_poll;
+	}
+
+	err = mlx5_cmd_query_adapter(dev);
+	if (err) {
+		dev_err(&pdev->dev, "query adapter failed\n");
+		goto err_stop_poll;
+	}
+
+	err = mlx5_enable_msix(dev);
+	if (err) {
+		dev_err(&pdev->dev, "enable msix failed\n");
+		goto err_stop_poll;
+	}
+
+	err = mlx5_eq_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize eq\n");
+		goto disable_msix;
+	}
+
+	err = mlx5_alloc_uuars(dev, &priv->uuari);
+	if (err) {
+		dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+		goto err_eq_cleanup;
+	}
+
+	err = mlx5_start_eqs(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
+		goto err_free_uar;
+	}
+
+	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
+
+	mlx5_init_cq_table(dev);
+	mlx5_init_qp_table(dev);
+	mlx5_init_srq_table(dev);
+
+	return 0;
+
+err_free_uar:
+	mlx5_free_uuars(dev, &priv->uuari);
+
+err_eq_cleanup:
+	mlx5_eq_cleanup(dev);
+
+disable_msix:
+	mlx5_disable_msix(dev);
+
+err_stop_poll:
+	mlx5_stop_health_poll(dev);
+	mlx5_cmd_teardown_hca(dev);
+
+err_pagealloc_stop:
+	mlx5_pagealloc_stop(dev);
+
+err_reclaim_pages:
+	mlx5_reclaim_startup_pages(dev);
+
+err_pagealloc_cleanup:
+	mlx5_pagealloc_cleanup(dev);
+	mlx5_cmd_cleanup(dev);
+
+err_unmap:
+	iounmap(dev->iseg);
+
+err_clr_master:
+	pci_clear_master(dev->pdev);
+	release_bar(dev->pdev);
+
+err_disable:
+	pci_disable_device(dev->pdev);
+
+err_dbg:
+	debugfs_remove(priv->dbg_root);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_dev_init);
+
+void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+
+	mlx5_cleanup_srq_table(dev);
+	mlx5_cleanup_qp_table(dev);
+	mlx5_cleanup_cq_table(dev);
+	mlx5_stop_eqs(dev);
+	mlx5_free_uuars(dev, &priv->uuari);
+	mlx5_eq_cleanup(dev);
+	mlx5_disable_msix(dev);
+	mlx5_stop_health_poll(dev);
+	mlx5_cmd_teardown_hca(dev);
+	mlx5_pagealloc_stop(dev);
+	mlx5_reclaim_startup_pages(dev);
+	mlx5_pagealloc_cleanup(dev);
+	mlx5_cmd_cleanup(dev);
+	iounmap(dev->iseg);
+	pci_clear_master(dev->pdev);
+	release_bar(dev->pdev);
+	pci_disable_device(dev->pdev);
+	debugfs_remove(priv->dbg_root);
+}
+EXPORT_SYMBOL(mlx5_dev_cleanup);
+
+static int __init init(void)
+{
+	int err;
+
+	mlx5_register_debugfs();
+	mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq");
+	if (!mlx5_core_wq) {
+		err = -ENOMEM;
+		goto err_debug;
+	}
+	mlx5_health_init();
+
+	return 0;
+
+	mlx5_health_cleanup();
+err_debug:
+	mlx5_unregister_debugfs();
+	return err;
+}
+
+static void __exit cleanup(void)
+{
+	mlx5_health_cleanup();
+	destroy_workqueue(mlx5_core_wq);
+	mlx5_unregister_debugfs();
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
new file mode 100644
index 000000000000..44837640bd7c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+struct mlx5_attach_mcg_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			qpn;
+	__be32			rsvd;
+	u8			gid[16];
+};
+
+struct mlx5_attach_mcg_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvf[8];
+};
+
+struct mlx5_detach_mcg_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			qpn;
+	__be32			rsvd;
+	u8			gid[16];
+};
+
+struct mlx5_detach_mcg_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvf[8];
+};
+
+int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+	struct mlx5_attach_mcg_mbox_in in;
+	struct mlx5_attach_mcg_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
+	memcpy(in.gid, mgid, sizeof(*mgid));
+	in.qpn = cpu_to_be32(qpn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_attach_mcg);
+
+int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+	struct mlx5_detach_mcg_mbox_in in;
+	struct mlx5_detach_mcg_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG);
+	memcpy(in.gid, mgid, sizeof(*mgid));
+	in.qpn = cpu_to_be32(qpn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
new file mode 100644
index 000000000000..68b74e1ae1b0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_CORE_H__
+#define __MLX5_CORE_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+extern int mlx5_core_debug_mask;
+
+#define mlx5_core_dbg(dev, format, arg...)				       \
+pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,   \
+	 current->pid, ##arg)
+
+#define mlx5_core_dbg_mask(dev, mask, format, arg...)			       \
+do {									       \
+	if ((mask) & mlx5_core_debug_mask)				       \
+		pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name,       \
+			 __func__, __LINE__, current->pid, ##arg);	       \
+} while (0)
+
+#define mlx5_core_err(dev, format, arg...) \
+pr_err("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,     \
+	current->pid, ##arg)
+
+#define mlx5_core_warn(dev, format, arg...) \
+pr_warn("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__,    \
+	current->pid, ##arg)
+
+enum {
+	MLX5_CMD_DATA, /* print command payload only */
+	MLX5_CMD_TIME, /* print command execution time */
+};
+
+
+int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
+			   struct mlx5_caps *caps);
+int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev);
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+
+#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
new file mode 100644
index 000000000000..5b44e2e46daf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+			  struct mlx5_create_mkey_mbox_in *in, int inlen)
+{
+	struct mlx5_create_mkey_mbox_out out;
+	int err;
+	u8 key;
+
+	memset(&out, 0, sizeof(out));
+	spin_lock(&dev->priv.mkey_lock);
+	key = dev->priv.mkey_key++;
+	spin_unlock(&dev->priv.mkey_lock);
+	in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
+	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	if (err) {
+		mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
+		return err;
+	}
+
+	if (out.hdr.status) {
+		mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
+		return mlx5_cmd_status_to_err(&out.hdr);
+	}
+
+	mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
+	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey);
+
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
+{
+	struct mlx5_destroy_mkey_mbox_in in;
+	struct mlx5_destroy_mkey_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
+	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_mkey);
+
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+			 struct mlx5_query_mkey_mbox_out *out, int outlen)
+{
+	struct mlx5_destroy_mkey_mbox_in in;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(out, 0, outlen);
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
+	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	if (err)
+		return err;
+
+	if (out->hdr.status)
+		return mlx5_cmd_status_to_err(&out->hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_mkey);
+
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+			     u32 *mkey)
+{
+	struct mlx5_query_special_ctxs_mbox_in in;
+	struct mlx5_query_special_ctxs_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	*mkey = be32_to_cpu(out.dump_fill_mkey);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
new file mode 100644
index 000000000000..f0bf46339b28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+	MLX5_PAGES_CANT_GIVE	= 0,
+	MLX5_PAGES_GIVE		= 1,
+	MLX5_PAGES_TAKE		= 2
+};
+
+struct mlx5_pages_req {
+	struct mlx5_core_dev *dev;
+	u32	func_id;
+	s16	npages;
+	struct work_struct work;
+};
+
+struct fw_page {
+	struct rb_node	rb_node;
+	u64		addr;
+	struct page	*page;
+	u16		func_id;
+};
+
+struct mlx5_query_pages_inbox {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_query_pages_outbox {
+	struct mlx5_outbox_hdr	hdr;
+	u8			reserved[2];
+	__be16			func_id;
+	__be16			init_pages;
+	__be16			num_pages;
+};
+
+struct mlx5_manage_pages_inbox {
+	struct mlx5_inbox_hdr	hdr;
+	__be16			rsvd0;
+	__be16			func_id;
+	__be16			rsvd1;
+	__be16			num_entries;
+	u8			rsvd2[16];
+	__be64			pas[0];
+};
+
+struct mlx5_manage_pages_outbox {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd0[2];
+	__be16			num_entries;
+	u8			rsvd1[20];
+	__be64			pas[0];
+};
+
+static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+{
+	struct rb_root *root = &dev->priv.page_root;
+	struct rb_node **new = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct fw_page *nfp;
+	struct fw_page *tfp;
+
+	while (*new) {
+		parent = *new;
+		tfp = rb_entry(parent, struct fw_page, rb_node);
+		if (tfp->addr < addr)
+			new = &parent->rb_left;
+		else if (tfp->addr > addr)
+			new = &parent->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
+	if (!nfp)
+		return -ENOMEM;
+
+	nfp->addr = addr;
+	nfp->page = page;
+	nfp->func_id = func_id;
+
+	rb_link_node(&nfp->rb_node, parent, new);
+	rb_insert_color(&nfp->rb_node, root);
+
+	return 0;
+}
+
+static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+{
+	struct rb_root *root = &dev->priv.page_root;
+	struct rb_node *tmp = root->rb_node;
+	struct page *result = NULL;
+	struct fw_page *tfp;
+
+	while (tmp) {
+		tfp = rb_entry(tmp, struct fw_page, rb_node);
+		if (tfp->addr < addr) {
+			tmp = tmp->rb_left;
+		} else if (tfp->addr > addr) {
+			tmp = tmp->rb_right;
+		} else {
+			rb_erase(&tfp->rb_node, root);
+			result = tfp->page;
+			kfree(tfp);
+			break;
+		}
+	}
+
+	return result;
+}
+
+static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+				s16 *pages, s16 *init_pages)
+{
+	struct mlx5_query_pages_inbox	in;
+	struct mlx5_query_pages_outbox	out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	if (pages)
+		*pages = be16_to_cpu(out.num_pages);
+	if (init_pages)
+		*init_pages = be16_to_cpu(out.init_pages);
+	*func_id = be16_to_cpu(out.func_id);
+
+	return err;
+}
+
+static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+		      int notify_fail)
+{
+	struct mlx5_manage_pages_inbox *in;
+	struct mlx5_manage_pages_outbox out;
+	struct page *page;
+	int inlen;
+	u64 addr;
+	int err;
+	int i;
+
+	inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
+		return -ENOMEM;
+	}
+	memset(&out, 0, sizeof(out));
+
+	for (i = 0; i < npages; i++) {
+		page = alloc_page(GFP_HIGHUSER);
+		if (!page) {
+			err = -ENOMEM;
+			mlx5_core_warn(dev, "failed to allocate page\n");
+			goto out_alloc;
+		}
+		addr = dma_map_page(&dev->pdev->dev, page, 0,
+				    PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&dev->pdev->dev, addr)) {
+			mlx5_core_warn(dev, "failed dma mapping page\n");
+			__free_page(page);
+			err = -ENOMEM;
+			goto out_alloc;
+		}
+		err = insert_page(dev, addr, page, func_id);
+		if (err) {
+			mlx5_core_err(dev, "failed to track allocated page\n");
+			dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__free_page(page);
+			err = -ENOMEM;
+			goto out_alloc;
+		}
+		in->pas[i] = cpu_to_be64(addr);
+	}
+
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
+	in->func_id = cpu_to_be16(func_id);
+	in->num_entries = cpu_to_be16(npages);
+	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	mlx5_core_dbg(dev, "err %d\n", err);
+	if (err) {
+		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
+		goto out_alloc;
+	}
+	dev->priv.fw_pages += npages;
+
+	if (out.hdr.status) {
+		err = mlx5_cmd_status_to_err(&out.hdr);
+		if (err) {
+			mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status);
+			goto out_alloc;
+		}
+	}
+
+	mlx5_core_dbg(dev, "err %d\n", err);
+
+	goto out_free;
+
+out_alloc:
+	if (notify_fail) {
+		memset(in, 0, inlen);
+		memset(&out, 0, sizeof(out));
+		in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+		in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
+		if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
+			mlx5_core_warn(dev, "\n");
+	}
+	for (i--; i >= 0; i--) {
+		addr = be64_to_cpu(in->pas[i]);
+		page = remove_page(dev, addr);
+		if (!page) {
+			mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
+				      addr);
+			continue;
+		}
+		dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(page);
+	}
+
+out_free:
+	mlx5_vfree(in);
+	return err;
+}
+
+static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+			 int *nclaimed)
+{
+	struct mlx5_manage_pages_inbox   in;
+	struct mlx5_manage_pages_outbox *out;
+	struct page *page;
+	int num_claimed;
+	int outlen;
+	u64 addr;
+	int err;
+	int i;
+
+	memset(&in, 0, sizeof(in));
+	outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
+	out = mlx5_vzalloc(outlen);
+	if (!out)
+		return -ENOMEM;
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
+	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
+	in.func_id = cpu_to_be16(func_id);
+	in.num_entries = cpu_to_be16(npages);
+	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	if (err) {
+		mlx5_core_err(dev, "failed recliaming pages\n");
+		goto out_free;
+	}
+	dev->priv.fw_pages -= npages;
+
+	if (out->hdr.status) {
+		err = mlx5_cmd_status_to_err(&out->hdr);
+		goto out_free;
+	}
+
+	num_claimed = be16_to_cpu(out->num_entries);
+	if (nclaimed)
+		*nclaimed = num_claimed;
+
+	for (i = 0; i < num_claimed; i++) {
+		addr = be64_to_cpu(out->pas[i]);
+		page = remove_page(dev, addr);
+		if (!page) {
+			mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
+		} else {
+			dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__free_page(page);
+		}
+	}
+
+out_free:
+	mlx5_vfree(out);
+	return err;
+}
+
+static void pages_work_handler(struct work_struct *work)
+{
+	struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
+	struct mlx5_core_dev *dev = req->dev;
+	int err = 0;
+
+	if (req->npages < 0)
+		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+	else if (req->npages > 0)
+		err = give_pages(dev, req->func_id, req->npages, 1);
+
+	if (err)
+		mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ?
+			       "reclaim" : "give", err);
+
+	kfree(req);
+}
+
+void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
+				 s16 npages)
+{
+	struct mlx5_pages_req *req;
+
+	req = kzalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req) {
+		mlx5_core_warn(dev, "failed to allocate pages request\n");
+		return;
+	}
+
+	req->dev = dev;
+	req->func_id = func_id;
+	req->npages = npages;
+	INIT_WORK(&req->work, pages_work_handler);
+	queue_work(dev->priv.pg_wq, &req->work);
+}
+
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev)
+{
+	s16 uninitialized_var(init_pages);
+	u16 uninitialized_var(func_id);
+	int err;
+
+	err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages);
+	if (err)
+		return err;
+
+	mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id);
+
+	return give_pages(dev, func_id, init_pages, 0);
+}
+
+static int optimal_reclaimed_pages(void)
+{
+	struct mlx5_cmd_prot_block *block;
+	struct mlx5_cmd_layout *lay;
+	int ret;
+
+	ret = (sizeof(lay->in) + sizeof(block->data) -
+	       sizeof(struct mlx5_manage_pages_outbox)) / 8;
+
+	return ret;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+	unsigned long end = jiffies + msecs_to_jiffies(5000);
+	struct fw_page *fwp;
+	struct rb_node *p;
+	int err;
+
+	do {
+		p = rb_first(&dev->priv.page_root);
+		if (p) {
+			fwp = rb_entry(p, struct fw_page, rb_node);
+			err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL);
+			if (err) {
+				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err);
+				return err;
+			}
+		}
+		if (time_after(jiffies, end)) {
+			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
+			break;
+		}
+	} while (p);
+
+	return 0;
+}
+
+void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+{
+	dev->priv.page_root = RB_ROOT;
+}
+
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+{
+	/* nothing */
+}
+
+int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+{
+	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+	if (!dev->priv.pg_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+{
+	destroy_workqueue(dev->priv.pg_wq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
new file mode 100644
index 000000000000..790da5c4ca4f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+struct mlx5_alloc_pd_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_alloc_pd_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	__be32			pdn;
+	u8			rsvd[4];
+};
+
+struct mlx5_dealloc_pd_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			pdn;
+	u8			rsvd[4];
+};
+
+struct mlx5_dealloc_pd_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
+{
+	struct mlx5_alloc_pd_mbox_in	in;
+	struct mlx5_alloc_pd_mbox_out	out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	*pdn = be32_to_cpu(out.pdn) & 0xffffff;
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_pd);
+
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
+{
+	struct mlx5_dealloc_pd_mbox_in	in;
+	struct mlx5_dealloc_pd_mbox_out	out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
+	in.pdn = cpu_to_be32(pdn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
new file mode 100644
index 000000000000..f6afe7b5a675
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+			 int size_in, void *data_out, int size_out,
+			 u16 reg_num, int arg, int write)
+{
+	struct mlx5_access_reg_mbox_in *in = NULL;
+	struct mlx5_access_reg_mbox_out *out = NULL;
+	int err = -ENOMEM;
+
+	in = mlx5_vzalloc(sizeof(*in) + size_in);
+	if (!in)
+		return -ENOMEM;
+
+	out = mlx5_vzalloc(sizeof(*out) + size_out);
+	if (!out)
+		goto ex1;
+
+	memcpy(in->data, data_in, size_in);
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
+	in->hdr.opmod = cpu_to_be16(!write);
+	in->arg = cpu_to_be32(arg);
+	in->register_id = cpu_to_be16(reg_num);
+	err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
+			    sizeof(out) + size_out);
+	if (err)
+		goto ex2;
+
+	if (out->hdr.status)
+		err = mlx5_cmd_status_to_err(&out->hdr);
+
+	if (!err)
+		memcpy(data_out, out->data, size_out);
+
+ex2:
+	mlx5_vfree(out);
+ex1:
+	mlx5_vfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
+
+
+struct mlx5_reg_pcap {
+	u8			rsvd0;
+	u8			port_num;
+	u8			rsvd1[2];
+	__be32			caps_127_96;
+	__be32			caps_95_64;
+	__be32			caps_63_32;
+	__be32			caps_31_0;
+};
+
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps)
+{
+	struct mlx5_reg_pcap in;
+	struct mlx5_reg_pcap out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	in.caps_127_96 = cpu_to_be32(caps);
+	in.port_num = port_num;
+
+	err = mlx5_core_access_reg(dev, &in, sizeof(in), &out,
+				   sizeof(out), MLX5_REG_PCAP, 0, 1);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
new file mode 100644
index 000000000000..54faf8bfcaf4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type)
+{
+	struct mlx5_qp_table *table = &dev->priv.qp_table;
+	struct mlx5_core_qp *qp;
+
+	spin_lock(&table->lock);
+
+	qp = radix_tree_lookup(&table->tree, qpn);
+	if (qp)
+		atomic_inc(&qp->refcount);
+
+	spin_unlock(&table->lock);
+
+	if (!qp) {
+		mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn);
+		return;
+	}
+
+	qp->event(qp, event_type);
+
+	if (atomic_dec_and_test(&qp->refcount))
+		complete(&qp->free);
+}
+
+int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+			struct mlx5_core_qp *qp,
+			struct mlx5_create_qp_mbox_in *in,
+			int inlen)
+{
+	struct mlx5_qp_table *table = &dev->priv.qp_table;
+	struct mlx5_create_qp_mbox_out out;
+	struct mlx5_destroy_qp_mbox_in din;
+	struct mlx5_destroy_qp_mbox_out dout;
+	int err;
+
+	memset(&dout, 0, sizeof(dout));
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
+
+	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	if (err) {
+		mlx5_core_warn(dev, "ret %d", err);
+		return err;
+	}
+
+	if (out.hdr.status) {
+		pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
+		return mlx5_cmd_status_to_err(&out.hdr);
+	}
+
+	qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
+	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
+
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree, qp->qpn, qp);
+	spin_unlock_irq(&table->lock);
+	if (err) {
+		mlx5_core_warn(dev, "err %d", err);
+		goto err_cmd;
+	}
+
+	err = mlx5_debug_qp_add(dev, qp);
+	if (err)
+		mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
+			      qp->qpn);
+
+	qp->pid = current->pid;
+	atomic_set(&qp->refcount, 1);
+	atomic_inc(&dev->num_qps);
+	init_completion(&qp->free);
+
+	return 0;
+
+err_cmd:
+	memset(&din, 0, sizeof(din));
+	memset(&dout, 0, sizeof(dout));
+	din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
+	din.qpn = cpu_to_be32(qp->qpn);
+	mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout));
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
+
+int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
+			 struct mlx5_core_qp *qp)
+{
+	struct mlx5_destroy_qp_mbox_in in;
+	struct mlx5_destroy_qp_mbox_out out;
+	struct mlx5_qp_table *table = &dev->priv.qp_table;
+	unsigned long flags;
+	int err;
+
+	mlx5_debug_qp_remove(dev, qp);
+
+	spin_lock_irqsave(&table->lock, flags);
+	radix_tree_delete(&table->tree, qp->qpn);
+	spin_unlock_irqrestore(&table->lock, flags);
+
+	if (atomic_dec_and_test(&qp->refcount))
+		complete(&qp->free);
+	wait_for_completion(&qp->free);
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
+	in.qpn = cpu_to_be32(qp->qpn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	atomic_dec(&dev->num_qps);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
+
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
+			enum mlx5_qp_state new_state,
+			struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+			struct mlx5_core_qp *qp)
+{
+	static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+		[MLX5_QP_STATE_RST] = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+			[MLX5_QP_STATE_INIT]	= MLX5_CMD_OP_RST2INIT_QP,
+		},
+		[MLX5_QP_STATE_INIT]  = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+			[MLX5_QP_STATE_INIT]	= MLX5_CMD_OP_INIT2INIT_QP,
+			[MLX5_QP_STATE_RTR]	= MLX5_CMD_OP_INIT2RTR_QP,
+		},
+		[MLX5_QP_STATE_RTR]   = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_RTR2RTS_QP,
+		},
+		[MLX5_QP_STATE_RTS]   = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_RTS2RTS_QP,
+			[MLX5_QP_STATE_SQD]	= MLX5_CMD_OP_RTS2SQD_QP,
+		},
+		[MLX5_QP_STATE_SQD] = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_SQD2RTS_QP,
+			[MLX5_QP_STATE_SQD]	= MLX5_CMD_OP_SQD2SQD_QP,
+		},
+		[MLX5_QP_STATE_SQER] = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_SQERR2RTS_QP,
+		},
+		[MLX5_QP_STATE_ERR] = {
+			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
+			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
+		}
+	};
+
+	struct mlx5_modify_qp_mbox_out out;
+	int err = 0;
+	u16 op;
+
+	if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
+	    !optab[cur_state][new_state])
+		return -EINVAL;
+
+	memset(&out, 0, sizeof(out));
+	op = optab[cur_state][new_state];
+	in->hdr.opcode = cpu_to_be16(op);
+	in->qpn = cpu_to_be32(qp->qpn);
+	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	return mlx5_cmd_status_to_err(&out.hdr);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
+
+void mlx5_init_qp_table(struct mlx5_core_dev *dev)
+{
+	struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+	spin_lock_init(&table->lock);
+	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	mlx5_qp_debugfs_init(dev);
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
+{
+	mlx5_qp_debugfs_cleanup(dev);
+}
+
+int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+		       struct mlx5_query_qp_mbox_out *out, int outlen)
+{
+	struct mlx5_query_qp_mbox_in in;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(out, 0, outlen);
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP);
+	in.qpn = cpu_to_be32(qp->qpn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	if (err)
+		return err;
+
+	if (out->hdr.status)
+		return mlx5_cmd_status_to_err(&out->hdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
+
+int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
+{
+	struct mlx5_alloc_xrcd_mbox_in in;
+	struct mlx5_alloc_xrcd_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+	else
+		*xrcdn = be32_to_cpu(out.xrcdn);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
+
+int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
+{
+	struct mlx5_dealloc_xrcd_mbox_in in;
+	struct mlx5_dealloc_xrcd_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD);
+	in.xrcdn = cpu_to_be32(xrcdn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
new file mode 100644
index 000000000000..38bce93f8314
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/srq.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
+{
+	struct mlx5_srq_table *table = &dev->priv.srq_table;
+	struct mlx5_core_srq *srq;
+
+	spin_lock(&table->lock);
+
+	srq = radix_tree_lookup(&table->tree, srqn);
+	if (srq)
+		atomic_inc(&srq->refcount);
+
+	spin_unlock(&table->lock);
+
+	if (!srq) {
+		mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
+		return;
+	}
+
+	srq->event(srq, event_type);
+
+	if (atomic_dec_and_test(&srq->refcount))
+		complete(&srq->free);
+}
+
+struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
+{
+	struct mlx5_srq_table *table = &dev->priv.srq_table;
+	struct mlx5_core_srq *srq;
+
+	spin_lock(&table->lock);
+
+	srq = radix_tree_lookup(&table->tree, srqn);
+	if (srq)
+		atomic_inc(&srq->refcount);
+
+	spin_unlock(&table->lock);
+
+	return srq;
+}
+EXPORT_SYMBOL(mlx5_core_get_srq);
+
+int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+			 struct mlx5_create_srq_mbox_in *in, int inlen)
+{
+	struct mlx5_create_srq_mbox_out out;
+	struct mlx5_srq_table *table = &dev->priv.srq_table;
+	struct mlx5_destroy_srq_mbox_in din;
+	struct mlx5_destroy_srq_mbox_out dout;
+	int err;
+
+	memset(&out, 0, sizeof(out));
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ);
+	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	srq->srqn = be32_to_cpu(out.srqn) & 0xffffff;
+
+	atomic_set(&srq->refcount, 1);
+	init_completion(&srq->free);
+
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree, srq->srqn, srq);
+	spin_unlock_irq(&table->lock);
+	if (err) {
+		mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
+		goto err_cmd;
+	}
+
+	return 0;
+
+err_cmd:
+	memset(&din, 0, sizeof(din));
+	memset(&dout, 0, sizeof(dout));
+	din.srqn = cpu_to_be32(srq->srqn);
+	din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
+	mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_srq);
+
+int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+{
+	struct mlx5_destroy_srq_mbox_in in;
+	struct mlx5_destroy_srq_mbox_out out;
+	struct mlx5_srq_table *table = &dev->priv.srq_table;
+	struct mlx5_core_srq *tmp;
+	int err;
+
+	spin_lock_irq(&table->lock);
+	tmp = radix_tree_delete(&table->tree, srq->srqn);
+	spin_unlock_irq(&table->lock);
+	if (!tmp) {
+		mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
+		return -EINVAL;
+	}
+	if (tmp != srq) {
+		mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
+		return -EINVAL;
+	}
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
+	in.srqn = cpu_to_be32(srq->srqn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	if (atomic_dec_and_test(&srq->refcount))
+		complete(&srq->free);
+	wait_for_completion(&srq->free);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_srq);
+
+int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+			struct mlx5_query_srq_mbox_out *out)
+{
+	struct mlx5_query_srq_mbox_in in;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(out, 0, sizeof(*out));
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ);
+	in.srqn = cpu_to_be32(srq->srqn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+	if (err)
+		return err;
+
+	if (out->hdr.status)
+		return mlx5_cmd_status_to_err(&out->hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_srq);
+
+int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+		      u16 lwm, int is_srq)
+{
+	struct mlx5_arm_srq_mbox_in	in;
+	struct mlx5_arm_srq_mbox_out	out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ);
+	in.hdr.opmod = cpu_to_be16(!!is_srq);
+	in.srqn = cpu_to_be32(srq->srqn);
+	in.lwm = cpu_to_be16(lwm);
+
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_core_arm_srq);
+
+void mlx5_init_srq_table(struct mlx5_core_dev *dev)
+{
+	struct mlx5_srq_table *table = &dev->priv.srq_table;
+
+	spin_lock_init(&table->lock);
+	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
+{
+	/* nothing */
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
new file mode 100644
index 000000000000..71d4a3937200
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+	NUM_DRIVER_UARS		= 4,
+	NUM_LOW_LAT_UUARS	= 4,
+};
+
+
+struct mlx5_alloc_uar_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_alloc_uar_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	__be32			uarn;
+	u8			rsvd[4];
+};
+
+struct mlx5_free_uar_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			uarn;
+	u8			rsvd[4];
+};
+
+struct mlx5_free_uar_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+{
+	struct mlx5_alloc_uar_mbox_in	in;
+	struct mlx5_alloc_uar_mbox_out	out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		goto ex;
+
+	if (out.hdr.status) {
+		err = mlx5_cmd_status_to_err(&out.hdr);
+		goto ex;
+	}
+
+	*uarn = be32_to_cpu(out.uarn) & 0xffffff;
+
+ex:
+	return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
+
+int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+{
+	struct mlx5_free_uar_mbox_in	in;
+	struct mlx5_free_uar_mbox_out	out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR);
+	in.uarn = cpu_to_be32(uarn);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		goto ex;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+ex:
+	return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_free_uar);
+
+static int need_uuar_lock(int uuarn)
+{
+	int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
+
+	if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS)
+		return 0;
+
+	return 1;
+}
+
+int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
+{
+	int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
+	struct mlx5_bf *bf;
+	phys_addr_t addr;
+	int err;
+	int i;
+
+	uuari->num_uars = NUM_DRIVER_UARS;
+	uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS;
+
+	mutex_init(&uuari->lock);
+	uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL);
+	if (!uuari->uars)
+		return -ENOMEM;
+
+	uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL);
+	if (!uuari->bfs) {
+		err = -ENOMEM;
+		goto out_uars;
+	}
+
+	uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap),
+				GFP_KERNEL);
+	if (!uuari->bitmap) {
+		err = -ENOMEM;
+		goto out_bfs;
+	}
+
+	uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL);
+	if (!uuari->count) {
+		err = -ENOMEM;
+		goto out_bitmap;
+	}
+
+	for (i = 0; i < uuari->num_uars; i++) {
+		err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index);
+		if (err)
+			goto out_count;
+
+		addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT);
+		uuari->uars[i].map = ioremap(addr, PAGE_SIZE);
+		if (!uuari->uars[i].map) {
+			mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+			goto out_count;
+		}
+		mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n",
+			      uuari->uars[i].index, uuari->uars[i].map);
+	}
+
+	for (i = 0; i < tot_uuars; i++) {
+		bf = &uuari->bfs[i];
+
+		bf->buf_size = dev->caps.bf_reg_size / 2;
+		bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE];
+		bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map;
+		bf->reg = NULL; /* Add WC support */
+		bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size +
+			MLX5_BF_OFFSET;
+		bf->need_lock = need_uuar_lock(i);
+		spin_lock_init(&bf->lock);
+		spin_lock_init(&bf->lock32);
+		bf->uuarn = i;
+	}
+
+	return 0;
+
+out_count:
+	for (i--; i >= 0; i--) {
+		iounmap(uuari->uars[i].map);
+		mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+	}
+	kfree(uuari->count);
+
+out_bitmap:
+	kfree(uuari->bitmap);
+
+out_bfs:
+	kfree(uuari->bfs);
+
+out_uars:
+	kfree(uuari->uars);
+	return err;
+}
+
+int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
+{
+	int i = uuari->num_uars;
+
+	for (i--; i >= 0; i--) {
+		iounmap(uuari->uars[i].map);
+		mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+	}
+
+	kfree(uuari->count);
+	kfree(uuari->bitmap);
+	kfree(uuari->bfs);
+	kfree(uuari->uars);
+
+	return 0;
+}