summary refs log tree commit diff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/core/umem_odp.c92
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c53
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c7
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c8
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c27
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c56
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c48
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h40
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c14
-rw-r--r--drivers/infiniband/hw/mlx5/main.c337
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h46
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c128
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c505
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c91
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c9
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c1
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c6
36 files changed, 1312 insertions, 258 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..446b56a5260b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	if (dmasync)
 		dma_attrs |= DMA_ATTR_WRITE_BARRIER;
 
-	if (!size)
-		return ERR_PTR(-EINVAL);
-
 	/*
 	 * If the combination of the addr and size requested for this memory
 	 * region causes an integer overflow, return error.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..f2fc0431512d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
 	.invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
 };
 
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+				  unsigned long addr,
+				  size_t size)
+{
+	struct ib_umem *umem;
+	struct ib_umem_odp *odp_data;
+	int pages = size >> PAGE_SHIFT;
+	int ret;
+
+	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+	if (!umem)
+		return ERR_PTR(-ENOMEM);
+
+	umem->context   = context;
+	umem->length    = size;
+	umem->address   = addr;
+	umem->page_size = PAGE_SIZE;
+	umem->writable  = 1;
+
+	odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+	if (!odp_data) {
+		ret = -ENOMEM;
+		goto out_umem;
+	}
+	odp_data->umem = umem;
+
+	mutex_init(&odp_data->umem_mutex);
+	init_completion(&odp_data->notifier_completion);
+
+	odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+	if (!odp_data->page_list) {
+		ret = -ENOMEM;
+		goto out_odp_data;
+	}
+
+	odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+	if (!odp_data->dma_list) {
+		ret = -ENOMEM;
+		goto out_page_list;
+	}
+
+	down_write(&context->umem_rwsem);
+	context->odp_mrs_count++;
+	rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+	if (likely(!atomic_read(&context->notifier_count)))
+		odp_data->mn_counters_active = true;
+	else
+		list_add(&odp_data->no_private_counters,
+			 &context->no_private_counters);
+	up_write(&context->umem_rwsem);
+
+	umem->odp_data = odp_data;
+
+	return umem;
+
+out_page_list:
+	vfree(odp_data->page_list);
+out_odp_data:
+	kfree(odp_data);
+out_umem:
+	kfree(umem);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
 int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
 {
 	int ret_val;
@@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
 
 	init_completion(&umem->odp_data->notifier_completion);
 
-	umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+	if (ib_umem_num_pages(umem)) {
+		umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
 					    sizeof(*umem->odp_data->page_list));
-	if (!umem->odp_data->page_list) {
-		ret_val = -ENOMEM;
-		goto out_odp_data;
-	}
+		if (!umem->odp_data->page_list) {
+			ret_val = -ENOMEM;
+			goto out_odp_data;
+		}
 
-	umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+		umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
 					  sizeof(*umem->odp_data->dma_list));
-	if (!umem->odp_data->dma_list) {
-		ret_val = -ENOMEM;
-		goto out_page_list;
+		if (!umem->odp_data->dma_list) {
+			ret_val = -ENOMEM;
+			goto out_page_list;
+		}
 	}
 
 	/*
@@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page(
 		}
 		umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
 		umem->odp_data->page_list[page_index] = page;
+		umem->npages++;
 		stored_page = 1;
 	} else if (umem->odp_data->page_list[page_index] == page) {
 		umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +573,8 @@ out:
  * for failure.
  * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
  * the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
  * @umem: the umem to map and pin
  * @user_virt: the address from which we need to map.
  * @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +622,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 
 	owning_mm = get_task_mm(owning_process);
 	if (owning_mm == NULL) {
-		ret = -EINVAL;
+		ret = -ENOENT;
 		goto out_put_task;
 	}
 
@@ -665,6 +734,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 				put_page(page);
 			umem->odp_data->page_list[idx] = NULL;
 			umem->odp_data->dma_list[idx] = 0;
+			umem->npages--;
 		}
 	}
 	mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
 				  void *cookie)
 {
 	int ret_val = 0;
-	struct umem_odp_node *node;
+	struct umem_odp_node *node, *next;
 	struct ib_umem_odp *umem;
 
 	if (unlikely(start == last))
 		return ret_val;
 
-	for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
-			node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+	for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+			node; node = next) {
+		next = rbt_ib_umem_iter_next(node, start, last - 1);
 		umem = container_of(node, struct ib_umem_odp, interval_tree);
 		ret_val = cb(umem->umem, start, last, cookie) || ret_val;
 	}
 
 	return ret_val;
 }
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+				       u64 addr, u64 length)
+{
+	struct umem_odp_node *node;
+
+	node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+	if (node)
+		return container_of(node, struct ib_umem_odp, interval_tree);
+	return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..e1bedf0bac04 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
 		struct ib_uverbs_flow_spec_ipv4    ipv4;
 		struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
 		struct ib_uverbs_flow_spec_ipv6    ipv6;
+		struct ib_uverbs_flow_spec_action_tag	flow_tag;
 	};
 };
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..b4b395a054ac 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1891,7 +1891,8 @@ static int create_qp(struct ib_uverbs_file *file,
 				IB_QP_CREATE_CROSS_CHANNEL |
 				IB_QP_CREATE_MANAGED_SEND |
 				IB_QP_CREATE_MANAGED_RECV |
-				IB_QP_CREATE_SCATTER_FCS)) {
+				IB_QP_CREATE_SCATTER_FCS |
+				IB_QP_CREATE_CVLAN_STRIPPING)) {
 		ret = -EINVAL;
 		goto err_put;
 	}
@@ -3143,6 +3144,25 @@ out_put:
 	return ret ? ret : in_len;
 }
 
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+				       union ib_flow_spec *ib_spec)
+{
+	ib_spec->type = kern_spec->type;
+	switch (ib_spec->type) {
+	case IB_FLOW_SPEC_ACTION_TAG:
+		if (kern_spec->flow_tag.size !=
+		    sizeof(struct ib_uverbs_flow_spec_action_tag))
+			return -EINVAL;
+
+		ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+		ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
 {
 	/* Returns user space filter size, includes padding */
@@ -3167,8 +3187,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
 	return kern_filter_size;
 }
 
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
-				union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+				       union ib_flow_spec *ib_spec)
 {
 	ssize_t actual_filter_sz;
 	ssize_t kern_filter_sz;
@@ -3263,6 +3283,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
 	return 0;
 }
 
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+				union ib_flow_spec *ib_spec)
+{
+	if (kern_spec->reserved)
+		return -EINVAL;
+
+	if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+		return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+	else
+		return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
 int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
@@ -3325,6 +3357,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 	wq_init_attr.wq_context = file;
 	wq_init_attr.wq_type = cmd.wq_type;
 	wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+	if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+			     sizeof(cmd.create_flags)))
+		wq_init_attr.create_flags = cmd.create_flags;
 	obj->uevent.events_reported = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 	wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3480,7 +3515,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 	if (!cmd.attr_mask)
 		return -EINVAL;
 
-	if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+	if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
 		return -EINVAL;
 
 	wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3489,6 +3524,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 
 	wq_attr.curr_wq_state = cmd.curr_wq_state;
 	wq_attr.wq_state = cmd.wq_state;
+	if (cmd.attr_mask & IB_WQ_FLAGS) {
+		wq_attr.flags = cmd.flags;
+		wq_attr.flags_mask = cmd.flags_mask;
+	}
 	ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
 	put_wq_read(wq);
 	return ret;
@@ -4323,6 +4362,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 
 	resp.max_wq_type_rq = attr.max_wq_type_rq;
 	resp.response_length += sizeof(resp.max_wq_type_rq);
+
+	if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+		goto end;
+
+	resp.raw_packet_caps = attr.raw_packet_caps;
+	resp.response_length += sizeof(resp.raw_packet_caps);
 end:
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
 	return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6262dc035f3c..48649f93258a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev,
 	dev = to_iwch_dev(ibdev);
 	netdev = dev->rdev.port_info.lldevs[port-1];
 
-	memset(props, 0, sizeof(struct ib_port_attr));
+	/* props being zeroed by the caller, avoid zeroing it here */
 	props->max_mtu = IB_MTU_4096;
 	props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -1329,13 +1329,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = iwch_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 3345e1c312f7..bdf7de571d83 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -370,8 +370,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 
 	dev = to_c4iw_dev(ibdev);
 	netdev = dev->rdev.lldi.ports[port-1];
-
-	memset(props, 0, sizeof(struct ib_port_attr));
+	/* props being zeroed by the caller, avoid zeroing it here */
 	props->max_mtu = IB_MTU_4096;
 	props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -508,13 +507,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = c4iw_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 5ba4c0dec348..33f00f0719c5 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1302,6 +1302,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
 	u16 lid = ppd->lid;
 
+	/* props being zeroed by the caller, avoid zeroing it here */
 	props->lid = lid ? lid : 0;
 	props->lmc = ppd->lmc;
 	/* OPA logical states match IB logical states */
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index cf14679664ca..6843409fba29 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -250,7 +250,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
 	assert(port_num > 0);
 	port = port_num - 1;
 
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	props->max_mtu = hr_dev->caps.max_mtu;
 	props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -401,14 +401,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
 	struct ib_port_attr attr;
 	int ret;
 
-	ret = hns_roce_query_port(ib_dev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+	ret = ib_query_port(ib_dev, port_num, &attr);
 	if (ret)
 		return ret;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
 
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 	return 0;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4c000d60d5c6..5f695bf232a8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
 	struct i40iw_device *iwdev = to_iwdev(ibdev);
 	struct net_device *netdev = iwdev->netdev;
 
-	memset(props, 0, sizeof(*props));
-
+	/* props being zeroed by the caller, avoid zeroing it here */
 	props->max_mtu = IB_MTU_4096;
 	props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -2497,14 +2496,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = i40iw_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 06020c54db20..ea24230ea0d4 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
 	struct list_head *head =
 		&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
 
+	memset(&attr, 0, sizeof(attr));
 	err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
 	if (err) {
 		pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 7031a8dd4d14..211cbbe9ccd1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -678,7 +678,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
 }
 
 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
-			       struct ib_port_attr *props, int netw_view)
+			       struct ib_port_attr *props)
 {
 
 	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -741,11 +741,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 {
 	int err;
 
-	memset(props, 0, sizeof *props);
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
 		ib_link_query_port(ibdev, port, props, netw_view) :
-				eth_link_query_port(ibdev, port, props, netw_view);
+				eth_link_query_port(ibdev, port, props);
 
 	return err;
 }
@@ -1014,7 +1014,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 
 	mutex_lock(&mdev->cap_mask_mutex);
 
-	err = mlx4_ib_query_port(ibdev, port, &attr);
+	err = ib_query_port(ibdev, port, &attr);
 	if (err)
 		goto out;
 
@@ -2537,24 +2537,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
 	int err;
 
-	err = mlx4_ib_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-
 	if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
 		immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 	} else {
 		if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
 			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
 			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
 				RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+		immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+		if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+		    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+			immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 	}
 
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+	err = ib_query_port(ibdev, port_num, &attr);
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 7d76f769233c..c34eebc7db65 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2420,11 +2420,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
 	return vl;
 }
 
+static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
+				int index, union ib_gid *gid,
+				enum ib_gid_type *gid_type)
+{
+	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+	struct mlx4_port_gid_table *port_gid_table;
+	unsigned long flags;
+
+	port_gid_table = &iboe->gids[port_num - 1];
+	spin_lock_irqsave(&iboe->lock, flags);
+	memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
+	*gid_type = port_gid_table->gids[index].gid_type;
+	spin_unlock_irqrestore(&iboe->lock, flags);
+	if (!memcmp(gid, &zgid, sizeof(*gid)))
+		return -ENOENT;
+
+	return 0;
+}
+
 #define MLX4_ROCEV2_QP1_SPORT 0xC000
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 			    void *wqe, unsigned *mlx_seg_len)
 {
 	struct ib_device *ib_dev = sqp->qp.ibqp.device;
+	struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
 	struct mlx4_wqe_mlx_seg *mlx = wqe;
 	struct mlx4_wqe_ctrl_seg *ctrl = wqe;
 	struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2450,8 +2470,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 	is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
 	is_grh = mlx4_ib_ah_grh_present(ah);
 	if (is_eth) {
-		struct ib_gid_attr gid_attr;
-
+		enum ib_gid_type gid_type;
 		if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
 			/* When multi-function is enabled, the ib_core gid
 			 * indexes don't necessarily match the hw ones, so
@@ -2462,18 +2481,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 			if (err)
 				return err;
 		} else  {
-			err = ib_get_cached_gid(ib_dev,
-						be32_to_cpu(ah->av.ib.port_pd) >> 24,
-						ah->av.ib.gid_index, &sgid,
-						&gid_attr);
-			if (!err) {
-				if (gid_attr.ndev)
-					dev_put(gid_attr.ndev);
-				if (!memcmp(&sgid, &zgid, sizeof(sgid)))
-					err = -ENOENT;
-			}
+			err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
+					    ah->av.ib.gid_index,
+					    &sgid, &gid_type);
 			if (!err) {
-				is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+				is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
 				if (is_udp) {
 					if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
 						ip_version = 4;
@@ -2951,21 +2963,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 		if (sqp->roce_v2_gsi) {
 			struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
-			struct ib_gid_attr gid_attr;
+			enum ib_gid_type gid_type;
 			union ib_gid gid;
 
-			if (!ib_get_cached_gid(ibqp->device,
-					       be32_to_cpu(ah->av.ib.port_pd) >> 24,
-					       ah->av.ib.gid_index, &gid,
-					       &gid_attr)) {
-				if (gid_attr.ndev)
-					dev_put(gid_attr.ndev);
-				qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
-					to_mqp(sqp->roce_v2_gsi) : qp;
-			} else {
+			if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+					   ah->av.ib.gid_index,
+					   &gid, &gid_type))
+				qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+						to_mqp(sqp->roce_v2_gsi) : qp;
+			else
 				pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
 				       ah->av.ib.gid_index);
-			}
 		}
 	}
 
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 69fb5ba94d0f..0ba5ba7540c8 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
 	int ret = 0 ;
 	struct ib_port_attr attr;
 
+	memset(&attr, 0, sizeof(attr));
 	/* get the physical gid and pkey table sizes.*/
 	ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
 	if (ret)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 7493a83acd28..90ad2adc752f 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
-mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
+mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644
index 000000000000..cdc2d3017da7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cmd.h"
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
+{
+	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {};
+	int err;
+
+	MLX5_SET(query_special_contexts_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*null_mkey = MLX5_GET(query_special_contexts_out, out,
+				      null_mkey);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
new file mode 100644
index 000000000000..7ca8a7b6434d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_CMD_H
+#define MLX5_IB_CMD_H
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 39e58489dcc2..8dacb49eabd9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -42,12 +42,24 @@ enum {
 	MLX5_IB_VENDOR_CLASS2 = 0xa
 };
 
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+			   struct ib_mad *in_mad)
+{
+	if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+	    in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		return true;
+	return dev->mdev->port_caps[port_num - 1].has_smi;
+}
+
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 		 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
 		 const void *in_mad, void *response_mad)
 {
 	u8 op_modifier = 0;
 
+	if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
+		return -EPERM;
+
 	/* Key check traps can't be generated unless we have in_wc to
 	 * tell us where to send the trap.
 	 */
@@ -515,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
 	if (!in_mad || !out_mad)
 		goto out;
 
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	init_query_mad(in_mad);
 	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9d8535385bb8..6a8498c052a5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -65,10 +65,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRIVER_VERSION);
 
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-
 static char mlx5_version[] =
 	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
 	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@@ -175,7 +171,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	enum ib_mtu ndev_ib_mtu;
 	u16 qkey_viol_cntr;
 
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	props->port_cap_flags  |= IB_PORT_CM_SUP;
 	props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
@@ -326,6 +322,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 	return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
 }
 
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+			   int index, enum ib_gid_type *gid_type)
+{
+	struct ib_gid_attr attr;
+	union ib_gid gid;
+	int ret;
+
+	ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
+	if (ret)
+		return ret;
+
+	if (!attr.ndev)
+		return -ENODEV;
+
+	dev_put(attr.ndev);
+
+	*gid_type = attr.gid_type;
+
+	return 0;
+}
+
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -565,8 +582,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 
 	if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
-		if (MLX5_CAP_ETH(mdev, csum_cap))
+		if (MLX5_CAP_ETH(mdev, csum_cap)) {
+			/* Legacy bit to support old userspace libraries */
 			props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+			props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+		}
+
+		if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+			props->raw_packet_caps |=
+				IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
 
 		if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
 			max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@@ -605,8 +629,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	}
 
 	if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
-	    MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+	    MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+		/* Legacy bit to support old userspace libraries */
 		props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+		props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+	}
 
 	if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
 		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -831,7 +858,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 		goto out;
 	}
 
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
 	if (err)
@@ -969,6 +996,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
 	return err;
 }
 
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+				u32 value)
+{
+	struct mlx5_hca_vport_context ctx = {};
+	int err;
+
+	err = mlx5_query_hca_vport_context(dev->mdev, 0,
+					   port_num, 0, &ctx);
+	if (err)
+		return err;
+
+	if (~ctx.cap_mask1_perm & mask) {
+		mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+			     mask, ctx.cap_mask1_perm);
+		return -EINVAL;
+	}
+
+	ctx.cap_mask1 = value;
+	ctx.cap_mask1_perm = mask;
+	err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
+						 port_num, 0, &ctx);
+
+	return err;
+}
+
 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 			       struct ib_port_modify *props)
 {
@@ -976,10 +1028,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 	struct ib_port_attr attr;
 	u32 tmp;
 	int err;
+	u32 change_mask;
+	u32 value;
+	bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+		      IB_LINK_LAYER_INFINIBAND);
+
+	if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+		change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+		value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+		return set_port_caps_atomic(dev, port, change_mask, value);
+	}
 
 	mutex_lock(&dev->cap_mask_mutex);
 
-	err = mlx5_ib_query_port(ibdev, port, &attr);
+	err = ib_query_port(ibdev, port, &attr);
 	if (err)
 		goto out;
 
@@ -1661,6 +1723,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 #define LAST_IPV6_FIELD traffic_class
 #define LAST_TCP_UDP_FIELD src_port
 #define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
 
 /* Field is the last supported field */
 #define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1671,7 +1734,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 		   sizeof(filter.field))
 
 static int parse_flow_attr(u32 *match_c, u32 *match_v,
-			   const union ib_flow_spec *ib_spec)
+			   const union ib_flow_spec *ib_spec, u32 *tag_id)
 {
 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   misc_parameters);
@@ -1695,7 +1758,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 	case IB_FLOW_SPEC_ETH:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     dmac_47_16),
@@ -1743,7 +1806,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 		break;
 	case IB_FLOW_SPEC_IPV4:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, 0xffff);
@@ -1775,7 +1838,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 		break;
 	case IB_FLOW_SPEC_IPV6:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, 0xffff);
@@ -1816,7 +1879,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 	case IB_FLOW_SPEC_TCP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 					 LAST_TCP_UDP_FIELD))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 0xff);
@@ -1836,7 +1899,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 	case IB_FLOW_SPEC_UDP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 					 LAST_TCP_UDP_FIELD))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 0xff);
@@ -1856,13 +1919,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
 	case IB_FLOW_SPEC_VXLAN_TUNNEL:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
 					 LAST_TUNNEL_FIELD))
-			return -ENOTSUPP;
+			return -EOPNOTSUPP;
 
 		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
 			 ntohl(ib_spec->tunnel.mask.tunnel_id));
 		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
 			 ntohl(ib_spec->tunnel.val.tunnel_id));
 		break;
+	case IB_FLOW_SPEC_ACTION_TAG:
+		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+					 LAST_FLOW_TAG_FIELD))
+			return -EOPNOTSUPP;
+		if (ib_spec->flow_tag.tag_id >= BIT(24))
+			return -EINVAL;
+
+		*tag_id = ib_spec->flow_tag.tag_id;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2046,6 +2118,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	struct mlx5_flow_spec *spec;
 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 	unsigned int spec_index;
+	u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 	int err = 0;
 
 	if (!is_valid_attr(flow_attr))
@@ -2062,7 +2135,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 
 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 		err = parse_flow_attr(spec->match_criteria,
-				      spec->match_value, ib_flow);
+				      spec->match_value, ib_flow, &flow_tag);
 		if (err < 0)
 			goto free;
 
@@ -2072,7 +2145,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
 	flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+	if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+			     flow_tag, flow_attr->type);
+		err = -EINVAL;
+		goto free;
+	}
+	flow_act.flow_tag = flow_tag;
 	handler->rule = mlx5_add_flow_rules(ft, spec,
 					    &flow_act,
 					    dst, 1);
@@ -2542,6 +2624,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 		ibdev->ib_active = false;
 }
 
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_hca_vport_context vport_ctx;
+	int err;
+	int port;
+
+	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+		dev->mdev->port_caps[port - 1].has_smi = false;
+		if (MLX5_CAP_GEN(dev->mdev, port_type) ==
+		    MLX5_CAP_PORT_TYPE_IB) {
+			if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+				err = mlx5_query_hca_vport_context(dev->mdev, 0,
+								   port, 0,
+								   &vport_ctx);
+				if (err) {
+					mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+						    port, err);
+					return err;
+				}
+				dev->mdev->port_caps[port - 1].has_smi =
+					vport_ctx.has_smi;
+			} else {
+				dev->mdev->port_caps[port - 1].has_smi = true;
+			}
+		}
+	}
+	return 0;
+}
+
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
 {
 	int port;
@@ -2566,6 +2677,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 	if (!dprops)
 		goto out;
 
+	err = set_has_smi_cap(dev);
+	if (err)
+		goto out;
+
 	err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
 	if (err) {
 		mlx5_ib_warn(dev, "query_device failed %d\n", err);
@@ -2573,6 +2688,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 	}
 
 	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+		memset(pprops, 0, sizeof(*pprops));
 		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
 		if (err) {
 			mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -2867,11 +2983,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
 	if (ll == IB_LINK_LAYER_INFINIBAND)
 		return RDMA_CORE_PORT_IBA_IB;
 
+	ret = RDMA_CORE_PORT_RAW_PACKET;
+
 	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
-		return 0;
+		return ret;
 
 	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
-		return 0;
+		return ret;
 
 	if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
 		ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2890,7 +3008,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
 	int err;
 
-	err = mlx5_ib_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
@@ -3011,13 +3131,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
 		mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
+struct mlx5_ib_q_counter {
+	const char *name;
+	size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name)		\
+	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+	INIT_Q_COUNTER(rx_write_requests),
+	INIT_Q_COUNTER(rx_read_requests),
+	INIT_Q_COUNTER(rx_atomic_requests),
+	INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+	INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+	INIT_Q_COUNTER(duplicate_request),
+	INIT_Q_COUNTER(rnr_nak_retry_err),
+	INIT_Q_COUNTER(packet_seq_err),
+	INIT_Q_COUNTER(implied_nak_seq_err),
+	INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
 {
 	unsigned int i;
 
-	for (i = 0; i < dev->num_ports; i++)
+	for (i = 0; i < dev->num_ports; i++) {
 		mlx5_core_dealloc_q_counter(dev->mdev,
-					    dev->port[i].q_cnt_id);
+					    dev->port[i].q_cnts.set_id);
+		kfree(dev->port[i].q_cnts.names);
+		kfree(dev->port[i].q_cnts.offsets);
+	}
+}
+
+static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
+				      const char ***names,
+				      size_t **offsets,
+				      u32 *num)
+{
+	u32 num_counters;
+
+	num_counters = ARRAY_SIZE(basic_q_cnts);
+
+	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+		num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+		num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+	*names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
+	if (!*names)
+		return -ENOMEM;
+
+	*offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
+	if (!*offsets)
+		goto err_names;
+
+	*num = num_counters;
+
+	return 0;
+
+err_names:
+	kfree(*names);
+	return -ENOMEM;
+}
+
+static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
+				    const char **names,
+				    size_t *offsets)
+{
+	int i;
+	int j = 0;
+
+	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+		names[j] = basic_q_cnts[i].name;
+		offsets[j] = basic_q_cnts[i].offset;
+	}
+
+	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+			names[j] = out_of_seq_q_cnts[i].name;
+			offsets[j] = out_of_seq_q_cnts[i].offset;
+		}
+	}
+
+	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+			names[j] = retrans_q_cnts[i].name;
+			offsets[j] = retrans_q_cnts[i].offset;
+		}
+	}
 }
 
 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
@@ -3026,14 +3235,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
 	int ret;
 
 	for (i = 0; i < dev->num_ports; i++) {
+		struct mlx5_ib_port *port = &dev->port[i];
+
 		ret = mlx5_core_alloc_q_counter(dev->mdev,
-						&dev->port[i].q_cnt_id);
+						&port->q_cnts.set_id);
 		if (ret) {
 			mlx5_ib_warn(dev,
 				     "couldn't allocate queue counter for port %d, err %d\n",
 				     i + 1, ret);
 			goto dealloc_counters;
 		}
+
+		ret = __mlx5_ib_alloc_q_counters(dev,
+						 &port->q_cnts.names,
+						 &port->q_cnts.offsets,
+						 &port->q_cnts.num_counters);
+		if (ret)
+			goto dealloc_counters;
+
+		mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
+					port->q_cnts.offsets);
 	}
 
 	return 0;
@@ -3041,62 +3262,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
 dealloc_counters:
 	while (--i >= 0)
 		mlx5_core_dealloc_q_counter(dev->mdev,
-					    dev->port[i].q_cnt_id);
+					    dev->port[i].q_cnts.set_id);
 
 	return ret;
 }
 
-static const char * const names[] = {
-	"rx_write_requests",
-	"rx_read_requests",
-	"rx_atomic_requests",
-	"out_of_buffer",
-	"out_of_sequence",
-	"duplicate_request",
-	"rnr_nak_retry_err",
-	"packet_seq_err",
-	"implied_nak_seq_err",
-	"local_ack_timeout_err",
-};
-
-static const size_t stats_offsets[] = {
-	MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
-	MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
-	MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
-	MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
-	MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
-	MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
-	MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
-	MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
-	MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
-	MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
-};
-
 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
 						    u8 port_num)
 {
-	BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_port *port = &dev->port[port_num - 1];
 
 	/* We support only per port stats */
 	if (port_num == 0)
 		return NULL;
 
-	return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+	return rdma_alloc_hw_stats_struct(port->q_cnts.names,
+					  port->q_cnts.num_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 				struct rdma_hw_stats *stats,
-				u8 port, int index)
+				u8 port_num, int index)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_port *port = &dev->port[port_num - 1];
 	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
 	void *out;
 	__be32 val;
 	int ret;
 	int i;
 
-	if (!port || !stats)
+	if (!stats)
 		return -ENOSYS;
 
 	out = mlx5_vzalloc(outlen);
@@ -3104,18 +3302,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 		return -ENOMEM;
 
 	ret = mlx5_core_query_q_counter(dev->mdev,
-					dev->port[port - 1].q_cnt_id, 0,
+					port->q_cnts.set_id, 0,
 					out, outlen);
 	if (ret)
 		goto free;
 
-	for (i = 0; i < ARRAY_SIZE(names); i++) {
-		val = *(__be32 *)(out + stats_offsets[i]);
+	for (i = 0; i < port->q_cnts.num_counters; i++) {
+		val = *(__be32 *)(out + port->q_cnts.offsets[i]);
 		stats->value[i] = (u64)be32_to_cpu(val);
 	}
+
 free:
 	kvfree(out);
-	return ARRAY_SIZE(names);
+	return port->q_cnts.num_counters;
 }
 
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3267,8 +3466,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
 	}
 
-	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
-	    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
 		dev->ib_dev.get_hw_stats	= mlx5_ib_get_hw_stats;
 		dev->ib_dev.alloc_hw_stats	= mlx5_ib_alloc_hw_stats;
 	}
@@ -3322,9 +3520,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	if (err)
 		goto err_rsrc;
 
-	err = mlx5_ib_alloc_q_counters(dev);
-	if (err)
-		goto err_odp;
+	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
+		err = mlx5_ib_alloc_q_counters(dev);
+		if (err)
+			goto err_odp;
+	}
 
 	dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
 	if (!dev->mdev->priv.uar)
@@ -3373,7 +3573,8 @@ err_uar_page:
 	mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 
 err_q_cnt:
-	mlx5_ib_dealloc_q_counters(dev);
+	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+		mlx5_ib_dealloc_q_counters(dev);
 
 err_odp:
 	mlx5_ib_odp_remove_one(dev);
@@ -3406,7 +3607,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 	mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 	mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 	mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
-	mlx5_ib_dealloc_q_counters(dev);
+	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+		mlx5_ib_dealloc_q_counters(dev);
 	destroy_umrc_res(dev);
 	mlx5_ib_odp_remove_one(dev);
 	destroy_dev_resources(&dev->devr);
@@ -3430,8 +3632,7 @@ static int __init mlx5_ib_init(void)
 {
 	int err;
 
-	if (deprecated_prof_sel != 2)
-		pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
+	mlx5_ib_odp_init();
 
 	err = mlx5_register_interface(&mlx5_ib_interface);
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e1a4b93dce6b..3cd064b5f0bf 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
 #define MLX5_IB_UPD_XLT_ADDR	      BIT(3)
 #define MLX5_IB_UPD_XLT_PD	      BIT(4)
 #define MLX5_IB_UPD_XLT_ACCESS	      BIT(5)
+#define MLX5_IB_UPD_XLT_INDIRECT      BIT(6)
 
 /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
  *
@@ -220,6 +221,10 @@ struct wr_list {
 	u16	next;
 };
 
+enum mlx5_ib_rq_flags {
+	MLX5_IB_RQ_CVLAN_STRIPPING	= 1 << 0,
+};
+
 struct mlx5_ib_wq {
 	u64		       *wrid;
 	u32		       *wr_data;
@@ -308,6 +313,7 @@ struct mlx5_ib_rq {
 	struct mlx5_db		*doorbell;
 	u32			tirn;
 	u8			state;
+	u32			flags;
 };
 
 struct mlx5_ib_sq {
@@ -392,6 +398,7 @@ enum mlx5_ib_qp_flags {
 	MLX5_IB_QP_SQPN_QP1			= 1 << 6,
 	MLX5_IB_QP_CAP_SCATTER_FCS		= 1 << 7,
 	MLX5_IB_QP_RSS				= 1 << 8,
+	MLX5_IB_QP_CVLAN_STRIPPING		= 1 << 9,
 };
 
 struct mlx5_umr_wr {
@@ -497,6 +504,10 @@ struct mlx5_ib_mr {
 	int			live;
 	void			*descs_alloc;
 	int			access_flags; /* Needed for rereg MR */
+
+	struct mlx5_ib_mr      *parent;
+	atomic_t		num_leaf_free;
+	wait_queue_head_t       q_leaf_free;
 };
 
 struct mlx5_ib_mw {
@@ -535,6 +546,10 @@ struct mlx5_cache_ent {
 	struct dentry	       *dir;
 	char                    name[4];
 	u32                     order;
+	u32			xlt;
+	u32			access_mode;
+	u32			page;
+
 	u32			size;
 	u32                     cur;
 	u32                     miss;
@@ -549,6 +564,7 @@ struct mlx5_cache_ent {
 	struct work_struct	work;
 	struct delayed_work	dwork;
 	int			pending;
+	struct completion	compl;
 };
 
 struct mlx5_mr_cache {
@@ -579,8 +595,15 @@ struct mlx5_ib_resources {
 	struct mutex	mutex;
 };
 
+struct mlx5_ib_q_counters {
+	const char **names;
+	size_t *offsets;
+	u32 num_counters;
+	u16 set_id;
+};
+
 struct mlx5_ib_port {
-	u16 q_cnt_id;
+	struct mlx5_ib_q_counters q_cnts;
 };
 
 struct mlx5_roce {
@@ -619,6 +642,7 @@ struct mlx5_ib_dev {
 	 * being used by a page fault handler.
 	 */
 	struct srcu_struct      mr_srcu;
+	u32			null_mkey;
 #endif
 	struct mlx5_ib_flow_db	flow_db;
 	/* protect resources needed as part of reset flow */
@@ -771,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 		       int page_shift, int flags);
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+					     int access_flags);
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 			  u64 length, u64 virt_addr, int access_flags,
 			  struct ib_pd *pd, struct ib_udata *udata);
@@ -824,7 +851,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
-int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -848,6 +877,9 @@ int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 			      unsigned long end);
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+			   size_t nentries, struct mlx5_ib_mr *mr, int flags);
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 {
@@ -855,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 }
 
 static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)	{}
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)	    {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
-static inline void mlx5_ib_odp_cleanup(void)				{}
+static inline void mlx5_ib_odp_cleanup(void)				    {}
+static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+					 size_t nentries, struct mlx5_ib_mr *mr,
+					 int flags) {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
@@ -872,6 +908,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
 
 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 			       int index);
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+			   int index, enum ib_gid_type *gid_type);
 
 /* GSI QP helper functions */
 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8cf2a67f9fb0..3c1f483d003f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -49,6 +49,7 @@ enum {
 
 static int clean_mr(struct mlx5_ib_mr *mr);
 static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
@@ -149,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
 	if (err)
 		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
 	write_unlock_irqrestore(&table->lock, flags);
+
+	if (!completion_done(&ent->compl))
+		complete(&ent->compl);
 }
 
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -157,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_ib_mr *mr;
-	int npages = 1 << ent->order;
 	void *mkc;
 	u32 *in;
 	int err = 0;
@@ -185,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 
 		MLX5_SET(mkc, mkc, free, 1);
 		MLX5_SET(mkc, mkc, umr_en, 1);
-		MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+		MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
 
 		MLX5_SET(mkc, mkc, qpn, 0xffffff);
-		MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
-		MLX5_SET(mkc, mkc, log_page_size, 12);
+		MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+		MLX5_SET(mkc, mkc, log_page_size, ent->page);
 
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
@@ -447,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
 	__cache_work_func(ent);
 }
 
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent;
+	struct mlx5_ib_mr *mr;
+	int err;
+
+	if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
+		mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+		return NULL;
+	}
+
+	ent = &cache->ent[entry];
+	while (1) {
+		spin_lock_irq(&ent->lock);
+		if (list_empty(&ent->head)) {
+			spin_unlock_irq(&ent->lock);
+
+			err = add_keys(dev, entry, 1);
+			if (err && err != -EAGAIN)
+				return ERR_PTR(err);
+
+			wait_for_completion(&ent->compl);
+		} else {
+			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+					      list);
+			list_del(&mr->list);
+			ent->cur--;
+			spin_unlock_irq(&ent->lock);
+			if (ent->cur < ent->limit)
+				queue_work(cache->wq, &ent->work);
+			return mr;
+		}
+	}
+}
+
 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
@@ -456,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 	int i;
 
 	c = order2idx(dev, order);
-	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+	if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
 		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 		return NULL;
 	}
 
-	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+	for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
 		ent = &cache->ent[i];
 
 		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -488,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 	return mr;
 }
 
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
 	struct mlx5_cache_ent *ent;
@@ -500,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 		return;
 	}
+
+	if (unreg_umr(dev, mr))
+		return;
+
 	ent = &cache->ent[c];
 	spin_lock_irq(&ent->lock);
 	list_add_tail(&mr->list, &ent->head);
@@ -602,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
 	struct mlx5_cache_ent *ent;
-	int limit;
 	int err;
 	int i;
 
@@ -615,26 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 
 	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-		INIT_LIST_HEAD(&cache->ent[i].head);
-		spin_lock_init(&cache->ent[i].lock);
-
 		ent = &cache->ent[i];
 		INIT_LIST_HEAD(&ent->head);
 		spin_lock_init(&ent->lock);
 		ent->order = i + 2;
 		ent->dev = dev;
+		ent->limit = 0;
 
-		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
-		    mlx5_core_is_pf(dev->mdev) &&
-		    use_umr(dev, ent->order))
-			limit = dev->mdev->profile->mr_cache[i].limit;
-		else
-			limit = 0;
-
+		init_completion(&ent->compl);
 		INIT_WORK(&ent->work, cache_work_func);
 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-		ent->limit = limit;
 		queue_work(cache->wq, &ent->work);
+
+		if (i > MAX_UMR_CACHE_ENTRY) {
+			mlx5_odp_init_mr_cache_entry(ent);
+			continue;
+		}
+
+		if (!use_umr(dev, ent->order))
+			continue;
+
+		ent->page = PAGE_SHIFT;
+		ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
+			   MLX5_IB_UMR_OCTOWORD;
+		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		    mlx5_core_is_pf(dev->mdev))
+			ent->limit = dev->mdev->profile->mr_cache[i].limit;
+		else
+			ent->limit = 0;
 	}
 
 	err = mlx5_mr_cache_debugfs_init(dev);
@@ -758,7 +809,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
 static int use_umr(struct mlx5_ib_dev *dev, int order)
 {
 	if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
-		return order < MAX_MR_CACHE_ENTRIES + 2;
+		return order <= MAX_UMR_CACHE_ENTRY + 2;
 	return order <= MLX5_MAX_UMR_SHIFT;
 }
 
@@ -871,7 +922,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 				 MLX5_IB_UPD_XLT_ENABLE);
 
 	if (err) {
-		free_cached_mr(dev, mr);
+		mlx5_mr_cache_free(dev, mr);
 		return ERR_PTR(err);
 	}
 
@@ -886,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
 {
 	struct mlx5_ib_dev *dev = mr->dev;
 	struct ib_umem *umem = mr->umem;
+	if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+		mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
+		return npages;
+	}
 
 	npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
 
@@ -919,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 	struct mlx5_umr_wr wr;
 	struct ib_sge sg;
 	int err = 0;
-	int desc_size = sizeof(struct mlx5_mtt);
+	int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+			       ? sizeof(struct mlx5_klm)
+			       : sizeof(struct mlx5_mtt);
 	const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
 	const int page_mask = page_align - 1;
 	size_t pages_mapped = 0;
@@ -1091,6 +1148,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
 		goto err_2;
 	}
 	mr->mmkey.type = MLX5_MKEY_MR;
+	mr->desc_size = sizeof(struct mlx5_mtt);
 	mr->umem = umem;
 	mr->dev = dev;
 	mr->live = 1;
@@ -1136,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
 		    start, virt_addr, length, access_flags);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (!start && length == U64_MAX) {
+		if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
+		    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+			return ERR_PTR(-EINVAL);
+
+		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+		return &mr->ibmr;
+	}
+#endif
+
 	err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
 			   &page_shift, &ncont, &order);
 
@@ -1398,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
 			return err;
 		}
 	} else {
-		err = unreg_umr(dev, mr);
-		if (err) {
-			mlx5_ib_warn(dev, "failed unregister\n");
-			return err;
-		}
-		free_cached_mr(dev, mr);
+		mlx5_mr_cache_free(dev, mr);
 	}
 
 	if (!umred)
@@ -1426,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 		/* Wait for all running page-fault handlers to finish. */
 		synchronize_srcu(&dev->mr_srcu);
 		/* Destroy all page mappings */
-		mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
-					 ib_umem_end(umem));
+		if (umem->odp_data->page_list)
+			mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+						 ib_umem_end(umem));
+		else
+			mlx5_ib_free_implicit_mr(mr);
 		/*
 		 * We kill the umem before the MR for ODP,
 		 * so that there will not be any invalidations in
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e5bc267aca73..d7b12f0750e2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -34,6 +34,7 @@
 #include <rdma/ib_umem_odp.h>
 
 #include "mlx5_ib.h"
+#include "cmd.h"
 
 #define MAX_PREFETCH_LEN (4*1024*1024U)
 
@@ -41,6 +42,140 @@
  * a pagefault. */
 #define MMU_NOTIFIER_TIMEOUT 1000
 
+#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
+#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
+#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
+#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
+#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
+
+#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
+
+static u64 mlx5_imr_ksm_entries;
+
+static int check_parent(struct ib_umem_odp *odp,
+			       struct mlx5_ib_mr *parent)
+{
+	struct mlx5_ib_mr *mr = odp->private;
+
+	return mr && mr->parent == parent;
+}
+
+static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
+{
+	struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
+	struct ib_ucontext *ctx = odp->umem->context;
+	struct rb_node *rb;
+
+	down_read(&ctx->umem_rwsem);
+	while (1) {
+		rb = rb_next(&odp->interval_tree.rb);
+		if (!rb)
+			goto not_found;
+		odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+		if (check_parent(odp, parent))
+			goto end;
+	}
+not_found:
+	odp = NULL;
+end:
+	up_read(&ctx->umem_rwsem);
+	return odp;
+}
+
+static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
+				      u64 start, u64 length,
+				      struct mlx5_ib_mr *parent)
+{
+	struct ib_umem_odp *odp;
+	struct rb_node *rb;
+
+	down_read(&ctx->umem_rwsem);
+	odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+	if (!odp)
+		goto end;
+
+	while (1) {
+		if (check_parent(odp, parent))
+			goto end;
+		rb = rb_next(&odp->interval_tree.rb);
+		if (!rb)
+			goto not_found;
+		odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+		if (ib_umem_start(odp->umem) > start + length)
+			goto not_found;
+	}
+not_found:
+	odp = NULL;
+end:
+	up_read(&ctx->umem_rwsem);
+	return odp;
+}
+
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+			   size_t nentries, struct mlx5_ib_mr *mr, int flags)
+{
+	struct ib_pd *pd = mr->ibmr.pd;
+	struct ib_ucontext *ctx = pd->uobject->context;
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct ib_umem_odp *odp;
+	unsigned long va;
+	int i;
+
+	if (flags & MLX5_IB_UPD_XLT_ZAP) {
+		for (i = 0; i < nentries; i++, pklm++) {
+			pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+			pklm->key = cpu_to_be32(dev->null_mkey);
+			pklm->va = 0;
+		}
+		return;
+	}
+
+	odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
+			     nentries * MLX5_IMR_MTT_SIZE, mr);
+
+	for (i = 0; i < nentries; i++, pklm++) {
+		pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+		va = (offset + i) * MLX5_IMR_MTT_SIZE;
+		if (odp && odp->umem->address == va) {
+			struct mlx5_ib_mr *mtt = odp->private;
+
+			pklm->key = cpu_to_be32(mtt->ibmr.lkey);
+			odp = odp_next(odp);
+		} else {
+			pklm->key = cpu_to_be32(dev->null_mkey);
+		}
+		mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
+			    i, va, be32_to_cpu(pklm->key));
+	}
+}
+
+static void mr_leaf_free_action(struct work_struct *work)
+{
+	struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
+	int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+	struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+
+	mr->parent = NULL;
+	synchronize_srcu(&mr->dev->mr_srcu);
+
+	if (!READ_ONCE(odp->dying)) {
+		mr->parent = imr;
+		if (atomic_dec_and_test(&imr->num_leaf_free))
+			wake_up(&imr->q_leaf_free);
+		return;
+	}
+
+	ib_umem_release(odp->umem);
+	if (imr->live)
+		mlx5_ib_update_xlt(imr, idx, 1, 0,
+				   MLX5_IB_UPD_XLT_INDIRECT |
+				   MLX5_IB_UPD_XLT_ATOMIC);
+	mlx5_mr_cache_free(mr->dev, mr);
+
+	if (atomic_dec_and_test(&imr->num_leaf_free))
+		wake_up(&imr->q_leaf_free);
+}
+
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 			      unsigned long end)
 {
@@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 	 */
 
 	ib_umem_odp_unmap_dma_pages(umem, start, end);
+
+	if (unlikely(!umem->npages && mr->parent &&
+		     !umem->odp_data->dying)) {
+		WRITE_ONCE(umem->odp_data->dying, 1);
+		atomic_inc(&mr->parent->num_leaf_free);
+		schedule_work(&umem->odp_data->work);
+	}
 }
 
 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 	if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
 		caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
 
+	if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
+	    MLX5_CAP_GEN(dev->mdev, null_mkey) &&
+	    MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+		caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
+
 	return;
 }
 
@@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 			    wq_num);
 }
 
+static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
+					    struct ib_umem *umem,
+					    bool ksm, int access_flags)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_ib_mr *mr;
+	int err;
+
+	mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
+					    MLX5_IMR_MTT_CACHE_ENTRY);
+
+	if (IS_ERR(mr))
+		return mr;
+
+	mr->ibmr.pd = pd;
+
+	mr->dev = dev;
+	mr->access_flags = access_flags;
+	mr->mmkey.iova = 0;
+	mr->umem = umem;
+
+	if (ksm) {
+		err = mlx5_ib_update_xlt(mr, 0,
+					 mlx5_imr_ksm_entries,
+					 MLX5_KSM_PAGE_SHIFT,
+					 MLX5_IB_UPD_XLT_INDIRECT |
+					 MLX5_IB_UPD_XLT_ZAP |
+					 MLX5_IB_UPD_XLT_ENABLE);
+
+	} else {
+		err = mlx5_ib_update_xlt(mr, 0,
+					 MLX5_IMR_MTT_ENTRIES,
+					 PAGE_SHIFT,
+					 MLX5_IB_UPD_XLT_ZAP |
+					 MLX5_IB_UPD_XLT_ENABLE |
+					 MLX5_IB_UPD_XLT_ATOMIC);
+	}
+
+	if (err)
+		goto fail;
+
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
+
+	mr->live = 1;
+
+	mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
+		    mr->mmkey.key, dev->mdev, mr);
+
+	return mr;
+
+fail:
+	mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+	mlx5_mr_cache_free(dev, mr);
+
+	return ERR_PTR(err);
+}
+
+static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
+						u64 io_virt, size_t bcnt)
+{
+	struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
+	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
+	struct ib_umem_odp *odp, *result = NULL;
+	u64 addr = io_virt & MLX5_IMR_MTT_MASK;
+	int nentries = 0, start_idx = 0, ret;
+	struct mlx5_ib_mr *mtt;
+	struct ib_umem *umem;
+
+	mutex_lock(&mr->umem->odp_data->umem_mutex);
+	odp = odp_lookup(ctx, addr, 1, mr);
+
+	mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
+		    io_virt, bcnt, addr, odp);
+
+next_mr:
+	if (likely(odp)) {
+		if (nentries)
+			nentries++;
+	} else {
+		umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
+		if (IS_ERR(umem)) {
+			mutex_unlock(&mr->umem->odp_data->umem_mutex);
+			return ERR_CAST(umem);
+		}
+
+		mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+		if (IS_ERR(mtt)) {
+			mutex_unlock(&mr->umem->odp_data->umem_mutex);
+			ib_umem_release(umem);
+			return ERR_CAST(mtt);
+		}
+
+		odp = umem->odp_data;
+		odp->private = mtt;
+		mtt->umem = umem;
+		mtt->mmkey.iova = addr;
+		mtt->parent = mr;
+		INIT_WORK(&odp->work, mr_leaf_free_action);
+
+		if (!nentries)
+			start_idx = addr >> MLX5_IMR_MTT_SHIFT;
+		nentries++;
+	}
+
+	odp->dying = 0;
+
+	/* Return first odp if region not covered by single one */
+	if (likely(!result))
+		result = odp;
+
+	addr += MLX5_IMR_MTT_SIZE;
+	if (unlikely(addr < io_virt + bcnt)) {
+		odp = odp_next(odp);
+		if (odp && odp->umem->address != addr)
+			odp = NULL;
+		goto next_mr;
+	}
+
+	if (unlikely(nentries)) {
+		ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
+					 MLX5_IB_UPD_XLT_INDIRECT |
+					 MLX5_IB_UPD_XLT_ATOMIC);
+		if (ret) {
+			mlx5_ib_err(dev, "Failed to update PAS\n");
+			result = ERR_PTR(ret);
+		}
+	}
+
+	mutex_unlock(&mr->umem->odp_data->umem_mutex);
+	return result;
+}
+
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+					     int access_flags)
+{
+	struct ib_ucontext *ctx = pd->ibpd.uobject->context;
+	struct mlx5_ib_mr *imr;
+	struct ib_umem *umem;
+
+	umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+	if (IS_ERR(umem))
+		return ERR_CAST(umem);
+
+	imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+	if (IS_ERR(imr)) {
+		ib_umem_release(umem);
+		return ERR_CAST(imr);
+	}
+
+	imr->umem = umem;
+	init_waitqueue_head(&imr->q_leaf_free);
+	atomic_set(&imr->num_leaf_free, 0);
+
+	return imr;
+}
+
+static int mr_leaf_free(struct ib_umem *umem, u64 start,
+			u64 end, void *cookie)
+{
+	struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+
+	if (mr->parent != imr)
+		return 0;
+
+	ib_umem_odp_unmap_dma_pages(umem,
+				    ib_umem_start(umem),
+				    ib_umem_end(umem));
+
+	if (umem->odp_data->dying)
+		return 0;
+
+	WRITE_ONCE(umem->odp_data->dying, 1);
+	atomic_inc(&imr->num_leaf_free);
+	schedule_work(&umem->odp_data->work);
+
+	return 0;
+}
+
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+{
+	struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+
+	down_read(&ctx->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+				      mr_leaf_free, imr);
+	up_read(&ctx->umem_rwsem);
+
+	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+}
+
 /*
  * Handle a single data segment in a page-fault WQE or RDMA region.
  *
@@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
  * -EFAULT when there's an error mapping the requested pages. The caller will
  *  abort the page fault handling.
  */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 					 u32 key, u64 io_virt, size_t bcnt,
 					 u32 *bytes_committed,
 					 u32 *bytes_mapped)
 {
 	int srcu_key;
-	unsigned int current_seq;
+	unsigned int current_seq = 0;
 	u64 start_idx;
 	int npages = 0, ret = 0;
 	struct mlx5_ib_mr *mr;
 	u64 access_mask = ODP_READ_ALLOWED_BIT;
+	struct ib_umem_odp *odp;
+	int implicit = 0;
+	size_t size;
 
-	srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
-	mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+	srcu_key = srcu_read_lock(&dev->mr_srcu);
+	mr = mlx5_ib_odp_find_mr_lkey(dev, key);
 	/*
 	 * If we didn't find the MR, it means the MR was closed while we were
 	 * handling the ODP event. In this case we return -EFAULT so that the
 	 * QP will be closed.
 	 */
 	if (!mr || !mr->ibmr.pd) {
-		pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
-		       key);
+		mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+			    key);
 		ret = -EFAULT;
 		goto srcu_unlock;
 	}
 	if (!mr->umem->odp_data) {
-		pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
-			 key);
+		mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+			    key);
 		if (bytes_mapped)
 			*bytes_mapped +=
 				(bcnt - *bytes_committed);
 		goto srcu_unlock;
 	}
 
-	current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
-	/*
-	 * Ensure the sequence number is valid for some time before we call
-	 * gup.
-	 */
-	smp_rmb();
-
 	/*
 	 * Avoid branches - this code will perform correctly
 	 * in all iterations (in iteration 2 and above,
@@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
 	io_virt += *bytes_committed;
 	bcnt -= *bytes_committed;
 
+	if (!mr->umem->odp_data->page_list) {
+		odp = implicit_mr_get_data(mr, io_virt, bcnt);
+
+		if (IS_ERR(odp)) {
+			ret = PTR_ERR(odp);
+			goto srcu_unlock;
+		}
+		mr = odp->private;
+		implicit = 1;
+
+	} else {
+		odp = mr->umem->odp_data;
+	}
+
+next_mr:
+	current_seq = READ_ONCE(odp->notifiers_seq);
+	/*
+	 * Ensure the sequence number is valid for some time before we call
+	 * gup.
+	 */
+	smp_rmb();
+
+	size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
 	start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
 
 	if (mr->umem->writable)
 		access_mask |= ODP_WRITE_ALLOWED_BIT;
-	npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
-					   access_mask, current_seq);
-	if (npages < 0) {
-		ret = npages;
+
+	ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+					access_mask, current_seq);
+
+	if (ret < 0)
 		goto srcu_unlock;
-	}
 
-	if (npages > 0) {
-		mutex_lock(&mr->umem->odp_data->umem_mutex);
+	if (ret > 0) {
+		int np = ret;
+
+		mutex_lock(&odp->umem_mutex);
 		if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
 			/*
 			 * No need to check whether the MTTs really belong to
 			 * this MR, since ib_umem_odp_map_dma_pages already
 			 * checks this.
 			 */
-			ret = mlx5_ib_update_xlt(mr, start_idx, npages,
+			ret = mlx5_ib_update_xlt(mr, start_idx, np,
 						 PAGE_SHIFT,
 						 MLX5_IB_UPD_XLT_ATOMIC);
 		} else {
 			ret = -EAGAIN;
 		}
-		mutex_unlock(&mr->umem->odp_data->umem_mutex);
+		mutex_unlock(&odp->umem_mutex);
 		if (ret < 0) {
 			if (ret != -EAGAIN)
-				pr_err("Failed to update mkey page tables\n");
+				mlx5_ib_err(dev, "Failed to update mkey page tables\n");
 			goto srcu_unlock;
 		}
 
 		if (bytes_mapped) {
-			u32 new_mappings = npages * PAGE_SIZE -
+			u32 new_mappings = np * PAGE_SIZE -
 				(io_virt - round_down(io_virt, PAGE_SIZE));
-			*bytes_mapped += min_t(u32, new_mappings, bcnt);
+			*bytes_mapped += min_t(u32, new_mappings, size);
 		}
+
+		npages += np;
+	}
+
+	bcnt -= size;
+	if (unlikely(bcnt)) {
+		struct ib_umem_odp *next;
+
+		io_virt += size;
+		next = odp_next(odp);
+		if (unlikely(!next || next->umem->address != io_virt)) {
+			mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
+				    io_virt, next);
+			ret = -EAGAIN;
+			goto srcu_unlock_no_wait;
+		}
+		odp = next;
+		mr = odp->private;
+		goto next_mr;
 	}
 
 srcu_unlock:
 	if (ret == -EAGAIN) {
-		if (!mr->umem->odp_data->dying) {
-			struct ib_umem_odp *odp_data = mr->umem->odp_data;
+		if (implicit || !odp->dying) {
 			unsigned long timeout =
 				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
 
 			if (!wait_for_completion_timeout(
-					&odp_data->notifier_completion,
+					&odp->notifier_completion,
 					timeout)) {
-				pr_warn("timeout waiting for mmu notifier completion\n");
+				mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
+					     current_seq, odp->notifiers_seq);
 			}
 		} else {
 			/* The MR is being killed, kill the QP as well. */
 			ret = -EFAULT;
 		}
 	}
-	srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+
+srcu_unlock_no_wait:
+	srcu_read_unlock(&dev->mr_srcu, srcu_key);
 	*bytes_committed = 0;
 	return ret ? ret : npages;
 }
@@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
 		goto resolve_page_fault;
 	} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
 		if (ret != -ENOENT)
-			mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n",
-				    ret);
+			mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
+				    ret, pfault->wqe.wq_num, pfault->type);
 		goto resolve_page_fault;
 	}
 
@@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
 resolve_page_fault:
 	mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
 	mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
-		    pfault->token, resume_with_error,
+		    pfault->wqe.wq_num, resume_with_error,
 		    pfault->type);
 	free_page((unsigned long)buffer);
 }
@@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
 		ret = pagefault_single_data_segment(dev, rkey, address,
 						    prefetch_len,
 						    &bytes_committed, NULL);
-		if (ret < 0) {
+		if (ret < 0 && ret != -EAGAIN) {
 			mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
-				     ret, pfault->token, address,
-				     prefetch_len);
+				     ret, pfault->token, address, prefetch_len);
 		}
 	}
 }
@@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
 	}
 }
 
-int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
+{
+	if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+		return;
+
+	switch (ent->order - 2) {
+	case MLX5_IMR_MTT_CACHE_ENTRY:
+		ent->page = PAGE_SHIFT;
+		ent->xlt = MLX5_IMR_MTT_ENTRIES *
+			   sizeof(struct mlx5_mtt) /
+			   MLX5_IB_UMR_OCTOWORD;
+		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+		ent->limit = 0;
+		break;
+
+	case MLX5_IMR_KSM_CACHE_ENTRY:
+		ent->page = MLX5_KSM_PAGE_SHIFT;
+		ent->xlt = mlx5_imr_ksm_entries *
+			   sizeof(struct mlx5_klm) /
+			   MLX5_IB_UMR_OCTOWORD;
+		ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
+		ent->limit = 0;
+		break;
+	}
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
 {
 	int ret;
 
-	ret = init_srcu_struct(&ibdev->mr_srcu);
+	ret = init_srcu_struct(&dev->mr_srcu);
 	if (ret)
 		return ret;
 
+	if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
+		ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
+		if (ret) {
+			mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
+{
+	cleanup_srcu_struct(&dev->mr_srcu);
+}
+
+int mlx5_ib_odp_init(void)
 {
-	cleanup_srcu_struct(&ibdev->mr_srcu);
+	mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
+				       MLX5_IMR_MTT_BITS);
+
+	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e31bf11ae64f..ad8a2638e339 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -905,7 +905,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	else
 		qp->bf.bfreg = &dev->bfreg;
 
-	qp->bf.buf_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
+	/* We need to divide by two since each register is comprised of
+	 * two buffers of identical size, namely odd and even
+	 */
+	qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
 	uar_index = qp->bf.bfreg->index;
 
 	err = calc_sq_size(dev, init_attr, qp);
@@ -1141,7 +1144,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 		return -ENOMEM;
 
 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
-	MLX5_SET(rqc, rqc, vsd, 1);
+	if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
+		MLX5_SET(rqc, rqc, vsd, 1);
 	MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@@ -1238,6 +1242,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (qp->rq.wqe_cnt) {
 		rq->base.container_mibqp = qp;
 
+		if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+			rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
 		err = create_raw_packet_qp_rq(dev, rq, in);
 		if (err)
 			goto err_destroy_sq;
@@ -1559,6 +1565,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
 		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
+	if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
+		if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+		      MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
+		    (init_attr->qp_type != IB_QPT_RAW_PACKET))
+			return -EOPNOTSUPP;
+		qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+	}
+
 	if (pd && pd->uobject) {
 		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 			mlx5_ib_dbg(dev, "copy failed\n");
@@ -2198,6 +2212,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 {
 	enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
 	int err;
+	enum ib_gid_type gid_type;
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
 		path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
@@ -2216,10 +2231,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (ll == IB_LINK_LAYER_ETHERNET) {
 		if (!(ah->ah_flags & IB_AH_GRH))
 			return -EINVAL;
+		err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
+					     &gid_type);
+		if (err)
+			return err;
 		memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
 		path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
 							  ah->grh.sgid_index);
 		path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+			path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
 	} else {
 		path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
 		path->fl_free_ar |=
@@ -2422,7 +2443,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 	if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
 		if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
 			MLX5_SET64(modify_rq_in, in, modify_bitmask,
-				   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+				   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
 			MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
 		} else
 			pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@@ -2777,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			       qp->port) - 1;
 		mibport = &dev->port[port_num];
 		context->qp_counter_set_usr_page |=
-			cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+			cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
 	}
 
 	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2805,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 
 		raw_qp_param.operation = op;
 		if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
-			raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+			raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
 			raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
 		}
 
@@ -3637,8 +3658,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
 		psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
 		break;
 	default:
-		pr_err("Bad signature type given.\n");
-		return 1;
+		pr_err("Bad signature type (%d) is given.\n",
+		       domain->sig_type);
+		return -EINVAL;
 	}
 
 	*seg += sizeof(*psv_seg);
@@ -3978,6 +4000,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			break;
 
 		case IB_QPT_SMI:
+			if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+				mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+				err = -EPERM;
+				*bad_wr = wr;
+				goto out;
+			}
 		case MLX5_IB_QPT_HW_GSI:
 			set_datagram_seg(seg, wr);
 			seg += sizeof(struct mlx5_wqe_datagram_seg);
@@ -4579,6 +4607,7 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 		      struct ib_wq_init_attr *init_attr)
 {
 	struct mlx5_ib_dev *dev;
+	int has_net_offloads;
 	__be64 *rq_pas0;
 	void *in;
 	void *rqc;
@@ -4610,9 +4639,28 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 	MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
 	MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
 	MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+	has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
+	if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+		if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+			mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+	} else {
+		MLX5_SET(rqc, rqc, vsd, 1);
+	}
+	if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
+		if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
+			mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+		MLX5_SET(rqc, rqc, scatter_fcs, 1);
+	}
 	rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
 	mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
 	err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+out:
 	kvfree(in);
 	return err;
 }
@@ -4896,10 +4944,37 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 	MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
 	MLX5_SET(rqc, rqc, state, wq_state);
 
+	if (wq_attr_mask & IB_WQ_FLAGS) {
+		if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+			if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+			      MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+				mlx5_ib_dbg(dev, "VLAN offloads are not "
+					    "supported\n");
+				err = -EOPNOTSUPP;
+				goto out;
+			}
+			MLX5_SET64(modify_rq_in, in, modify_bitmask,
+				   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+			MLX5_SET(rqc, rqc, vsd,
+				 (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
+		}
+	}
+
+	if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+		if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+			MLX5_SET64(modify_rq_in, in, modify_bitmask,
+				   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+			MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
+		} else
+			pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
+				     dev->ib_dev.name);
+	}
+
 	err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
-	kvfree(in);
 	if (!err)
 		rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
 
+out:
+	kvfree(in);
 	return err;
 }
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6f4397ee1ed6..7cb145f9a6db 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	int err;
 	int i;
 	struct mlx5_wqe_srq_next_seg *next;
-	int page_shift;
-	int npages;
 
 	err = mlx5_db_alloc(dev->mdev, &srq->db);
 	if (err) {
@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 		err = -ENOMEM;
 		goto err_db;
 	}
-	page_shift = srq->buf.page_shift;
 
 	srq->head    = 0;
 	srq->tail    = srq->msrq.max - 1;
@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 			cpu_to_be16((i + 1) & (srq->msrq.max - 1));
 	}
 
-	npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
-	mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
-		    buf_size, page_shift, srq->buf.npages, npages);
-	in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+	mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
+	in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
 	if (!in->pas) {
 		err = -ENOMEM;
 		goto err_buf;
@@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	}
 	srq->wq_sig = !!srq_signature;
 
-	in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+	in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
 	    in->type == IB_SRQT_XRC)
 		in->user_index = MLX5_IB_DEFAULT_UIDX;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index d31708742ba5..ce163184e742 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
 	if (!in_mad || !out_mad)
 		goto out;
 
-	memset(props, 0, sizeof *props);
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	init_query_mad(in_mad);
 	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
@@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
 	if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
 		return -ERESTARTSYS;
 
-	err = mthca_query_port(ibdev, port, &attr);
+	err = ib_query_port(ibdev, port, &attr);
 	if (err)
 		goto out;
 
@@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = mthca_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 	return 0;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5a31f3c6a421..d3eae2f3e9f5 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
 	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
 	struct net_device *netdev = nesvnic->netdev;
 
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	props->max_mtu = IB_MTU_4096;
 	props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
+	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
 	err = nes_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 896071502739..3e43bdc81e7a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
 	int err;
 
 	dev = get_ocrdma_dev(ibdev);
-	err = ocrdma_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+	if (ocrdma_is_udp_encap_supported(dev))
+		immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
-	if (ocrdma_is_udp_encap_supported(dev))
-		immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 	return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index e06ad7250963..bc9fb144e57b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
 	struct ocrdma_dev *dev;
 	struct net_device *netdev;
 
+	/* props being zeroed by the caller, avoid zeroing it here */
 	dev = get_ocrdma_dev(ibdev);
 	if (port > 1) {
 		pr_err("%s(%d) invalid_port=0x%x\n", __func__,
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 0c51657af151..6b3bb32803bd 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 	}
 
 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
-	memset(attr, 0, sizeof(*attr));
 
+	/* *attr being zeroed by the caller, avoid zeroing it here */
 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
 		attr->state = IB_PORT_ACTIVE;
 		attr->phys_state = 5;
@@ -3494,14 +3494,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = qedr_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
-				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 	return 0;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index b0b78e1cec92..6b56f1c01a07 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1220,6 +1220,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
 	enum ib_mtu mtu;
 	u16 lid = ppd->lid;
 
+	/* props being zeroed by the caller, avoid zeroing it here */
 	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
 	props->lmc = ppd->lmc;
 	props->state = dd->f_iblink_state(ppd->lastibcstat);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0a89a955550b..4f5a45db08e1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = usnic_ib_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 69df8e353123..3284730d3c09 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
 
 	mutex_lock(&us_ibdev->usdev_lock);
 	__ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	props->lid = 0;
 	props->lmc = 1;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 60cdb7719565..e03d2f6c1f90 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	int err;
 
-	err = pvrdma_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 	return 0;
 }
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index c2aa52638dcb..fec17c49103b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
 		return err;
 	}
 
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 
 	props->state = pvrdma_port_state_to_ib(resp->attrs.state);
 	props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
 	}
 
 	mutex_lock(&vdev->port_mutex);
-	ret = pvrdma_query_port(ibdev, port, &attr);
+	ret = ib_query_port(ibdev, port, &attr);
 	if (ret)
 		goto out;
 
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index d430c2f7cec4..1165639a914b 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -165,7 +165,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
 		return -EINVAL;
 
 	rvp = rdi->ports[port_index];
-	memset(props, 0, sizeof(*props));
+	/* props being zeroed by the caller, avoid zeroing it here */
 	props->sm_lid = rvp->sm_lid;
 	props->sm_sl = rvp->sm_sl;
 	props->port_cap_flags = rvp->port_cap_flags;
@@ -326,13 +326,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
 	if (port_index < 0)
 		return -EINVAL;
 
-	err = rvt_query_port(ibdev, port_num, &attr);
+	immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+
+	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = rdi->dparms.core_cap_flags;
 	immutable->max_mad_size = rdi->dparms.max_mad_size;
 
 	return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e4de37fb9aab..d2e2eff7a515 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -86,6 +86,7 @@ static int rxe_query_port(struct ib_device *dev,
 
 	port = &rxe->port;
 
+	/* *attr being zeroed by the caller, avoid zeroing it here */
 	*attr = port->attr;
 
 	mutex_lock(&rxe->usdev_lock);
@@ -261,13 +262,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
 	int err;
 	struct ib_port_attr attr;
 
-	err = rxe_query_port(dev, port_num, &attr);
+	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+	err = ib_query_port(dev, port_num, &attr);
 	if (err)
 		return err;
 
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 	return 0;