From b4f34597a5ce148b88a47da621037537c384d565 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:12 +0300 Subject: IB/mlx5: Expose multi-packet RQ capabilities This patch reports the device's striding RQ capabilities to the user-space: - min/max_single_stride_log_num_of_bytes: Log of min/max number of bytes in a single stride. - min/max_single_wqe_log_num_of_strides: Log of min/max number of strides in a single WQE. - supported_qpts: A bit mask to know which QP types support multi- packet RQ, for now only Raw Packet QPs. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 1791bf123ba9..0832d9502200 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -190,6 +190,19 @@ struct mlx5_ib_sw_parsing_caps { __u32 supported_qpts; }; +struct mlx5_ib_striding_rq_caps { + __u32 min_single_stride_log_num_of_bytes; + __u32 max_single_stride_log_num_of_bytes; + __u32 min_single_wqe_log_num_of_strides; + __u32 max_single_wqe_log_num_of_strides; + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -200,6 +213,7 @@ struct mlx5_ib_query_device_resp { __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; struct mlx5_ib_sw_parsing_caps sw_parsing_caps; + struct mlx5_ib_striding_rq_caps striding_rq_caps; }; struct mlx5_ib_create_cq { -- cgit 1.4.1 From ccc8708790273811db24676223b040710793cba7 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:13 +0300 Subject: IB/mlx5: Allow creation of a multi-packet RQ Allow creation of a multi-packet receive queue. In order to create a multi-packet RQ, the following fields in the mlx5_ib_rwq should be set: - log_num_strides: Log of number of strides per WQE - single_stride_log_num_of_bytes: Log of a single stride size - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeros before writing the message to memory (e.g. for the IP alignment). Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +++ drivers/infiniband/hw/mlx5/qp.c | 52 ++++++++++++++++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 1 + include/uapi/rdma/mlx5-abi.h | 8 +++++- 4 files changed, 56 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8de40852818b..e7deaa08535b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -254,6 +254,7 @@ struct mlx5_ib_wq { enum mlx5_ib_wq_flags { MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, + MLX5_IB_WQ_FLAGS_STRIDING_RQ = 0x2, }; #define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 @@ -269,6 +270,9 @@ struct mlx5_ib_rwq { u32 log_rq_size; u32 rq_page_offset; u32 log_page_size; + u32 log_num_strides; + u32 two_byte_shift_en; + u32 single_stride_log_num_of_bytes; struct ib_umem *umem; size_t buf_size; unsigned int page_shift; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 37a0976240fd..d209c684d729 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4706,9 +4706,19 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, wq_type, + rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ? + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); + if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); + MLX5_SET(wq, wq, log_wqe_stride_size, + rwq->single_stride_log_num_of_bytes - + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); @@ -4790,7 +4800,8 @@ static int prepare_user_rq(struct ib_pd *pd, int err; size_t required_cmd_sz; - required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes) + + sizeof(ucmd.single_stride_log_num_of_bytes); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -4808,14 +4819,39 @@ static int prepare_user_rq(struct ib_pd *pd, return -EFAULT; } - if (ucmd.comp_mask) { + if (ucmd.comp_mask & (~MLX5_IB_CREATE_WQ_STRIDING_RQ)) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; - } - - if (ucmd.reserved) { - mlx5_ib_dbg(dev, "invalid reserved\n"); - return -EOPNOTSUPP; + } else if (ucmd.comp_mask & MLX5_IB_CREATE_WQ_STRIDING_RQ) { + if (!MLX5_CAP_GEN(dev->mdev, striding_rq)) { + mlx5_ib_dbg(dev, "Striding RQ is not supported\n"); + return -EOPNOTSUPP; + } + if ((ucmd.single_stride_log_num_of_bytes < + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES) || + (ucmd.single_stride_log_num_of_bytes > + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES)) { + mlx5_ib_dbg(dev, "Invalid log stride size (%u. Range is %u - %u)\n", + ucmd.single_stride_log_num_of_bytes, + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES, + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); + return -EINVAL; + } + if ((ucmd.single_wqe_log_num_of_strides > + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (ucmd.single_wqe_log_num_of_strides < + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { + mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + return -EINVAL; + } + rwq->single_stride_log_num_of_bytes = + ucmd.single_stride_log_num_of_bytes; + rwq->log_num_strides = ucmd.single_wqe_log_num_of_strides; + rwq->two_byte_shift_en = !!ucmd.two_byte_shift_en; + rwq->create_flags |= MLX5_IB_WQ_FLAGS_STRIDING_RQ; } err = set_user_rq_size(dev, init_attr, &ucmd, rwq); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69772347f866..db655db45b77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -744,6 +744,7 @@ enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ = 0x3, }; enum { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0832d9502200..b1d5b87ba3fd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -308,6 +308,10 @@ struct mlx5_ib_alloc_mw { __u16 reserved2; }; +enum mlx5_ib_create_wq_mask { + MLX5_IB_CREATE_WQ_STRIDING_RQ = (1 << 0), +}; + struct mlx5_ib_create_wq { __u64 buf_addr; __u64 db_addr; @@ -316,7 +320,9 @@ struct mlx5_ib_create_wq { __u32 user_index; __u32 flags; __u32 comp_mask; - __u32 reserved; + __u32 single_stride_log_num_of_bytes; + __u32 single_wqe_log_num_of_strides; + __u32 two_byte_shift_en; }; struct mlx5_ib_create_ah_resp { -- cgit 1.4.1 From de57f2ad06d5bf01015b955600cbfc77059b2b6e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:52 +0300 Subject: IB/mlx5: Support 128B CQE compression feature In commit 1cbe6fc86ccf ("IB/mlx5: Add support for CQE compressing") the concept of CQE compression was introduced and added a support for 64B CQE size. This change update the code to support 128B CQE size as well. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 6 ++++-- drivers/infiniband/hw/mlx5/main.c | 8 ++++++-- include/uapi/rdma/mlx5-abi.h | 7 ++++++- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b8a116d0e063..51871f049c57 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -804,8 +804,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { - if (unlikely((*cqe_size != 64) || - !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { + if (!((*cqe_size == 128 && + MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || + (*cqe_size == 64 && + MLX5_CAP_GEN(dev->mdev, cqe_compression)))) { err = -EOPNOTSUPP; mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", *cqe_size); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 02da3f58f296..b9337562aa90 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -824,8 +824,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), reserved, uhw->outlen)) - resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), flags, uhw->outlen)) { + resp.response_length += sizeof(resp.flags); + if (MLX5_CAP_GEN(mdev, cqe_compression_128)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; + } if (field_avail(typeof(resp), sw_parsing_caps, uhw->outlen)) { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index b1d5b87ba3fd..a8fc1f0956d0 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -203,6 +203,11 @@ struct mlx5_ib_striding_rq_caps { __u32 supported_qpts; }; +enum mlx5_ib_query_dev_resp_flags { + /* Support 128B CQE compression */ + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -211,7 +216,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_cqe_comp_caps cqe_comp_caps; struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; - __u32 reserved; + __u32 flags; /* Use enum mlx5_ib_query_dev_resp_flags */ struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; }; -- cgit 1.4.1 From 7a0c8f4244e9ec7a630563d294b211342b46223d Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:53 +0300 Subject: IB/mlx5: Support padded 128B CQE feature In some benchmarks and some CPU architectures, writing the CQE on a full cache line size improves performance by saving memory access operations (read-modify-write) relative to partial cache line change. This patch lets the user to configure the device to pad the CQE up to 128B in case its content is less than 128B. Currently the driver supports only padding for a CQE size of 128B. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 27 +++++++++++++++++++++++---- drivers/infiniband/hw/mlx5/main.c | 4 ++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ include/linux/mlx5/cq.h | 6 ++++-- include/uapi/rdma/mlx5-abi.h | 7 ++++++- 5 files changed, 42 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 51871f049c57..01b218a3c277 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -754,13 +754,13 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int err; ucmdlen = udata->inlen < sizeof(ucmd) ? - (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd); + (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; if (ucmdlen == sizeof(ucmd) && - ucmd.reserved != 0) + (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD))) return -EINVAL; if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) @@ -830,6 +830,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, ilog2(ucmd.cqe_comp_res_format)); } + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { + if (*cqe_size != 128 || + !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, + "CQE padding is not supported for CQE size of %dB!\n", + *cqe_size); + goto err_cqb; + } + + cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; + } + return 0; err_cqb: @@ -989,7 +1002,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->cqe_size = cqe_size; cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); - MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); MLX5_SET(cqc, cqc, c_eqn, eqn); @@ -1339,7 +1355,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b9337562aa90..1edd41e3be1b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -826,9 +826,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), flags, uhw->outlen)) { resp.response_length += sizeof(resp.flags); + if (MLX5_CAP_GEN(mdev, cqe_compression_128)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; + + if (MLX5_CAP_GEN(mdev, cqe_128_always)) + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; } if (field_avail(typeof(resp), sw_parsing_caps, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e7deaa08535b..137f2116911f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -444,6 +444,10 @@ struct mlx5_shared_mr_info { struct ib_umem *umem; }; +enum mlx5_ib_cq_pr_flags { + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0, +}; + struct mlx5_ib_cq { struct ib_cq ibcq; struct mlx5_core_cq mcq; @@ -466,6 +470,7 @@ struct mlx5_ib_cq { struct list_head wc_list; enum ib_cq_notify_flags notify_flags; struct work_struct notify_work; + u16 private_flags; /* Use mlx5_ib_cq_pr_flags */ }; struct mlx5_ib_wc { diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 95898847c7d4..cc718e245b1e 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -125,11 +125,13 @@ struct mlx5_cq_modify_params { enum { CQE_SIZE_64 = 0, CQE_SIZE_128 = 1, + CQE_SIZE_128_PAD = 2, }; -static inline int cqe_sz_to_mlx_sz(u8 size) +static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { - return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; + return padding_128_en ? CQE_SIZE_128_PAD : + size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; } static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a8fc1f0956d0..201a60f032dd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -206,6 +206,7 @@ struct mlx5_ib_striding_rq_caps { enum mlx5_ib_query_dev_resp_flags { /* Support 128B CQE compression */ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, }; struct mlx5_ib_query_device_resp { @@ -221,13 +222,17 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_striding_rq_caps striding_rq_caps; }; +enum mlx5_ib_create_cq_flags { + MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0, +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; __u8 cqe_comp_en; __u8 cqe_comp_res_format; - __u16 reserved; /* explicit padding (optional on i386) */ + __u16 flags; }; struct mlx5_ib_create_cq_resp { -- cgit 1.4.1 From f95ef6cbae61fa1dd563f5c0f6a0e5b512fda5ba Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:55 +0300 Subject: IB/mlx5: Add tunneling offloads support The device can support receive Stateless Offloads for the inner packet's fields only when the packet is processed by TIR which is enabled to support tunneling. Otherwise, the device treats the packet as an ordinary non-tunneling packet and receive offloads can be done only for the outer packet's field. In order to enable receive Stateless Offloading support for incoming tunneling traffic the TIR should be created with tunneled_offload_en. Tunneling offloads is supported only be raw ethernet QP. This patch includes: * New QP creation flag for tunneling offloads. * Reports device capabilities. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 39 +++++++++++++++++++++++++++++++----- include/uapi/rdma/mlx5-abi.h | 11 +++++++++- 4 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1edd41e3be1b..260f8be1d0ed 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -872,6 +872,20 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } + if (field_avail(typeof(resp), tunnel_offloads_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.tunnel_offloads_caps); + if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_VXLAN; + if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_GENEVE; + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_GRE; + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 137f2116911f..0a328d6c6494 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -398,6 +398,7 @@ struct mlx5_ib_qp { struct list_head cq_send_list; u32 rate_limit; u32 underlay_qpn; + bool tunnel_offload_en; }; struct mlx5_ib_cq_buf { @@ -420,6 +421,8 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, MLX5_IB_QP_UNDERLAY = 1 << 10, + /* Reserved for PCI_WRITE_PAD = 1 << 11, */ + MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d209c684d729..53bb0d5cad3d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1204,8 +1204,16 @@ static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp); } +static bool tunnel_offload_supported(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) || + MLX5_CAP_ETH(dev, tunnel_stateless_gre) || + MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); +} + static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, u32 tdn) + struct mlx5_ib_rq *rq, u32 tdn, + bool tunnel_offload_en) { u32 *in; void *tirc; @@ -1221,6 +1229,8 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); MLX5_SET(tirc, tirc, transport_domain, tdn); + if (tunnel_offload_en) + MLX5_SET(tirc, tirc, tunneled_offload_en, 1); err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); @@ -1271,7 +1281,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn); + err = create_raw_packet_qp_tir(dev, rq, tdn, + qp->tunnel_offload_en); if (err) goto err_destroy_rq; } @@ -1358,7 +1369,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (udata->outlen < min_resp_len) return -EINVAL; - required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); + required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -1381,8 +1392,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { - mlx5_ib_dbg(dev, "invalid reserved\n"); + if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + mlx5_ib_dbg(dev, "invalid flags\n"); + return -EOPNOTSUPP; + } + + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS && + !tunnel_offload_supported(dev->mdev)) { + mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n"); return -EOPNOTSUPP; } @@ -1405,6 +1422,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(tirc, tirc, transport_domain, tdn); hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) + MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { @@ -1604,6 +1625,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + if (init_attr->qp_type != IB_QPT_RAW_PACKET || + !tunnel_offload_supported(mdev)) { + mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); + return -EOPNOTSUPP; + } + qp->tunnel_offload_en = true; + } if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 201a60f032dd..791655ec4aff 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -39,6 +39,7 @@ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, + MLX5_QP_FLAG_TUNNEL_OFFLOADS = 1 << 2, }; enum { @@ -209,6 +210,12 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, }; +enum mlx5_ib_tunnel_offloads { + MLX5_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, + MLX5_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, + MLX5_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2 +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -220,6 +227,8 @@ struct mlx5_ib_query_device_resp { __u32 flags; /* Use enum mlx5_ib_query_dev_resp_flags */ struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; + __u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */ + __u32 reserved; }; enum mlx5_ib_create_cq_flags { @@ -304,7 +313,7 @@ struct mlx5_ib_create_qp_rss { __u8 reserved[6]; __u8 rx_hash_key[128]; /* valid only for Toeplitz */ __u32 comp_mask; - __u32 reserved1; + __u32 flags; }; struct mlx5_ib_create_qp_resp { -- cgit 1.4.1 From 309fa3470fcaf96b295d2106ab17c00dbf7f3920 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:56 +0300 Subject: IB/mlx5: Add support for RSS on the inner packet Some user space application would like to do RSS on the inner packet fields instead on the outer. When MLX5_RX_HASH_INNER is set with one or more of the other hash fields, then the RSS will be done using the inner packet. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 11 +++++++++++ include/uapi/rdma/mlx5-abi.h | 4 +++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 53bb0d5cad3d..9e22dead259a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1403,6 +1403,12 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER && + !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { + mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); + return -EOPNOTSUPP; + } + err = ib_copy_to_udata(udata, &resp, min_resp_len); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); @@ -1426,6 +1432,11 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 791655ec4aff..442b46b74a3a 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -303,7 +303,9 @@ enum mlx5_rx_hash_fields { MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, - MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 + MLX5_RX_HASH_DST_PORT_UDP = 1 << 7, + /* Save bits for future fields */ + MLX5_RX_HASH_INNER = 1 << 31 }; struct mlx5_ib_create_qp_rss { -- cgit 1.4.1 From f17966f19575eac9d5dea68b08f6292dd3d4d3db Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 2 Nov 2017 15:22:28 +0200 Subject: IB/mlx5: Fix ABI alignment to 64 bit Struct mlx5_ib_striding_rq_caps was not aligned to 64 bit as it should have been. Add a 32 bit reserved field. Fixes: b4f34597a5ce ('IB/mlx5: Expose multi-packet RQ capabilities') Signed-off-by: Noa Osherovich Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 442b46b74a3a..21722e3c2c70 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -202,6 +202,7 @@ struct mlx5_ib_striding_rq_caps { * supported_qpts |= 1 << IB_QPT_RAW_PACKET */ __u32 supported_qpts; + __u32 reserved; }; enum mlx5_ib_query_dev_resp_flags { -- cgit 1.4.1 From 8b10ba783c9d0c69d53e7d78ff7f2cd921f80729 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Mon, 6 Nov 2017 11:48:53 -0800 Subject: RDMA/vmw_pvrdma: Add shared receive queue support Add the required functions needed to support SRQs. Currently, kernel clients are not supported. SRQs will only be available in userspace. Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Reviewed-by: Nitish Bhat Signed-off-by: Bryan Tan Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/Makefile | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 25 ++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 54 ++++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 59 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 55 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 319 ++++++++++++++++++++++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 3 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 18 ++ include/uapi/rdma/vmw_pvrdma-abi.h | 2 + 9 files changed, 523 insertions(+), 14 deletions(-) create mode 100644 drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile index 0194ed19f542..2f52e0a044a0 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/Makefile +++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o -vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o +vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_srq.o pvrdma_verbs.o diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 984aa3484928..63bc2efc34eb 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -162,6 +162,22 @@ struct pvrdma_ah { struct pvrdma_av av; }; +struct pvrdma_srq { + struct ib_srq ibsrq; + int offset; + spinlock_t lock; /* SRQ lock. */ + int wqe_cnt; + int wqe_size; + int max_gs; + struct ib_umem *umem; + struct pvrdma_ring_state *ring; + struct pvrdma_page_dir pdir; + u32 srq_handle; + int npages; + refcount_t refcnt; + wait_queue_head_t wait; +}; + struct pvrdma_qp { struct ib_qp ibqp; u32 qp_handle; @@ -171,6 +187,7 @@ struct pvrdma_qp { struct ib_umem *rumem; struct ib_umem *sumem; struct pvrdma_page_dir pdir; + struct pvrdma_srq *srq; int npages; int npages_send; int npages_recv; @@ -210,6 +227,8 @@ struct pvrdma_dev { struct pvrdma_page_dir cq_pdir; struct pvrdma_cq **cq_tbl; spinlock_t cq_tbl_lock; + struct pvrdma_srq **srq_tbl; + spinlock_t srq_tbl_lock; struct pvrdma_qp **qp_tbl; spinlock_t qp_tbl_lock; struct pvrdma_uar_table uar_table; @@ -221,6 +240,7 @@ struct pvrdma_dev { bool ib_active; atomic_t num_qps; atomic_t num_cqs; + atomic_t num_srqs; atomic_t num_pds; atomic_t num_ahs; @@ -256,6 +276,11 @@ static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq) return container_of(ibcq, struct pvrdma_cq, ibcq); } +static inline struct pvrdma_srq *to_vsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct pvrdma_srq, ibsrq); +} + static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr) { return container_of(ibmr, struct pvrdma_user_mr, ibmr); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index df0a6b525021..6fd5a8f4e2f6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -339,6 +339,10 @@ enum { PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_DESTROY_BIND, + PVRDMA_CMD_CREATE_SRQ, + PVRDMA_CMD_MODIFY_SRQ, + PVRDMA_CMD_QUERY_SRQ, + PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_MAX, }; @@ -361,6 +365,10 @@ enum { PVRDMA_CMD_DESTROY_UC_RESP_NOOP, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, + PVRDMA_CMD_CREATE_SRQ_RESP, + PVRDMA_CMD_MODIFY_SRQ_RESP, + PVRDMA_CMD_QUERY_SRQ_RESP, + PVRDMA_CMD_DESTROY_SRQ_RESP, PVRDMA_CMD_MAX_RESP, }; @@ -495,6 +503,46 @@ struct pvrdma_cmd_destroy_cq { u8 reserved[4]; }; +struct pvrdma_cmd_create_srq { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 pd_handle; + u32 nchunks; + struct pvrdma_srq_attr attrs; + u8 srq_type; + u8 reserved[7]; +}; + +struct pvrdma_cmd_create_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 srqn; + u8 reserved[4]; +}; + +struct pvrdma_cmd_modify_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u32 attr_mask; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_query_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_query_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_destroy_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u8 reserved[4]; +}; + struct pvrdma_cmd_create_qp { struct pvrdma_cmd_hdr hdr; u64 pdir_dma; @@ -594,6 +642,10 @@ union pvrdma_cmd_req { struct pvrdma_cmd_destroy_qp destroy_qp; struct pvrdma_cmd_create_bind create_bind; struct pvrdma_cmd_destroy_bind destroy_bind; + struct pvrdma_cmd_create_srq create_srq; + struct pvrdma_cmd_modify_srq modify_srq; + struct pvrdma_cmd_query_srq query_srq; + struct pvrdma_cmd_destroy_srq destroy_srq; }; union pvrdma_cmd_resp { @@ -608,6 +660,8 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_qp_resp create_qp_resp; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; + struct pvrdma_cmd_create_srq_resp create_srq_resp; + struct pvrdma_cmd_query_srq_resp query_srq_resp; }; #endif /* __PVRDMA_DEV_API_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6ce709a67959..1f4e18717a00 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -118,6 +118,7 @@ static int pvrdma_init_device(struct pvrdma_dev *dev) spin_lock_init(&dev->cmd_lock); sema_init(&dev->cmd_sema, 1); atomic_set(&dev->num_qps, 0); + atomic_set(&dev->num_srqs, 0); atomic_set(&dev->num_cqs, 0); atomic_set(&dev->num_pds, 0); atomic_set(&dev->num_ahs, 0); @@ -254,9 +255,32 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) goto err_cq_free; spin_lock_init(&dev->qp_tbl_lock); + /* Check if SRQ is supported by backend */ + if (dev->dsr->caps.max_srq) { + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + + dev->ib_dev.create_srq = pvrdma_create_srq; + dev->ib_dev.modify_srq = pvrdma_modify_srq; + dev->ib_dev.query_srq = pvrdma_query_srq; + dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; + + dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, + sizeof(struct pvrdma_srq *), + GFP_KERNEL); + if (!dev->srq_tbl) + goto err_qp_free; + } + spin_lock_init(&dev->srq_tbl_lock); + ret = ib_register_device(&dev->ib_dev, NULL); if (ret) - goto err_qp_free; + goto err_srq_free; for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { ret = device_create_file(&dev->ib_dev.dev, @@ -271,6 +295,8 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) err_class: ib_unregister_device(&dev->ib_dev); +err_srq_free: + kfree(dev->srq_tbl); err_qp_free: kfree(dev->qp_tbl); err_cq_free: @@ -353,6 +379,35 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) } } +static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) +{ + struct pvrdma_srq *srq; + unsigned long flags; + + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + if (dev->srq_tbl) + srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; + else + srq = NULL; + if (srq) + refcount_inc(&srq->refcnt); + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + if (srq && srq->ibsrq.event_handler) { + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event e; + + e.device = ibsrq->device; + e.element.srq = ibsrq; + e.event = type; /* 1:1 mapping for now. */ + ibsrq->event_handler(&e, ibsrq->srq_context); + } + if (srq) { + if (refcount_dec_and_test(&srq->refcnt)) + wake_up(&srq->wait); + } +} + static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, enum ib_event_type event) { @@ -423,6 +478,7 @@ static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) case PVRDMA_EVENT_SRQ_ERR: case PVRDMA_EVENT_SRQ_LIMIT_REACHED: + pvrdma_srq_event(dev, eqe->info, eqe->type); break; case PVRDMA_EVENT_PORT_ACTIVE: @@ -1059,6 +1115,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) iounmap(dev->regs); kfree(dev->sgid_tbl); kfree(dev->cq_tbl); + kfree(dev->srq_tbl); kfree(dev->qp_tbl); pvrdma_uar_table_cleanup(dev); iounmap(dev->driver_uar.map); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index ed34d5a581fa..10420a18d02f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -198,6 +198,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct pvrdma_create_qp ucmd; unsigned long flags; int ret; + bool is_srq = !!init_attr->srq; if (init_attr->create_flags) { dev_warn(&dev->pdev->dev, @@ -214,6 +215,12 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } + if (is_srq && !dev->dsr->caps.max_srq) { + dev_warn(&dev->pdev->dev, + "SRQs not supported by device\n"); + return ERR_PTR(-EINVAL); + } + if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp)) return ERR_PTR(-ENOMEM); @@ -252,26 +259,36 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } - /* set qp->sq.wqe_cnt, shift, buf_size.. */ - qp->rumem = ib_umem_get(pd->uobject->context, - ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); - if (IS_ERR(qp->rumem)) { - ret = PTR_ERR(qp->rumem); - goto err_qp; + if (!is_srq) { + /* set qp->sq.wqe_cnt, shift, buf_size.. */ + qp->rumem = ib_umem_get(pd->uobject->context, + ucmd.rbuf_addr, + ucmd.rbuf_size, 0, 0); + if (IS_ERR(qp->rumem)) { + ret = PTR_ERR(qp->rumem); + goto err_qp; + } + qp->srq = NULL; + } else { + qp->rumem = NULL; + qp->srq = to_vsrq(init_attr->srq); } qp->sumem = ib_umem_get(pd->uobject->context, ucmd.sbuf_addr, ucmd.sbuf_size, 0, 0); if (IS_ERR(qp->sumem)) { - ib_umem_release(qp->rumem); + if (!is_srq) + ib_umem_release(qp->rumem); ret = PTR_ERR(qp->sumem); goto err_qp; } qp->npages_send = ib_umem_page_count(qp->sumem); - qp->npages_recv = ib_umem_page_count(qp->rumem); + if (!is_srq) + qp->npages_recv = ib_umem_page_count(qp->rumem); + else + qp->npages_recv = 0; qp->npages = qp->npages_send + qp->npages_recv; } else { qp->is_kernel = true; @@ -312,12 +329,14 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!qp->is_kernel) { pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0); - pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem, - qp->npages_send); + if (!is_srq) + pvrdma_page_dir_insert_umem(&qp->pdir, + qp->rumem, + qp->npages_send); } else { /* Ring state is always the first page. */ qp->sq.ring = qp->pdir.pages[0]; - qp->rq.ring = &qp->sq.ring[1]; + qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1]; } break; default: @@ -333,6 +352,10 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->pd_handle = to_vpd(pd)->pd_handle; cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle; cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle; + if (is_srq) + cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle; + else + cmd->srq_handle = 0; cmd->max_send_wr = init_attr->cap.max_send_wr; cmd->max_recv_wr = init_attr->cap.max_recv_wr; cmd->max_send_sge = init_attr->cap.max_send_sge; @@ -340,6 +363,8 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->max_inline_data = init_attr->cap.max_inline_data; cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); + cmd->is_srq = is_srq; + cmd->lkey = 0; cmd->access_flags = IB_ACCESS_LOCAL_WRITE; cmd->total_chunks = qp->npages; cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES; @@ -815,6 +840,12 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } + if (qp->srq) { + dev_warn(&dev->pdev->dev, "QP associated with SRQ\n"); + *bad_wr = wr; + return -EINVAL; + } + spin_lock_irqsave(&qp->rq.lock, flags); while (wr) { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c new file mode 100644 index 000000000000..826ccb864596 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2016-2017 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "pvrdma.h" + +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + /* No support for kernel clients. */ + return -EOPNOTSUPP; +} + +/** + * pvrdma_query_srq - query shared receive queue + * @ibsrq: the shared receive queue to query + * @srq_attr: attributes to query and return to client + * + * @return: 0 for success, otherwise returns an errno. + */ +int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct pvrdma_dev *dev = to_vdev(ibsrq->device); + struct pvrdma_srq *srq = to_vsrq(ibsrq); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_srq *cmd = &req.query_srq; + struct pvrdma_cmd_query_srq_resp *resp = &rsp.query_srq_resp; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_SRQ; + cmd->srq_handle = srq->srq_handle; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_SRQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not query shared receive queue, error: %d\n", + ret); + return -EINVAL; + } + + srq_attr->srq_limit = resp->attrs.srq_limit; + srq_attr->max_wr = resp->attrs.max_wr; + srq_attr->max_sge = resp->attrs.max_sge; + + return 0; +} + +/** + * pvrdma_create_srq - create shared receive queue + * @pd: protection domain + * @init_attr: shared receive queue attributes + * @udata: user data + * + * @return: the ib_srq pointer on success, otherwise returns an errno. + */ +struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct pvrdma_srq *srq = NULL; + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_srq *cmd = &req.create_srq; + struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; + struct pvrdma_create_srq ucmd; + unsigned long flags; + int ret; + + if (!(pd->uobject && udata)) { + /* No support for kernel clients. */ + dev_warn(&dev->pdev->dev, + "no shared receive queue support for kernel client\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->srq_type != IB_SRQT_BASIC) { + dev_warn(&dev->pdev->dev, + "shared receive queue type %d not supported\n", + init_attr->srq_type); + return ERR_PTR(-EINVAL); + } + + if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || + init_attr->attr.max_sge > dev->dsr->caps.max_srq_sge) { + dev_warn(&dev->pdev->dev, + "shared receive queue size invalid\n"); + return ERR_PTR(-EINVAL); + } + + if (!atomic_add_unless(&dev->num_srqs, 1, dev->dsr->caps.max_srq)) + return ERR_PTR(-ENOMEM); + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + ret = -ENOMEM; + goto err_srq; + } + + spin_lock_init(&srq->lock); + refcount_set(&srq->refcnt, 1); + init_waitqueue_head(&srq->wait); + + dev_dbg(&dev->pdev->dev, + "create shared receive queue from user space\n"); + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, + ucmd.buf_addr, + ucmd.buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + srq->npages = ib_umem_page_count(srq->umem); + + if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in shared receive queue\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &srq->pdir, srq->npages, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + pvrdma_page_dir_insert_umem(&srq->pdir, srq->umem, 0); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_SRQ; + cmd->srq_type = init_attr->srq_type; + cmd->nchunks = srq->npages; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->attrs.max_wr = init_attr->attr.max_wr; + cmd->attrs.max_sge = init_attr->attr.max_sge; + cmd->attrs.srq_limit = init_attr->attr.srq_limit; + cmd->pdir_dma = srq->pdir.dir_dma; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_SRQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create shared receive queue, error: %d\n", + ret); + goto err_page_dir; + } + + srq->srq_handle = resp->srqn; + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq; + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + /* Copy udata back. */ + if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); + pvrdma_destroy_srq(&srq->ibsrq); + return ERR_PTR(-EINVAL); + } + + return &srq->ibsrq; + +err_page_dir: + pvrdma_page_dir_cleanup(dev, &srq->pdir); +err_umem: + ib_umem_release(srq->umem); +err_srq: + kfree(srq); + atomic_dec(&dev->num_srqs); + + return ERR_PTR(ret); +} + +static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + dev->srq_tbl[srq->srq_handle] = NULL; + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + refcount_dec(&srq->refcnt); + wait_event(srq->wait, !refcount_read(&srq->refcnt)); + + /* There is no support for kernel clients, so this is safe. */ + ib_umem_release(srq->umem); + + pvrdma_page_dir_cleanup(dev, &srq->pdir); + + kfree(srq); + + atomic_dec(&dev->num_srqs); +} + +/** + * pvrdma_destroy_srq - destroy shared receive queue + * @srq: the shared receive queue to destroy + * + * @return: 0 for success. + */ +int pvrdma_destroy_srq(struct ib_srq *srq) +{ + struct pvrdma_srq *vsrq = to_vsrq(srq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_srq *cmd = &req.destroy_srq; + struct pvrdma_dev *dev = to_vdev(srq->device); + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_SRQ; + cmd->srq_handle = vsrq->srq_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "destroy shared receive queue failed, error: %d\n", + ret); + + pvrdma_free_srq(dev, vsrq); + + return 0; +} + +/** + * pvrdma_modify_srq - modify shared receive queue attributes + * @ibsrq: the shared receive queue to modify + * @attr: the shared receive queue's new attributes + * @attr_mask: attributes mask + * @udata: user data + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct pvrdma_srq *vsrq = to_vsrq(ibsrq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_modify_srq *cmd = &req.modify_srq; + struct pvrdma_dev *dev = to_vdev(ibsrq->device); + int ret; + + /* Only support SRQ limit. */ + if (!(attr_mask & IB_SRQ_LIMIT)) + return -EINVAL; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_MODIFY_SRQ; + cmd->srq_handle = vsrq->srq_handle; + cmd->attrs.srq_limit = attr->srq_limit; + cmd->attr_mask = attr_mask; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not modify shared receive queue, error: %d\n", + ret); + + return -EINVAL; + } + + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 48776f5ffb0e..16b96616ef7e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -85,6 +85,9 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); + props->max_srq = dev->dsr->caps.max_srq; + props->max_srq_wr = dev->dsr->caps.max_srq_wr; + props->max_srq_sge = dev->dsr->caps.max_srq_sge; props->max_cq = dev->dsr->caps.max_cq; props->max_cqe = dev->dsr->caps.max_cqe; props->max_mr = dev->dsr->caps.max_mr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 002a9b066e70..b7b25728a7e5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -324,6 +324,13 @@ enum pvrdma_mw_type { PVRDMA_MW_TYPE_2 = 2, }; +struct pvrdma_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; + u32 reserved; +}; + struct pvrdma_qp_attr { enum pvrdma_qp_state qp_state; enum pvrdma_qp_state cur_qp_state; @@ -420,6 +427,17 @@ int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int pvrdma_destroy_ah(struct ib_ah *ah); + +struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); +int pvrdma_destroy_srq(struct ib_srq *srq); +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c6569b0032ec..846c6f4859db 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -158,6 +158,8 @@ struct pvrdma_resize_cq { struct pvrdma_create_srq { __u64 buf_addr; + __u32 buf_size; + __u32 reserved; }; struct pvrdma_create_srq_resp { -- cgit 1.4.1 From 869ddcf8b351ace5bf8860f3cd6265dccb382426 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:13 +0200 Subject: IB/uverbs: Allow CQ moderation with modify CQ Uverbs support in modify_cq for CQ moderation only. Gives ability to change cq_max_count and cq_period. CQ moderation enhance performance by moderating the number of CQEs needed to create an event instead of application having to suffer from event per-CQE. To achieve CQ moderation the application needs to set cq_max_count and cq_period. cq_max_count - defines the number of CQEs needed to create an event. cq_period - defines the timeout (micro seconds) between last event and a new one that will occur even if cq_max_count was not satisfied Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 42 +++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_verbs.h | 4 ++++ include/uapi/rdma/ib_user_verbs.h | 15 ++++++++++++- 5 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ee2739ae4305..deccefb71a6b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -306,5 +306,6 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(modify_qp); +IB_UVERBS_DECLARE_EX_CMD(modify_cq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8ca36843ef38..3c2673cd4090 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3856,3 +3856,45 @@ end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; } + +int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_cq cmd = {}; + struct ib_cq *cq; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), reserved) + + sizeof(cmd.reserved); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + /* sanity checks */ + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask || cmd.reserved) + return -EINVAL; + + if (cmd.attr_mask > IB_CQ_MODERATE) + return -EOPNOTSUPP; + + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + if (!cq) + return -EINVAL; + + ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + + uobj_put_obj_read(cq); + + return ret; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b5febfd84ee5..381fd9c096ae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -128,6 +128,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, + [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b671982bbb3..8e0d3780ce4e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -311,6 +311,10 @@ struct ib_cq_init_attr { u32 flags; }; +enum ib_cq_attr_mask { + IB_CQ_MODERATE = 1 << 0, +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d4e0b53bfc75..cfa09d6095d6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -100,7 +100,8 @@ enum { IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, - IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_MODIFY_CQ }; /* @@ -1150,6 +1151,18 @@ struct ib_uverbs_ex_destroy_rwq_ind_table { __u32 ind_tbl_handle; }; +struct ib_uverbs_cq_moderation { + __u16 cq_count; + __u16 cq_period; +}; + +struct ib_uverbs_ex_modify_cq { + __u32 cq_handle; + __u32 attr_mask; + struct ib_uverbs_cq_moderation attr; + __u32 reserved; +}; + #define IB_DEVICE_NAME_MAX 64 #endif /* IB_USER_VERBS_H */ -- cgit 1.4.1 From 18bd90729237dc6ddbad01bc9618148224f03590 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:16 +0200 Subject: IB/uverbs: Add CQ moderation capability to query_device The query_device function can now obtain the maximum values for cq_max_count and cq_period, needed for CQ moderation. cq_max_count is a 16 bits number that determines the number of CQEs to accumulate before generating an event. cq_period is a 16 bits number that determines the timeout in micro seconds from the last event generated, upon which a new event will be generated even if cq_max_count was not reached. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++++++ include/rdma/ib_verbs.h | 6 ++++++ include/uapi/rdma/ib_user_verbs.h | 7 +++++++ 3 files changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3c2673cd4090..53143e4b1c50 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3852,6 +3852,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.tm_caps.max_sge = attr.tm_caps.max_sge; resp.tm_caps.flags = attr.tm_caps.flags; resp.response_length += sizeof(resp.tm_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps)) + goto end; + + resp.cq_moderation_caps.max_cq_moderation_count = + attr.cq_caps.max_cq_moderation_count; + resp.cq_moderation_caps.max_cq_moderation_period = + attr.cq_caps.max_cq_moderation_period; + resp.response_length += sizeof(resp.cq_moderation_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e0d3780ce4e..0b484c023fa9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -315,6 +315,11 @@ enum ib_cq_attr_mask { IB_CQ_MODERATE = 1 << 0, }; +struct ib_cq_caps { + u16 max_cq_moderation_count; + u16 max_cq_moderation_period; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -365,6 +370,7 @@ struct ib_device_attr { u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ struct ib_tm_caps tm_caps; + struct ib_cq_caps cq_caps; }; enum ib_mtu { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index cfa09d6095d6..5186fb12629b 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -125,6 +125,12 @@ struct ib_uverbs_comp_event_desc { __u64 cq_handle; }; +struct ib_uverbs_cq_moderation_caps { + __u16 max_cq_moderation_count; + __u16 max_cq_moderation_period; + __u32 reserved; +}; + /* * All commands from userspace should start with a __u32 command field * followed by __u16 in_words and out_words fields (which give the @@ -263,6 +269,7 @@ struct ib_uverbs_ex_query_device_resp { __u32 max_wq_type_rq; __u32 raw_packet_caps; struct ib_uverbs_tm_caps tm_caps; + struct ib_uverbs_cq_moderation_caps cq_moderation_caps; }; struct ib_uverbs_query_port { -- cgit 1.4.1