From e228a5d05e9ee25878e9a40de96e7ceb579d4893 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 8 Apr 2020 03:21:01 -0700 Subject: net/rds: Replace struct rds_mr's r_refcount with struct kref And removed rds_mr_put(). Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/message.c | 6 +++--- net/rds/rdma.c | 28 +++++++++++++++------------- net/rds/rds.h | 9 ++------- 3 files changed, 20 insertions(+), 23 deletions(-) (limited to 'net/rds') diff --git a/net/rds/message.c b/net/rds/message.c index 50f13f1d4ae0..bbecb8cb873e 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -162,12 +162,12 @@ static void rds_message_purge(struct rds_message *rm) if (rm->rdma.op_active) rds_rdma_free_op(&rm->rdma); if (rm->rdma.op_rdma_mr) - rds_mr_put(rm->rdma.op_rdma_mr); + kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final); if (rm->atomic.op_active) rds_atomic_free_op(&rm->atomic); if (rm->atomic.op_rdma_mr) - rds_mr_put(rm->atomic.op_rdma_mr); + kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final); } void rds_message_put(struct rds_message *rm) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 585e6b3b69ce..f828b66978e4 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key, if (insert) { rb_link_node(&insert->r_rb_node, parent, p); rb_insert_color(&insert->r_rb_node, root); - refcount_inc(&insert->r_refcount); + kref_get(&insert->r_kref); } return NULL; } @@ -99,7 +99,7 @@ static void rds_destroy_mr(struct rds_mr *mr) unsigned long flags; rdsdebug("RDS: destroy mr key is %x refcnt %u\n", - mr->r_key, refcount_read(&mr->r_refcount)); + mr->r_key, kref_read(&mr->r_kref)); if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) return; @@ -115,8 +115,10 @@ static void rds_destroy_mr(struct rds_mr *mr) mr->r_trans->free_mr(trans_private, mr->r_invalidate); } -void __rds_put_mr_final(struct rds_mr *mr) +void __rds_put_mr_final(struct kref *kref) { + struct rds_mr *mr = container_of(kref, struct rds_mr, r_kref); + rds_destroy_mr(mr); kfree(mr); } @@ -141,7 +143,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs) RB_CLEAR_NODE(&mr->r_rb_node); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); spin_lock_irqsave(&rs->rs_rdma_lock, flags); } spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); @@ -242,7 +244,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, goto out; } - refcount_set(&mr->r_refcount, 1); + kref_init(&mr->r_kref); RB_CLEAR_NODE(&mr->r_rb_node); mr->r_trans = rs->rs_transport; mr->r_sock = rs; @@ -343,7 +345,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, rdsdebug("RDS: get_mr key is %x\n", mr->r_key); if (mr_ret) { - refcount_inc(&mr->r_refcount); + kref_get(&mr->r_kref); *mr_ret = mr; } @@ -351,7 +353,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, out: kfree(pages); if (mr) - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); return ret; } @@ -440,7 +442,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) * someone else drops their ref. */ rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); return 0; } @@ -481,7 +483,7 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) * trigger an async flush. */ if (zot_me) { rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); } } @@ -490,7 +492,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro) unsigned int i; if (ro->op_odp_mr) { - rds_mr_put(ro->op_odp_mr); + kref_put(&ro->op_odp_mr->r_kref, __rds_put_mr_final); } else { for (i = 0; i < ro->op_nents; i++) { struct page *page = sg_page(&ro->op_sg[i]); @@ -730,7 +732,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out_pages; } RB_CLEAR_NODE(&local_odp_mr->r_rb_node); - refcount_set(&local_odp_mr->r_refcount, 1); + kref_init(&local_odp_mr->r_kref); local_odp_mr->r_trans = rs->rs_transport; local_odp_mr->r_sock = rs; local_odp_mr->r_trans_private = @@ -827,7 +829,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, if (!mr) err = -EINVAL; /* invalid r_key */ else - refcount_inc(&mr->r_refcount); + kref_get(&mr->r_kref); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); if (mr) { diff --git a/net/rds/rds.h b/net/rds/rds.h index e4a603523083..3cda01cfaa56 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -291,7 +291,7 @@ struct rds_incoming { struct rds_mr { struct rb_node r_rb_node; - refcount_t r_refcount; + struct kref r_kref; u32 r_key; /* A copy of the creation flags */ @@ -946,12 +946,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); -void __rds_put_mr_final(struct rds_mr *mr); -static inline void rds_mr_put(struct rds_mr *mr) -{ - if (refcount_dec_and_test(&mr->r_refcount)) - __rds_put_mr_final(mr); -} +void __rds_put_mr_final(struct kref *kref); static inline bool rds_destroy_pending(struct rds_connection *conn) { -- cgit 1.4.1 From 2fabef4f65b46b261434a27ecdce291b63de8522 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 8 Apr 2020 03:21:02 -0700 Subject: net/rds: Fix MR reference counting problem In rds_free_mr(), it calls rds_destroy_mr(mr) directly. But this defeats the purpose of reference counting and makes MR free handling impossible. It means that holding a reference does not guarantee that it is safe to access some fields. For example, In rds_cmsg_rdma_dest(), it increases the ref count, unlocks and then calls mr->r_trans->sync_mr(). But if rds_free_mr() (and rds_destroy_mr()) is called in between (there is no lock preventing this to happen), r_trans_private is set to NULL, causing a panic. Similar issue is in rds_rdma_unuse(). Reported-by: zerons Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma.c | 25 ++++++++++++------------- net/rds/rds.h | 8 -------- 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index f828b66978e4..113e442101ce 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -101,9 +101,6 @@ static void rds_destroy_mr(struct rds_mr *mr) rdsdebug("RDS: destroy mr key is %x refcnt %u\n", mr->r_key, kref_read(&mr->r_kref)); - if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) - return; - spin_lock_irqsave(&rs->rs_rdma_lock, flags); if (!RB_EMPTY_NODE(&mr->r_rb_node)) rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); @@ -142,7 +139,6 @@ void rds_rdma_drop_keys(struct rds_sock *rs) rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); RB_CLEAR_NODE(&mr->r_rb_node); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); - rds_destroy_mr(mr); kref_put(&mr->r_kref, __rds_put_mr_final); spin_lock_irqsave(&rs->rs_rdma_lock, flags); } @@ -436,12 +432,6 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) if (!mr) return -EINVAL; - /* - * call rds_destroy_mr() ourselves so that we're sure it's done by the time - * we return. If we let rds_mr_put() do it it might not happen until - * someone else drops their ref. - */ - rds_destroy_mr(mr); kref_put(&mr->r_kref, __rds_put_mr_final); return 0; } @@ -466,6 +456,14 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) return; } + /* Get a reference so that the MR won't go away before calling + * sync_mr() below. + */ + kref_get(&mr->r_kref); + + /* If it is going to be freed, remove it from the tree now so + * that no other thread can find it and free it. + */ if (mr->r_use_once || force) { rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); RB_CLEAR_NODE(&mr->r_rb_node); @@ -479,12 +477,13 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) if (mr->r_trans->sync_mr) mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); + /* Release the reference held above. */ + kref_put(&mr->r_kref, __rds_put_mr_final); + /* If the MR was marked as invalidate, this will * trigger an async flush. */ - if (zot_me) { - rds_destroy_mr(mr); + if (zot_me) kref_put(&mr->r_kref, __rds_put_mr_final); - } } void rds_rdma_free_op(struct rm_rdma_op *ro) diff --git a/net/rds/rds.h b/net/rds/rds.h index 3cda01cfaa56..8e18cd2aec51 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -299,19 +299,11 @@ struct rds_mr { unsigned int r_invalidate:1; unsigned int r_write:1; - /* This is for RDS_MR_DEAD. - * It would be nice & consistent to make this part of the above - * bit field here, but we need to use test_and_set_bit. - */ - unsigned long r_state; struct rds_sock *r_sock; /* back pointer to the socket that owns us */ struct rds_transport *r_trans; void *r_trans_private; }; -/* Flags for mr->r_state */ -#define RDS_MR_DEAD 0 - static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) { return r_key | (((u64) offset) << 32); -- cgit 1.4.1 From 7dba92037baf3fa00b4880a31fd532542264994c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Apr 2020 20:02:07 -0300 Subject: net/rds: Use ERR_PTR for rds_message_alloc_sgs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning the error code via a 'int *ret' when the function returns a pointer is very un-kernely and causes gcc 10's static analysis to choke: net/rds/message.c: In function ‘rds_message_map_pages’: net/rds/message.c:358:10: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized] 358 | return ERR_PTR(ret); Use a typical ERR_PTR return instead. Signed-off-by: Jason Gunthorpe Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/message.c | 19 ++++++------------- net/rds/rdma.c | 12 ++++++++---- net/rds/rds.h | 3 +-- net/rds/send.c | 6 ++++-- 4 files changed, 19 insertions(+), 21 deletions(-) (limited to 'net/rds') diff --git a/net/rds/message.c b/net/rds/message.c index bbecb8cb873e..071a261fdaab 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -308,26 +308,20 @@ out: /* * RDS ops use this to grab SG entries from the rm's sg pool. */ -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, - int *ret) +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) { struct scatterlist *sg_first = (struct scatterlist *) &rm[1]; struct scatterlist *sg_ret; - if (WARN_ON(!ret)) - return NULL; - if (nents <= 0) { pr_warn("rds: alloc sgs failed! nents <= 0\n"); - *ret = -EINVAL; - return NULL; + return ERR_PTR(-EINVAL); } if (rm->m_used_sgs + nents > rm->m_total_sgs) { pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n", rm->m_total_sgs, rm->m_used_sgs, nents); - *ret = -ENOMEM; - return NULL; + return ERR_PTR(-ENOMEM); } sg_ret = &sg_first[rm->m_used_sgs]; @@ -343,7 +337,6 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in unsigned int i; int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); - int ret; rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); if (!rm) @@ -352,10 +345,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); - if (!rm->data.op_sg) { + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (IS_ERR(rm->data.op_sg)) { rds_message_put(rm); - return ERR_PTR(ret); + return ERR_CAST(rm->data.op_sg); } for (i = 0; i < rm->data.op_nents; ++i) { diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 113e442101ce..a7ae11846cd7 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -665,9 +665,11 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_odp_mr = NULL; WARN_ON(!nr_pages); - op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret); - if (!op->op_sg) + op->op_sg = rds_message_alloc_sgs(rm, nr_pages); + if (IS_ERR(op->op_sg)) { + ret = PTR_ERR(op->op_sg); goto out_pages; + } if (op->op_notify || op->op_recverr) { /* We allocate an uninitialized notifier here, because @@ -906,9 +908,11 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; - rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1, &ret); - if (!rm->atomic.op_sg) + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + if (IS_ERR(rm->atomic.op_sg)) { + ret = PTR_ERR(rm->atomic.op_sg); goto err; + } /* verify 8 byte-aligned */ if (args->local_addr & 0x7) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 8e18cd2aec51..6019b0c004a9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -844,8 +844,7 @@ rds_conn_connecting(struct rds_connection *conn) /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, - int *ret); +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); diff --git a/net/rds/send.c b/net/rds/send.c index 82dcd8b84fe7..68e2bdb08fd0 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1274,9 +1274,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); - if (!rm->data.op_sg) + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (IS_ERR(rm->data.op_sg)) { + ret = PTR_ERR(rm->data.op_sg); goto out; + } ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; -- cgit 1.4.1