From 7d267278a9ece963d77eefec61630223fce08c6c Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Fri, 20 Nov 2015 22:07:23 +0000 Subject: unix: avoid use-after-free in ep_remove_wait_queue Rainer Weikusat writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron Signed-off-by: David S. Miller --- net/unix/af_unix.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 164 insertions(+), 19 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 955ec152cb71..4e95bdf973d9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -326,6 +326,118 @@ found: return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static int unix_writable(const struct sock *sk) { return sk->sk_state != TCP_LISTEN && @@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1033,6 +1148,8 @@ restart: if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1472,6 +1589,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; int max_level; int data_len = 0; + int sk_locked; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1550,12 +1668,14 @@ restart: goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1563,10 +1683,14 @@ restart: unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1592,21 +1716,38 @@ restart: goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { - err = -EAGAIN; - goto out_unlock; + if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; } - timeo = unix_wait_for_peer(other, timeo); + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { + err = -EAGAIN; + sk_locked = 1; + goto out_unlock; + } - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1620,6 +1761,8 @@ restart: return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -2476,14 +2619,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable) -- cgit 1.4.1 From 9490f886b192964796285907d777ff00fba1fa0f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 26 Nov 2015 12:08:18 +0100 Subject: af-unix: passcred support for sendpage sendpage did not care about credentials at all. This could lead to situations in which because of fd passing between processes we could append data to skbs with different scm data. It is illegal to splice those skbs together. Instead we have to allocate a new skb and if requested fill out the scm details. Fixes: 869e7c62486ec ("net: af_unix: implement stream sendpage support") Reported-by: Al Viro Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/unix/af_unix.c | 79 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 15 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4e95bdf973d9..6ced74690eee 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1551,6 +1551,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen return err; } +static bool unix_passcred_enabled(const struct socket *sock, + const struct sock *other) +{ + return test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags); +} + /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket @@ -1561,14 +1569,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, { if (UNIXCB(skb).pid) return; - if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + if (unix_passcred_enabled(sock, other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } +static int maybe_init_creds(struct scm_cookie *scm, + struct socket *socket, + const struct sock *other) +{ + int err; + struct msghdr msg = { .msg_controllen = 0 }; + + err = scm_send(socket, &msg, scm, false); + if (err) + return err; + + if (unix_passcred_enabled(socket, other)) { + scm->pid = get_pid(task_tgid(current)); + current_uid_gid(&scm->creds.uid, &scm->creds.gid); + } + return err; +} + +static bool unix_skb_scm_eq(struct sk_buff *skb, + struct scm_cookie *scm) +{ + const struct unix_skb_parms *u = &UNIXCB(skb); + + return u->pid == scm->pid && + uid_eq(u->uid, scm->creds.uid) && + gid_eq(u->gid, scm->creds.gid) && + unix_secdata_eq(scm, skb); +} + /* * Send AF_UNIX data. */ @@ -1884,8 +1919,10 @@ out_err: static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, int offset, size_t size, int flags) { - int err = 0; - bool send_sigpipe = true; + int err; + bool send_sigpipe = false; + bool init_scm = true; + struct scm_cookie scm; struct sock *other, *sk = socket->sk; struct sk_buff *skb, *newskb = NULL, *tail = NULL; @@ -1903,7 +1940,7 @@ alloc_skb: newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, &err, 0); if (!newskb) - return err; + goto err; } /* we must acquire readlock as we modify already present @@ -1912,12 +1949,12 @@ alloc_skb: err = mutex_lock_interruptible(&unix_sk(other)->readlock); if (err) { err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; - send_sigpipe = false; goto err; } if (sk->sk_shutdown & SEND_SHUTDOWN) { err = -EPIPE; + send_sigpipe = true; goto err_unlock; } @@ -1926,17 +1963,27 @@ alloc_skb: if (sock_flag(other, SOCK_DEAD) || other->sk_shutdown & RCV_SHUTDOWN) { err = -EPIPE; + send_sigpipe = true; goto err_state_unlock; } + if (init_scm) { + err = maybe_init_creds(&scm, socket, other); + if (err) + goto err_state_unlock; + init_scm = false; + } + skb = skb_peek_tail(&other->sk_receive_queue); if (tail && tail == skb) { skb = newskb; - } else if (!skb) { - if (newskb) + } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { + if (newskb) { skb = newskb; - else + } else { + tail = skb; goto alloc_skb; + } } else if (newskb) { /* this is fast path, we don't necessarily need to * call to kfree_skb even though with newskb == NULL @@ -1957,6 +2004,9 @@ alloc_skb: atomic_add(size, &sk->sk_wmem_alloc); if (newskb) { + err = unix_scm_to_skb(&scm, skb, false); + if (err) + goto err_state_unlock; spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, newskb); spin_unlock(&other->sk_receive_queue.lock); @@ -1966,7 +2016,7 @@ alloc_skb: mutex_unlock(&unix_sk(other)->readlock); other->sk_data_ready(other); - + scm_destroy(&scm); return size; err_state_unlock: @@ -1977,6 +2027,8 @@ err: kfree_skb(newskb); if (send_sigpipe && !(flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); + if (!init_scm) + scm_destroy(&scm); return err; } @@ -2280,10 +2332,7 @@ unlock: if (check_creds) { /* Never glue messages from different writers */ - if ((UNIXCB(skb).pid != scm.pid) || - !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || - !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || - !unix_secdata_eq(&scm, skb)) + if (!unix_skb_scm_eq(skb, &scm)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ -- cgit 1.4.1 From 9cd3e072b0be17446e37d7414eac8a3499e0601e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:10 -0800 Subject: net: rename SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA This patch is a cleanup to make following patch easier to review. Goal is to move SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA from (struct socket)->flags to a (struct socket_wq)->flags to benefit from RCU protection in sock_wake_async() To ease backports, we rename both constants. Two new helpers, sk_set_bit(int nr, struct sock *sk) and sk_clear_bit(int net, struct sock *sk) are added so that following patch can change their implementation. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- crypto/algif_aead.c | 4 ++-- crypto/algif_skcipher.c | 6 +++--- drivers/net/macvtap.c | 4 ++-- drivers/net/tun.c | 4 ++-- fs/dlm/lowcomms.c | 4 ++-- include/linux/net.h | 6 +++--- include/net/sock.h | 10 ++++++++++ net/bluetooth/af_bluetooth.c | 6 +++--- net/caif/caif_socket.c | 4 ++-- net/core/datagram.c | 2 +- net/core/sock.c | 8 ++++---- net/core/stream.c | 4 ++-- net/dccp/proto.c | 3 +-- net/decnet/af_decnet.c | 8 ++++---- net/ipv4/tcp.c | 7 +++---- net/iucv/af_iucv.c | 2 +- net/nfc/llcp_sock.c | 2 +- net/rxrpc/ar-output.c | 2 +- net/sctp/socket.c | 2 +- net/socket.c | 4 ++-- net/sunrpc/xprtsock.c | 14 +++++++------- net/unix/af_unix.c | 6 +++--- 22 files changed, 60 insertions(+), 52 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 0aa6fdfb448a..6d4d4569447e 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -125,7 +125,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags) if (flags & MSG_DONTWAIT) return -EAGAIN; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); for (;;) { if (signal_pending(current)) @@ -139,7 +139,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags) } finish_wait(sk_sleep(sk), &wait); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); return err; } diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index af31a0ee4057..ca9efe17db1a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -212,7 +212,7 @@ static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) if (flags & MSG_DONTWAIT) return -EAGAIN; - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); for (;;) { if (signal_pending(current)) @@ -258,7 +258,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags) return -EAGAIN; } - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); for (;;) { if (signal_pending(current)) @@ -272,7 +272,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags) } finish_wait(sk_sleep(sk), &wait); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); return err; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 54036ae0a388..0fc521941c71 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -498,7 +498,7 @@ static void macvtap_sock_write_space(struct sock *sk) wait_queue_head_t *wqueue; if (!sock_writeable(sk) || - !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); @@ -585,7 +585,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) mask |= POLLIN | POLLRDNORM; if (sock_writeable(&q->sk) || - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) && sock_writeable(&q->sk))) mask |= POLLOUT | POLLWRNORM; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b1878faea397..f0db770e8b2f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1040,7 +1040,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) mask |= POLLIN | POLLRDNORM; if (sock_writeable(sk) || - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && sock_writeable(sk))) mask |= POLLOUT | POLLWRNORM; @@ -1488,7 +1488,7 @@ static void tun_sock_write_space(struct sock *sk) if (!sock_writeable(sk)) return; - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 87e9d796cf7d..3a37bd3f9637 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -421,7 +421,7 @@ static void lowcomms_write_space(struct sock *sk) if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { con->sock->sk->sk_write_pending--; - clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags); } if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) @@ -1448,7 +1448,7 @@ static void send_to_sock(struct connection *con) msg_flags); if (ret == -EAGAIN || ret == 0) { if (ret == -EAGAIN && - test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) && + test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { /* Notify TCP that we're limited by the * application window size. diff --git a/include/linux/net.h b/include/linux/net.h index 70ac5e28e6b7..f514e4dd5521 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,8 +34,8 @@ struct inode; struct file; struct net; -#define SOCK_ASYNC_NOSPACE 0 -#define SOCK_ASYNC_WAITDATA 1 +#define SOCKWQ_ASYNC_NOSPACE 0 +#define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 @@ -96,7 +96,7 @@ struct socket_wq { * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) * @type: socket type (%SOCK_STREAM, etc) - * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) + * @flags: socket flags (%SOCK_NOSPACE, etc) * @ops: protocol specific socket operations * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..c155d09d8af4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2005,6 +2005,16 @@ static inline unsigned long sock_wspace(struct sock *sk) return amt; } +static inline void sk_set_bit(int nr, struct sock *sk) +{ + set_bit(nr, &sk->sk_socket->flags); +} + +static inline void sk_clear_bit(int nr, struct sock *sk) +{ + clear_bit(nr, &sk->sk_socket->flags); +} + static inline void sk_wake_async(struct sock *sk, int how, int band) { if (sock_flag(sk, SOCK_FASYNC)) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a3bffd1ec2b4..70306cc9d814 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -271,11 +271,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo) if (signal_pending(current) || !timeo) break; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } __set_current_state(TASK_RUNNING); @@ -441,7 +441,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index cc858919108e..aa209b1066c9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) !timeo) break; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); @@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) if (sock_flag(sk, SOCK_DEAD)) break; - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); diff --git a/net/core/datagram.c b/net/core/datagram.c index 617088aee21d..d62af69ad844 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/core/sock.c b/net/core/sock.c index 1e4dd54bfb5a..9d79569935a3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1815,7 +1815,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) { DEFINE_WAIT(wait); - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); for (;;) { if (!timeo) break; @@ -1861,7 +1861,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) break; - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; if (!timeo) @@ -2048,9 +2048,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); return rc; } diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c889..43309428644d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; while (1) { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) } if (signal_pending(current)) goto do_interrupted; - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk_stream_memory_free(sk) && !vm_wait) break; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b5cf13a28009..41e65804ddf5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ - set_bit(SOCK_ASYNC_NOSPACE, - &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 675cf94e04f8..eebf5ac8ce18 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1747,9 +1747,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); } @@ -2004,10 +2004,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, !dn_queue_too_long(scp, queue, flags)); - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); finish_wait(sk_sleep(sk), &wait); continue; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c1728771cf89..c82cca18c90f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -517,8 +517,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ - set_bit(SOCK_ASYNC_NOSPACE, - &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after @@ -906,7 +905,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, goto out_err; } - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tcp_send_mss(sk, &size_goal, flags); copied = 0; @@ -1134,7 +1133,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } /* This should be in poll */ - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tcp_send_mss(sk, &size_goal, flags); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index fcb2752419c6..435608c4306d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1483,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, if (sock_writeable(sk) && iucv_below_msglim(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index b7de0da46acd..ecf0a0196f18 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); pr_debug("mask 0x%x\n", mask); diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index a40d3afe93b7..14c4e12c47b0 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); /* this should be in poll */ - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) return -EPIPE; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c029ca..2353985d689c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6458,7 +6458,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sctp_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* * Since the socket is not locked, the buffer * might be made available after the writeable check and diff --git a/net/socket.c b/net/socket.c index dd2c247c99e3..16be908205fc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1072,11 +1072,11 @@ int sock_wake_async(struct socket *sock, int how, int band) } switch (how) { case SOCK_WAKE_WAITD: - if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) + if (test_bit(SOCKWQ_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; case SOCK_WAKE_SPACE: - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags)) break; /* fall through */ case SOCK_WAKE_IO: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1d1a70498910..2ffaf6a79499 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -398,7 +398,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (unlikely(!sock)) return -ENOTSOCK; - clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags); if (base != 0) { addr = NULL; addrlen = 0; @@ -442,7 +442,7 @@ static void xs_nospace_callback(struct rpc_task *task) struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); transport->inet->sk_write_pending--; - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } /** @@ -467,7 +467,7 @@ static int xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { - if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { /* * Notify TCP that we're limited by the application * window size @@ -478,7 +478,7 @@ static int xs_nospace(struct rpc_task *task) xprt_wait_for_buffer_space(task, xs_nospace_callback); } } else { - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); ret = -ENOTCONN; } @@ -626,7 +626,7 @@ process_status: case -EPERM: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } return status; @@ -715,7 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EADDRINUSE: case -ENOBUFS: case -EPIPE: - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); } return status; @@ -1618,7 +1618,7 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; - if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) + if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) return; xprt_write_space(xprt); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6ced74690eee..45aebd966978 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2191,7 +2191,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, !timeo) break; - set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); timeo = freezable_schedule_timeout(timeo); unix_state_lock(sk); @@ -2199,7 +2199,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, if (sock_flag(sk, SOCK_DEAD)) break; - clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); @@ -2683,7 +2683,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, if (writable) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else - set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } -- cgit 1.4.1