summary refs log tree commit diff
path: root/net/tipc
diff options
context:
space:
mode:
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/bcast.c62
-rw-r--r--net/tipc/bcast.h17
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tipc/node.h4
-rw-r--r--net/tipc/socket.c36
5 files changed, 108 insertions, 15 deletions
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 672e6ef93cab..7d99029df342 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -54,6 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link";
  * @dest: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
+ * @rcast_support: indicates if all peer nodes support replicast
+ * @rc_ratio: dest count as percentage of cluster size where send method changes
+ * @bc_threshold: calculated drom rc_ratio; if dests > threshold use broadcast
  */
 struct tipc_bc_base {
 	struct tipc_link *link;
@@ -61,6 +64,9 @@ struct tipc_bc_base {
 	int dests[MAX_BEARERS];
 	int primary_bearer;
 	bool bcast_support;
+	bool rcast_support;
+	int rc_ratio;
+	int bc_threshold;
 };
 
 static struct tipc_bc_base *tipc_bc_base(struct net *net)
@@ -73,6 +79,19 @@ int tipc_bcast_get_mtu(struct net *net)
 	return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
 }
 
+void tipc_bcast_disable_rcast(struct net *net)
+{
+	tipc_bc_base(net)->rcast_support = false;
+}
+
+static void tipc_bcbase_calc_bc_threshold(struct net *net)
+{
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+	int cluster_size = tipc_link_bc_peers(tipc_bc_sndlink(net));
+
+	bb->bc_threshold = 1 + (cluster_size * bb->rc_ratio / 100);
+}
+
 /* tipc_bcbase_select_primary(): find a bearer with links to all destinations,
  *                               if any, and make it primary bearer
  */
@@ -175,6 +194,31 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
 	__skb_queue_purge(&_xmitq);
 }
 
+static void tipc_bcast_select_xmit_method(struct net *net, int dests,
+					  struct tipc_mc_method *method)
+{
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+	unsigned long exp = method->expires;
+
+	/* Broadcast supported by used bearer/bearers? */
+	if (!bb->bcast_support) {
+		method->rcast = true;
+		return;
+	}
+	/* Any destinations which don't support replicast ? */
+	if (!bb->rcast_support) {
+		method->rcast = false;
+		return;
+	}
+	/* Can current method be changed ? */
+	method->expires = jiffies + TIPC_METHOD_EXPIRE;
+	if (method->mandatory || time_before(jiffies, exp))
+		return;
+
+	/* Determine method to use now */
+	method->rcast = dests <= bb->bc_threshold;
+}
+
 /* tipc_bcast_xmit - broadcast the buffer chain to all external nodes
  * @net: the applicable net namespace
  * @pkts: chain of buffers containing message
@@ -237,16 +281,16 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
  *                   and to identified node local sockets
  * @net: the applicable net namespace
  * @pkts: chain of buffers containing message
- * @dests: destination nodes for message. Not consumed.
+ * @method: send method to be used
+ * @dests: destination nodes for message.
  * @cong_link_cnt: returns number of encountered congested destination links
- * @cong_links: returns identities of congested links
  * Consumes buffer chain.
  * Returns 0 if success, otherwise errno
  */
 int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
-		    struct tipc_nlist *dests, u16 *cong_link_cnt)
+		    struct tipc_mc_method *method, struct tipc_nlist *dests,
+		    u16 *cong_link_cnt)
 {
-	struct tipc_bc_base *bb = tipc_bc_base(net);
 	struct sk_buff_head inputq, localq;
 	int rc = 0;
 
@@ -258,9 +302,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
 		rc = -ENOMEM;
 		goto exit;
 	}
-
+	/* Send according to determined transmit method */
 	if (dests->remote) {
-		if (!bb->bcast_support)
+		tipc_bcast_select_xmit_method(net, dests->remote, method);
+		if (method->rcast)
 			rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt);
 		else
 			rc = tipc_bcast_xmit(net, pkts, cong_link_cnt);
@@ -269,6 +314,7 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
 	if (dests->local)
 		tipc_sk_mcast_rcv(net, &localq, &inputq);
 exit:
+	/* This queue should normally be empty by now */
 	__skb_queue_purge(pkts);
 	return rc;
 }
@@ -377,6 +423,7 @@ void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l,
 	tipc_bcast_lock(net);
 	tipc_link_add_bc_peer(snd_l, uc_l, xmitq);
 	tipc_bcbase_select_primary(net);
+	tipc_bcbase_calc_bc_threshold(net);
 	tipc_bcast_unlock(net);
 }
 
@@ -395,6 +442,7 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
 	tipc_bcast_lock(net);
 	tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq);
 	tipc_bcbase_select_primary(net);
+	tipc_bcbase_calc_bc_threshold(net);
 	tipc_bcast_unlock(net);
 
 	tipc_bcbase_xmit(net, &xmitq);
@@ -477,6 +525,8 @@ int tipc_bcast_init(struct net *net)
 		goto enomem;
 	bb->link = l;
 	tn->bcl = l;
+	bb->rc_ratio = 25;
+	bb->rcast_support = true;
 	return 0;
 enomem:
 	kfree(bb);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dd772e6f6fa4..751530ab0c49 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -46,6 +46,8 @@ struct tipc_nlist;
 struct tipc_nitem;
 extern const char tipc_bclink_name[];
 
+#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
+
 struct tipc_nlist {
 	struct list_head list;
 	u32 self;
@@ -58,6 +60,17 @@ void tipc_nlist_purge(struct tipc_nlist *nl);
 void tipc_nlist_add(struct tipc_nlist *nl, u32 node);
 void tipc_nlist_del(struct tipc_nlist *nl, u32 node);
 
+/* Cookie to be used between socket and broadcast layer
+ * @rcast: replicast (instead of broadcast) was used at previous xmit
+ * @mandatory: broadcast/replicast indication was set by user
+ * @expires: re-evaluate non-mandatory transmit method if we are past this
+ */
+struct tipc_mc_method {
+	bool rcast;
+	bool mandatory;
+	unsigned long expires;
+};
+
 int tipc_bcast_init(struct net *net);
 void tipc_bcast_stop(struct net *net);
 void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
@@ -66,8 +79,10 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl);
 void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
 void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
 int  tipc_bcast_get_mtu(struct net *net);
+void tipc_bcast_disable_rcast(struct net *net);
 int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
-		    struct tipc_nlist *dests, u16 *cong_link_cnt);
+		    struct tipc_mc_method *method, struct tipc_nlist *dests,
+		    u16 *cong_link_cnt);
 int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
 void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
 			struct tipc_msg *hdr);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b17b9e155469..ddd2dd6f77aa 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -515,6 +515,10 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 	if (link_is_bc_sndlink(l))
 		l->state = LINK_ESTABLISHED;
 
+	/* Disable replicast if even a single peer doesn't support it */
+	if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST))
+		tipc_bcast_disable_rcast(net);
+
 	return true;
 }
 
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 39ef54c1f2ad..898c22916984 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -47,11 +47,13 @@
 enum {
 	TIPC_BCAST_SYNCH      = (1 << 1),
 	TIPC_BCAST_STATE_NACK = (1 << 2),
-	TIPC_BLOCK_FLOWCTL    = (1 << 3)
+	TIPC_BLOCK_FLOWCTL    = (1 << 3),
+	TIPC_BCAST_RCAST      = (1 << 4)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
 				TIPC_BCAST_STATE_NACK | \
+				TIPC_BCAST_RCAST | \
 				TIPC_BLOCK_FLOWCTL)
 #define INVALID_BEARER_ID -1
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 93b6ae3154c9..5bec8aac5008 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -79,6 +79,7 @@ enum {
  * @rcv_unacked: # messages read by user, but not yet acked back to peer
  * @peer: 'connected' peer for dgram/rdm
  * @node: hash table node
+ * @mc_method: cookie for use between socket and broadcast layer
  * @rcu: rcu struct for tipc_sock
  */
 struct tipc_sock {
@@ -103,6 +104,7 @@ struct tipc_sock {
 	u16 rcv_win;
 	struct sockaddr_tipc peer;
 	struct rhash_head node;
+	struct tipc_mc_method mc_method;
 	struct rcu_head rcu;
 };
 
@@ -740,6 +742,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
 	int mtu = tipc_bcast_get_mtu(net);
+	struct tipc_mc_method *method = &tsk->mc_method;
 	u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
 	struct sk_buff_head pkts;
 	struct tipc_nlist dsts;
@@ -773,7 +776,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 
 	/* Send message if build was successful */
 	if (unlikely(rc == dlen))
-		rc = tipc_mcast_xmit(net, &pkts, &dsts,
+		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
 				     &tsk->cong_link_cnt);
 
 	tipc_nlist_purge(&dsts);
@@ -2344,18 +2347,29 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
-	u32 value;
+	u32 value = 0;
 	int res;
 
 	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
 		return 0;
 	if (lvl != SOL_TIPC)
 		return -ENOPROTOOPT;
-	if (ol < sizeof(value))
-		return -EINVAL;
-	res = get_user(value, (u32 __user *)ov);
-	if (res)
-		return res;
+
+	switch (opt) {
+	case TIPC_IMPORTANCE:
+	case TIPC_SRC_DROPPABLE:
+	case TIPC_DEST_DROPPABLE:
+	case TIPC_CONN_TIMEOUT:
+		if (ol < sizeof(value))
+			return -EINVAL;
+		res = get_user(value, (u32 __user *)ov);
+		if (res)
+			return res;
+		break;
+	default:
+		if (ov || ol)
+			return -EINVAL;
+	}
 
 	lock_sock(sk);
 
@@ -2376,6 +2390,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 		tipc_sk(sk)->conn_timeout = value;
 		/* no need to set "res", since already 0 at this point */
 		break;
+	case TIPC_MCAST_BROADCAST:
+		tsk->mc_method.rcast = false;
+		tsk->mc_method.mandatory = true;
+		break;
+	case TIPC_MCAST_REPLICAST:
+		tsk->mc_method.rcast = true;
+		tsk->mc_method.mandatory = true;
+		break;
 	default:
 		res = -EINVAL;
 	}