summary refs log tree commit diff
path: root/net/ipv4
diff options
context:
space:
mode:
authorAndrey Vagin <avagin@openvz.org>2016-06-27 15:33:56 -0700
committerDavid S. Miller <davem@davemloft.net>2016-06-30 08:15:31 -0400
commitb1ed4c4fa9a5ccf325184fd90edc50978ef6e33a (patch)
treeeb298a85a5f016c52ad375f351bf171eec99152a /net/ipv4
parent641f7e405ed208cfeb6b52145252675b51c43180 (diff)
downloadlinux-b1ed4c4fa9a5ccf325184fd90edc50978ef6e33a.tar.gz
tcp: add an ability to dump and restore window parameters
We found that sometimes a restored tcp socket doesn't work.

A reason of this bug is incorrect window parameters and in this case
tcp_acceptable_seq() returns tcp_wnd_end(tp) instead of tp->snd_nxt. The
other side drops packets with this seq, because seq is less than
tp->rcv_nxt ( tcp_sequence() ).

Data from a send queue is sent only if there is enough space in a
window, so when we restore unacked data, we need to expand a window to
fit this data.

This was in a first version of this patch:
"tcp: extend window to fit all restored unacked data in a send queue"

Then Alexey recommended me to restore window parameters instead of
adjusted them according with data in a sent queue. This sounds resonable.

rcv_wnd has to be restored, because it was reported to another side
and the offered window is never shrunk.
One of reasons why we need to restore snd_wnd was described above.

Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5c7ed147449c..108ef2a6665c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
 		((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
 }
 
+static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+{
+	struct tcp_repair_window opt;
+
+	if (!tp->repair)
+		return -EPERM;
+
+	if (len != sizeof(opt))
+		return -EINVAL;
+
+	if (copy_from_user(&opt, optbuf, sizeof(opt)))
+		return -EFAULT;
+
+	if (opt.max_window < opt.snd_wnd)
+		return -EINVAL;
+
+	if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
+		return -EINVAL;
+
+	if (after(opt.rcv_wup, tp->rcv_nxt))
+		return -EINVAL;
+
+	tp->snd_wl1	= opt.snd_wl1;
+	tp->snd_wnd	= opt.snd_wnd;
+	tp->max_window	= opt.max_window;
+
+	tp->rcv_wnd	= opt.rcv_wnd;
+	tp->rcv_wup	= opt.rcv_wup;
+
+	return 0;
+}
+
 static int tcp_repair_options_est(struct tcp_sock *tp,
 		struct tcp_repair_opt __user *optbuf, unsigned int len)
 {
@@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			tp->tsoffset = val - tcp_time_stamp;
 		break;
+	case TCP_REPAIR_WINDOW:
+		err = tcp_repair_set_window(tp, optval, optlen);
+		break;
 	case TCP_NOTSENT_LOWAT:
 		tp->notsent_lowat = val;
 		sk->sk_write_space(sk);
@@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EINVAL;
 		break;
 
+	case TCP_REPAIR_WINDOW: {
+		struct tcp_repair_window opt;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		if (len != sizeof(opt))
+			return -EINVAL;
+
+		if (!tp->repair)
+			return -EPERM;
+
+		opt.snd_wl1	= tp->snd_wl1;
+		opt.snd_wnd	= tp->snd_wnd;
+		opt.max_window	= tp->max_window;
+		opt.rcv_wnd	= tp->rcv_wnd;
+		opt.rcv_wup	= tp->rcv_wup;
+
+		if (copy_to_user(optval, &opt, len))
+			return -EFAULT;
+		return 0;
+	}
 	case TCP_QUEUE_SEQ:
 		if (tp->repair_queue == TCP_SEND_QUEUE)
 			val = tp->write_seq;