If the receive queue contains payload that was already received, __tcp_close() can send an unexpected RST. Refine the code to take tp->copied_seq into account, as we already do in tcp recvmsg(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet --- net/ipv4/tcp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 40b774b4f587..39eb03f6d07f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3099,8 +3099,8 @@ bool tcp_check_oom(const struct sock *sk, int shift) void __tcp_close(struct sock *sk, long timeout) { + bool data_was_unread = false; struct sk_buff *skb; - int data_was_unread = 0; int state; WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); @@ -3119,11 +3119,12 @@ void __tcp_close(struct sock *sk, long timeout) * reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - len--; - data_was_unread += len; + end_seq--; + if (after(end_seq, tcp_sk(sk)->copied_seq)) + data_was_unread = true; __kfree_skb(skb); } -- 2.51.0.338.gd7d06c2dae-goog This test makes sure we do send a FIN on close() if the receive queue contains data that was consumed. Signed-off-by: Eric Dumazet --- .../net/packetdrill/tcp_close_no_rst.pkt | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt new file mode 100644 index 000000000000..eef01d5f1118 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +0 < . 1:1001(1000) ack 1 win 32792 + +0 > . 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + +// resend the payload + a FIN + +0 < F. 1:1001(1000) ack 1 win 32792 +// Why do we have a delay and no dsack ? + +0~+.04 > . 1:1(0) ack 1002 + + +0 close(4) = 0 + +// According to RFC 2525, section 2.17 +// we should _not_ send an RST here, because there was no data to consume. + +0 > F. 1:1(0) ack 1002 -- 2.51.0.338.gd7d06c2dae-goog Small change to use tcp_eat_recv_skb() instead of __kfree_skb(). This can help if an application under attack has to close many sockets with unread data. Signed-off-by: Eric Dumazet --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39eb03f6d07f..588932c3cf1d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3118,14 +3118,14 @@ void __tcp_close(struct sock *sk, long timeout) * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ - while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) end_seq--; if (after(end_seq, tcp_sk(sk)->copied_seq)) data_was_unread = true; - __kfree_skb(skb); + tcp_eat_recv_skb(sk, skb); } /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ -- 2.51.0.338.gd7d06c2dae-goog