ms/tcp: fix potential huge kmalloc() calls in TCP_REPAIR

Submitted by Konstantin Khorenko on April 9, 2019, 9:34 a.m.

Details

Message ID 20190409093444.23482-1-khorenko@virtuozzo.com
State New
Series "ms/tcp: fix potential huge kmalloc() calls in TCP_REPAIR"
Headers show

Commit Message

Konstantin Khorenko April 9, 2019, 9:34 a.m.
From: Eric Dumazet <edumazet@google.com>

tcp_send_rcvq() is used for re-injecting data into tcp receive queue.

Problems :

- No check against size is performed, allowed user to fool kernel in
  attempting very large memory allocations, eventually triggering
  OOM when memory is fragmented.

- In case of fault during the copy we do not return correct errno.

Lets use alloc_skb_with_frags() to cook optimal skbs.

Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c")
Fixes: c0e88ff0f256 ("tcp: Repair socket queues")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

CRIU often triggers 8 order page allocation while restoring TCP sockets
without this patch.
https://jira.sw.ru/browse/PSBM-93672

(cherry picked from commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9)
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 net/ipv4/tcp_input.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 015c6fd7ec83..d0a6f767189d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4470,19 +4470,34 @@  static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct sk_buff *skb;
+	int err = -ENOMEM;
+	int data_len = 0;
 	bool fragstolen;
 
 	if (size == 0)
 		return 0;
 
-	skb = alloc_skb(size, sk->sk_allocation);
+	if (size > PAGE_SIZE) {
+		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+		data_len = npages << PAGE_SHIFT;
+		size = data_len + (size & ~PAGE_MASK);
+	}
+	skb = alloc_skb_with_frags(size - data_len, data_len,
+				   PAGE_ALLOC_COSTLY_ORDER,
+				   &err, sk->sk_allocation);
 	if (!skb)
 		goto err;
 
+	skb_put(skb, size - data_len);
+	skb->data_len = data_len;
+	skb->len = size;
+
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_from_msg(skb_put(skb, size), msg, size))
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+	if (err)
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4498,7 +4513,8 @@  int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 err_free:
 	kfree_skb(skb);
 err:
-	return -ENOMEM;
+	return err;
+
 }
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)

Comments

Andrey Ryabinin April 9, 2019, 10:19 a.m.
On 4/9/19 12:34 PM, Konstantin Khorenko wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> tcp_send_rcvq() is used for re-injecting data into tcp receive queue.
> 
> Problems :
> 
> - No check against size is performed, allowed user to fool kernel in
>   attempting very large memory allocations, eventually triggering
>   OOM when memory is fragmented.
> 
> - In case of fault during the copy we do not return correct errno.
> 
> Lets use alloc_skb_with_frags() to cook optimal skbs.
> 
> Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c")
> Fixes: c0e88ff0f256 ("tcp: Repair socket queues")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Pavel Emelyanov <xemul@parallels.com>
> Acked-by: Pavel Emelyanov <xemul@parallels.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> CRIU often triggers 8 order page allocation while restoring TCP sockets
> without this patch.
> https://jira.sw.ru/browse/PSBM-93672
> 
> (cherry picked from commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9)
> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
> ---
>  net/ipv4/tcp_input.c | 22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 015c6fd7ec83..d0a6f767189d 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4470,19 +4470,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
>  int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>  {
>  	struct sk_buff *skb;
> +	int err = -ENOMEM;
> +	int data_len = 0;
>  	bool fragstolen;
>  
>  	if (size == 0)
>  		return 0;
>  
> -	skb = alloc_skb(size, sk->sk_allocation);
> +	if (size > PAGE_SIZE) {
> +		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
> +
> +		data_len = npages << PAGE_SHIFT;
> +		size = data_len + (size & ~PAGE_MASK);
> +	}
> +	skb = alloc_skb_with_frags(size - data_len, data_len,
> +				   PAGE_ALLOC_COSTLY_ORDER,
> +				   &err, sk->sk_allocation);
>  	if (!skb)
>  		goto err;
>  
> +	skb_put(skb, size - data_len);
> +	skb->data_len = data_len;
> +	skb->len = size;
> +
>  	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
>  		goto err_free;
>  
> -	if (memcpy_from_msg(skb_put(skb, size), msg, size))
> +	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);

             skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size) ?


> +	if (err)
>  		goto err_free;
>  
>  	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
> @@ -4498,7 +4513,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>  err_free:
>  	kfree_skb(skb);
>  err:
> -	return -ENOMEM;
> +	return err;
> +
>  }
>  
>  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
>
Andrey Ryabinin April 9, 2019, 10:23 a.m.
On 4/9/19 1:19 PM, Andrey Ryabinin wrote:
> 
> 
> On 4/9/19 12:34 PM, Konstantin Khorenko wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> tcp_send_rcvq() is used for re-injecting data into tcp receive queue.
>>
>> Problems :
>>
>> - No check against size is performed, allowed user to fool kernel in
>>   attempting very large memory allocations, eventually triggering
>>   OOM when memory is fragmented.
>>
>> - In case of fault during the copy we do not return correct errno.
>>
>> Lets use alloc_skb_with_frags() to cook optimal skbs.
>>
>> Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c")
>> Fixes: c0e88ff0f256 ("tcp: Repair socket queues")
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Pavel Emelyanov <xemul@parallels.com>
>> Acked-by: Pavel Emelyanov <xemul@parallels.com>
>> Signed-off-by: David S. Miller <davem@davemloft.net>
>>
>> CRIU often triggers 8 order page allocation while restoring TCP sockets
>> without this patch.
>> https://jira.sw.ru/browse/PSBM-93672
>>
>> (cherry picked from commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9)
>> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
>> ---
>>  net/ipv4/tcp_input.c | 22 +++++++++++++++++++---
>>  1 file changed, 19 insertions(+), 3 deletions(-)
>>
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index 015c6fd7ec83..d0a6f767189d 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -4470,19 +4470,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
>>  int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>>  {
>>  	struct sk_buff *skb;
>> +	int err = -ENOMEM;
>> +	int data_len = 0;
>>  	bool fragstolen;
>>  
>>  	if (size == 0)
>>  		return 0;
>>  
>> -	skb = alloc_skb(size, sk->sk_allocation);
>> +	if (size > PAGE_SIZE) {
>> +		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
>> +
>> +		data_len = npages << PAGE_SHIFT;
>> +		size = data_len + (size & ~PAGE_MASK);
>> +	}
>> +	skb = alloc_skb_with_frags(size - data_len, data_len,
>> +				   PAGE_ALLOC_COSTLY_ORDER,
>> +				   &err, sk->sk_allocation);
>>  	if (!skb)
>>  		goto err;
>>  
>> +	skb_put(skb, size - data_len);
>> +	skb->data_len = data_len;
>> +	skb->len = size;
>> +
>>  	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
>>  		goto err_free;
>>  
>> -	if (memcpy_from_msg(skb_put(skb, size), msg, size))
>> +	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
> 
>              skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size) ?
       
         Correction: skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, size)
> 
> 
>> +	if (err)
>>  		goto err_free;
>>  
>>  	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
>> @@ -4498,7 +4513,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>>  err_free:
>>  	kfree_skb(skb);
>>  err:
>> -	return -ENOMEM;
>> +	return err;
>> +
>>  }
>>  
>>  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
>>
Konstantin Khorenko April 9, 2019, 11:12 a.m.
On 04/09/2019 01:23 PM, Andrey Ryabinin wrote:
>
>
> On 4/9/19 1:19 PM, Andrey Ryabinin wrote:
>>
>>
>> On 4/9/19 12:34 PM, Konstantin Khorenko wrote:
>>> From: Eric Dumazet <edumazet@google.com>
>>>
>>> tcp_send_rcvq() is used for re-injecting data into tcp receive queue.
>>>
>>> Problems :
>>>
>>> - No check against size is performed, allowed user to fool kernel in
>>>   attempting very large memory allocations, eventually triggering
>>>   OOM when memory is fragmented.
>>>
>>> - In case of fault during the copy we do not return correct errno.
>>>
>>> Lets use alloc_skb_with_frags() to cook optimal skbs.
>>>
>>> Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c")
>>> Fixes: c0e88ff0f256 ("tcp: Repair socket queues")
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>>> Cc: Pavel Emelyanov <xemul@parallels.com>
>>> Acked-by: Pavel Emelyanov <xemul@parallels.com>
>>> Signed-off-by: David S. Miller <davem@davemloft.net>
>>>
>>> CRIU often triggers 8 order page allocation while restoring TCP sockets
>>> without this patch.
>>> https://jira.sw.ru/browse/PSBM-93672
>>>
>>> (cherry picked from commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9)
>>> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
>>> ---
>>>  net/ipv4/tcp_input.c | 22 +++++++++++++++++++---
>>>  1 file changed, 19 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>>> index 015c6fd7ec83..d0a6f767189d 100644
>>> --- a/net/ipv4/tcp_input.c
>>> +++ b/net/ipv4/tcp_input.c
>>> @@ -4470,19 +4470,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
>>>  int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>>>  {
>>>  	struct sk_buff *skb;
>>> +	int err = -ENOMEM;
>>> +	int data_len = 0;
>>>  	bool fragstolen;
>>>
>>>  	if (size == 0)
>>>  		return 0;
>>>
>>> -	skb = alloc_skb(size, sk->sk_allocation);
>>> +	if (size > PAGE_SIZE) {
>>> +		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
>>> +
>>> +		data_len = npages << PAGE_SHIFT;
>>> +		size = data_len + (size & ~PAGE_MASK);
>>> +	}
>>> +	skb = alloc_skb_with_frags(size - data_len, data_len,
>>> +				   PAGE_ALLOC_COSTLY_ORDER,
>>> +				   &err, sk->sk_allocation);
>>>  	if (!skb)
>>>  		goto err;
>>>
>>> +	skb_put(skb, size - data_len);
>>> +	skb->data_len = data_len;
>>> +	skb->len = size;
>>> +
>>>  	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
>>>  		goto err_free;
>>>
>>> -	if (memcpy_from_msg(skb_put(skb, size), msg, size))
>>> +	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
>>
>>              skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size) ?
>
>          Correction: skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, size)

Andrey, thank you!
Haste would make waste...

>>
>>
>>> +	if (err)
>>>  		goto err_free;
>>>
>>>  	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
>>> @@ -4498,7 +4513,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>>>  err_free:
>>>  	kfree_skb(skb);
>>>  err:
>>> -	return -ENOMEM;
>>> +	return err;
>>> +
>>>  }
>>>
>>>  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
>>>
> .
>