[rh7] fs/fuse/dev: improve ->splice() with fragmented memory

Submitted by Andrey Ryabinin on Nov. 29, 2017, 1:59 p.m.

Details

Message ID 20171129135901.7689-1-aryabinin@virtuozzo.com
State New
Series "fs/fuse/dev: improve ->splice() with fragmented memory"
Headers show

Commit Message

Andrey Ryabinin Nov. 29, 2017, 1:59 p.m.
fuse_dev_splice_[read,write]() temporary allocates array of pipe_buffer
structs. Depending on pipe size it could be quite large, thus we stall
in high order allocation request. Use kvmalloc() instead of kmalloc()
to fallback in vmalloc() if high order page is not available at the moment.

https://jira.sw.ru/browse/PSBM-77949
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 fs/fuse/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 3427eddcfb17..83c30e51dfca 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1353,7 +1353,7 @@  static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
 	if (!fud)
 		return -EPERM;
 
-	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
+	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
 	if (!bufs)
 		return -ENOMEM;
 
@@ -1410,7 +1410,7 @@  out:
 	for (; page_nr < cs.nr_segs; page_nr++)
 		page_cache_release(bufs[page_nr].page);
 
-	kfree(bufs);
+	kvfree(bufs);
 	return ret;
 }
 
@@ -1991,7 +1991,7 @@  static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	if (!fud)
 		return -EPERM;
 
-	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
+	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
 	if (!bufs)
 		return -ENOMEM;
 
@@ -2049,7 +2049,7 @@  static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 		buf->ops->release(pipe, buf);
 	}
 out:
-	kfree(bufs);
+	kvfree(bufs);
 	return ret;
 }
 

Comments

Vasily Averin Nov. 29, 2017, 2:31 p.m.
Could you please elaborate, why it should help in reported case?
It seems for me kvmalloc will push reclaimer first exactly like kmalloc does right now.

On 2017-11-29 16:59, Andrey Ryabinin wrote:
> fuse_dev_splice_[read,write]() temporary allocates array of pipe_buffer
> structs. Depending on pipe size it could be quite large, thus we stall
> in high order allocation request. Use kvmalloc() instead of kmalloc()
> to fallback in vmalloc() if high order page is not available at the moment.
> 
> https://jira.sw.ru/browse/PSBM-77949
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>  fs/fuse/dev.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 3427eddcfb17..83c30e51dfca 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -1353,7 +1353,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
>  	if (!fud)
>  		return -EPERM;
>  
> -	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
> +	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>  	if (!bufs)
>  		return -ENOMEM;
>  
> @@ -1410,7 +1410,7 @@ out:
>  	for (; page_nr < cs.nr_segs; page_nr++)
>  		page_cache_release(bufs[page_nr].page);
>  
> -	kfree(bufs);
> +	kvfree(bufs);
>  	return ret;
>  }
>  
> @@ -1991,7 +1991,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>  	if (!fud)
>  		return -EPERM;
>  
> -	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
> +	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>  	if (!bufs)
>  		return -ENOMEM;
>  
> @@ -2049,7 +2049,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>  		buf->ops->release(pipe, buf);
>  	}
>  out:
> -	kfree(bufs);
> +	kvfree(bufs);
>  	return ret;
>  }
>  
>
Vasily Averin Nov. 29, 2017, 2:37 p.m.
got it,
kvmalloc does not use kmalloc for size <= (16*PAGE_SIZE)

On 2017-11-29 17:31, Vasily Averin wrote:
> Could you please elaborate, why it should help in reported case?
> It seems for me kvmalloc will push reclaimer first exactly like kmalloc does right now.
> 
> On 2017-11-29 16:59, Andrey Ryabinin wrote:
>> fuse_dev_splice_[read,write]() temporary allocates array of pipe_buffer
>> structs. Depending on pipe size it could be quite large, thus we stall
>> in high order allocation request. Use kvmalloc() instead of kmalloc()
>> to fallback in vmalloc() if high order page is not available at the moment.
>>
>> https://jira.sw.ru/browse/PSBM-77949
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> ---
>>  fs/fuse/dev.c | 8 ++++----
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
>> index 3427eddcfb17..83c30e51dfca 100644
>> --- a/fs/fuse/dev.c
>> +++ b/fs/fuse/dev.c
>> @@ -1353,7 +1353,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
>>  	if (!fud)
>>  		return -EPERM;
>>  
>> -	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>> +	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>>  	if (!bufs)
>>  		return -ENOMEM;
>>  
>> @@ -1410,7 +1410,7 @@ out:
>>  	for (; page_nr < cs.nr_segs; page_nr++)
>>  		page_cache_release(bufs[page_nr].page);
>>  
>> -	kfree(bufs);
>> +	kvfree(bufs);
>>  	return ret;
>>  }
>>  
>> @@ -1991,7 +1991,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>>  	if (!fud)
>>  		return -EPERM;
>>  
>> -	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>> +	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>>  	if (!bufs)
>>  		return -ENOMEM;
>>  
>> @@ -2049,7 +2049,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>>  		buf->ops->release(pipe, buf);
>>  	}
>>  out:
>> -	kfree(bufs);
>> +	kvfree(bufs);
>>  	return ret;
>>  }
>>  
>>
> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
>
Andrey Ryabinin Nov. 29, 2017, 2:42 p.m.
On 11/29/2017 05:31 PM, Vasily Averin wrote:
> Could you please elaborate, why it should help in reported case?
> It seems for me kvmalloc will push reclaimer first exactly like kmalloc does right now.
> 

Currently we try to allocate possibly high-order page with GFP_KERNEL flags. For order <= PAGE_ALLOC_COSTLY_ORDER
this will loop indefinitely until succeed.

kvmalloc set __GFP_NORETRY, so if high order page isn't available we bail out immediately and try vmalloc()
which will use 0-order pages.
Andrey Ryabinin Nov. 29, 2017, 2:43 p.m.
On 11/29/2017 05:37 PM, Vasily Averin wrote:
> got it,
> kvmalloc does not use kmalloc for size <= (16*PAGE_SIZE)
> 

No, it does use kmalloc() first:

void *kvmalloc_node(size_t size, gfp_t flags, int node)
{
	gfp_t kmalloc_flags = flags;

....
	/*
	 * Make sure that larger requests are not too disruptive - no OOM
	 * killer and no allocation failure warnings as we have a fallback
	 */
	if (size > PAGE_SIZE)
		kmalloc_flags |= __GFP_NORETRY | __GFP_NOWARN;

	ret = kmalloc_node(size, kmalloc_flags, node);

	/*
	 * It doesn't really make sense to fallback to vmalloc for sub page
	 * requests
	 */
	if (ret || size <= PAGE_SIZE)
		return ret;

	return __vmalloc_node_flags(size, node, flags | __GFP_HIGHMEM);
Konstantin Khorenko Dec. 7, 2017, 11:09 a.m.
Please consider to RK this.

https://readykernel.com/


--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 11/29/2017 04:59 PM, Andrey Ryabinin wrote:
> fuse_dev_splice_[read,write]() temporary allocates array of pipe_buffer
> structs. Depending on pipe size it could be quite large, thus we stall
> in high order allocation request. Use kvmalloc() instead of kmalloc()
> to fallback in vmalloc() if high order page is not available at the moment.
>
> https://jira.sw.ru/browse/PSBM-77949
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>  fs/fuse/dev.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 3427eddcfb17..83c30e51dfca 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -1353,7 +1353,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
>  	if (!fud)
>  		return -EPERM;
>
> -	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
> +	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>  	if (!bufs)
>  		return -ENOMEM;
>
> @@ -1410,7 +1410,7 @@ out:
>  	for (; page_nr < cs.nr_segs; page_nr++)
>  		page_cache_release(bufs[page_nr].page);
>
> -	kfree(bufs);
> +	kvfree(bufs);
>  	return ret;
>  }
>
> @@ -1991,7 +1991,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>  	if (!fud)
>  		return -EPERM;
>
> -	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
> +	bufs = kvmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
>  	if (!bufs)
>  		return -ENOMEM;
>
> @@ -2049,7 +2049,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>  		buf->ops->release(pipe, buf);
>  	}
>  out:
> -	kfree(bufs);
> +	kvfree(bufs);
>  	return ret;
>  }
>
>