[Devel,vz7] fuse: optimize writepages search

Submitted by Maxim Patlasov on April 7, 2017, 3:35 a.m.

Details

Message ID 149153608981.7655.17228861235877876121.stgit@maxim-thinkpad
State New
Series "fuse: optimize writepages search"
Headers show

Commit Message

Maxim Patlasov April 7, 2017, 3:35 a.m.
The patch re-works fi->writepages replacing list with rb-tree.
This improves performance because kernel fuse iterates through
fi->writepages for each writeback page and typical number of
entries is about 800 (for 100MB of fuse writeback).

Before patch:

# dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

# vmstat 10
 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

# perf top
  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

# dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

# vmstat 10
 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

# perf top
  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock

https://jira.sw.ru/browse/PSBM-59254

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
---
 fs/fuse/file.c   |   83 ++++++++++++++++++++++++++++++++++++++++++++++--------
 fs/fuse/fuse_i.h |    4 +--
 fs/fuse/inode.c  |    2 +
 3 files changed, 73 insertions(+), 16 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 41ed6f0..eab5030 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -435,7 +435,7 @@  static int fuse_release(struct inode *inode, struct file *file)
 		 * enqueued on this file and it is not going to get new one: it is closing.
 		 */
 		if (!ff->fc->close_wait)
-			wait_event(fi->page_waitq, list_empty_careful(&fi->writepages));
+			wait_event(fi->page_waitq, RB_EMPTY_ROOT(&fi->writepages));
 		else
 			wait_event(fi->page_waitq, atomic_read(&ff->count) == 1);
 
@@ -501,17 +501,30 @@  static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	struct fuse_req *req;
 	bool found = false;
+	struct rb_node *n;
 
 	spin_lock(&fc->lock);
-	list_for_each_entry(req, &fi->writepages, writepages_entry) {
+
+	n = fi->writepages.rb_node;
+	if (!n) {
+		spin_unlock(&fc->lock);
+		return false;
+	}
+
+	while (n) {
+		struct fuse_req *req;
 		pgoff_t curr_index;
 
+		req = rb_entry(n, struct fuse_req, writepages_entry);
 		BUG_ON(req->inode != inode);
 		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
-		if (!(idx_from >= curr_index + req->num_pages ||
-		      idx_to < curr_index)) {
+
+		if (idx_from >= curr_index + req->num_pages)
+			n = n->rb_right;
+		else if (idx_to < curr_index)
+			n = n->rb_left;
+		else {
 			found = true;
 			break;
 		}
@@ -531,17 +544,31 @@  static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	struct fuse_req *req;
 	bool found = false;
+	struct rb_node *n;
 
 	spin_lock(&fc->lock);
-	list_for_each_entry(req, &fi->writepages, writepages_entry) {
+
+	n = fi->writepages.rb_node;
+	if (!n) {
+		spin_unlock(&fc->lock);
+		return false;
+	}
+
+
+	while (n) {
+		struct fuse_req *req;
 		pgoff_t curr_index;
 
+		req = rb_entry(n, struct fuse_req, writepages_entry);
 		BUG_ON(req->inode != inode);
 		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
-		if (curr_index <= index &&
-		    index < curr_index + req->num_pages) {
+
+		if (index >= curr_index + req->num_pages)
+			n = n->rb_right;
+		else if (index < curr_index)
+			n = n->rb_left;
+		else {
 			found = true;
 			break;
 		}
@@ -1868,7 +1895,7 @@  static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
 	int i;
 
-	list_del(&req->writepages_entry);
+	rb_erase(&req->writepages_entry, &fi->writepages);
 	if (fc->writeback_cache || fc->close_wait)
 		__fuse_file_put(req->ff);
 	for (i = 0; i < req->num_pages; i++) {
@@ -1964,6 +1991,36 @@  static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
 	return ff;
 }
 
+static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
+{
+	unsigned i = ins_req->num_pages - 1;
+	pgoff_t idx_from = ins_req->pages[0]->index;
+	pgoff_t idx_to   = ins_req->pages[i]->index;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	while (*p) {
+		struct fuse_req *req;
+		pgoff_t curr_index;
+
+		parent = *p;
+		req = rb_entry(parent, struct fuse_req, writepages_entry);
+		BUG_ON(req->inode != ins_req->inode);
+		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+
+		if (idx_from >= curr_index + req->num_pages)
+			p = &(*p)->rb_right;
+		else if (idx_to < curr_index)
+			p = &(*p)->rb_left;
+		else
+			BUG();
+	}
+
+	rb_link_node(&ins_req->writepages_entry, parent, p);
+	rb_insert_color(&ins_req->writepages_entry, root);
+	return 0;
+}
+
 static int fuse_writepage_locked(struct page *page,
 				 struct writeback_control *wbc,
 				 struct fuse_file **ff_pp)
@@ -2034,7 +2091,7 @@  static int fuse_writepage_locked(struct page *page,
 	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
 	spin_lock(&fc->lock);
-	list_add(&req->writepages_entry, &fi->writepages);
+	tree_insert(&fi->writepages, req);
 	list_add_tail(&req->list, &fi->queued_writes);
 	fuse_flush_writepages(inode);
 	spin_unlock(&fc->lock);
@@ -2110,7 +2167,7 @@  static int fuse_send_writepages(struct fuse_fill_data *data)
 	req->misc.write.in.offset = page_offset(req->pages[0]);
 
 	spin_lock(&fc->lock);
-	list_add(&req->writepages_entry, &fi->writepages);
+	tree_insert(&fi->writepages, req);
 	spin_unlock(&fc->lock);
 
 	for (i = 0; i < req->num_pages; i++) {
@@ -2143,7 +2200,7 @@  static int fuse_send_writepages(struct fuse_fill_data *data)
 		}
 
 		spin_lock(&fc->lock);
-		list_del(&req->writepages_entry);
+		rb_erase(&req->writepages_entry, &fi->writepages);
 		wake_up(&fi->page_waitq);
 		spin_unlock(&fc->lock);
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fefa8ff..4601571 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -117,7 +117,7 @@  struct fuse_inode {
 	wait_queue_head_t page_waitq;
 
 	/** List of writepage requestst (pending or sent) */
-	struct list_head writepages;
+	struct rb_root writepages;
 
 	/** Miscellaneous bits describing inode state */
 	unsigned long state;
@@ -404,7 +404,7 @@  struct fuse_req {
 	struct fuse_io_priv *io;
 
 	/** Link on fi->writepages */
-	struct list_head writepages_entry;
+	struct rb_node writepages_entry;
 
 	/** Request completion callback */
 	void (*end)(struct fuse_conn *, struct fuse_req *);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ddd858c..0da2623 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -101,7 +101,7 @@  static struct inode *fuse_alloc_inode(struct super_block *sb)
 	INIT_LIST_HEAD(&fi->write_files);
 	INIT_LIST_HEAD(&fi->rw_files);
 	INIT_LIST_HEAD(&fi->queued_writes);
-	INIT_LIST_HEAD(&fi->writepages);
+	fi->writepages = RB_ROOT;
 	init_waitqueue_head(&fi->page_waitq);
 	fi->forget = fuse_alloc_forget();
 	if (!fi->forget) {

Comments

Konstantin Khorenko April 7, 2017, 10:26 a.m.
Kirill, please review the patch.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 04/07/2017 06:35 AM, Maxim Patlasov wrote:
> The patch re-works fi->writepages replacing list with rb-tree.
> This improves performance because kernel fuse iterates through
> fi->writepages for each writeback page and typical number of
> entries is about 800 (for 100MB of fuse writeback).
>
> Before patch:
>
> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s
>
> # vmstat 10
>  2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0
>
> # perf top
>   29.86%  [kernel]               [k] _raw_spin_lock
>   26.62%  [fuse]                 [k] fuse_page_is_writeback
>
> After patch:
>
> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s
>
> # vmstat 10
>  2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0
>
> # perf top
>   23.55%  [kernel]             [k] copy_user_enhanced_fast_string
>    9.87%  [kernel]             [k] __memcpy
>    3.10%  [kernel]             [k] _raw_spin_lock
>
> https://jira.sw.ru/browse/PSBM-59254
>
> Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
> ---
>  fs/fuse/file.c   |   83 ++++++++++++++++++++++++++++++++++++++++++++++--------
>  fs/fuse/fuse_i.h |    4 +--
>  fs/fuse/inode.c  |    2 +
>  3 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 41ed6f0..eab5030 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -435,7 +435,7 @@ static int fuse_release(struct inode *inode, struct file *file)
>  		 * enqueued on this file and it is not going to get new one: it is closing.
>  		 */
>  		if (!ff->fc->close_wait)
> -			wait_event(fi->page_waitq, list_empty_careful(&fi->writepages));
> +			wait_event(fi->page_waitq, RB_EMPTY_ROOT(&fi->writepages));
>  		else
>  			wait_event(fi->page_waitq, atomic_read(&ff->count) == 1);
>
> @@ -501,17 +501,30 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> -	struct fuse_req *req;
>  	bool found = false;
> +	struct rb_node *n;
>
>  	spin_lock(&fc->lock);
> -	list_for_each_entry(req, &fi->writepages, writepages_entry) {
> +
> +	n = fi->writepages.rb_node;
> +	if (!n) {
> +		spin_unlock(&fc->lock);
> +		return false;
> +	}
> +
> +	while (n) {
> +		struct fuse_req *req;
>  		pgoff_t curr_index;
>
> +		req = rb_entry(n, struct fuse_req, writepages_entry);
>  		BUG_ON(req->inode != inode);
>  		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> -		if (!(idx_from >= curr_index + req->num_pages ||
> -		      idx_to < curr_index)) {
> +
> +		if (idx_from >= curr_index + req->num_pages)
> +			n = n->rb_right;
> +		else if (idx_to < curr_index)
> +			n = n->rb_left;
> +		else {
>  			found = true;
>  			break;
>  		}
> @@ -531,17 +544,31 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> -	struct fuse_req *req;
>  	bool found = false;
> +	struct rb_node *n;
>
>  	spin_lock(&fc->lock);
> -	list_for_each_entry(req, &fi->writepages, writepages_entry) {
> +
> +	n = fi->writepages.rb_node;
> +	if (!n) {
> +		spin_unlock(&fc->lock);
> +		return false;
> +	}
> +
> +
> +	while (n) {
> +		struct fuse_req *req;
>  		pgoff_t curr_index;
>
> +		req = rb_entry(n, struct fuse_req, writepages_entry);
>  		BUG_ON(req->inode != inode);
>  		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> -		if (curr_index <= index &&
> -		    index < curr_index + req->num_pages) {
> +
> +		if (index >= curr_index + req->num_pages)
> +			n = n->rb_right;
> +		else if (index < curr_index)
> +			n = n->rb_left;
> +		else {
>  			found = true;
>  			break;
>  		}
> @@ -1868,7 +1895,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
>  	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
>  	int i;
>
> -	list_del(&req->writepages_entry);
> +	rb_erase(&req->writepages_entry, &fi->writepages);
>  	if (fc->writeback_cache || fc->close_wait)
>  		__fuse_file_put(req->ff);
>  	for (i = 0; i < req->num_pages; i++) {
> @@ -1964,6 +1991,36 @@ static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
>  	return ff;
>  }
>
> +static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
> +{
> +	unsigned i = ins_req->num_pages - 1;
> +	pgoff_t idx_from = ins_req->pages[0]->index;
> +	pgoff_t idx_to   = ins_req->pages[i]->index;
> +	struct rb_node **p = &root->rb_node;
> +	struct rb_node  *parent = NULL;
> +
> +	while (*p) {
> +		struct fuse_req *req;
> +		pgoff_t curr_index;
> +
> +		parent = *p;
> +		req = rb_entry(parent, struct fuse_req, writepages_entry);
> +		BUG_ON(req->inode != ins_req->inode);
> +		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> +
> +		if (idx_from >= curr_index + req->num_pages)
> +			p = &(*p)->rb_right;
> +		else if (idx_to < curr_index)
> +			p = &(*p)->rb_left;
> +		else
> +			BUG();
> +	}
> +
> +	rb_link_node(&ins_req->writepages_entry, parent, p);
> +	rb_insert_color(&ins_req->writepages_entry, root);
> +	return 0;
> +}
> +
>  static int fuse_writepage_locked(struct page *page,
>  				 struct writeback_control *wbc,
>  				 struct fuse_file **ff_pp)
> @@ -2034,7 +2091,7 @@ static int fuse_writepage_locked(struct page *page,
>  	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
>
>  	spin_lock(&fc->lock);
> -	list_add(&req->writepages_entry, &fi->writepages);
> +	tree_insert(&fi->writepages, req);
>  	list_add_tail(&req->list, &fi->queued_writes);
>  	fuse_flush_writepages(inode);
>  	spin_unlock(&fc->lock);
> @@ -2110,7 +2167,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>  	req->misc.write.in.offset = page_offset(req->pages[0]);
>
>  	spin_lock(&fc->lock);
> -	list_add(&req->writepages_entry, &fi->writepages);
> +	tree_insert(&fi->writepages, req);
>  	spin_unlock(&fc->lock);
>
>  	for (i = 0; i < req->num_pages; i++) {
> @@ -2143,7 +2200,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>  		}
>
>  		spin_lock(&fc->lock);
> -		list_del(&req->writepages_entry);
> +		rb_erase(&req->writepages_entry, &fi->writepages);
>  		wake_up(&fi->page_waitq);
>  		spin_unlock(&fc->lock);
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index fefa8ff..4601571 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -117,7 +117,7 @@ struct fuse_inode {
>  	wait_queue_head_t page_waitq;
>
>  	/** List of writepage requestst (pending or sent) */
> -	struct list_head writepages;
> +	struct rb_root writepages;
>
>  	/** Miscellaneous bits describing inode state */
>  	unsigned long state;
> @@ -404,7 +404,7 @@ struct fuse_req {
>  	struct fuse_io_priv *io;
>
>  	/** Link on fi->writepages */
> -	struct list_head writepages_entry;
> +	struct rb_node writepages_entry;
>
>  	/** Request completion callback */
>  	void (*end)(struct fuse_conn *, struct fuse_req *);
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ddd858c..0da2623 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -101,7 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
>  	INIT_LIST_HEAD(&fi->write_files);
>  	INIT_LIST_HEAD(&fi->rw_files);
>  	INIT_LIST_HEAD(&fi->queued_writes);
> -	INIT_LIST_HEAD(&fi->writepages);
> +	fi->writepages = RB_ROOT;
>  	init_waitqueue_head(&fi->page_waitq);
>  	fi->forget = fuse_alloc_forget();
>  	if (!fi->forget) {
>
> .
>
Kirill Tkhai April 7, 2017, 2:28 p.m.
Everything looks good for me except two moments. Please see comments below.

On 07.04.2017 06:35, Maxim Patlasov wrote:
> The patch re-works fi->writepages replacing list with rb-tree.
> This improves performance because kernel fuse iterates through
> fi->writepages for each writeback page and typical number of
> entries is about 800 (for 100MB of fuse writeback).
> 
> Before patch:
> 
> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s
> 
> # vmstat 10
>  2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0
> 
> # perf top
>   29.86%  [kernel]               [k] _raw_spin_lock
>   26.62%  [fuse]                 [k] fuse_page_is_writeback
> 
> After patch:
> 
> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s
> 
> # vmstat 10
>  2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0
> 
> # perf top
>   23.55%  [kernel]             [k] copy_user_enhanced_fast_string
>    9.87%  [kernel]             [k] __memcpy
>    3.10%  [kernel]             [k] _raw_spin_lock
> 
> https://jira.sw.ru/browse/PSBM-59254
> 
> Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
> ---
>  fs/fuse/file.c   |   83 ++++++++++++++++++++++++++++++++++++++++++++++--------
>  fs/fuse/fuse_i.h |    4 +--
>  fs/fuse/inode.c  |    2 +
>  3 files changed, 73 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 41ed6f0..eab5030 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -435,7 +435,7 @@ static int fuse_release(struct inode *inode, struct file *file)
>  		 * enqueued on this file and it is not going to get new one: it is closing.
>  		 */
>  		if (!ff->fc->close_wait)
> -			wait_event(fi->page_waitq, list_empty_careful(&fi->writepages));
> +			wait_event(fi->page_waitq, RB_EMPTY_ROOT(&fi->writepages));
>  		else
>  			wait_event(fi->page_waitq, atomic_read(&ff->count) == 1);
>  
> @@ -501,17 +501,30 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> -	struct fuse_req *req;
>  	bool found = false;
> +	struct rb_node *n;
>  
>  	spin_lock(&fc->lock);
> -	list_for_each_entry(req, &fi->writepages, writepages_entry) {
> +
> +	n = fi->writepages.rb_node;
> +	if (!n) {
> +		spin_unlock(&fc->lock);
> +		return false;
> +	}

You don't need this "if" branch, because this case is handled in "while".

> +
> +	while (n) {
> +		struct fuse_req *req;
>  		pgoff_t curr_index;
>  
> +		req = rb_entry(n, struct fuse_req, writepages_entry);
>  		BUG_ON(req->inode != inode);
>  		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> -		if (!(idx_from >= curr_index + req->num_pages ||
> -		      idx_to < curr_index)) {
> +
> +		if (idx_from >= curr_index + req->num_pages)
> +			n = n->rb_right;
> +		else if (idx_to < curr_index)
> +			n = n->rb_left;
> +		else {
>  			found = true;
>  			break;
>  		}
> @@ -531,17 +544,31 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> -	struct fuse_req *req;
>  	bool found = false;
> +	struct rb_node *n;
>  
>  	spin_lock(&fc->lock);
> -	list_for_each_entry(req, &fi->writepages, writepages_entry) {
> +
> +	n = fi->writepages.rb_node;
> +	if (!n) {
> +		spin_unlock(&fc->lock);
> +		return false;
> +	}

The same as above.

> +
> +
> +	while (n) {
> +		struct fuse_req *req;
>  		pgoff_t curr_index;
>  
> +		req = rb_entry(n, struct fuse_req, writepages_entry);
>  		BUG_ON(req->inode != inode);
>  		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> -		if (curr_index <= index &&
> -		    index < curr_index + req->num_pages) {
> +
> +		if (index >= curr_index + req->num_pages)
> +			n = n->rb_right;
> +		else if (index < curr_index)
> +			n = n->rb_left;
> +		else {
>  			found = true;
>  			break;
>  		}
> @@ -1868,7 +1895,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
>  	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
>  	int i;
>  
> -	list_del(&req->writepages_entry);
> +	rb_erase(&req->writepages_entry, &fi->writepages);
>  	if (fc->writeback_cache || fc->close_wait)
>  		__fuse_file_put(req->ff);
>  	for (i = 0; i < req->num_pages; i++) {
> @@ -1964,6 +1991,36 @@ static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
>  	return ff;
>  }
>  
> +static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
> +{
> +	unsigned i = ins_req->num_pages - 1;
> +	pgoff_t idx_from = ins_req->pages[0]->index;
> +	pgoff_t idx_to   = ins_req->pages[i]->index;
> +	struct rb_node **p = &root->rb_node;
> +	struct rb_node  *parent = NULL;
> +
> +	while (*p) {
> +		struct fuse_req *req;
> +		pgoff_t curr_index;
> +
> +		parent = *p;
> +		req = rb_entry(parent, struct fuse_req, writepages_entry);
> +		BUG_ON(req->inode != ins_req->inode);
> +		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> +
> +		if (idx_from >= curr_index + req->num_pages)
> +			p = &(*p)->rb_right;
> +		else if (idx_to < curr_index)
> +			p = &(*p)->rb_left;
> +		else
> +			BUG();
> +	}
> +
> +	rb_link_node(&ins_req->writepages_entry, parent, p);
> +	rb_insert_color(&ins_req->writepages_entry, root);
> +	return 0;
> +}
> +
>  static int fuse_writepage_locked(struct page *page,
>  				 struct writeback_control *wbc,
>  				 struct fuse_file **ff_pp)
> @@ -2034,7 +2091,7 @@ static int fuse_writepage_locked(struct page *page,
>  	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
>  
>  	spin_lock(&fc->lock);
> -	list_add(&req->writepages_entry, &fi->writepages);
> +	tree_insert(&fi->writepages, req);
>  	list_add_tail(&req->list, &fi->queued_writes);
>  	fuse_flush_writepages(inode);
>  	spin_unlock(&fc->lock);
> @@ -2110,7 +2167,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>  	req->misc.write.in.offset = page_offset(req->pages[0]);
>  
>  	spin_lock(&fc->lock);
> -	list_add(&req->writepages_entry, &fi->writepages);
> +	tree_insert(&fi->writepages, req);
>  	spin_unlock(&fc->lock);
>  
>  	for (i = 0; i < req->num_pages; i++) {
> @@ -2143,7 +2200,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>  		}
>  
>  		spin_lock(&fc->lock);
> -		list_del(&req->writepages_entry);
> +		rb_erase(&req->writepages_entry, &fi->writepages);
>  		wake_up(&fi->page_waitq);
>  		spin_unlock(&fc->lock);
>  
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index fefa8ff..4601571 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -117,7 +117,7 @@ struct fuse_inode {
>  	wait_queue_head_t page_waitq;
>  
>  	/** List of writepage requestst (pending or sent) */
> -	struct list_head writepages;
> +	struct rb_root writepages;
>  
>  	/** Miscellaneous bits describing inode state */
>  	unsigned long state;
> @@ -404,7 +404,7 @@ struct fuse_req {
>  	struct fuse_io_priv *io;
>  
>  	/** Link on fi->writepages */
> -	struct list_head writepages_entry;
> +	struct rb_node writepages_entry;
>  
>  	/** Request completion callback */
>  	void (*end)(struct fuse_conn *, struct fuse_req *);
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ddd858c..0da2623 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -101,7 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
>  	INIT_LIST_HEAD(&fi->write_files);
>  	INIT_LIST_HEAD(&fi->rw_files);
>  	INIT_LIST_HEAD(&fi->queued_writes);
> -	INIT_LIST_HEAD(&fi->writepages);
> +	fi->writepages = RB_ROOT;
>  	init_waitqueue_head(&fi->page_waitq);
>  	fi->forget = fuse_alloc_forget();
>  	if (!fi->forget) {
>
Maxim Patlasov April 7, 2017, 6:01 p.m.
Thank you for review, Kirill!


On 04/07/2017 07:28 AM, Kirill Tkhai wrote:
> Everything looks good for me except two moments. Please see comments below.
>
> On 07.04.2017 06:35, Maxim Patlasov wrote:
>> The patch re-works fi->writepages replacing list with rb-tree.
>> This improves performance because kernel fuse iterates through
>> fi->writepages for each writeback page and typical number of
>> entries is about 800 (for 100MB of fuse writeback).
>>
>> Before patch:
>>
>> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
>> 10240+0 records in
>> 10240+0 records out
>> 10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s
>>
>> # vmstat 10
>>   2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0
>>
>> # perf top
>>    29.86%  [kernel]               [k] _raw_spin_lock
>>    26.62%  [fuse]                 [k] fuse_page_is_writeback
>>
>> After patch:
>>
>> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
>> 10240+0 records in
>> 10240+0 records out
>> 10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s
>>
>> # vmstat 10
>>   2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0
>>
>> # perf top
>>    23.55%  [kernel]             [k] copy_user_enhanced_fast_string
>>     9.87%  [kernel]             [k] __memcpy
>>     3.10%  [kernel]             [k] _raw_spin_lock
>>
>> https://jira.sw.ru/browse/PSBM-59254
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
>> ---
>>   fs/fuse/file.c   |   83 ++++++++++++++++++++++++++++++++++++++++++++++--------
>>   fs/fuse/fuse_i.h |    4 +--
>>   fs/fuse/inode.c  |    2 +
>>   3 files changed, 73 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>> index 41ed6f0..eab5030 100644
>> --- a/fs/fuse/file.c
>> +++ b/fs/fuse/file.c
>> @@ -435,7 +435,7 @@ static int fuse_release(struct inode *inode, struct file *file)
>>   		 * enqueued on this file and it is not going to get new one: it is closing.
>>   		 */
>>   		if (!ff->fc->close_wait)
>> -			wait_event(fi->page_waitq, list_empty_careful(&fi->writepages));
>> +			wait_event(fi->page_waitq, RB_EMPTY_ROOT(&fi->writepages));
>>   		else
>>   			wait_event(fi->page_waitq, atomic_read(&ff->count) == 1);
>>   
>> @@ -501,17 +501,30 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
>>   {
>>   	struct fuse_conn *fc = get_fuse_conn(inode);
>>   	struct fuse_inode *fi = get_fuse_inode(inode);
>> -	struct fuse_req *req;
>>   	bool found = false;
>> +	struct rb_node *n;
>>   
>>   	spin_lock(&fc->lock);
>> -	list_for_each_entry(req, &fi->writepages, writepages_entry) {
>> +
>> +	n = fi->writepages.rb_node;
>> +	if (!n) {
>> +		spin_unlock(&fc->lock);
>> +		return false;
>> +	}
> You don't need this "if" branch, because this case is handled in "while".
>
>> +
>> +	while (n) {
>> +		struct fuse_req *req;
>>   		pgoff_t curr_index;
>>   
>> +		req = rb_entry(n, struct fuse_req, writepages_entry);
>>   		BUG_ON(req->inode != inode);
>>   		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
>> -		if (!(idx_from >= curr_index + req->num_pages ||
>> -		      idx_to < curr_index)) {
>> +
>> +		if (idx_from >= curr_index + req->num_pages)
>> +			n = n->rb_right;
>> +		else if (idx_to < curr_index)
>> +			n = n->rb_left;
>> +		else {
>>   			found = true;
>>   			break;
>>   		}
>> @@ -531,17 +544,31 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
>>   {
>>   	struct fuse_conn *fc = get_fuse_conn(inode);
>>   	struct fuse_inode *fi = get_fuse_inode(inode);
>> -	struct fuse_req *req;
>>   	bool found = false;
>> +	struct rb_node *n;
>>   
>>   	spin_lock(&fc->lock);
>> -	list_for_each_entry(req, &fi->writepages, writepages_entry) {
>> +
>> +	n = fi->writepages.rb_node;
>> +	if (!n) {
>> +		spin_unlock(&fc->lock);
>> +		return false;
>> +	}
> The same as above.
>
>> +
>> +
>> +	while (n) {
>> +		struct fuse_req *req;
>>   		pgoff_t curr_index;
>>   
>> +		req = rb_entry(n, struct fuse_req, writepages_entry);
>>   		BUG_ON(req->inode != inode);
>>   		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
>> -		if (curr_index <= index &&
>> -		    index < curr_index + req->num_pages) {
>> +
>> +		if (index >= curr_index + req->num_pages)
>> +			n = n->rb_right;
>> +		else if (index < curr_index)
>> +			n = n->rb_left;
>> +		else {
>>   			found = true;
>>   			break;
>>   		}
>> @@ -1868,7 +1895,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
>>   	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
>>   	int i;
>>   
>> -	list_del(&req->writepages_entry);
>> +	rb_erase(&req->writepages_entry, &fi->writepages);
>>   	if (fc->writeback_cache || fc->close_wait)
>>   		__fuse_file_put(req->ff);
>>   	for (i = 0; i < req->num_pages; i++) {
>> @@ -1964,6 +1991,36 @@ static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
>>   	return ff;
>>   }
>>   
>> +static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
>> +{
>> +	unsigned i = ins_req->num_pages - 1;
>> +	pgoff_t idx_from = ins_req->pages[0]->index;
>> +	pgoff_t idx_to   = ins_req->pages[i]->index;
>> +	struct rb_node **p = &root->rb_node;
>> +	struct rb_node  *parent = NULL;
>> +
>> +	while (*p) {
>> +		struct fuse_req *req;
>> +		pgoff_t curr_index;
>> +
>> +		parent = *p;
>> +		req = rb_entry(parent, struct fuse_req, writepages_entry);
>> +		BUG_ON(req->inode != ins_req->inode);
>> +		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
>> +
>> +		if (idx_from >= curr_index + req->num_pages)
>> +			p = &(*p)->rb_right;
>> +		else if (idx_to < curr_index)
>> +			p = &(*p)->rb_left;
>> +		else
>> +			BUG();
>> +	}
>> +
>> +	rb_link_node(&ins_req->writepages_entry, parent, p);
>> +	rb_insert_color(&ins_req->writepages_entry, root);
>> +	return 0;
>> +}
>> +
>>   static int fuse_writepage_locked(struct page *page,
>>   				 struct writeback_control *wbc,
>>   				 struct fuse_file **ff_pp)
>> @@ -2034,7 +2091,7 @@ static int fuse_writepage_locked(struct page *page,
>>   	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
>>   
>>   	spin_lock(&fc->lock);
>> -	list_add(&req->writepages_entry, &fi->writepages);
>> +	tree_insert(&fi->writepages, req);
>>   	list_add_tail(&req->list, &fi->queued_writes);
>>   	fuse_flush_writepages(inode);
>>   	spin_unlock(&fc->lock);
>> @@ -2110,7 +2167,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>>   	req->misc.write.in.offset = page_offset(req->pages[0]);
>>   
>>   	spin_lock(&fc->lock);
>> -	list_add(&req->writepages_entry, &fi->writepages);
>> +	tree_insert(&fi->writepages, req);
>>   	spin_unlock(&fc->lock);
>>   
>>   	for (i = 0; i < req->num_pages; i++) {
>> @@ -2143,7 +2200,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>>   		}
>>   
>>   		spin_lock(&fc->lock);
>> -		list_del(&req->writepages_entry);
>> +		rb_erase(&req->writepages_entry, &fi->writepages);
>>   		wake_up(&fi->page_waitq);
>>   		spin_unlock(&fc->lock);
>>   
>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> index fefa8ff..4601571 100644
>> --- a/fs/fuse/fuse_i.h
>> +++ b/fs/fuse/fuse_i.h
>> @@ -117,7 +117,7 @@ struct fuse_inode {
>>   	wait_queue_head_t page_waitq;
>>   
>>   	/** List of writepage requestst (pending or sent) */
>> -	struct list_head writepages;
>> +	struct rb_root writepages;
>>   
>>   	/** Miscellaneous bits describing inode state */
>>   	unsigned long state;
>> @@ -404,7 +404,7 @@ struct fuse_req {
>>   	struct fuse_io_priv *io;
>>   
>>   	/** Link on fi->writepages */
>> -	struct list_head writepages_entry;
>> +	struct rb_node writepages_entry;
>>   
>>   	/** Request completion callback */
>>   	void (*end)(struct fuse_conn *, struct fuse_req *);
>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>> index ddd858c..0da2623 100644
>> --- a/fs/fuse/inode.c
>> +++ b/fs/fuse/inode.c
>> @@ -101,7 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
>>   	INIT_LIST_HEAD(&fi->write_files);
>>   	INIT_LIST_HEAD(&fi->rw_files);
>>   	INIT_LIST_HEAD(&fi->queued_writes);
>> -	INIT_LIST_HEAD(&fi->writepages);
>> +	fi->writepages = RB_ROOT;
>>   	init_waitqueue_head(&fi->page_waitq);
>>   	fi->forget = fuse_alloc_forget();
>>   	if (!fi->forget) {
>>
Kirill Tkhai April 7, 2017, 6:58 p.m.
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>

On 07.04.2017 13:26, Konstantin Khorenko wrote:
> Kirill, please review the patch.
> 
> -- 
> Best regards,
> 
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
> 
> On 04/07/2017 06:35 AM, Maxim Patlasov wrote:
>> The patch re-works fi->writepages replacing list with rb-tree.
>> This improves performance because kernel fuse iterates through
>> fi->writepages for each writeback page and typical number of
>> entries is about 800 (for 100MB of fuse writeback).
>>
>> Before patch:
>>
>> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
>> 10240+0 records in
>> 10240+0 records out
>> 10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s
>>
>> # vmstat 10
>>  2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0
>>
>> # perf top
>>   29.86%  [kernel]               [k] _raw_spin_lock
>>   26.62%  [fuse]                 [k] fuse_page_is_writeback
>>
>> After patch:
>>
>> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
>> 10240+0 records in
>> 10240+0 records out
>> 10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s
>>
>> # vmstat 10
>>  2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0
>>
>> # perf top
>>   23.55%  [kernel]             [k] copy_user_enhanced_fast_string
>>    9.87%  [kernel]             [k] __memcpy
>>    3.10%  [kernel]             [k] _raw_spin_lock
>>
>> https://jira.sw.ru/browse/PSBM-59254
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
>> ---
>>  fs/fuse/file.c   |   83 ++++++++++++++++++++++++++++++++++++++++++++++--------
>>  fs/fuse/fuse_i.h |    4 +--
>>  fs/fuse/inode.c  |    2 +
>>  3 files changed, 73 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>> index 41ed6f0..eab5030 100644
>> --- a/fs/fuse/file.c
>> +++ b/fs/fuse/file.c
>> @@ -435,7 +435,7 @@ static int fuse_release(struct inode *inode, struct file *file)
>>           * enqueued on this file and it is not going to get new one: it is closing.
>>           */
>>          if (!ff->fc->close_wait)
>> -            wait_event(fi->page_waitq, list_empty_careful(&fi->writepages));
>> +            wait_event(fi->page_waitq, RB_EMPTY_ROOT(&fi->writepages));
>>          else
>>              wait_event(fi->page_waitq, atomic_read(&ff->count) == 1);
>>
>> @@ -501,17 +501,30 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
>>  {
>>      struct fuse_conn *fc = get_fuse_conn(inode);
>>      struct fuse_inode *fi = get_fuse_inode(inode);
>> -    struct fuse_req *req;
>>      bool found = false;
>> +    struct rb_node *n;
>>
>>      spin_lock(&fc->lock);
>> -    list_for_each_entry(req, &fi->writepages, writepages_entry) {
>> +
>> +    n = fi->writepages.rb_node;
>> +    if (!n) {
>> +        spin_unlock(&fc->lock);
>> +        return false;
>> +    }
>> +
>> +    while (n) {
>> +        struct fuse_req *req;
>>          pgoff_t curr_index;
>>
>> +        req = rb_entry(n, struct fuse_req, writepages_entry);
>>          BUG_ON(req->inode != inode);
>>          curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
>> -        if (!(idx_from >= curr_index + req->num_pages ||
>> -              idx_to < curr_index)) {
>> +
>> +        if (idx_from >= curr_index + req->num_pages)
>> +            n = n->rb_right;
>> +        else if (idx_to < curr_index)
>> +            n = n->rb_left;
>> +        else {
>>              found = true;
>>              break;
>>          }
>> @@ -531,17 +544,31 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
>>  {
>>      struct fuse_conn *fc = get_fuse_conn(inode);
>>      struct fuse_inode *fi = get_fuse_inode(inode);
>> -    struct fuse_req *req;
>>      bool found = false;
>> +    struct rb_node *n;
>>
>>      spin_lock(&fc->lock);
>> -    list_for_each_entry(req, &fi->writepages, writepages_entry) {
>> +
>> +    n = fi->writepages.rb_node;
>> +    if (!n) {
>> +        spin_unlock(&fc->lock);
>> +        return false;
>> +    }
>> +
>> +
>> +    while (n) {
>> +        struct fuse_req *req;
>>          pgoff_t curr_index;
>>
>> +        req = rb_entry(n, struct fuse_req, writepages_entry);
>>          BUG_ON(req->inode != inode);
>>          curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
>> -        if (curr_index <= index &&
>> -            index < curr_index + req->num_pages) {
>> +
>> +        if (index >= curr_index + req->num_pages)
>> +            n = n->rb_right;
>> +        else if (index < curr_index)
>> +            n = n->rb_left;
>> +        else {
>>              found = true;
>>              break;
>>          }
>> @@ -1868,7 +1895,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
>>      struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
>>      int i;
>>
>> -    list_del(&req->writepages_entry);
>> +    rb_erase(&req->writepages_entry, &fi->writepages);
>>      if (fc->writeback_cache || fc->close_wait)
>>          __fuse_file_put(req->ff);
>>      for (i = 0; i < req->num_pages; i++) {
>> @@ -1964,6 +1991,36 @@ static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
>>      return ff;
>>  }
>>
>> +static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
>> +{
>> +    unsigned i = ins_req->num_pages - 1;
>> +    pgoff_t idx_from = ins_req->pages[0]->index;
>> +    pgoff_t idx_to   = ins_req->pages[i]->index;
>> +    struct rb_node **p = &root->rb_node;
>> +    struct rb_node  *parent = NULL;
>> +
>> +    while (*p) {
>> +        struct fuse_req *req;
>> +        pgoff_t curr_index;
>> +
>> +        parent = *p;
>> +        req = rb_entry(parent, struct fuse_req, writepages_entry);
>> +        BUG_ON(req->inode != ins_req->inode);
>> +        curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
>> +
>> +        if (idx_from >= curr_index + req->num_pages)
>> +            p = &(*p)->rb_right;
>> +        else if (idx_to < curr_index)
>> +            p = &(*p)->rb_left;
>> +        else
>> +            BUG();
>> +    }
>> +
>> +    rb_link_node(&ins_req->writepages_entry, parent, p);
>> +    rb_insert_color(&ins_req->writepages_entry, root);
>> +    return 0;
>> +}
>> +
>>  static int fuse_writepage_locked(struct page *page,
>>                   struct writeback_control *wbc,
>>                   struct fuse_file **ff_pp)
>> @@ -2034,7 +2091,7 @@ static int fuse_writepage_locked(struct page *page,
>>      inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
>>
>>      spin_lock(&fc->lock);
>> -    list_add(&req->writepages_entry, &fi->writepages);
>> +    tree_insert(&fi->writepages, req);
>>      list_add_tail(&req->list, &fi->queued_writes);
>>      fuse_flush_writepages(inode);
>>      spin_unlock(&fc->lock);
>> @@ -2110,7 +2167,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>>      req->misc.write.in.offset = page_offset(req->pages[0]);
>>
>>      spin_lock(&fc->lock);
>> -    list_add(&req->writepages_entry, &fi->writepages);
>> +    tree_insert(&fi->writepages, req);
>>      spin_unlock(&fc->lock);
>>
>>      for (i = 0; i < req->num_pages; i++) {
>> @@ -2143,7 +2200,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
>>          }
>>
>>          spin_lock(&fc->lock);
>> -        list_del(&req->writepages_entry);
>> +        rb_erase(&req->writepages_entry, &fi->writepages);
>>          wake_up(&fi->page_waitq);
>>          spin_unlock(&fc->lock);
>>
>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> index fefa8ff..4601571 100644
>> --- a/fs/fuse/fuse_i.h
>> +++ b/fs/fuse/fuse_i.h
>> @@ -117,7 +117,7 @@ struct fuse_inode {
>>      wait_queue_head_t page_waitq;
>>
>>      /** List of writepage requestst (pending or sent) */
>> -    struct list_head writepages;
>> +    struct rb_root writepages;
>>
>>      /** Miscellaneous bits describing inode state */
>>      unsigned long state;
>> @@ -404,7 +404,7 @@ struct fuse_req {
>>      struct fuse_io_priv *io;
>>
>>      /** Link on fi->writepages */
>> -    struct list_head writepages_entry;
>> +    struct rb_node writepages_entry;
>>
>>      /** Request completion callback */
>>      void (*end)(struct fuse_conn *, struct fuse_req *);
>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>> index ddd858c..0da2623 100644
>> --- a/fs/fuse/inode.c
>> +++ b/fs/fuse/inode.c
>> @@ -101,7 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
>>      INIT_LIST_HEAD(&fi->write_files);
>>      INIT_LIST_HEAD(&fi->rw_files);
>>      INIT_LIST_HEAD(&fi->queued_writes);
>> -    INIT_LIST_HEAD(&fi->writepages);
>> +    fi->writepages = RB_ROOT;
>>      init_waitqueue_head(&fi->page_waitq);
>>      fi->forget = fuse_alloc_forget();
>>      if (!fi->forget) {
>>
>> .
>>