[RHEL7,COMMIT] fs/fuse kio_pcs: drop old mapping after size growth

Submitted by Konstantin Khorenko on July 25, 2018, 3:40 p.m.

Details

Message ID 201807251540.w6PFeLFR018994@finist_ce7.work
State New
Series "fs/fuse kio_pcs: drop old mapping after size growth"
Headers show

Commit Message

Konstantin Khorenko July 25, 2018, 3:40 p.m.
The commit is pushed to "branch-rh7-3.10.0-862.9.1.vz7.70.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.9.1.vz7.63.2
------>
commit 137e83c45f9955073c14f53403d95bb1b48fee46
Author: Pavel Butsykin <pbutsykin@virtuozzo.com>
Date:   Wed Jul 25 18:40:21 2018 +0300

    fs/fuse kio_pcs: drop old mapping after size growth
    
    With kio FUSE_SETATTR sends request to MDS to expand file size. Then
    vstorage-mount queries new map from MDS. MDS pushes updates to CSes,
    which makes current map cached in kio invalid, CSes will reject all IO
    with PCS_ERR_CSD_STALE_MAP.
    
    This behavior leads to the fact that all requests waiting for a grow size will
    do extra work and will be sent twice to CS. Let's drop the mapping from cache
    immediately after submit_size_grow() to avoid slow writes during file growth.
    
    Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
---
 fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 30 ++++++++++++++++++++++++++----
 fs/fuse/kio/pcs/pcs_req.h          |  1 +
 2 files changed, 27 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 8a6f9e60fece..82d514f4332a 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -657,18 +657,29 @@  static int submit_size_grow(struct inode *inode, unsigned long long size)
 
 }
 
+static void truncate_complete(struct pcs_int_request *treq)
+{
+	if (pcs_if_error(&treq->error)) {
+		TRACE("truncate offs: %llu error: %d \n",
+		      treq->truncreq.offset, treq->error.value);
+	}
+	pcs_cc_requeue(treq->cc, &treq->truncreq.waiters);
+	ireq_destroy(treq);
+}
+
 static void fuse_size_grow_work(struct work_struct *w)
 {
 	struct pcs_dentry_info* di = container_of(w, struct pcs_dentry_info, size.work);
 	struct inode *inode = &di->inode->inode;
-	struct pcs_int_request* ireq, *next;
-	unsigned long long size;
+	struct pcs_int_request *ireq, *next, *treq;
+	u64 size, old_size;
 	int err;
 	LIST_HEAD(pending_reqs);
 
 	spin_lock(&di->lock);
 	BUG_ON(di->size.op != PCS_SIZE_INACTION);
 
+	old_size = DENTRY_SIZE(di);
 	size = di->size.required;
 	if (!size) {
 		BUG_ON(!list_empty(&di->size.queue));
@@ -676,7 +687,7 @@  static void fuse_size_grow_work(struct work_struct *w)
 		TRACE("No more pending writes\n");
 		return;
 	}
-	BUG_ON(di->fileinfo.attr.size >= size);
+	BUG_ON(old_size >= size);
 
 	list_splice_tail_init(&di->size.queue, &pending_reqs);
 	di->size.op = PCS_SIZE_GROW;
@@ -713,7 +724,18 @@  static void fuse_size_grow_work(struct work_struct *w)
 		di->size.required = 0;
 	spin_unlock(&di->lock);
 
-	pcs_cc_requeue(di->cluster, &pending_reqs);
+	treq = ireq_alloc(di);
+	if (!treq) {
+		TRACE("Can't allocate treq\n");
+		pcs_cc_requeue(di->cluster, &pending_reqs);
+		return;
+	}
+	/* Drop old mapping from cache */
+	treq->type = PCS_IREQ_TRUNCATE;
+	treq->complete_cb = truncate_complete;
+	INIT_LIST_HEAD(&treq->truncreq.waiters);
+	list_splice(&pending_reqs, &treq->truncreq.waiters);
+	pcs_mapping_truncate(treq, old_size);
 }
 
 static void wait_grow(struct pcs_fuse_req *r, struct pcs_dentry_info *di, unsigned long long required)
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index fec6c1e1575c..d27943675776 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -131,6 +131,7 @@  struct pcs_int_request
 			u64			offset;
 			int			phase;
 			PCS_MAP_VERSION_T	version;
+			struct list_head	waiters;
 		} truncreq;
 
 		struct {

Comments

Konstantin Khorenko July 25, 2018, 3:41 p.m.
Note: this patch goes to vz7 update 9 only now.

If we need it in vz7 update 8, please let me know.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 07/25/2018 06:40 PM, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-862.9.1.vz7.70.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-862.9.1.vz7.63.2
> ------>
> commit 137e83c45f9955073c14f53403d95bb1b48fee46
> Author: Pavel Butsykin <pbutsykin@virtuozzo.com>
> Date:   Wed Jul 25 18:40:21 2018 +0300
>
>     fs/fuse kio_pcs: drop old mapping after size growth
>
>     With kio FUSE_SETATTR sends request to MDS to expand file size. Then
>     vstorage-mount queries new map from MDS. MDS pushes updates to CSes,
>     which makes current map cached in kio invalid, CSes will reject all IO
>     with PCS_ERR_CSD_STALE_MAP.
>
>     This behavior leads to the fact that all requests waiting for a grow size will
>     do extra work and will be sent twice to CS. Let's drop the mapping from cache
>     immediately after submit_size_grow() to avoid slow writes during file growth.
>
>     Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
> ---
>  fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 30 ++++++++++++++++++++++++++----
>  fs/fuse/kio/pcs/pcs_req.h          |  1 +
>  2 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
> index 8a6f9e60fece..82d514f4332a 100644
> --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
> +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
> @@ -657,18 +657,29 @@ static int submit_size_grow(struct inode *inode, unsigned long long size)
>
>  }
>
> +static void truncate_complete(struct pcs_int_request *treq)
> +{
> +	if (pcs_if_error(&treq->error)) {
> +		TRACE("truncate offs: %llu error: %d \n",
> +		      treq->truncreq.offset, treq->error.value);
> +	}
> +	pcs_cc_requeue(treq->cc, &treq->truncreq.waiters);
> +	ireq_destroy(treq);
> +}
> +
>  static void fuse_size_grow_work(struct work_struct *w)
>  {
>  	struct pcs_dentry_info* di = container_of(w, struct pcs_dentry_info, size.work);
>  	struct inode *inode = &di->inode->inode;
> -	struct pcs_int_request* ireq, *next;
> -	unsigned long long size;
> +	struct pcs_int_request *ireq, *next, *treq;
> +	u64 size, old_size;
>  	int err;
>  	LIST_HEAD(pending_reqs);
>
>  	spin_lock(&di->lock);
>  	BUG_ON(di->size.op != PCS_SIZE_INACTION);
>
> +	old_size = DENTRY_SIZE(di);
>  	size = di->size.required;
>  	if (!size) {
>  		BUG_ON(!list_empty(&di->size.queue));
> @@ -676,7 +687,7 @@ static void fuse_size_grow_work(struct work_struct *w)
>  		TRACE("No more pending writes\n");
>  		return;
>  	}
> -	BUG_ON(di->fileinfo.attr.size >= size);
> +	BUG_ON(old_size >= size);
>
>  	list_splice_tail_init(&di->size.queue, &pending_reqs);
>  	di->size.op = PCS_SIZE_GROW;
> @@ -713,7 +724,18 @@ static void fuse_size_grow_work(struct work_struct *w)
>  		di->size.required = 0;
>  	spin_unlock(&di->lock);
>
> -	pcs_cc_requeue(di->cluster, &pending_reqs);
> +	treq = ireq_alloc(di);
> +	if (!treq) {
> +		TRACE("Can't allocate treq\n");
> +		pcs_cc_requeue(di->cluster, &pending_reqs);
> +		return;
> +	}
> +	/* Drop old mapping from cache */
> +	treq->type = PCS_IREQ_TRUNCATE;
> +	treq->complete_cb = truncate_complete;
> +	INIT_LIST_HEAD(&treq->truncreq.waiters);
> +	list_splice(&pending_reqs, &treq->truncreq.waiters);
> +	pcs_mapping_truncate(treq, old_size);
>  }
>
>  static void wait_grow(struct pcs_fuse_req *r, struct pcs_dentry_info *di, unsigned long long required)
> diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
> index fec6c1e1575c..d27943675776 100644
> --- a/fs/fuse/kio/pcs/pcs_req.h
> +++ b/fs/fuse/kio/pcs/pcs_req.h
> @@ -131,6 +131,7 @@ struct pcs_int_request
>  			u64			offset;
>  			int			phase;
>  			PCS_MAP_VERSION_T	version;
> +			struct list_head	waiters;
>  		} truncreq;
>
>  		struct {
> .
>