[RHEL7,COMMIT] ms/nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

Submitted by Konstantin Khorenko on Feb. 27, 2020, 10:01 a.m.

Details

Message ID 202002271001.01RA1PHc024953@finist-ce7.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko Feb. 27, 2020, 10:01 a.m.
The commit is pushed to "branch-rh7-3.10.0-1062.12.1.vz7.131.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.12.1.vz7.131.4
------>
commit 72d4c25c68761713eafa79cd96be05ddd83a544d
Author: Trond Myklebust <trondmy@gmail.com>
Date:   Thu Feb 27 13:01:25 2020 +0300

    ms/nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()
    
    nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()
    
    When we're destroying the client lease, and we call
    nfsd4_shutdown_callback(), we must ensure that we do not return
    before all outstanding callbacks have terminated and have
    released their payloads.
    
    Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
    Signed-off-by: J. Bruce Fields <bfields@redhat.com>
    
    ML commit 2bbfed98a4d82 ("nfsd: Fix races between nfsd4_cb_release() and
    nfsd4_shutdown_callback()")
    
    Taken from https://bugzilla.redhat.com/show_bug.cgi?id=1448750
    https://jira.sw.ru/browse/PSBM-100902
    Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 fs/nfsd/nfs4callback.c | 67 +++++++++++++++++++++++++++++++++++++++++++-------
 fs/nfsd/state.h        |  1 +
 2 files changed, 59 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d692b9756e847..76e75a6ac0989 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -744,6 +744,31 @@  static int max_cb_time(struct net *net)
 	return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
 }
 
+static struct workqueue_struct *callback_wq;
+
+static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
+{
+	return queue_work(callback_wq, &cb->cb_work);
+}
+
+static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
+{
+	atomic_inc(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
+{
+
+	if (atomic_dec_and_test(&clp->cl_cb_inflight))
+		wake_up_var(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
+{
+	wait_var_event(&clp->cl_cb_inflight,
+			!atomic_read(&clp->cl_cb_inflight));
+}
+
 static struct rpc_cred *callback_cred;
 
 int set_callback_cred(void)
@@ -860,14 +885,21 @@  static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
 		clp->cl_cb_state = NFSD4_CB_UP;
 }
 
+static void nfsd4_cb_probe_release(void *calldata)
+{
+	struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+	nfsd41_cb_inflight_end(clp);
+
+}
+
 static const struct rpc_call_ops nfsd4_cb_probe_ops = {
 	/* XXX: release method to ensure we set the cb channel down if
 	 * necessary on early failure? */
 	.rpc_call_done = nfsd4_cb_probe_done,
+	.rpc_release = nfsd4_cb_probe_release,
 };
 
-static struct workqueue_struct *callback_wq;
-
 /*
  * Poke the callback thread to process any updates to the callback
  * parameters, and send a null probe.
@@ -927,6 +959,16 @@  static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
 	}
 }
 
+static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
+{
+	struct nfs4_client *clp = cb->cb_clp;
+
+	nfsd41_cb_release_slot(cb);
+	if (cb->cb_ops && cb->cb_ops->release)
+		cb->cb_ops->release(cb);
+	nfsd41_cb_inflight_end(clp);
+}
+
 /*
  * TODO: cb_sequence should support referring call lists, cachethis, multiple
  * slots, and mark callback channel down on communication errors.
@@ -1024,8 +1066,10 @@  static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
 		ret = false;
 	goto out;
 need_restart:
-	task->tk_status = 0;
-	cb->cb_need_restart = true;
+	if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+		task->tk_status = 0;
+		cb->cb_need_restart = true;
+	}
 	return false;
 }
 
@@ -1066,9 +1110,9 @@  static void nfsd4_cb_release(void *calldata)
 	struct nfsd4_callback *cb = calldata;
 
 	if (cb->cb_need_restart)
-		nfsd4_run_cb(cb);
+		nfsd4_queue_cb(cb);
 	else
-		cb->cb_ops->release(cb);
+		nfsd41_destroy_cb(cb);
 
 }
 
@@ -1102,6 +1146,7 @@  void nfsd4_shutdown_callback(struct nfs4_client *clp)
 	 */
 	nfsd4_run_cb(&clp->cl_cb_null);
 	flush_workqueue(callback_wq);
+	nfsd41_cb_inflight_wait_complete(clp);
 }
 
 /* requires cl_lock: */
@@ -1187,8 +1232,7 @@  nfsd4_run_cb_work(struct work_struct *work)
 	clnt = clp->cl_cb_client;
 	if (!clnt) {
 		/* Callback channel broken, or client killed; give up: */
-		if (cb->cb_ops && cb->cb_ops->release)
-			cb->cb_ops->release(cb);
+		nfsd41_destroy_cb(cb);
 		return;
 	}
 
@@ -1197,6 +1241,7 @@  nfsd4_run_cb_work(struct work_struct *work)
 	 */
 	if (!cb->cb_ops && clp->cl_minorversion) {
 		clp->cl_cb_state = NFSD4_CB_UP;
+		nfsd41_destroy_cb(cb);
 		return;
 	}
 
@@ -1222,5 +1267,9 @@  void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
 {
-	queue_work(callback_wq, &cb->cb_work);
+	struct nfs4_client *clp = cb->cb_clp;
+
+	nfsd41_cb_inflight_begin(clp);
+	if (!nfsd4_queue_cb(cb))
+		nfsd41_cb_inflight_end(clp);
 }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d8bac46b3573a..3e11dad72eba4 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -350,6 +350,7 @@  struct nfs4_client {
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
 	struct net		*net;
+	atomic_t		cl_cb_inflight;	/* Outstanding callbacks */
 };
 
 /* struct nfs4_client_reset