[rh7] fs/nfs: don't use delayed unmount for nfs.

Submitted by Andrey Ryabinin on Oct. 27, 2017, 3:31 p.m.

Details

Message ID 20171027153118.17547-1-aryabinin@virtuozzo.com
State New
Series "fs/nfs: don't use delayed unmount for nfs."
Headers show

Commit Message

Andrey Ryabinin Oct. 27, 2017, 3:31 p.m.
Delayed nfs unmount causes too much PITA. We must destroy VENET ip after
unmount, but in that case we can't reuse that IP on restarted container
because it migh be still alive.

So let's just unmount NFS synchronously and destroy veip after it.

https://jira.sw.ru/browse/PSBM-76086
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 drivers/net/venetdev.c | 9 ++-------
 fs/namespace.c         | 3 ++-
 fs/nfs/super.c         | 1 +
 include/linux/fs.h     | 4 ++++
 4 files changed, 9 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/net/venetdev.c b/drivers/net/venetdev.c
index 1c4ae90b7ba8..11f4a66aaf3d 100644
--- a/drivers/net/venetdev.c
+++ b/drivers/net/venetdev.c
@@ -765,7 +765,7 @@  static void venet_dellink(struct net_device *dev, struct list_head *head)
 	 * has VE_FEATURE_NFS enabled. Thus here we have to destroy veip in
 	 * this case.
 	 */
-	if (env->ve_netns || (env->features & VE_FEATURE_NFS))
+	if (env->ve_netns)
 		veip_shutdown(env);
 
 	env->_venet_dev = NULL;
@@ -1182,12 +1182,7 @@  static struct rtnl_link_ops venet_link_ops = {
 
 static void veip_shutdown_fini(void *data)
 {
-	struct ve_struct *ve = data;
-
-	if (ve->features & VE_FEATURE_NFS)
-		return;
-
-	veip_shutdown(ve);
+	veip_shutdown(data);
 }
 
 static struct ve_hook veip_shutdown_hook = {
diff --git a/fs/namespace.c b/fs/namespace.c
index 2c9824985bc5..c2489dd2f520 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1134,7 +1134,8 @@  static void mntput_no_expire(struct mount *mnt)
 	}
 	unlock_mount_hash();
 
-	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
+	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))
+	    && !(mnt->mnt.mnt_sb->s_iflags & SB_I_UMOUNT_SYNC)) {
 		struct task_struct *task = current;
 		if (likely(!(task->flags & PF_KTHREAD))) {
 			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8f29ad17e29e..65a0ac8a3d16 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2414,6 +2414,7 @@  static int nfs_set_super(struct super_block *s, void *data)
 	int ret;
 
 	s->s_flags = sb_mntdata->mntflags;
+	s->s_iflags |= SB_I_UMOUNT_SYNC;
 	s->s_fs_info = server;
 	s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
 	ret = set_anon_super(s, server);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 79011b4bc040..2f3a983741f8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1526,6 +1526,9 @@  struct mm_struct;
 #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
 #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
+/* sb->s_iflags */
+#define SB_I_UMOUNT_SYNC		0x10000000 /* don't use delayed unmount */
+
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
@@ -1566,6 +1569,7 @@  struct super_block {
 	const struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
+	unsigned long           s_iflags;       /* internal SB_I_* flags */
 	unsigned long		s_magic;
 	struct dentry		*s_root;
 	struct rw_semaphore	s_umount;

Comments

Andrey Vagin Oct. 27, 2017, 5:45 p.m.
On Fri, Oct 27, 2017 at 06:31:18PM +0300, Andrey Ryabinin wrote:
> Delayed nfs unmount causes too much PITA. We must destroy VENET ip after
> unmount, but in that case we can't reuse that IP on restarted container
> because it migh be still alive.
> 
> So let's just unmount NFS synchronously and destroy veip after it.

You change a general scenario to fix your small case. For users, it will
be unexpected behaviour. They call umount -l and don't expect any
delays.

How nfs mounts are umounted when a host is shutdowned? I think they are
umounted from init scripts (systemd). Why we can't umount nfs mounts
with the force flag when we stop a container?

> 
> https://jira.sw.ru/browse/PSBM-76086
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>  drivers/net/venetdev.c | 9 ++-------
>  fs/namespace.c         | 3 ++-
>  fs/nfs/super.c         | 1 +
>  include/linux/fs.h     | 4 ++++
>  4 files changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/venetdev.c b/drivers/net/venetdev.c
> index 1c4ae90b7ba8..11f4a66aaf3d 100644
> --- a/drivers/net/venetdev.c
> +++ b/drivers/net/venetdev.c
> @@ -765,7 +765,7 @@ static void venet_dellink(struct net_device *dev, struct list_head *head)
>  	 * has VE_FEATURE_NFS enabled. Thus here we have to destroy veip in
>  	 * this case.
>  	 */
> -	if (env->ve_netns || (env->features & VE_FEATURE_NFS))
> +	if (env->ve_netns)
>  		veip_shutdown(env);
>  
>  	env->_venet_dev = NULL;
> @@ -1182,12 +1182,7 @@ static struct rtnl_link_ops venet_link_ops = {
>  
>  static void veip_shutdown_fini(void *data)
>  {
> -	struct ve_struct *ve = data;
> -
> -	if (ve->features & VE_FEATURE_NFS)
> -		return;
> -
> -	veip_shutdown(ve);
> +	veip_shutdown(data);
>  }
>  
>  static struct ve_hook veip_shutdown_hook = {
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 2c9824985bc5..c2489dd2f520 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1134,7 +1134,8 @@ static void mntput_no_expire(struct mount *mnt)
>  	}
>  	unlock_mount_hash();
>  
> -	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
> +	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))
> +	    && !(mnt->mnt.mnt_sb->s_iflags & SB_I_UMOUNT_SYNC)) {
>  		struct task_struct *task = current;
>  		if (likely(!(task->flags & PF_KTHREAD))) {
>  			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index 8f29ad17e29e..65a0ac8a3d16 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -2414,6 +2414,7 @@ static int nfs_set_super(struct super_block *s, void *data)
>  	int ret;
>  
>  	s->s_flags = sb_mntdata->mntflags;
> +	s->s_iflags |= SB_I_UMOUNT_SYNC;
>  	s->s_fs_info = server;
>  	s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
>  	ret = set_anon_super(s, server);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 79011b4bc040..2f3a983741f8 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1526,6 +1526,9 @@ struct mm_struct;
>  #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
>  #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
>  
> +/* sb->s_iflags */
> +#define SB_I_UMOUNT_SYNC		0x10000000 /* don't use delayed unmount */
> +
>  extern struct list_head super_blocks;
>  extern spinlock_t sb_lock;
>  
> @@ -1566,6 +1569,7 @@ struct super_block {
>  	const struct quotactl_ops	*s_qcop;
>  	const struct export_operations *s_export_op;
>  	unsigned long		s_flags;
> +	unsigned long           s_iflags;       /* internal SB_I_* flags */
>  	unsigned long		s_magic;
>  	struct dentry		*s_root;
>  	struct rw_semaphore	s_umount;
> -- 
> 2.13.6
> 
> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
Andrey Ryabinin Oct. 30, 2017, 9:15 a.m.
On 10/27/2017 08:45 PM, Andrei Vagin wrote:
> On Fri, Oct 27, 2017 at 06:31:18PM +0300, Andrey Ryabinin wrote:
>> Delayed nfs unmount causes too much PITA. We must destroy VENET ip after
>> unmount, but in that case we can't reuse that IP on restarted container
>> because it migh be still alive.
>>
>> So let's just unmount NFS synchronously and destroy veip after it.
> 
> You change a general scenario to fix your small case. For users, it will
> be unexpected behaviour. They call umount -l and don't expect any
> delays.


This has nothing to do with the MNT_DETACH (umount -l) at all. This more affects ordinary umount.
Currently, due to delayed mntput thing (see commit 9ea459e110df32e6 upstream), successful  umount /mnt
doesn't mean that umount actually finished. So this patch only brings back pre 9ea459e110df32e6 behaviour
for NFS.



> How nfs mounts are umounted when a host is shutdowned? I think they are
> umounted from init scripts (systemd). Why we can't umount nfs mounts
> with the force flag when we stop a container?
> 

It doesn't really matter how umount triggered. Currently there is no way to do unmount synchronously.
You may call "umount /mnt" whenever you want, but successfully finished umount doesn't guarantee that delayed_mntput
finished or even started.


FYI, Stas suggested another possible way to fix this:
 "Bring back that tricky logic, allowing catching falling VEIP object and reuse it."

But it's unclear to me, how is this supposed to work. This basically means that we might have two interfaces with the same IP address.
So, when packet arrives it's unclear to which interface we supposed to redirect it.
Thus, bringing  back pre 9ea459e110df32e6 behaviour seems like the only way to me.


>>
>> https://jira.sw.ru/browse/PSBM-76086
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> ---
>>  drivers/net/venetdev.c | 9 ++-------
>>  fs/namespace.c         | 3 ++-
>>  fs/nfs/super.c         | 1 +
>>  include/linux/fs.h     | 4 ++++
>>  4 files changed, 9 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/venetdev.c b/drivers/net/venetdev.c
>> index 1c4ae90b7ba8..11f4a66aaf3d 100644
>> --- a/drivers/net/venetdev.c
>> +++ b/drivers/net/venetdev.c
>> @@ -765,7 +765,7 @@ static void venet_dellink(struct net_device *dev, struct list_head *head)
>>  	 * has VE_FEATURE_NFS enabled. Thus here we have to destroy veip in
>>  	 * this case.
>>  	 */
>> -	if (env->ve_netns || (env->features & VE_FEATURE_NFS))
>> +	if (env->ve_netns)
>>  		veip_shutdown(env);
>>  
>>  	env->_venet_dev = NULL;
>> @@ -1182,12 +1182,7 @@ static struct rtnl_link_ops venet_link_ops = {
>>  
>>  static void veip_shutdown_fini(void *data)
>>  {
>> -	struct ve_struct *ve = data;
>> -
>> -	if (ve->features & VE_FEATURE_NFS)
>> -		return;
>> -
>> -	veip_shutdown(ve);
>> +	veip_shutdown(data);
>>  }
>>  
>>  static struct ve_hook veip_shutdown_hook = {
>> diff --git a/fs/namespace.c b/fs/namespace.c
>> index 2c9824985bc5..c2489dd2f520 100644
>> --- a/fs/namespace.c
>> +++ b/fs/namespace.c
>> @@ -1134,7 +1134,8 @@ static void mntput_no_expire(struct mount *mnt)
>>  	}
>>  	unlock_mount_hash();
>>  
>> -	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
>> +	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))
>> +	    && !(mnt->mnt.mnt_sb->s_iflags & SB_I_UMOUNT_SYNC)) {
>>  		struct task_struct *task = current;
>>  		if (likely(!(task->flags & PF_KTHREAD))) {
>>  			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>> index 8f29ad17e29e..65a0ac8a3d16 100644
>> --- a/fs/nfs/super.c
>> +++ b/fs/nfs/super.c
>> @@ -2414,6 +2414,7 @@ static int nfs_set_super(struct super_block *s, void *data)
>>  	int ret;
>>  
>>  	s->s_flags = sb_mntdata->mntflags;
>> +	s->s_iflags |= SB_I_UMOUNT_SYNC;
>>  	s->s_fs_info = server;
>>  	s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
>>  	ret = set_anon_super(s, server);
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 79011b4bc040..2f3a983741f8 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1526,6 +1526,9 @@ struct mm_struct;
>>  #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
>>  #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
>>  
>> +/* sb->s_iflags */
>> +#define SB_I_UMOUNT_SYNC		0x10000000 /* don't use delayed unmount */
>> +
>>  extern struct list_head super_blocks;
>>  extern spinlock_t sb_lock;
>>  
>> @@ -1566,6 +1569,7 @@ struct super_block {
>>  	const struct quotactl_ops	*s_qcop;
>>  	const struct export_operations *s_export_op;
>>  	unsigned long		s_flags;
>> +	unsigned long           s_iflags;       /* internal SB_I_* flags */
>>  	unsigned long		s_magic;
>>  	struct dentry		*s_root;
>>  	struct rw_semaphore	s_umount;
>> -- 
>> 2.13.6
>>
>> _______________________________________________
>> Devel mailing list
>> Devel@openvz.org
>> https://lists.openvz.org/mailman/listinfo/devel