[Devel,RHEL7,COMMIT] ve/fs/mount: pin mnt to a VE for correct number of mounts accounting

Submitted by Konstantin Khorenko on Aug. 11, 2017, 2:50 p.m.

Details

Message ID 201708111450.v7BEoCdw005688@finist_cl7.x64_64.work.ct
State New
Series "ve/fs/mount: make work with ve->mnt_nr race free"
Headers show

Commit Message

Konstantin Khorenko Aug. 11, 2017, 2:50 p.m.
The commit is pushed to "branch-rh7-3.10.0-514.26.1.vz7.35.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.26.1.vz7.35.3
------>
commit 049382b868b944f91c154bab238eb94c45ce476a
Author: Konstantin Khorenko <khorenko@virtuozzo.com>
Date:   Fri Aug 11 18:50:12 2017 +0400

    ve/fs/mount: pin mnt to a VE for correct number of mounts accounting
    
    If a mount is done in one VE context and umount - in another,
    counters ve->mnt_nr became unbalanced and this can cause denial
    of new mounts due to per-VE numbre of mounts limit.
    
    Additionally move related functions to fs/namespace.c to avoid adding
    fs/mount.h into ve.h:
    	ve_mount_nr_inc()
    	ve_mount_nr_dec()
    	ve_mount_allowed()
    
    Example: mount is done from inside a CT, umount - from host =>
    ve->mnt_nr gets incorrectly increased for a CT.
    
    https://jira.sw.ru/browse/PSBM-69880
    
    Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
    Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 fs/mount.h         |  3 +++
 fs/namespace.c     | 41 ++++++++++++++++++++++++++++++++++++++---
 include/linux/ve.h | 27 +--------------------------
 3 files changed, 42 insertions(+), 29 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/mount.h b/fs/mount.h
index 013152b..77f50cc 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -41,6 +41,9 @@  struct mount {
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	struct mountpoint *mnt_mp;      /* where is it mounted */
+#ifdef CONFIG_VE
+	struct ve_struct *ve_owner;	/* VE in which this mount was created */
+#endif /* CONFIG_VE */
 #ifdef CONFIG_FSNOTIFY
 	struct hlist_head mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index f2d1f84..2cf7f9a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -163,6 +163,10 @@  unsigned int mnt_get_count(struct mount *mnt)
 #endif
 }
 
+static inline int ve_mount_allowed(void);
+static inline void ve_mount_nr_inc(struct mount *mnt);
+static inline void ve_mount_nr_dec(struct mount *mnt);
+
 static struct mount *alloc_vfsmnt(const char *name)
 {
 	struct mount *mnt;
@@ -211,7 +215,7 @@  static struct mount *alloc_vfsmnt(const char *name)
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
 #endif
 	}
-	ve_mount_nr_inc();
+	ve_mount_nr_inc(mnt);
 	return mnt;
 
 #ifdef CONFIG_SMP
@@ -552,7 +556,7 @@  int sb_prepare_remount_readonly(struct super_block *sb)
 
 static void free_vfsmnt(struct mount *mnt)
 {
-	ve_mount_nr_dec();
+	ve_mount_nr_dec(mnt);
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
@@ -2012,7 +2016,38 @@  int ve_devmnt_process(struct ve_struct *ve, dev_t dev, void **data_pp, int remou
 
 	return err;
 }
-#endif
+
+static inline int ve_mount_allowed(void)
+{
+	struct ve_struct *ve = get_exec_env();
+
+	return ve_is_super(ve) ||
+		atomic_read(&ve->mnt_nr) < (int)sysctl_ve_mount_nr;
+}
+
+static inline void ve_mount_nr_inc(struct mount *mnt)
+{
+	struct ve_struct *ve = get_exec_env();
+
+	mnt->ve_owner = get_ve(ve);
+	atomic_inc(&ve->mnt_nr);
+}
+
+static inline void ve_mount_nr_dec(struct mount *mnt)
+{
+	struct ve_struct *ve = mnt->ve_owner;
+
+	atomic_dec(&ve->mnt_nr);
+	put_ve(ve);
+	mnt->ve_owner = NULL;
+}
+
+#else /* CONFIG_VE */
+
+static inline int ve_mount_allowed(void) { return 1; }
+static inline void ve_mount_nr_inc(struct mount *mnt) { }
+static inline void ve_mount_nr_dec(struct mount *mnt) { }
+#endif /* CONFIG_VE */
 
 static int do_check_and_remount_sb(struct super_block *sb, int flags, void *data)
 {
diff --git a/include/linux/ve.h b/include/linux/ve.h
index f430ccd..c9b0af4 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -119,10 +119,7 @@  struct ve_struct {
 	int			netns_max_nr;
 	atomic_t		netif_avail_nr;
 	int			netif_max_nr;
-	/* Number of mounts. May become unbalanced if VE0 mounts something
-	 * and the VE unmounts it. This is acceptable.
-	 */
-	atomic_t		mnt_nr;
+	atomic_t		mnt_nr;	/* number of present VE mounts */
 #ifdef CONFIG_COREDUMP
 	char 			core_pattern[CORENAME_MAX_SIZE];
 #endif
@@ -228,24 +225,6 @@  extern void vtty_release(struct tty_struct *tty, struct tty_struct *o_tty,
 extern bool vtty_is_master(struct tty_struct *tty);
 #endif /* CONFIG_TTY */
 
-static inline int ve_mount_allowed(void)
-{
-	struct ve_struct *ve = get_exec_env();
-
-	return ve_is_super(ve) ||
-		atomic_read(&ve->mnt_nr) < (int)sysctl_ve_mount_nr;
-}
-
-static inline void ve_mount_nr_inc(void)
-{
-	atomic_inc(&get_exec_env()->mnt_nr);
-}
-
-static inline void ve_mount_nr_dec(void)
-{
-	atomic_dec(&get_exec_env()->mnt_nr);
-}
-
 #else	/* CONFIG_VE */
 
 #define ve_uevent_seqnum uevent_seqnum
@@ -279,10 +258,6 @@  static inline void monotonic_abs_to_ve(clockid_t which_clock,
 				struct timespec *tp) { }
 static inline void monotonic_ve_to_abs(clockid_t which_clock,
 				struct timepsec *tp) { }
-
-static inline int ve_mount_allowed(void) { return 1; }
-static inline void ve_mount_nr_inc(void) { }
-static inline void ve_mount_nr_dec(void) { }
 #endif	/* CONFIG_VE */
 
 struct seq_file;