[RHEL8,COMMIT] ve/fs/devmnt: process mount options

Submitted by Konstantin Khorenko on Oct. 29, 2020, 12:09 p.m.

Details

Message ID 202010291209.09TC9diw1546045@finist-co8.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko Oct. 29, 2020, 12:09 p.m.
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.14
------>
commit 6b6cdbe6a4a5c84b741cf4b1fa5fa398a7278418
Author: Kirill Tkhai <ktkhai@odin.com>
Date:   Thu Oct 29 15:09:39 2020 +0300

    ve/fs/devmnt: process mount options
    
    Port patch diff-ve-fs-process-mount-options-check-and-insert by Maxim Patlasov:
    
    The patch implements two kinds of processing mount options: check and insert.
    Check is OK if and only if each option supplied by CT-user is present
    among options listed in allowed_options.
    
    Insert transforms mount options supplied by CT-user like this:
    
    <mount_options> = <hidden_options> + <user_supplied_mount_options>
    
    Check is performed both for mount and remount. Insert - only for mount. All
    this happens only for mount/remount inside CT and if proper ve_devmnt struct
    is found in ve->devmnt_list (searched by 'dev').
    
    https://jira.sw.ru/browse/PSBM-32273
    
    Signed-off-by: Kirill Tkhai <ktkhai@odin.com>
    
    Acked-by: Maxim Patlasov <mpatlasov@openvz.org>
    
    +++
    ve/fs/devmnt: allow more than one mount option inside a CT
    
    strsep() changes provided string: puts '\0' instead of separators,
    thus after successful call to ve_devmnt_check() we insert
    only first provided mount options, ignoring others.
    
    mFixes: bc4143b ("ve/fs/devmnt: process mount options")
    
    Found during implementation of
    https://jira.sw.ru/browse/PSBM-40075
    
    Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
    
    Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    
    https://jira.sw.ru/browse/PSBM-108196
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 fs/namespace.c     | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/super.c         |  16 ++++++
 include/linux/fs.h |   2 +
 3 files changed, 163 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/fs/namespace.c b/fs/namespace.c
index d355b5921d1e..c24ab7597a39 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -28,6 +28,8 @@ 
 #include <linux/task_work.h>
 #include <linux/sched/task.h>
 
+#include <linux/ve.h>
+
 #include "pnode.h"
 #include "internal.h"
 
@@ -2344,6 +2346,148 @@  static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
 	return error;
 }
 
+#ifdef CONFIG_VE
+/*
+ * Returns first occurrence of needle in haystack separated by sep,
+ * or NULL if not found
+ */
+static char *strstr_separated(char *haystack, char *needle, char sep)
+{
+	int needle_len = strlen(needle);
+
+	while (haystack) {
+		if (!strncmp(haystack, needle, needle_len) &&
+		    (haystack[needle_len] == 0 || /* end-of-line or */
+		     haystack[needle_len] == sep)) /* separator */
+			return haystack;
+
+		haystack = strchr(haystack, sep);
+		if (haystack)
+			haystack++;
+	}
+
+	return NULL;
+}
+
+static int ve_devmnt_check(char *options, char *allowed)
+{
+	char *p;
+	char *tmp_options;
+
+	if (!options || !*options)
+		return 0;
+
+	if (!allowed)
+		return -EPERM;
+
+	/* strsep() changes provided string: puts '\0' instead of separators */
+	tmp_options = kstrdup(options, GFP_KERNEL);
+	if (!tmp_options)
+		return -ENOMEM;
+
+	while ((p = strsep(&tmp_options, ",")) != NULL) {
+		if (!*p)
+			continue;
+
+		if (!strstr_separated(allowed, p, ',')) {
+			kfree(tmp_options);
+			return -EPERM;
+		}
+	}
+
+	kfree(tmp_options);
+	return 0;
+}
+
+static int ve_devmnt_insert(char *options, char *hidden)
+{
+	int options_len;
+	int hidden_len;
+
+	if (!hidden)
+		return 0;
+
+	if (!options)
+		return -EAGAIN;
+
+	options_len = strlen(options);
+	hidden_len = strlen(hidden);
+
+	if (hidden_len + options_len + 2 > PAGE_SIZE)
+		return -EPERM;
+
+	memmove(options + hidden_len + 1, options, options_len);
+	memcpy(options, hidden, hidden_len);
+
+	options[hidden_len] = ',';
+	options[hidden_len + options_len + 1] = 0;
+
+	return 0;
+}
+
+int ve_devmnt_process(struct ve_struct *ve, dev_t dev, void **data_pp, int remount)
+{
+	void *data = *data_pp;
+	struct ve_devmnt *devmnt;
+	int err;
+again:
+	err = 1;
+	mutex_lock(&ve->devmnt_mutex);
+	list_for_each_entry(devmnt, &ve->devmnt_list, link) {
+		if (devmnt->dev == dev) {
+			err = ve_devmnt_check(data, devmnt->allowed_options);
+
+			if (!err && !remount)
+				err = ve_devmnt_insert(data, devmnt->hidden_options);
+
+			break;
+		}
+	}
+	mutex_unlock(&ve->devmnt_mutex);
+
+	switch (err) {
+	case -EAGAIN:
+		if (!(data = (void *)__get_free_page(GFP_KERNEL)))
+			return -ENOMEM;
+		*(char *)data = 0; /* the string must be zero-terminated */
+		goto again;
+	case 1:
+		if (*data_pp) {
+			ve_printk(VE_LOG_BOTH, KERN_WARNING "VE%s: no allowed "
+				  "mount options found for device %u:%u\n",
+				  ve->ve_name, MAJOR(dev), MINOR(dev));
+			err = -EPERM;
+		} else
+			err = 0;
+		break;
+	case 0:
+		*data_pp = data;
+		break;
+	}
+
+	if (data && data != *data_pp)
+		free_page((unsigned long)data);
+
+	return err;
+}
+#endif
+
+static int do_check_and_remount_sb(struct super_block *sb, int flags, void *data)
+{
+#ifdef CONFIG_VE
+	struct ve_struct *ve = get_exec_env();
+
+	if (sb->s_bdev && data && !ve_is_super(ve)) {
+		int err;
+
+		err = ve_devmnt_process(ve, sb->s_bdev->bd_dev, &data, 1);
+		if (err)
+			return err;
+	}
+#endif
+	return do_remount_sb(sb, flags, data, 0);
+}
+
 /*
  * change filesystem flags. dir should be a physical root of filesystem.
  * If you've mounted a non-root directory somewhere and want to do remount
@@ -2399,7 +2543,7 @@  static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		err = -EPERM;
 	else
-		err = do_remount_sb(sb, sb_flags, data, 0);
+		err = do_check_and_remount_sb(sb, sb_flags, data);
 	if (!err) {
 		lock_mount_hash();
 		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
diff --git a/fs/super.c b/fs/super.c
index 3af82d38d3d1..b921e7a2b111 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1175,10 +1175,26 @@  struct dentry *mount_bdev(struct file_system_type *fs_type,
 		blkdev_put(bdev, mode);
 		down_write(&s->s_umount);
 	} else {
+#ifdef CONFIG_VE
+		void *data_orig = data;
+		struct ve_struct *ve = get_exec_env();
+
+		if (!ve_is_super(ve)) {
+			error = ve_devmnt_process(ve, bdev->bd_dev, &data, 0);
+			if (error) {
+				deactivate_locked_super(s);
+				goto error;
+			}
+		}
+#endif
 		s->s_mode = mode;
 		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
 		sb_set_blocksize(s, block_size(bdev));
 		error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+#ifdef CONFIG_VE
+		if (data_orig != data)
+			free_page((unsigned long)data);
+#endif
 		if (error) {
 			deactivate_locked_super(s);
 			goto error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a47765206391..826d99d6d962 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2316,6 +2316,8 @@  extern int super_setup_bdi(struct super_block *sb);
 
 extern int current_umask(void);
 
+extern int ve_devmnt_process(struct ve_struct *, dev_t, void **, int);
+
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 extern int generic_update_time(struct inode *, struct timespec64 *, int);