[08/11] fuse: Support fuse filesystems outside of init_user_ns

Submitted by Dongsu Park on Dec. 22, 2017, 2:32 p.m.

Details

Message ID c85c293e19a478353aba8e6e3ee39e5914f798d5.1512041070.git.dongsu@kinvolk.io
State New
Series "FUSE mounts from non-init user namespaces"
Headers show

Commit Message

Dongsu Park Dec. 22, 2017, 2:32 p.m.
From: Seth Forshee <seth.forshee@canonical.com>

In order to support mounts from namespaces other than
init_user_ns, fuse must translate uids and gids to/from the
userns of the process servicing requests on /dev/fuse. This
patch does that, with a couple of restrictions on the namespace:

 - The userns for the fuse connection is fixed to the namespace
   from which /dev/fuse is opened.

 - The namespace must be the same as s_user_ns.

These restrictions simplify the implementation by avoiding the
need to pass around userns references and by allowing fuse to
rely on the checks in inode_change_ok for ownership changes.
Either restriction could be relaxed in the future if needed.

For cuse the namespace used for the connection is also simply
current_user_ns() at the time /dev/cuse is opened.

Patch v4 is available: https://patchwork.kernel.org/patch/8944661/

Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Dongsu Park <dongsu@kinvolk.io>
---
 fs/fuse/cuse.c   |  3 ++-
 fs/fuse/dev.c    | 11 ++++++++---
 fs/fuse/dir.c    | 14 +++++++-------
 fs/fuse/fuse_i.h |  6 +++++-
 fs/fuse/inode.c  | 31 +++++++++++++++++++------------
 5 files changed, 41 insertions(+), 24 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index e9e97803..b1b83259 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -48,6 +48,7 @@ 
 #include <linux/stat.h>
 #include <linux/module.h>
 #include <linux/uio.h>
+#include <linux/user_namespace.h>
 
 #include "fuse_i.h"
 
@@ -498,7 +499,7 @@  static int cuse_channel_open(struct inode *inode, struct file *file)
 	if (!cc)
 		return -ENOMEM;
 
-	fuse_conn_init(&cc->fc);
+	fuse_conn_init(&cc->fc, current_user_ns());
 
 	fud = fuse_dev_alloc(&cc->fc);
 	if (!fud) {
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 17f0d05b..0f780e16 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -114,8 +114,8 @@  static void __fuse_put_request(struct fuse_req *req)
 
 static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
 {
-	req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
-	req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
+	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
+	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
 }
 
@@ -167,6 +167,10 @@  static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 	__set_bit(FR_WAITING, &req->flags);
 	if (for_background)
 		__set_bit(FR_BACKGROUND, &req->flags);
+	if (req->in.h.uid == (uid_t)-1 || req->in.h.gid == (gid_t)-1) {
+		fuse_put_request(fc, req);
+		return ERR_PTR(-EOVERFLOW);
+	}
 
 	return req;
 
@@ -1260,7 +1264,8 @@  static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 	in = &req->in;
 	reqsize = in->h.len;
 
-	if (task_active_pid_ns(current) != fc->pid_ns) {
+	if (task_active_pid_ns(current) != fc->pid_ns ||
+	    current_user_ns() != fc->user_ns) {
 		rcu_read_lock();
 		in->h.pid = pid_vnr(find_pid_ns(in->h.pid, fc->pid_ns));
 		rcu_read_unlock();
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 24967382..ad1cfac1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -858,8 +858,8 @@  static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
 	stat->ino = attr->ino;
 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 	stat->nlink = attr->nlink;
-	stat->uid = make_kuid(&init_user_ns, attr->uid);
-	stat->gid = make_kgid(&init_user_ns, attr->gid);
+	stat->uid = make_kuid(fc->user_ns, attr->uid);
+	stat->gid = make_kgid(fc->user_ns, attr->gid);
 	stat->rdev = inode->i_rdev;
 	stat->atime.tv_sec = attr->atime;
 	stat->atime.tv_nsec = attr->atimensec;
@@ -1475,17 +1475,17 @@  static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
 	return true;
 }
 
-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
-			   bool trust_local_cmtime)
+static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
+			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
 {
 	unsigned ivalid = iattr->ia_valid;
 
 	if (ivalid & ATTR_MODE)
 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
 	if (ivalid & ATTR_UID)
-		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
+		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
 	if (ivalid & ATTR_GID)
-		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
+		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
 	if (ivalid & ATTR_SIZE)
 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
 	if (ivalid & ATTR_ATIME) {
@@ -1646,7 +1646,7 @@  int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outarg, 0, sizeof(outarg));
-	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
+	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
 	if (file) {
 		struct fuse_file *ff = file->private_data;
 		inarg.valid |= FATTR_FH;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d5773ca6..364e65c8 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -26,6 +26,7 @@ 
 #include <linux/xattr.h>
 #include <linux/pid_namespace.h>
 #include <linux/refcount.h>
+#include <linux/user_namespace.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -466,6 +467,9 @@  struct fuse_conn {
 	/** The pid namespace for this mount */
 	struct pid_namespace *pid_ns;
 
+	/** The user namespace for this mount */
+	struct user_namespace *user_ns;
+
 	/** Maximum read size */
 	unsigned max_read;
 
@@ -870,7 +874,7 @@  struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 /**
  * Initialize fuse_conn
  */
-void fuse_conn_init(struct fuse_conn *fc);
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
 
 /**
  * Release reference to fuse_conn
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2f504d61..7f6b2e55 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -171,8 +171,8 @@  void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 	inode->i_ino     = fuse_squash_ino(attr->ino);
 	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 	set_nlink(inode, attr->nlink);
-	inode->i_uid     = make_kuid(&init_user_ns, attr->uid);
-	inode->i_gid     = make_kgid(&init_user_ns, attr->gid);
+	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
+	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
 	inode->i_blocks  = attr->blocks;
 	inode->i_atime.tv_sec   = attr->atime;
 	inode->i_atime.tv_nsec  = attr->atimensec;
@@ -477,7 +477,8 @@  static int fuse_match_uint(substring_t *s, unsigned int *res)
 	return err;
 }
 
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
+			  struct user_namespace *user_ns)
 {
 	char *p;
 	memset(d, 0, sizeof(struct fuse_mount_data));
@@ -513,7 +514,7 @@  static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 		case OPT_USER_ID:
 			if (fuse_match_uint(&args[0], &uv))
 				return 0;
-			d->user_id = make_kuid(current_user_ns(), uv);
+			d->user_id = make_kuid(user_ns, uv);
 			if (!uid_valid(d->user_id))
 				return 0;
 			d->user_id_present = 1;
@@ -522,7 +523,7 @@  static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 		case OPT_GROUP_ID:
 			if (fuse_match_uint(&args[0], &uv))
 				return 0;
-			d->group_id = make_kgid(current_user_ns(), uv);
+			d->group_id = make_kgid(user_ns, uv);
 			if (!gid_valid(d->group_id))
 				return 0;
 			d->group_id_present = 1;
@@ -565,8 +566,8 @@  static int fuse_show_options(struct seq_file *m, struct dentry *root)
 	struct super_block *sb = root->d_sb;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
-	seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
-	seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
+	seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
+	seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
 	if (fc->default_permissions)
 		seq_puts(m, ",default_permissions");
 	if (fc->allow_other)
@@ -597,7 +598,7 @@  static void fuse_pqueue_init(struct fuse_pqueue *fpq)
 	fpq->connected = 1;
 }
 
-void fuse_conn_init(struct fuse_conn *fc)
+void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
 {
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
@@ -621,6 +622,7 @@  void fuse_conn_init(struct fuse_conn *fc)
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
 	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
+	fc->user_ns = get_user_ns(user_ns);
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -630,6 +632,7 @@  void fuse_conn_put(struct fuse_conn *fc)
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
 		put_pid_ns(fc->pid_ns);
+		put_user_ns(fc->user_ns);
 		fc->release(fc);
 	}
 }
@@ -1061,7 +1064,7 @@  static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION);
 
-	if (!parse_fuse_opt(data, &d, is_bdev))
+	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
 		goto err;
 
 	if (is_bdev) {
@@ -1086,8 +1089,12 @@  static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!file)
 		goto err;
 
-	if ((file->f_op != &fuse_dev_operations) ||
-	    (file->f_cred->user_ns != &init_user_ns))
+	/*
+	 * Require mount to happen from the same user namespace which
+	 * opened /dev/fuse to prevent potential attacks.
+	 */
+	if (file->f_op != &fuse_dev_operations ||
+	    file->f_cred->user_ns != sb->s_user_ns)
 		goto err_fput;
 
 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
@@ -1095,7 +1102,7 @@  static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!fc)
 		goto err_fput;
 
-	fuse_conn_init(fc);
+	fuse_conn_init(fc, sb->s_user_ns);
 	fc->release = fuse_free_conn;
 
 	fud = fuse_dev_alloc(fc);

Comments

Serge E. Hallyn Dec. 23, 2017, 3:46 a.m.
On Fri, Dec 22, 2017 at 03:32:32PM +0100, Dongsu Park wrote:
> From: Seth Forshee <seth.forshee@canonical.com>
> 
> In order to support mounts from namespaces other than
> init_user_ns, fuse must translate uids and gids to/from the
> userns of the process servicing requests on /dev/fuse. This
> patch does that, with a couple of restrictions on the namespace:
> 
>  - The userns for the fuse connection is fixed to the namespace
>    from which /dev/fuse is opened.
> 
>  - The namespace must be the same as s_user_ns.
> 
> These restrictions simplify the implementation by avoiding the
> need to pass around userns references and by allowing fuse to
> rely on the checks in inode_change_ok for ownership changes.
> Either restriction could be relaxed in the future if needed.
> 
> For cuse the namespace used for the connection is also simply
> current_user_ns() at the time /dev/cuse is opened.
> 
> Patch v4 is available: https://patchwork.kernel.org/patch/8944661/
> 
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: Miklos Szeredi <mszeredi@redhat.com>
> Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
> Signed-off-by: Dongsu Park <dongsu@kinvolk.io>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  fs/fuse/cuse.c   |  3 ++-
>  fs/fuse/dev.c    | 11 ++++++++---
>  fs/fuse/dir.c    | 14 +++++++-------
>  fs/fuse/fuse_i.h |  6 +++++-
>  fs/fuse/inode.c  | 31 +++++++++++++++++++------------
>  5 files changed, 41 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> index e9e97803..b1b83259 100644
> --- a/fs/fuse/cuse.c
> +++ b/fs/fuse/cuse.c
> @@ -48,6 +48,7 @@
>  #include <linux/stat.h>
>  #include <linux/module.h>
>  #include <linux/uio.h>
> +#include <linux/user_namespace.h>
>  
>  #include "fuse_i.h"
>  
> @@ -498,7 +499,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
>  	if (!cc)
>  		return -ENOMEM;
>  
> -	fuse_conn_init(&cc->fc);
> +	fuse_conn_init(&cc->fc, current_user_ns());
>  
>  	fud = fuse_dev_alloc(&cc->fc);
>  	if (!fud) {
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 17f0d05b..0f780e16 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -114,8 +114,8 @@ static void __fuse_put_request(struct fuse_req *req)
>  
>  static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
>  {
> -	req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
> -	req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
> +	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
> +	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
>  	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
>  }
>  
> @@ -167,6 +167,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
>  	__set_bit(FR_WAITING, &req->flags);
>  	if (for_background)
>  		__set_bit(FR_BACKGROUND, &req->flags);
> +	if (req->in.h.uid == (uid_t)-1 || req->in.h.gid == (gid_t)-1) {
> +		fuse_put_request(fc, req);
> +		return ERR_PTR(-EOVERFLOW);
> +	}
>  
>  	return req;
>  
> @@ -1260,7 +1264,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
>  	in = &req->in;
>  	reqsize = in->h.len;
>  
> -	if (task_active_pid_ns(current) != fc->pid_ns) {
> +	if (task_active_pid_ns(current) != fc->pid_ns ||
> +	    current_user_ns() != fc->user_ns) {
>  		rcu_read_lock();
>  		in->h.pid = pid_vnr(find_pid_ns(in->h.pid, fc->pid_ns));
>  		rcu_read_unlock();
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 24967382..ad1cfac1 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -858,8 +858,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
>  	stat->ino = attr->ino;
>  	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
>  	stat->nlink = attr->nlink;
> -	stat->uid = make_kuid(&init_user_ns, attr->uid);
> -	stat->gid = make_kgid(&init_user_ns, attr->gid);
> +	stat->uid = make_kuid(fc->user_ns, attr->uid);
> +	stat->gid = make_kgid(fc->user_ns, attr->gid);
>  	stat->rdev = inode->i_rdev;
>  	stat->atime.tv_sec = attr->atime;
>  	stat->atime.tv_nsec = attr->atimensec;
> @@ -1475,17 +1475,17 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
>  	return true;
>  }
>  
> -static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
> -			   bool trust_local_cmtime)
> +static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
> +			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
>  {
>  	unsigned ivalid = iattr->ia_valid;
>  
>  	if (ivalid & ATTR_MODE)
>  		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
>  	if (ivalid & ATTR_UID)
> -		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
> +		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
>  	if (ivalid & ATTR_GID)
> -		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
> +		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
>  	if (ivalid & ATTR_SIZE)
>  		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
>  	if (ivalid & ATTR_ATIME) {
> @@ -1646,7 +1646,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>  
>  	memset(&inarg, 0, sizeof(inarg));
>  	memset(&outarg, 0, sizeof(outarg));
> -	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
> +	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
>  	if (file) {
>  		struct fuse_file *ff = file->private_data;
>  		inarg.valid |= FATTR_FH;
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index d5773ca6..364e65c8 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -26,6 +26,7 @@
>  #include <linux/xattr.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/refcount.h>
> +#include <linux/user_namespace.h>
>  
>  /** Max number of pages that can be used in a single read request */
>  #define FUSE_MAX_PAGES_PER_REQ 32
> @@ -466,6 +467,9 @@ struct fuse_conn {
>  	/** The pid namespace for this mount */
>  	struct pid_namespace *pid_ns;
>  
> +	/** The user namespace for this mount */
> +	struct user_namespace *user_ns;
> +
>  	/** Maximum read size */
>  	unsigned max_read;
>  
> @@ -870,7 +874,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
>  /**
>   * Initialize fuse_conn
>   */
> -void fuse_conn_init(struct fuse_conn *fc);
> +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
>  
>  /**
>   * Release reference to fuse_conn
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 2f504d61..7f6b2e55 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -171,8 +171,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
>  	inode->i_ino     = fuse_squash_ino(attr->ino);
>  	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
>  	set_nlink(inode, attr->nlink);
> -	inode->i_uid     = make_kuid(&init_user_ns, attr->uid);
> -	inode->i_gid     = make_kgid(&init_user_ns, attr->gid);
> +	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
> +	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
>  	inode->i_blocks  = attr->blocks;
>  	inode->i_atime.tv_sec   = attr->atime;
>  	inode->i_atime.tv_nsec  = attr->atimensec;
> @@ -477,7 +477,8 @@ static int fuse_match_uint(substring_t *s, unsigned int *res)
>  	return err;
>  }
>  
> -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
> +			  struct user_namespace *user_ns)
>  {
>  	char *p;
>  	memset(d, 0, sizeof(struct fuse_mount_data));
> @@ -513,7 +514,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
>  		case OPT_USER_ID:
>  			if (fuse_match_uint(&args[0], &uv))
>  				return 0;
> -			d->user_id = make_kuid(current_user_ns(), uv);
> +			d->user_id = make_kuid(user_ns, uv);
>  			if (!uid_valid(d->user_id))
>  				return 0;
>  			d->user_id_present = 1;
> @@ -522,7 +523,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
>  		case OPT_GROUP_ID:
>  			if (fuse_match_uint(&args[0], &uv))
>  				return 0;
> -			d->group_id = make_kgid(current_user_ns(), uv);
> +			d->group_id = make_kgid(user_ns, uv);
>  			if (!gid_valid(d->group_id))
>  				return 0;
>  			d->group_id_present = 1;
> @@ -565,8 +566,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
>  	struct super_block *sb = root->d_sb;
>  	struct fuse_conn *fc = get_fuse_conn_super(sb);
>  
> -	seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
> -	seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
> +	seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
> +	seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
>  	if (fc->default_permissions)
>  		seq_puts(m, ",default_permissions");
>  	if (fc->allow_other)
> @@ -597,7 +598,7 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
>  	fpq->connected = 1;
>  }
>  
> -void fuse_conn_init(struct fuse_conn *fc)
> +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
>  {
>  	memset(fc, 0, sizeof(*fc));
>  	spin_lock_init(&fc->lock);
> @@ -621,6 +622,7 @@ void fuse_conn_init(struct fuse_conn *fc)
>  	fc->attr_version = 1;
>  	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
>  	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
> +	fc->user_ns = get_user_ns(user_ns);
>  }
>  EXPORT_SYMBOL_GPL(fuse_conn_init);
>  
> @@ -630,6 +632,7 @@ void fuse_conn_put(struct fuse_conn *fc)
>  		if (fc->destroy_req)
>  			fuse_request_free(fc->destroy_req);
>  		put_pid_ns(fc->pid_ns);
> +		put_user_ns(fc->user_ns);
>  		fc->release(fc);
>  	}
>  }
> @@ -1061,7 +1064,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>  
>  	sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION);
>  
> -	if (!parse_fuse_opt(data, &d, is_bdev))
> +	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
>  		goto err;
>  
>  	if (is_bdev) {
> @@ -1086,8 +1089,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>  	if (!file)
>  		goto err;
>  
> -	if ((file->f_op != &fuse_dev_operations) ||
> -	    (file->f_cred->user_ns != &init_user_ns))
> +	/*
> +	 * Require mount to happen from the same user namespace which
> +	 * opened /dev/fuse to prevent potential attacks.
> +	 */
> +	if (file->f_op != &fuse_dev_operations ||
> +	    file->f_cred->user_ns != sb->s_user_ns)
>  		goto err_fput;
>  
>  	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
> @@ -1095,7 +1102,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>  	if (!fc)
>  		goto err_fput;
>  
> -	fuse_conn_init(fc);
> +	fuse_conn_init(fc, sb->s_user_ns);
>  	fc->release = fuse_free_conn;
>  
>  	fud = fuse_dev_alloc(fc);
> -- 
> 2.13.6
> 
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers
Alban Crequy Jan. 17, 2018, 10:59 a.m.
[Adding Tejun, David, Tom for question about cuse]

On Fri, Dec 22, 2017 at 3:32 PM, Dongsu Park <dongsu@kinvolk.io> wrote:
> From: Seth Forshee <seth.forshee@canonical.com>
>
> In order to support mounts from namespaces other than
> init_user_ns, fuse must translate uids and gids to/from the
> userns of the process servicing requests on /dev/fuse. This
> patch does that, with a couple of restrictions on the namespace:
>
>  - The userns for the fuse connection is fixed to the namespace
>    from which /dev/fuse is opened.
>
>  - The namespace must be the same as s_user_ns.
>
> These restrictions simplify the implementation by avoiding the
> need to pass around userns references and by allowing fuse to
> rely on the checks in inode_change_ok for ownership changes.
> Either restriction could be relaxed in the future if needed.
>
> For cuse the namespace used for the connection is also simply
> current_user_ns() at the time /dev/cuse is opened.

Was a use case discussed for using cuse in a new unprivileged userns?

I ran some tests yesterday with cusexmp [1] and I could add a new char
device as an unprivileged user with:

$ unshare -U -r -m sh -c 'mount --bind /mnt/cuse /dev/cuse ; cusexmp
--maj=99 --min=30 --name=foo

where /mnt/cuse is previously mknod'ed correctly and chmod'ed 777.
Then, I could see the new device:

$ cat /proc/devices | grep foo
 99 foo

On normal distros, we don't have a /mnt/cuse chmod'ed 777 but still it
seems dangerous if the dev node can be provided otherwise and if we
don't have a use case for it.

Thoughts?

[1] https://github.com/fuse4x/fuse/blob/master/example/cusexmp.c#L9

Cheers,
Alban


> Patch v4 is available: https://patchwork.kernel.org/patch/8944661/
>
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: Miklos Szeredi <mszeredi@redhat.com>
> Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
> Signed-off-by: Dongsu Park <dongsu@kinvolk.io>
> ---
>  fs/fuse/cuse.c   |  3 ++-
>  fs/fuse/dev.c    | 11 ++++++++---
>  fs/fuse/dir.c    | 14 +++++++-------
>  fs/fuse/fuse_i.h |  6 +++++-
>  fs/fuse/inode.c  | 31 +++++++++++++++++++------------
>  5 files changed, 41 insertions(+), 24 deletions(-)
>
> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> index e9e97803..b1b83259 100644
> --- a/fs/fuse/cuse.c
> +++ b/fs/fuse/cuse.c
> @@ -48,6 +48,7 @@
>  #include <linux/stat.h>
>  #include <linux/module.h>
>  #include <linux/uio.h>
> +#include <linux/user_namespace.h>
>
>  #include "fuse_i.h"
>
> @@ -498,7 +499,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
>         if (!cc)
>                 return -ENOMEM;
>
> -       fuse_conn_init(&cc->fc);
> +       fuse_conn_init(&cc->fc, current_user_ns());
>
>         fud = fuse_dev_alloc(&cc->fc);
>         if (!fud) {
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 17f0d05b..0f780e16 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -114,8 +114,8 @@ static void __fuse_put_request(struct fuse_req *req)
>
>  static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
>  {
> -       req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
> -       req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
> +       req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
> +       req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
>         req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
>  }
>
> @@ -167,6 +167,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
>         __set_bit(FR_WAITING, &req->flags);
>         if (for_background)
>                 __set_bit(FR_BACKGROUND, &req->flags);
> +       if (req->in.h.uid == (uid_t)-1 || req->in.h.gid == (gid_t)-1) {
> +               fuse_put_request(fc, req);
> +               return ERR_PTR(-EOVERFLOW);
> +       }
>
>         return req;
>
> @@ -1260,7 +1264,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
>         in = &req->in;
>         reqsize = in->h.len;
>
> -       if (task_active_pid_ns(current) != fc->pid_ns) {
> +       if (task_active_pid_ns(current) != fc->pid_ns ||
> +           current_user_ns() != fc->user_ns) {
>                 rcu_read_lock();
>                 in->h.pid = pid_vnr(find_pid_ns(in->h.pid, fc->pid_ns));
>                 rcu_read_unlock();
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 24967382..ad1cfac1 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -858,8 +858,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
>         stat->ino = attr->ino;
>         stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
>         stat->nlink = attr->nlink;
> -       stat->uid = make_kuid(&init_user_ns, attr->uid);
> -       stat->gid = make_kgid(&init_user_ns, attr->gid);
> +       stat->uid = make_kuid(fc->user_ns, attr->uid);
> +       stat->gid = make_kgid(fc->user_ns, attr->gid);
>         stat->rdev = inode->i_rdev;
>         stat->atime.tv_sec = attr->atime;
>         stat->atime.tv_nsec = attr->atimensec;
> @@ -1475,17 +1475,17 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
>         return true;
>  }
>
> -static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
> -                          bool trust_local_cmtime)
> +static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
> +                          struct fuse_setattr_in *arg, bool trust_local_cmtime)
>  {
>         unsigned ivalid = iattr->ia_valid;
>
>         if (ivalid & ATTR_MODE)
>                 arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
>         if (ivalid & ATTR_UID)
> -               arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
> +               arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
>         if (ivalid & ATTR_GID)
> -               arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
> +               arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
>         if (ivalid & ATTR_SIZE)
>                 arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
>         if (ivalid & ATTR_ATIME) {
> @@ -1646,7 +1646,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>
>         memset(&inarg, 0, sizeof(inarg));
>         memset(&outarg, 0, sizeof(outarg));
> -       iattr_to_fattr(attr, &inarg, trust_local_cmtime);
> +       iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
>         if (file) {
>                 struct fuse_file *ff = file->private_data;
>                 inarg.valid |= FATTR_FH;
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index d5773ca6..364e65c8 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -26,6 +26,7 @@
>  #include <linux/xattr.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/refcount.h>
> +#include <linux/user_namespace.h>
>
>  /** Max number of pages that can be used in a single read request */
>  #define FUSE_MAX_PAGES_PER_REQ 32
> @@ -466,6 +467,9 @@ struct fuse_conn {
>         /** The pid namespace for this mount */
>         struct pid_namespace *pid_ns;
>
> +       /** The user namespace for this mount */
> +       struct user_namespace *user_ns;
> +
>         /** Maximum read size */
>         unsigned max_read;
>
> @@ -870,7 +874,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
>  /**
>   * Initialize fuse_conn
>   */
> -void fuse_conn_init(struct fuse_conn *fc);
> +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
>
>  /**
>   * Release reference to fuse_conn
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 2f504d61..7f6b2e55 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -171,8 +171,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
>         inode->i_ino     = fuse_squash_ino(attr->ino);
>         inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
>         set_nlink(inode, attr->nlink);
> -       inode->i_uid     = make_kuid(&init_user_ns, attr->uid);
> -       inode->i_gid     = make_kgid(&init_user_ns, attr->gid);
> +       inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
> +       inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
>         inode->i_blocks  = attr->blocks;
>         inode->i_atime.tv_sec   = attr->atime;
>         inode->i_atime.tv_nsec  = attr->atimensec;
> @@ -477,7 +477,8 @@ static int fuse_match_uint(substring_t *s, unsigned int *res)
>         return err;
>  }
>
> -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
> +                         struct user_namespace *user_ns)
>  {
>         char *p;
>         memset(d, 0, sizeof(struct fuse_mount_data));
> @@ -513,7 +514,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
>                 case OPT_USER_ID:
>                         if (fuse_match_uint(&args[0], &uv))
>                                 return 0;
> -                       d->user_id = make_kuid(current_user_ns(), uv);
> +                       d->user_id = make_kuid(user_ns, uv);
>                         if (!uid_valid(d->user_id))
>                                 return 0;
>                         d->user_id_present = 1;
> @@ -522,7 +523,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
>                 case OPT_GROUP_ID:
>                         if (fuse_match_uint(&args[0], &uv))
>                                 return 0;
> -                       d->group_id = make_kgid(current_user_ns(), uv);
> +                       d->group_id = make_kgid(user_ns, uv);
>                         if (!gid_valid(d->group_id))
>                                 return 0;
>                         d->group_id_present = 1;
> @@ -565,8 +566,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
>         struct super_block *sb = root->d_sb;
>         struct fuse_conn *fc = get_fuse_conn_super(sb);
>
> -       seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
> -       seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
> +       seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
> +       seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
>         if (fc->default_permissions)
>                 seq_puts(m, ",default_permissions");
>         if (fc->allow_other)
> @@ -597,7 +598,7 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
>         fpq->connected = 1;
>  }
>
> -void fuse_conn_init(struct fuse_conn *fc)
> +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
>  {
>         memset(fc, 0, sizeof(*fc));
>         spin_lock_init(&fc->lock);
> @@ -621,6 +622,7 @@ void fuse_conn_init(struct fuse_conn *fc)
>         fc->attr_version = 1;
>         get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
>         fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
> +       fc->user_ns = get_user_ns(user_ns);
>  }
>  EXPORT_SYMBOL_GPL(fuse_conn_init);
>
> @@ -630,6 +632,7 @@ void fuse_conn_put(struct fuse_conn *fc)
>                 if (fc->destroy_req)
>                         fuse_request_free(fc->destroy_req);
>                 put_pid_ns(fc->pid_ns);
> +               put_user_ns(fc->user_ns);
>                 fc->release(fc);
>         }
>  }
> @@ -1061,7 +1064,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>
>         sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION);
>
> -       if (!parse_fuse_opt(data, &d, is_bdev))
> +       if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
>                 goto err;
>
>         if (is_bdev) {
> @@ -1086,8 +1089,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>         if (!file)
>                 goto err;
>
> -       if ((file->f_op != &fuse_dev_operations) ||
> -           (file->f_cred->user_ns != &init_user_ns))
> +       /*
> +        * Require mount to happen from the same user namespace which
> +        * opened /dev/fuse to prevent potential attacks.
> +        */
> +       if (file->f_op != &fuse_dev_operations ||
> +           file->f_cred->user_ns != sb->s_user_ns)
>                 goto err_fput;
>
>         fc = kmalloc(sizeof(*fc), GFP_KERNEL);
> @@ -1095,7 +1102,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>         if (!fc)
>                 goto err_fput;
>
> -       fuse_conn_init(fc);
> +       fuse_conn_init(fc, sb->s_user_ns);
>         fc->release = fuse_free_conn;
>
>         fud = fuse_dev_alloc(fc);
> --
> 2.13.6
>
Seth Forshee Jan. 17, 2018, 2:29 p.m.
On Wed, Jan 17, 2018 at 11:59:06AM +0100, Alban Crequy wrote:
> [Adding Tejun, David, Tom for question about cuse]
> 
> On Fri, Dec 22, 2017 at 3:32 PM, Dongsu Park <dongsu@kinvolk.io> wrote:
> > From: Seth Forshee <seth.forshee@canonical.com>
> >
> > In order to support mounts from namespaces other than
> > init_user_ns, fuse must translate uids and gids to/from the
> > userns of the process servicing requests on /dev/fuse. This
> > patch does that, with a couple of restrictions on the namespace:
> >
> >  - The userns for the fuse connection is fixed to the namespace
> >    from which /dev/fuse is opened.
> >
> >  - The namespace must be the same as s_user_ns.
> >
> > These restrictions simplify the implementation by avoiding the
> > need to pass around userns references and by allowing fuse to
> > rely on the checks in inode_change_ok for ownership changes.
> > Either restriction could be relaxed in the future if needed.
> >
> > For cuse the namespace used for the connection is also simply
> > current_user_ns() at the time /dev/cuse is opened.
> 
> Was a use case discussed for using cuse in a new unprivileged userns?
> 
> I ran some tests yesterday with cusexmp [1] and I could add a new char
> device as an unprivileged user with:
> 
> $ unshare -U -r -m sh -c 'mount --bind /mnt/cuse /dev/cuse ; cusexmp
> --maj=99 --min=30 --name=foo
> 
> where /mnt/cuse is previously mknod'ed correctly and chmod'ed 777.
> Then, I could see the new device:
> 
> $ cat /proc/devices | grep foo
>  99 foo
> 
> On normal distros, we don't have a /mnt/cuse chmod'ed 777 but still it
> seems dangerous if the dev node can be provided otherwise and if we
> don't have a use case for it.
> 
> Thoughts?

I can't remember the specific reasons, but I had concluded that letting
unprivileged users use cuse within a user namespace isn't safe. But
having a cuse device node usable by regular users at all is equally
unsafe I suspect, so I don't think your example demonstrates any problem
specific to user namespaces. There shouldn't be any way to use a user
namespace to gain access permissions towards /dev/cuse, otherwise we
have bigger problems than cuse to worry about.

Seth
Alban Crequy Jan. 17, 2018, 6:56 p.m.
On Wed, Jan 17, 2018 at 3:29 PM, Seth Forshee
<seth.forshee@canonical.com> wrote:
> On Wed, Jan 17, 2018 at 11:59:06AM +0100, Alban Crequy wrote:
>> [Adding Tejun, David, Tom for question about cuse]
>>
>> On Fri, Dec 22, 2017 at 3:32 PM, Dongsu Park <dongsu@kinvolk.io> wrote:
>> > From: Seth Forshee <seth.forshee@canonical.com>
>> >
>> > In order to support mounts from namespaces other than
>> > init_user_ns, fuse must translate uids and gids to/from the
>> > userns of the process servicing requests on /dev/fuse. This
>> > patch does that, with a couple of restrictions on the namespace:
>> >
>> >  - The userns for the fuse connection is fixed to the namespace
>> >    from which /dev/fuse is opened.
>> >
>> >  - The namespace must be the same as s_user_ns.
>> >
>> > These restrictions simplify the implementation by avoiding the
>> > need to pass around userns references and by allowing fuse to
>> > rely on the checks in inode_change_ok for ownership changes.
>> > Either restriction could be relaxed in the future if needed.
>> >
>> > For cuse the namespace used for the connection is also simply
>> > current_user_ns() at the time /dev/cuse is opened.
>>
>> Was a use case discussed for using cuse in a new unprivileged userns?
>>
>> I ran some tests yesterday with cusexmp [1] and I could add a new char
>> device as an unprivileged user with:
>>
>> $ unshare -U -r -m sh -c 'mount --bind /mnt/cuse /dev/cuse ; cusexmp
>> --maj=99 --min=30 --name=foo
>>
>> where /mnt/cuse is previously mknod'ed correctly and chmod'ed 777.
>> Then, I could see the new device:
>>
>> $ cat /proc/devices | grep foo
>>  99 foo
>>
>> On normal distros, we don't have a /mnt/cuse chmod'ed 777 but still it
>> seems dangerous if the dev node can be provided otherwise and if we
>> don't have a use case for it.
>>
>> Thoughts?
>
> I can't remember the specific reasons, but I had concluded that letting
> unprivileged users use cuse within a user namespace isn't safe. But
> having a cuse device node usable by regular users at all is equally
> unsafe I suspect,

This makes sense.

> so I don't think your example demonstrates any problem
> specific to user namespaces. There shouldn't be any way to use a user
> namespace to gain access permissions towards /dev/cuse, otherwise we
> have bigger problems than cuse to worry about.

From my tests, the patch seem safe but I don't fully understand why that is.

I am not trying to gain more permissions towards /dev/cuse but to
create another cuse char file from within the unprivileged userns. I
tested the scenario by patching the memfs userspace FUSE driver to
generate the char device whenever the file is named "cuse" (turning
the regular file into a char device with the cuse major/minor behind
the scene):

$ unshare -U -r -m
# memfs /mnt/memfs &
# ls -l /mnt/memfs
# echo -n > /mnt/memfs/cuse
-bash: /mnt/memfs/cuse: Input/output error
# ls -l /mnt/memfs/cuse
crwxrwxrwx. 1 root root 10, 203 Jan 17 18:24 /mnt/memfs/cuse
# cat /mnt/memfs/cuse
cat: /mnt/memfs/cuse: Permission denied

But then, I could not use that char device, even though it seems to
have the correct major/minor and permissions. The kernel FUSE code
seems to call init_special_inode() to handle character devices. I
don't understand why it seems to be safe.

Thanks!
Alban
Seth Forshee Jan. 17, 2018, 7:31 p.m.
On Wed, Jan 17, 2018 at 07:56:59PM +0100, Alban Crequy wrote:
> On Wed, Jan 17, 2018 at 3:29 PM, Seth Forshee
> <seth.forshee@canonical.com> wrote:
> > On Wed, Jan 17, 2018 at 11:59:06AM +0100, Alban Crequy wrote:
> >> [Adding Tejun, David, Tom for question about cuse]
> >>
> >> On Fri, Dec 22, 2017 at 3:32 PM, Dongsu Park <dongsu@kinvolk.io> wrote:
> >> > From: Seth Forshee <seth.forshee@canonical.com>
> >> >
> >> > In order to support mounts from namespaces other than
> >> > init_user_ns, fuse must translate uids and gids to/from the
> >> > userns of the process servicing requests on /dev/fuse. This
> >> > patch does that, with a couple of restrictions on the namespace:
> >> >
> >> >  - The userns for the fuse connection is fixed to the namespace
> >> >    from which /dev/fuse is opened.
> >> >
> >> >  - The namespace must be the same as s_user_ns.
> >> >
> >> > These restrictions simplify the implementation by avoiding the
> >> > need to pass around userns references and by allowing fuse to
> >> > rely on the checks in inode_change_ok for ownership changes.
> >> > Either restriction could be relaxed in the future if needed.
> >> >
> >> > For cuse the namespace used for the connection is also simply
> >> > current_user_ns() at the time /dev/cuse is opened.
> >>
> >> Was a use case discussed for using cuse in a new unprivileged userns?
> >>
> >> I ran some tests yesterday with cusexmp [1] and I could add a new char
> >> device as an unprivileged user with:
> >>
> >> $ unshare -U -r -m sh -c 'mount --bind /mnt/cuse /dev/cuse ; cusexmp
> >> --maj=99 --min=30 --name=foo
> >>
> >> where /mnt/cuse is previously mknod'ed correctly and chmod'ed 777.
> >> Then, I could see the new device:
> >>
> >> $ cat /proc/devices | grep foo
> >>  99 foo
> >>
> >> On normal distros, we don't have a /mnt/cuse chmod'ed 777 but still it
> >> seems dangerous if the dev node can be provided otherwise and if we
> >> don't have a use case for it.
> >>
> >> Thoughts?
> >
> > I can't remember the specific reasons, but I had concluded that letting
> > unprivileged users use cuse within a user namespace isn't safe. But
> > having a cuse device node usable by regular users at all is equally
> > unsafe I suspect,
> 
> This makes sense.
> 
> > so I don't think your example demonstrates any problem
> > specific to user namespaces. There shouldn't be any way to use a user
> > namespace to gain access permissions towards /dev/cuse, otherwise we
> > have bigger problems than cuse to worry about.
> 
> From my tests, the patch seem safe but I don't fully understand why that is.
> 
> I am not trying to gain more permissions towards /dev/cuse but to
> create another cuse char file from within the unprivileged userns. I
> tested the scenario by patching the memfs userspace FUSE driver to
> generate the char device whenever the file is named "cuse" (turning
> the regular file into a char device with the cuse major/minor behind
> the scene):
> 
> $ unshare -U -r -m
> # memfs /mnt/memfs &
> # ls -l /mnt/memfs
> # echo -n > /mnt/memfs/cuse
> -bash: /mnt/memfs/cuse: Input/output error
> # ls -l /mnt/memfs/cuse
> crwxrwxrwx. 1 root root 10, 203 Jan 17 18:24 /mnt/memfs/cuse
> # cat /mnt/memfs/cuse
> cat: /mnt/memfs/cuse: Permission denied
> 
> But then, I could not use that char device, even though it seems to
> have the correct major/minor and permissions. The kernel FUSE code
> seems to call init_special_inode() to handle character devices. I
> don't understand why it seems to be safe.

Because for new mounts in non-init user namespaces alloc_super() sets
SB_I_NODEV flag in s_iflags, which disallows opening device nodes in
that filesystem.

Seth
Alban Crequy Jan. 18, 2018, 10:29 a.m.
On Wed, Jan 17, 2018 at 8:31 PM, Seth Forshee
<seth.forshee@canonical.com> wrote:
> On Wed, Jan 17, 2018 at 07:56:59PM +0100, Alban Crequy wrote:
>> On Wed, Jan 17, 2018 at 3:29 PM, Seth Forshee
>> <seth.forshee@canonical.com> wrote:
>> > On Wed, Jan 17, 2018 at 11:59:06AM +0100, Alban Crequy wrote:
>> >> [Adding Tejun, David, Tom for question about cuse]
>> >>
>> >> On Fri, Dec 22, 2017 at 3:32 PM, Dongsu Park <dongsu@kinvolk.io> wrote:
>> >> > From: Seth Forshee <seth.forshee@canonical.com>
>> >> >
>> >> > In order to support mounts from namespaces other than
>> >> > init_user_ns, fuse must translate uids and gids to/from the
>> >> > userns of the process servicing requests on /dev/fuse. This
>> >> > patch does that, with a couple of restrictions on the namespace:
>> >> >
>> >> >  - The userns for the fuse connection is fixed to the namespace
>> >> >    from which /dev/fuse is opened.
>> >> >
>> >> >  - The namespace must be the same as s_user_ns.
>> >> >
>> >> > These restrictions simplify the implementation by avoiding the
>> >> > need to pass around userns references and by allowing fuse to
>> >> > rely on the checks in inode_change_ok for ownership changes.
>> >> > Either restriction could be relaxed in the future if needed.
>> >> >
>> >> > For cuse the namespace used for the connection is also simply
>> >> > current_user_ns() at the time /dev/cuse is opened.
>> >>
>> >> Was a use case discussed for using cuse in a new unprivileged userns?
>> >>
>> >> I ran some tests yesterday with cusexmp [1] and I could add a new char
>> >> device as an unprivileged user with:
>> >>
>> >> $ unshare -U -r -m sh -c 'mount --bind /mnt/cuse /dev/cuse ; cusexmp
>> >> --maj=99 --min=30 --name=foo
>> >>
>> >> where /mnt/cuse is previously mknod'ed correctly and chmod'ed 777.
>> >> Then, I could see the new device:
>> >>
>> >> $ cat /proc/devices | grep foo
>> >>  99 foo
>> >>
>> >> On normal distros, we don't have a /mnt/cuse chmod'ed 777 but still it
>> >> seems dangerous if the dev node can be provided otherwise and if we
>> >> don't have a use case for it.
>> >>
>> >> Thoughts?
>> >
>> > I can't remember the specific reasons, but I had concluded that letting
>> > unprivileged users use cuse within a user namespace isn't safe. But
>> > having a cuse device node usable by regular users at all is equally
>> > unsafe I suspect,
>>
>> This makes sense.
>>
>> > so I don't think your example demonstrates any problem
>> > specific to user namespaces. There shouldn't be any way to use a user
>> > namespace to gain access permissions towards /dev/cuse, otherwise we
>> > have bigger problems than cuse to worry about.
>>
>> From my tests, the patch seem safe but I don't fully understand why that is.
>>
>> I am not trying to gain more permissions towards /dev/cuse but to
>> create another cuse char file from within the unprivileged userns. I
>> tested the scenario by patching the memfs userspace FUSE driver to
>> generate the char device whenever the file is named "cuse" (turning
>> the regular file into a char device with the cuse major/minor behind
>> the scene):
>>
>> $ unshare -U -r -m
>> # memfs /mnt/memfs &
>> # ls -l /mnt/memfs
>> # echo -n > /mnt/memfs/cuse
>> -bash: /mnt/memfs/cuse: Input/output error
>> # ls -l /mnt/memfs/cuse
>> crwxrwxrwx. 1 root root 10, 203 Jan 17 18:24 /mnt/memfs/cuse
>> # cat /mnt/memfs/cuse
>> cat: /mnt/memfs/cuse: Permission denied
>>
>> But then, I could not use that char device, even though it seems to
>> have the correct major/minor and permissions. The kernel FUSE code
>> seems to call init_special_inode() to handle character devices. I
>> don't understand why it seems to be safe.
>
> Because for new mounts in non-init user namespaces alloc_super() sets
> SB_I_NODEV flag in s_iflags, which disallows opening device nodes in
> that filesystem.

I see. Thanks for the explanation!