[RH7] overlayfs: avoid permission check for priveleged processes

Submitted by Andrey Zhadchenko on Oct. 13, 2020, 11:05 p.m.

Details

Message ID 1602630321-729514-1-git-send-email-andrey.zhadchenko@virtuozzo.com
State New
Series "overlayfs: avoid permission check for priveleged processes"
Headers show

Commit Message

Andrey Zhadchenko Oct. 13, 2020, 11:05 p.m.
Overlayfs temporary override credentials in copy_up function to ones which was
used to create mount. Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
capability in current user namespace. This leads to strange situations.
For example, if overlayfs mount was made inside ve it is impossible to use
copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
credentials are not sufficient in init_user_ns to set xattr to file.
This is also required for criu since copy_up can be triggered on dump stage:
reading inotify fhandle from /proc may start copy_up.

Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.

https://jira.sw.ru/browse/PSBM-108122
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
---
 fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
 fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
 fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
 fs/xattr.c               |  2 +-
 4 files changed, 74 insertions(+), 24 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1564a35..d6b285f 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@ 
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
 #include <linux/exportfs.h>
+#include <linux/capability.h>
 #include "overlayfs.h"
 
 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -321,8 +322,8 @@  out:
 	return fh;
 }
 
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-		   struct dentry *upper)
+int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper, int propagate_cap)
 {
 	const struct ovl_fh *fh = NULL;
 	int err;
@@ -341,8 +342,8 @@  int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	/*
 	 * Do not fail when upper doesn't support xattrs.
 	 */
-	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
-				 fh ? fh->len : 0, 0);
+	err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
+				 fh ? fh->len : 0, 0, propagate_cap);
 	kfree(fh);
 
 	return err;
@@ -433,6 +434,7 @@  struct ovl_copy_up_ctx {
 	struct dentry *destdir;
 	struct qstr destname;
 	struct dentry *workdir;
+	int propagate_cap;
 	bool tmpfile;
 	bool origin;
 	bool indexed;
@@ -711,7 +713,7 @@  out:
 }
 
 static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-			   int flags)
+			   int flags, int propagate_cap)
 {
 	int err;
 	struct path parentpath;
@@ -719,6 +721,7 @@  static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		.parent = parent,
 		.dentry = dentry,
 		.workdir = ovl_workdir(dentry),
+		.propagate_cap = propagate_cap,
 	};
 
 	if (WARN_ON(!ctx.workdir))
@@ -768,9 +771,19 @@  static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	return err;
 }
 
+static int ovl_can_propagate_cap(struct dentry *dentry)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
+
+	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
+}
+
 int ovl_copy_up_flags(struct dentry *dentry, int flags)
 {
 	int err = 0;
+	int propagate_cap = ovl_can_propagate_cap(dentry);
 	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
 	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
 
@@ -815,7 +828,7 @@  int ovl_copy_up_flags(struct dentry *dentry, int flags)
 			next = parent;
 		}
 
-		err = ovl_copy_up_one(parent, next, flags);
+		err = ovl_copy_up_one(parent, next, flags, propagate_cap);
 
 		dput(parent);
 		dput(next);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7052938..6917acd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -149,15 +149,6 @@  static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
 	return err;
 }
 
-static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
-				  const void *value, size_t size, int flags)
-{
-	int err = vfs_setxattr(dentry, name, value, size, flags);
-	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
-		 dentry, name, min((int)size, 48), value, size, flags, err);
-	return err;
-}
-
 static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
 {
 	int err = vfs_removexattr(dentry, name);
@@ -245,9 +236,12 @@  int ovl_copy_up_start(struct dentry *dentry);
 void ovl_copy_up_end(struct dentry *dentry);
 bool ovl_check_origin_xattr(struct dentry *dentry);
 bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
 		       const char *name, const void *value, size_t size,
-		       int xerr);
+		       int xerr, int propagate_cap);
+int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
+			const void *value, size_t size, int flags,
+			int propagate_cap);
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
 void ovl_set_flag(unsigned long flag, struct inode *inode);
 void ovl_clear_flag(unsigned long flag, struct inode *inode);
@@ -279,6 +273,19 @@  static inline unsigned int ovl_xino_bits(struct super_block *sb)
 	return ofs->xino_bits;
 }
 
+static inline int ovl_check_setxattr(struct dentry *dentry,
+		struct dentry *upperdentry, const char *name,
+		const void *value, size_t size, int xerr)
+{
+	return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
+				      xerr, 0);
+}
+
+static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
+{
+	return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
+}
 
 /* namei.c */
 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
@@ -400,8 +407,14 @@  int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
 struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-		   struct dentry *upper);
+int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper, int propagate_cap);
+
+static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper)
+{
+	return ovl_set_origin_ext(dentry, lower, upper, 0);
+}
 
 /* export.c */
 extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 52a5116..30c10f1 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -419,9 +419,32 @@  bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
 	return false;
 }
 
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
-		       const char *name, const void *value, size_t size,
-		       int xerr)
+int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int propagate_cap)
+{
+	int err = vfs_setxattr(dentry, name, value, size, flags);
+
+	if (propagate_cap && err == -EPERM) {
+		struct inode *ino = dentry->d_inode;
+
+		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
+			goto out;
+
+		inode_lock(ino);
+		err = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+		inode_unlock(ino);
+	}
+
+out:
+	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = %d\n",
+		 dentry, name, min((int)size, 48), value, size, flags, err,
+		 propagate_cap);
+	return err;
+}
+
+int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
+			   const char *name, const void *value, size_t size,
+			   int xerr, int propagate_cap)
 {
 	int err;
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
@@ -429,7 +452,8 @@  int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 	if (ofs->noxattr)
 		return xerr;
 
-	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+	err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
+			      propagate_cap);
 
 	if (err == -EOPNOTSUPP) {
 		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
diff --git a/fs/xattr.c b/fs/xattr.c
index 9c24acc..c164e83 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -124,7 +124,7 @@  int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
 
 	return error;
 }
-
+EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
 
 int
 vfs_setxattr(struct dentry *dentry, const char *name, const void *value,

Comments

Vasily Averin Oct. 14, 2020, 4:52 a.m.
Pavel,
please review

On 10/14/20 2:05 AM, Andrey Zhadchenko wrote:
> Overlayfs temporary override credentials in copy_up function to ones which was
> used to create mount. Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
> capability in current user namespace. This leads to strange situations.
> For example, if overlayfs mount was made inside ve it is impossible to use
> copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
> credentials are not sufficient in init_user_ns to set xattr to file.
> This is also required for criu since copy_up can be triggered on dump stage:
> reading inotify fhandle from /proc may start copy_up.
> 
> Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
> have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.
> 
> https://jira.sw.ru/browse/PSBM-108122
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
> ---
>  fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
>  fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
>  fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
>  fs/xattr.c               |  2 +-
>  4 files changed, 74 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index 1564a35..d6b285f 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -20,6 +20,7 @@
>  #include <linux/fdtable.h>
>  #include <linux/ratelimit.h>
>  #include <linux/exportfs.h>
> +#include <linux/capability.h>
>  #include "overlayfs.h"
>  
>  #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
> @@ -321,8 +322,8 @@ out:
>  	return fh;
>  }
>  
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper)
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap)
>  {
>  	const struct ovl_fh *fh = NULL;
>  	int err;
> @@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
>  	/*
>  	 * Do not fail when upper doesn't support xattrs.
>  	 */
> -	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
> -				 fh ? fh->len : 0, 0);
> +	err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
> +				 fh ? fh->len : 0, 0, propagate_cap);
>  	kfree(fh);
>  
>  	return err;
> @@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
>  	struct dentry *destdir;
>  	struct qstr destname;
>  	struct dentry *workdir;
> +	int propagate_cap;
>  	bool tmpfile;
>  	bool origin;
>  	bool indexed;
> @@ -711,7 +713,7 @@ out:
>  }
>  
>  static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> -			   int flags)
> +			   int flags, int propagate_cap)
>  {
>  	int err;
>  	struct path parentpath;
> @@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>  		.parent = parent,
>  		.dentry = dentry,
>  		.workdir = ovl_workdir(dentry),
> +		.propagate_cap = propagate_cap,
>  	};
>  
>  	if (WARN_ON(!ctx.workdir))
> @@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>  	return err;
>  }
>  
> +static int ovl_can_propagate_cap(struct dentry *dentry)
> +{
> +	struct super_block *sb = dentry->d_sb;
> +	struct ovl_fs *ofs = sb->s_fs_info;
> +	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
> +
> +	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
> +}
> +
>  int ovl_copy_up_flags(struct dentry *dentry, int flags)
>  {
>  	int err = 0;
> +	int propagate_cap = ovl_can_propagate_cap(dentry);
>  	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
>  	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
>  
> @@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
>  			next = parent;
>  		}
>  
> -		err = ovl_copy_up_one(parent, next, flags);
> +		err = ovl_copy_up_one(parent, next, flags, propagate_cap);
>  
>  		dput(parent);
>  		dput(next);
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 7052938..6917acd 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
>  	return err;
>  }
>  
> -static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> -				  const void *value, size_t size, int flags)
> -{
> -	int err = vfs_setxattr(dentry, name, value, size, flags);
> -	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
> -		 dentry, name, min((int)size, 48), value, size, flags, err);
> -	return err;
> -}
> -
>  static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
>  {
>  	int err = vfs_removexattr(dentry, name);
> @@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
>  void ovl_copy_up_end(struct dentry *dentry);
>  bool ovl_check_origin_xattr(struct dentry *dentry);
>  bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
>  		       const char *name, const void *value, size_t size,
> -		       int xerr);
> +		       int xerr, int propagate_cap);
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +			const void *value, size_t size, int flags,
> +			int propagate_cap);
>  int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
>  void ovl_set_flag(unsigned long flag, struct inode *inode);
>  void ovl_clear_flag(unsigned long flag, struct inode *inode);
> @@ -279,6 +273,19 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
>  	return ofs->xino_bits;
>  }
>  
> +static inline int ovl_check_setxattr(struct dentry *dentry,
> +		struct dentry *upperdentry, const char *name,
> +		const void *value, size_t size, int xerr)
> +{
> +	return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
> +				      xerr, 0);
> +}
> +
> +static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags)
> +{
> +	return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
> +}
>  
>  /* namei.c */
>  int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
> @@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
>  int ovl_copy_xattr(struct dentry *old, struct dentry *new);
>  int ovl_set_attr(struct dentry *upper, struct kstat *stat);
>  struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper);
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap);
> +
> +static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper)
> +{
> +	return ovl_set_origin_ext(dentry, lower, upper, 0);
> +}
>  
>  /* export.c */
>  extern const struct export_operations ovl_export_operations;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 52a5116..30c10f1 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
>  	return false;
>  }
>  
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> -		       const char *name, const void *value, size_t size,
> -		       int xerr)
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags, int propagate_cap)
> +{
> +	int err = vfs_setxattr(dentry, name, value, size, flags);
> +
> +	if (propagate_cap && err == -EPERM) {
> +		struct inode *ino = dentry->d_inode;
> +
> +		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
> +			goto out;
> +
> +		inode_lock(ino);
> +		err = __vfs_setxattr_noperm(dentry, name, value, size, flags);
> +		inode_unlock(ino);
> +	}
> +
> +out:
> +	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = %d\n",
> +		 dentry, name, min((int)size, 48), value, size, flags, err,
> +		 propagate_cap);
> +	return err;
> +}
> +
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
> +			   const char *name, const void *value, size_t size,
> +			   int xerr, int propagate_cap)
>  {
>  	int err;
>  	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> @@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
>  	if (ofs->noxattr)
>  		return xerr;
>  
> -	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
> +	err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
> +			      propagate_cap);
>  
>  	if (err == -EOPNOTSUPP) {
>  		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 9c24acc..c164e83 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
>  
>  	return error;
>  }
> -
> +EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
>  
>  int
>  vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
>
Pavel Tikhomirov Oct. 14, 2020, 9:33 a.m.
On 10/14/20 2:05 AM, Andrey Zhadchenko wrote:
> Overlayfs temporary override credentials in copy_up function to ones which was
> used to create mount.

> Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
> capability in current user namespace.

No, if it was so, it would be no error =) To be correct we should say:

Function vfs_setxattr for "trusted." attrs requires CAP_SYS_ADMIN in 
current ve's userns.

It is done so to mimic mainstream behaviour for containers(ves), so that 
container user can't set "trusted." xattrs if it is in non-root 
container userns.

> This leads to strange situations.
> For example, if overlayfs mount was made inside ve it is impossible to use
> copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
> credentials are not sufficient in init_user_ns to set xattr to file.
> This is also required for criu since copy_up can be triggered on dump stage:
> reading inotify fhandle from /proc may start copy_up.

I hope that overlayfs overrides credentials exactly to be able to pass 
those checks. In mainstream kernel overlayfs can used from any userns, 
but can be only mounted from init_user_ns, so credentials always change 
to "more permissive". So it should be safe to skip override in case we 
are already "more permissive" than superblocks credentials.

> 
> Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
> have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.

Sorry but looking on the code I don't see how it works... There are only 
three codepaths here:

   +-< ovl_do_setxattr_ext
     +-< ovl_do_setxattr #1 sets propagate_cap to false
     +-< ovl_check_setxattr_ext
     | +-< ovl_set_origin_ext
     | | +-< ovl_set_origin #2 sets propagate_cap to false
     | +-< ovl_check_setxattr #3 sets propagate_cap to false

And on all of them we don't "propagate_cap". Probably I'm missing 
something though.

> 
> https://jira.sw.ru/browse/PSBM-108122
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
> ---
>   fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
>   fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
>   fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
>   fs/xattr.c               |  2 +-
>   4 files changed, 74 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index 1564a35..d6b285f 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -20,6 +20,7 @@
>   #include <linux/fdtable.h>
>   #include <linux/ratelimit.h>
>   #include <linux/exportfs.h>
> +#include <linux/capability.h>
>   #include "overlayfs.h"
>   
>   #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
> @@ -321,8 +322,8 @@ out:
>   	return fh;
>   }
>   
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper)
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap)
>   {
>   	const struct ovl_fh *fh = NULL;
>   	int err;
> @@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
>   	/*
>   	 * Do not fail when upper doesn't support xattrs.
>   	 */
> -	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
> -				 fh ? fh->len : 0, 0);
> +	err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
> +				 fh ? fh->len : 0, 0, propagate_cap);
>   	kfree(fh);
>   
>   	return err;
> @@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
>   	struct dentry *destdir;
>   	struct qstr destname;
>   	struct dentry *workdir;
> +	int propagate_cap;
>   	bool tmpfile;
>   	bool origin;
>   	bool indexed;
> @@ -711,7 +713,7 @@ out:
>   }
>   
>   static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> -			   int flags)
> +			   int flags, int propagate_cap)
>   {
>   	int err;
>   	struct path parentpath;
> @@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>   		.parent = parent,
>   		.dentry = dentry,
>   		.workdir = ovl_workdir(dentry),
> +		.propagate_cap = propagate_cap,
>   	};
>   
>   	if (WARN_ON(!ctx.workdir))
> @@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>   	return err;
>   }
>   
> +static int ovl_can_propagate_cap(struct dentry *dentry)
> +{
> +	struct super_block *sb = dentry->d_sb;
> +	struct ovl_fs *ofs = sb->s_fs_info;
> +	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
> +
> +	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
> +}
> +
>   int ovl_copy_up_flags(struct dentry *dentry, int flags)
>   {
>   	int err = 0;
> +	int propagate_cap = ovl_can_propagate_cap(dentry);
>   	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
>   	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
>   
> @@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
>   			next = parent;
>   		}
>   
> -		err = ovl_copy_up_one(parent, next, flags);
> +		err = ovl_copy_up_one(parent, next, flags, propagate_cap);
>   
>   		dput(parent);
>   		dput(next);
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 7052938..6917acd 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
>   	return err;
>   }
>   
> -static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> -				  const void *value, size_t size, int flags)
> -{
> -	int err = vfs_setxattr(dentry, name, value, size, flags);
> -	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
> -		 dentry, name, min((int)size, 48), value, size, flags, err);
> -	return err;
> -}
> -
>   static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
>   {
>   	int err = vfs_removexattr(dentry, name);
> @@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
>   void ovl_copy_up_end(struct dentry *dentry);
>   bool ovl_check_origin_xattr(struct dentry *dentry);
>   bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
>   		       const char *name, const void *value, size_t size,
> -		       int xerr);
> +		       int xerr, int propagate_cap);
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +			const void *value, size_t size, int flags,
> +			int propagate_cap);
>   int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
>   void ovl_set_flag(unsigned long flag, struct inode *inode);
>   void ovl_clear_flag(unsigned long flag, struct inode *inode);
> @@ -279,6 +273,19 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
>   	return ofs->xino_bits;
>   }
>   
> +static inline int ovl_check_setxattr(struct dentry *dentry,
> +		struct dentry *upperdentry, const char *name,
> +		const void *value, size_t size, int xerr)
> +{
> +	return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
> +				      xerr, 0);
> +}
> +
> +static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags)
> +{
> +	return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
> +}
>   
>   /* namei.c */
>   int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
> @@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
>   int ovl_copy_xattr(struct dentry *old, struct dentry *new);
>   int ovl_set_attr(struct dentry *upper, struct kstat *stat);
>   struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper);
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap);
> +
> +static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper)
> +{
> +	return ovl_set_origin_ext(dentry, lower, upper, 0);
> +}
>   
>   /* export.c */
>   extern const struct export_operations ovl_export_operations;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 52a5116..30c10f1 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
>   	return false;
>   }
>   
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> -		       const char *name, const void *value, size_t size,
> -		       int xerr)
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags, int propagate_cap)
> +{
> +	int err = vfs_setxattr(dentry, name, value, size, flags);
> +
> +	if (propagate_cap && err == -EPERM) {
> +		struct inode *ino = dentry->d_inode;
> +
> +		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
> +			goto out;
> +
> +		inode_lock(ino);
> +		err = __vfs_setxattr_noperm(dentry, name, value, size, flags);
> +		inode_unlock(ino);

I don't like this method. We have different permission checks in 
vfs_setxattr (e.g.: xattr_permission and security_inode_setxattr), by 
this we just ignore all of them, but actually need only 
xattr_permission's trusted part.

I would feel much better if we just don't override creds in overlay if 
our current cred is already more permissive then the overriding one. 
Because we can face ve_capable checks somewhere else later unexpectedly 
and will need to debug it all again. Probably it's better to allow 
everything for a host user?

Maybe we need to discuss this with company of @den @khorenko @vvs.

> +	}
> +
> +out:
> +	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = %d\n",
> +		 dentry, name, min((int)size, 48), value, size, flags, err,
> +		 propagate_cap);
> +	return err;
> +}
> +
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
> +			   const char *name, const void *value, size_t size,
> +			   int xerr, int propagate_cap)
>   {
>   	int err;
>   	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> @@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
>   	if (ofs->noxattr)
>   		return xerr;
>   
> -	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
> +	err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
> +			      propagate_cap);
>   
>   	if (err == -EOPNOTSUPP) {
>   		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 9c24acc..c164e83 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
>   
>   	return error;
>   }
> -
> +EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
>   
>   int
>   vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
>
Andrey Zhadchenko Oct. 14, 2020, 12:56 p.m.
On Wed, 14 Oct 2020 12:33:53 +0300
Pavel Tikhomirov <ptikhomirov@virtuozzo.com> wrote:

> On 10/14/20 2:05 AM, Andrey Zhadchenko wrote:
> > Overlayfs temporary override credentials in copy_up function to
> > ones which was used to create mount.  
> 
> > Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
> > capability in current user namespace.  
> 
> No, if it was so, it would be no error =) To be correct we should say:
> 
> Function vfs_setxattr for "trusted." attrs requires CAP_SYS_ADMIN in 
> current ve's userns.
> 

Yes, it is exactly as you say. "trusted" prefix requires CAP_SYS_ADMIN.
I will fix commit message.

> It is done so to mimic mainstream behaviour for containers(ves), so
> that container user can't set "trusted." xattrs if it is in non-root 
> container userns.
> 
> > This leads to strange situations.
> > For example, if overlayfs mount was made inside ve it is impossible
> > to use copy_up from init_user_ns even with CAP_SYS_ADMIN. This is
> > because overriden credentials are not sufficient in init_user_ns to
> > set xattr to file. This is also required for criu since copy_up can
> > be triggered on dump stage: reading inotify fhandle from /proc may
> > start copy_up.  
> 
> I hope that overlayfs overrides credentials exactly to be able to
> pass those checks. In mainstream kernel overlayfs can used from any
> userns, but can be only mounted from init_user_ns, so credentials
> always change to "more permissive". So it should be safe to skip
> override in case we are already "more permissive" than superblocks
> credentials.
> 

At least in vz7 currently you can unshare + mount overlayfs.
But it will disable nfs_export (dmesg below), since overlayfs won't be
able so set trusted xattr.

unshare -m -p -f -U -r bash
mkdir lower upper merged work
mount -t overlay overlay -o
lowerdir=./lower,upperdir=./upper,workdir=./work ./merged
dmesg | tail -n 10
[708824.300397] overlayfs: upper fs does not support xattr, falling back to index=off. 
[708824.300399] overlayfs: NFS export requires "index=on", falling back to nfs_export=off.

> > 
> > Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current
> > credentials have CAP_SYS_ADMIN in namespace that is recorded in
> > overlayfs mount superblock.  
> 
> Sorry but looking on the code I don't see how it works... There are
> only three codepaths here:
> 
>    +-< ovl_do_setxattr_ext
>      +-< ovl_do_setxattr #1 sets propagate_cap to false
>      +-< ovl_check_setxattr_ext
>      | +-< ovl_set_origin_ext
>      | | +-< ovl_set_origin #2 sets propagate_cap to false
>      | +-< ovl_check_setxattr #3 sets propagate_cap to false
> 
> And on all of them we don't "propagate_cap". Probably I'm missing 
> something though.
> 

Yes, I really lost this during polishing.
This should be like that:

+-< ovl_copy_up_flags #checks for propagate_cap, overrides creds
  +-< ovl_copy_up_one #fills ctx with propagate_cap
  | +-< ovl_copy_up_locked
  | | +-< ovl_copy_up_inode # calls ovl_do_setxattr_ext with propagate_cap from ctx <- I missed that
  | | | +-< ovl_do_setxattr_ext

> > 
> > https://jira.sw.ru/browse/PSBM-108122
> > Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
> > ---
> >   fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
> >   fs/overlayfs/overlayfs.h | 39
> > ++++++++++++++++++++++++++------------- fs/overlayfs/util.c      |
> > 32 ++++++++++++++++++++++++++++---- fs/xattr.c               |  2 +-
> >   4 files changed, 74 insertions(+), 24 deletions(-)
> > 
> > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> > index 1564a35..d6b285f 100644
> > --- a/fs/overlayfs/copy_up.c
> > +++ b/fs/overlayfs/copy_up.c
> > @@ -20,6 +20,7 @@
> >   #include <linux/fdtable.h>
> >   #include <linux/ratelimit.h>
> >   #include <linux/exportfs.h>
> > +#include <linux/capability.h>
> >   #include "overlayfs.h"
> >   
> >   #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
> > @@ -321,8 +322,8 @@ out:
> >   	return fh;
> >   }
> >   
> > -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> > -		   struct dentry *upper)
> > +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> > +		   struct dentry *upper, int propagate_cap)
> >   {
> >   	const struct ovl_fh *fh = NULL;
> >   	int err;
> > @@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry,
> > struct dentry *lower, /*
> >   	 * Do not fail when upper doesn't support xattrs.
> >   	 */
> > -	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN,
> > fh,
> > -				 fh ? fh->len : 0, 0);
> > +	err = ovl_check_setxattr_ext(dentry, upper,
> > OVL_XATTR_ORIGIN, fh,
> > +				 fh ? fh->len : 0, 0,
> > propagate_cap); kfree(fh);
> >   
> >   	return err;
> > @@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
> >   	struct dentry *destdir;
> >   	struct qstr destname;
> >   	struct dentry *workdir;
> > +	int propagate_cap;
> >   	bool tmpfile;
> >   	bool origin;
> >   	bool indexed;
> > @@ -711,7 +713,7 @@ out:
> >   }
> >   
> >   static int ovl_copy_up_one(struct dentry *parent, struct dentry
> > *dentry,
> > -			   int flags)
> > +			   int flags, int propagate_cap)
> >   {
> >   	int err;
> >   	struct path parentpath;
> > @@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry
> > *parent, struct dentry *dentry, .parent = parent,
> >   		.dentry = dentry,
> >   		.workdir = ovl_workdir(dentry),
> > +		.propagate_cap = propagate_cap,
> >   	};
> >   
> >   	if (WARN_ON(!ctx.workdir))
> > @@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry
> > *parent, struct dentry *dentry, return err;
> >   }
> >   
> > +static int ovl_can_propagate_cap(struct dentry *dentry)
> > +{
> > +	struct super_block *sb = dentry->d_sb;
> > +	struct ovl_fs *ofs = sb->s_fs_info;
> > +	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
> > +
> > +	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
> > +}
> > +
> >   int ovl_copy_up_flags(struct dentry *dentry, int flags)
> >   {
> >   	int err = 0;
> > +	int propagate_cap = ovl_can_propagate_cap(dentry);
> >   	const struct cred *old_cred =
> > ovl_override_creds(dentry->d_sb); bool disconnected =
> > (dentry->d_flags & DCACHE_DISCONNECTED); 
> > @@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry,
> > int flags) next = parent;
> >   		}
> >   
> > -		err = ovl_copy_up_one(parent, next, flags);
> > +		err = ovl_copy_up_one(parent, next, flags,
> > propagate_cap); 
> >   		dput(parent);
> >   		dput(next);
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index 7052938..6917acd 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode
> > *dir, struct dentry *dentry, return err;
> >   }
> >   
> > -static inline int ovl_do_setxattr(struct dentry *dentry, const
> > char *name,
> > -				  const void *value, size_t size,
> > int flags) -{
> > -	int err = vfs_setxattr(dentry, name, value, size, flags);
> > -	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) =
> > %i\n",
> > -		 dentry, name, min((int)size, 48), value, size,
> > flags, err);
> > -	return err;
> > -}
> > -
> >   static inline int ovl_do_removexattr(struct dentry *dentry, const
> > char *name) {
> >   	int err = vfs_removexattr(dentry, name);
> > @@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
> >   void ovl_copy_up_end(struct dentry *dentry);
> >   bool ovl_check_origin_xattr(struct dentry *dentry);
> >   bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
> > -int ovl_check_setxattr(struct dentry *dentry, struct dentry
> > *upperdentry, +int ovl_check_setxattr_ext(struct dentry *dentry,
> > struct dentry *upperdentry, const char *name, const void *value,
> > size_t size,
> > -		       int xerr);
> > +		       int xerr, int propagate_cap);
> > +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> > +			const void *value, size_t size, int flags,
> > +			int propagate_cap);
> >   int ovl_set_impure(struct dentry *dentry, struct dentry
> > *upperdentry); void ovl_set_flag(unsigned long flag, struct inode
> > *inode); void ovl_clear_flag(unsigned long flag, struct inode
> > *inode); @@ -279,6 +273,19 @@ static inline unsigned int
> > ovl_xino_bits(struct super_block *sb) return ofs->xino_bits;
> >   }
> >   
> > +static inline int ovl_check_setxattr(struct dentry *dentry,
> > +		struct dentry *upperdentry, const char *name,
> > +		const void *value, size_t size, int xerr)
> > +{
> > +	return ovl_check_setxattr_ext(dentry, upperdentry, name,
> > value, size,
> > +				      xerr, 0);
> > +}
> > +
> > +static inline int ovl_do_setxattr(struct dentry *dentry, const
> > char *name,
> > +		const void *value, size_t size, int flags)
> > +{
> > +	return ovl_do_setxattr_ext(dentry, name, value, size,
> > flags, 0); +}
> >   
> >   /* namei.c */
> >   int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
> > @@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry,
> > int flags); int ovl_copy_xattr(struct dentry *old, struct dentry
> > *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat);
> >   struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool
> > is_upper); -int ovl_set_origin(struct dentry *dentry, struct dentry
> > *lower,
> > -		   struct dentry *upper);
> > +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> > +		   struct dentry *upper, int propagate_cap);
> > +
> > +static inline int ovl_set_origin(struct dentry *dentry, struct
> > dentry *lower,
> > +		   struct dentry *upper)
> > +{
> > +	return ovl_set_origin_ext(dentry, lower, upper, 0);
> > +}
> >   
> >   /* export.c */
> >   extern const struct export_operations ovl_export_operations;
> > diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> > index 52a5116..30c10f1 100644
> > --- a/fs/overlayfs/util.c
> > +++ b/fs/overlayfs/util.c
> > @@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry
> > *dentry, const char *name) return false;
> >   }
> >   
> > -int ovl_check_setxattr(struct dentry *dentry, struct dentry
> > *upperdentry,
> > -		       const char *name, const void *value, size_t
> > size,
> > -		       int xerr)
> > +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> > +		const void *value, size_t size, int flags, int
> > propagate_cap) +{
> > +	int err = vfs_setxattr(dentry, name, value, size, flags);
> > +
> > +	if (propagate_cap && err == -EPERM) {
> > +		struct inode *ino = dentry->d_inode;
> > +
> > +		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
> > +			goto out;
> > +
> > +		inode_lock(ino);
> > +		err = __vfs_setxattr_noperm(dentry, name, value,
> > size, flags);
> > +		inode_unlock(ino);  
> 
> I don't like this method. We have different permission checks in 
> vfs_setxattr (e.g.: xattr_permission and security_inode_setxattr), by 
> this we just ignore all of them, but actually need only 
> xattr_permission's trusted part.

Well I do think it is sane for CAP_SYS_ADMIN to ignore any checks
except things like IS_IMMUTABLE (I bet __vfs_setxattr_noperm would fail
in that case anyway).

As far as I saw security_inode_setxattr checks for different prefixes
like evm, etc. They also require CAP_SYS_ADMIN.

This code would be called only to set ovl origin, which is
trusted.overlay.<id> which is not the case. Of course in the worst
case we may forget about it later and explode somewhere.
Maybe add check to ensure trusted prefix before setting xattr.

> 
> I would feel much better if we just don't override creds in overlay
> if our current cred is already more permissive then the overriding
> one. Because we can face ve_capable checks somewhere else later
> unexpectedly and will need to debug it all again. Probably it's
> better to allow everything for a host user?

Perhaps you are right, but I have no idea if and how credentials affects
inode creation which is done during copy_up. Won't there be a situation
when triggered copy_up from init_user_ns create inode that won't be
accessible/deletable inside ve? That would be even worse. 

Overall do not overriding credentials look more dangerous and
unpredictable to me. Also there were one rejected patch which would
allow to disable credentials override. There may be some useful thought
https://lkml.org/lkml/2018/6/22/675

> Maybe we need to discuss this with company of @den @khorenko @vvs.
Yes, I would would like to hear more opinions too.

> 
> > +	}
> > +
> > +out:
> > +	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) =
> > %i, propagate_cap = %d\n",
> > +		 dentry, name, min((int)size, 48), value, size,
> > flags, err,
> > +		 propagate_cap);
> > +	return err;
> > +}
> > +
> > +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry
> > *upperdentry,
> > +			   const char *name, const void *value,
> > size_t size,
> > +			   int xerr, int propagate_cap)
> >   {
> >   	int err;
> >   	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> > @@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry,
> > struct dentry *upperdentry, if (ofs->noxattr)
> >   		return xerr;
> >   
> > -	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
> > +	err = ovl_do_setxattr_ext(upperdentry, name, value, size,
> > 0,
> > +			      propagate_cap);
> >   
> >   	if (err == -EOPNOTSUPP) {
> >   		pr_warn("overlayfs: cannot set %s xattr on
> > upper\n", name); diff --git a/fs/xattr.c b/fs/xattr.c
> > index 9c24acc..c164e83 100644
> > --- a/fs/xattr.c
> > +++ b/fs/xattr.c
> > @@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry
> > *dentry, const char *name, 
> >   	return error;
> >   }
> > -
> > +EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
> >   
> >   int
> >   vfs_setxattr(struct dentry *dentry, const char *name, const void
> > *value, 
>
Alexander Mikhalitsyn Oct. 15, 2020, 1:03 p.m.
Please, see comments inline

On Wed, 14 Oct 2020 02:05:21 +0300
Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com> wrote:

> Overlayfs temporary override credentials in copy_up function to ones which was
> used to create mount. Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
> capability in current user namespace. This leads to strange situations.
> For example, if overlayfs mount was made inside ve it is impossible to use
> copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
> credentials are not sufficient in init_user_ns to set xattr to file.
> This is also required for criu since copy_up can be triggered on dump stage:
> reading inotify fhandle from /proc may start copy_up.
> 
> Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
> have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.
> 
> https://jira.sw.ru/browse/PSBM-108122
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
> ---
>  fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
>  fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
>  fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
>  fs/xattr.c               |  2 +-
>  4 files changed, 74 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index 1564a35..d6b285f 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -20,6 +20,7 @@
>  #include <linux/fdtable.h>
>  #include <linux/ratelimit.h>
>  #include <linux/exportfs.h>
> +#include <linux/capability.h>
>  #include "overlayfs.h"
>  
>  #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
> @@ -321,8 +322,8 @@ out:
>  	return fh;
>  }
>  
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper)
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap)
>  {
>  	const struct ovl_fh *fh = NULL;
>  	int err;
> @@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
>  	/*
>  	 * Do not fail when upper doesn't support xattrs.
>  	 */
> -	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
> -				 fh ? fh->len : 0, 0);
> +	err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
> +				 fh ? fh->len : 0, 0, propagate_cap);
>  	kfree(fh);
>  
>  	return err;
> @@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
>  	struct dentry *destdir;
>  	struct qstr destname;
>  	struct dentry *workdir;
> +	int propagate_cap;
>  	bool tmpfile;
>  	bool origin;
>  	bool indexed;
> @@ -711,7 +713,7 @@ out:
>  }
>  
>  static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> -			   int flags)
> +			   int flags, int propagate_cap)
>  {
>  	int err;
>  	struct path parentpath;
> @@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>  		.parent = parent,
>  		.dentry = dentry,
>  		.workdir = ovl_workdir(dentry),
> +		.propagate_cap = propagate_cap,

Maybe I've missed something but I can't see where this field is used.

>  	};
>  
>  	if (WARN_ON(!ctx.workdir))
> @@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>  	return err;
>  }
>  
> +static int ovl_can_propagate_cap(struct dentry *dentry)
> +{
> +	struct super_block *sb = dentry->d_sb;
> +	struct ovl_fs *ofs = sb->s_fs_info;
> +	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
> +
> +	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
> +}
> +
>  int ovl_copy_up_flags(struct dentry *dentry, int flags)
>  {
>  	int err = 0;
> +	int propagate_cap = ovl_can_propagate_cap(dentry);
>  	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
>  	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
>  
> @@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
>  			next = parent;
>  		}
>  
> -		err = ovl_copy_up_one(parent, next, flags);
> +		err = ovl_copy_up_one(parent, next, flags, propagate_cap);
>  
>  		dput(parent);
>  		dput(next);
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 7052938..6917acd 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
>  	return err;
>  }
>  
> -static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> -				  const void *value, size_t size, int flags)
> -{
> -	int err = vfs_setxattr(dentry, name, value, size, flags);
> -	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
> -		 dentry, name, min((int)size, 48), value, size, flags, err);
> -	return err;
> -}
> -
>  static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
>  {
>  	int err = vfs_removexattr(dentry, name);
> @@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
>  void ovl_copy_up_end(struct dentry *dentry);
>  bool ovl_check_origin_xattr(struct dentry *dentry);
>  bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
>  		       const char *name, const void *value, size_t size,
> -		       int xerr);
> +		       int xerr, int propagate_cap);
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +			const void *value, size_t size, int flags,
> +			int propagate_cap);
>  int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
>  void ovl_set_flag(unsigned long flag, struct inode *inode);
>  void ovl_clear_flag(unsigned long flag, struct inode *inode);
> @@ -279,6 +273,19 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
>  	return ofs->xino_bits;
>  }
>  
> +static inline int ovl_check_setxattr(struct dentry *dentry,
> +		struct dentry *upperdentry, const char *name,
> +		const void *value, size_t size, int xerr)
> +{
> +	return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
> +				      xerr, 0);
> +}
> +
> +static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags)
> +{
> +	return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
> +}
>  
>  /* namei.c */
>  int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
> @@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
>  int ovl_copy_xattr(struct dentry *old, struct dentry *new);
>  int ovl_set_attr(struct dentry *upper, struct kstat *stat);
>  struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper);
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap);
> +
> +static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper)
> +{
> +	return ovl_set_origin_ext(dentry, lower, upper, 0);
> +}
>  
>  /* export.c */
>  extern const struct export_operations ovl_export_operations;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 52a5116..30c10f1 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
>  	return false;
>  }
>  
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> -		       const char *name, const void *value, size_t size,
> -		       int xerr)
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags, int propagate_cap)
> +{
> +	int err = vfs_setxattr(dentry, name, value, size, flags);
> +
> +	if (propagate_cap && err == -EPERM) {
> +		struct inode *ino = dentry->d_inode;
> +
> +		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
> +			goto out;
> +
> +		inode_lock(ino);
> +		err = __vfs_setxattr_noperm(dentry, name, value, size, flags);

Speaking honestly, this is looks as very unsafe solution because
you have exported function that was "private" and as I can see you
have copied some checks from xattr_permission function (fs/xattr.c) also.
What if design will change and we will need to have some extra checks
here?

As I can see in xattr_permission function we have also:
		/*
		 * Updating an xattr will likely cause i_uid and i_gid
		 * to be writen back improperly if their true value is
		 * unknown to the vfs.
		 */
		if (HAS_UNMAPPED_ID(inode))
			return -EPERM;

Do we need to have this check here too?

> +		inode_unlock(ino);
> +	}
> +
> +out:
> +	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = %d\n",
> +		 dentry, name, min((int)size, 48), value, size, flags, err,
> +		 propagate_cap);
> +	return err;
> +}
> +
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
> +			   const char *name, const void *value, size_t size,
> +			   int xerr, int propagate_cap)
>  {
>  	int err;
>  	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> @@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
>  	if (ofs->noxattr)
>  		return xerr;
>  
> -	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
> +	err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
> +			      propagate_cap);
>  
>  	if (err == -EOPNOTSUPP) {
>  		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 9c24acc..c164e83 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
>  
>  	return error;
>  }
> -
> +EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
>  
>  int
>  vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
> -- 
> 1.8.3.1
> 
> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
Alexander Mikhalitsyn Oct. 15, 2020, 1:07 p.m.
On Thu, 15 Oct 2020 16:03:52 +0300
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com> wrote:

> Please, see comments inline
> 
> On Wed, 14 Oct 2020 02:05:21 +0300
> Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com> wrote:
> 
> > Overlayfs temporary override credentials in copy_up function to ones which was
> > used to create mount. Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
> > capability in current user namespace. This leads to strange situations.
> > For example, if overlayfs mount was made inside ve it is impossible to use
> > copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
> > credentials are not sufficient in init_user_ns to set xattr to file.
> > This is also required for criu since copy_up can be triggered on dump stage:
> > reading inotify fhandle from /proc may start copy_up.
> > 
> > Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
> > have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.
> > 
> > https://jira.sw.ru/browse/PSBM-108122
> > Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>
> > ---
> >  fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
> >  fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
> >  fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
> >  fs/xattr.c               |  2 +-
> >  4 files changed, 74 insertions(+), 24 deletions(-)
> > 
> > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> > index 1564a35..d6b285f 100644
> > --- a/fs/overlayfs/copy_up.c
> > +++ b/fs/overlayfs/copy_up.c
> > @@ -20,6 +20,7 @@
> >  #include <linux/fdtable.h>
> >  #include <linux/ratelimit.h>
> >  #include <linux/exportfs.h>
> > +#include <linux/capability.h>
> >  #include "overlayfs.h"
> >  
> >  #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
> > @@ -321,8 +322,8 @@ out:
> >  	return fh;
> >  }
> >  
> > -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> > -		   struct dentry *upper)
> > +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> > +		   struct dentry *upper, int propagate_cap)
> >  {
> >  	const struct ovl_fh *fh = NULL;
> >  	int err;
> > @@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> >  	/*
> >  	 * Do not fail when upper doesn't support xattrs.
> >  	 */
> > -	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
> > -				 fh ? fh->len : 0, 0);
> > +	err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
> > +				 fh ? fh->len : 0, 0, propagate_cap);
> >  	kfree(fh);
> >  
> >  	return err;
> > @@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
> >  	struct dentry *destdir;
> >  	struct qstr destname;
> >  	struct dentry *workdir;
> > +	int propagate_cap;
> >  	bool tmpfile;
> >  	bool origin;
> >  	bool indexed;
> > @@ -711,7 +713,7 @@ out:
> >  }
> >  
> >  static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> > -			   int flags)
> > +			   int flags, int propagate_cap)
> >  {
> >  	int err;
> >  	struct path parentpath;
> > @@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> >  		.parent = parent,
> >  		.dentry = dentry,
> >  		.workdir = ovl_workdir(dentry),
> > +		.propagate_cap = propagate_cap,
> 
> Maybe I've missed something but I can't see where this field is used.
> 
> >  	};
> >  
> >  	if (WARN_ON(!ctx.workdir))
> > @@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> >  	return err;
> >  }
> >  
> > +static int ovl_can_propagate_cap(struct dentry *dentry)
> > +{
> > +	struct super_block *sb = dentry->d_sb;
> > +	struct ovl_fs *ofs = sb->s_fs_info;
> > +	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
> > +
> > +	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
> > +}
> > +
> >  int ovl_copy_up_flags(struct dentry *dentry, int flags)
> >  {
> >  	int err = 0;
> > +	int propagate_cap = ovl_can_propagate_cap(dentry);
> >  	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
> >  	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
> >  
> > @@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
> >  			next = parent;
> >  		}
> >  
> > -		err = ovl_copy_up_one(parent, next, flags);
> > +		err = ovl_copy_up_one(parent, next, flags, propagate_cap);
> >  
> >  		dput(parent);
> >  		dput(next);
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index 7052938..6917acd 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
> >  	return err;
> >  }
> >  
> > -static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> > -				  const void *value, size_t size, int flags)
> > -{
> > -	int err = vfs_setxattr(dentry, name, value, size, flags);
> > -	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
> > -		 dentry, name, min((int)size, 48), value, size, flags, err);
> > -	return err;
> > -}
> > -
> >  static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
> >  {
> >  	int err = vfs_removexattr(dentry, name);
> > @@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
> >  void ovl_copy_up_end(struct dentry *dentry);
> >  bool ovl_check_origin_xattr(struct dentry *dentry);
> >  bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
> > -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> > +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
> >  		       const char *name, const void *value, size_t size,
> > -		       int xerr);
> > +		       int xerr, int propagate_cap);
> > +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> > +			const void *value, size_t size, int flags,
> > +			int propagate_cap);
> >  int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
> >  void ovl_set_flag(unsigned long flag, struct inode *inode);
> >  void ovl_clear_flag(unsigned long flag, struct inode *inode);
> > @@ -279,6 +273,19 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
> >  	return ofs->xino_bits;
> >  }
> >  
> > +static inline int ovl_check_setxattr(struct dentry *dentry,
> > +		struct dentry *upperdentry, const char *name,
> > +		const void *value, size_t size, int xerr)
> > +{
> > +	return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
> > +				      xerr, 0);
> > +}
> > +
> > +static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> > +		const void *value, size_t size, int flags)
> > +{
> > +	return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
> > +}
> >  
> >  /* namei.c */
> >  int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
> > @@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
> >  int ovl_copy_xattr(struct dentry *old, struct dentry *new);
> >  int ovl_set_attr(struct dentry *upper, struct kstat *stat);
> >  struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
> > -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> > -		   struct dentry *upper);
> > +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> > +		   struct dentry *upper, int propagate_cap);
> > +
> > +static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> > +		   struct dentry *upper)
> > +{
> > +	return ovl_set_origin_ext(dentry, lower, upper, 0);
> > +}
> >  
> >  /* export.c */
> >  extern const struct export_operations ovl_export_operations;
> > diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> > index 52a5116..30c10f1 100644
> > --- a/fs/overlayfs/util.c
> > +++ b/fs/overlayfs/util.c
> > @@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
> >  	return false;
> >  }
> >  
> > -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> > -		       const char *name, const void *value, size_t size,
> > -		       int xerr)
> > +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> > +		const void *value, size_t size, int flags, int propagate_cap)
> > +{
> > +	int err = vfs_setxattr(dentry, name, value, size, flags);
> > +
> > +	if (propagate_cap && err == -EPERM) {
> > +		struct inode *ino = dentry->d_inode;
> > +
> > +		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
> > +			goto out;
> > +
> > +		inode_lock(ino);
> > +		err = __vfs_setxattr_noperm(dentry, name, value, size, flags);
> 
> Speaking honestly, this is looks as very unsafe solution because
> you have exported function that was "private" and as I can see you
> have copied some checks from xattr_permission function (fs/xattr.c) also.
> What if design will change and we will need to have some extra checks
> here?

I can also add here, that as an option we may create special function in
fs/xattr.c something like vfs_ovl_setxattr_noperm() and keep this function close
to xattr_permission() and __vfs_setxattr_noperm() and during rebase we have minimal
probability of errors here if design will change.

> 
> As I can see in xattr_permission function we have also:
> 		/*
> 		 * Updating an xattr will likely cause i_uid and i_gid
> 		 * to be writen back improperly if their true value is
> 		 * unknown to the vfs.
> 		 */
> 		if (HAS_UNMAPPED_ID(inode))
> 			return -EPERM;
> 
> Do we need to have this check here too?
> 
> > +		inode_unlock(ino);
> > +	}
> > +
> > +out:
> > +	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = %d\n",
> > +		 dentry, name, min((int)size, 48), value, size, flags, err,
> > +		 propagate_cap);
> > +	return err;
> > +}
> > +
> > +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
> > +			   const char *name, const void *value, size_t size,
> > +			   int xerr, int propagate_cap)
> >  {
> >  	int err;
> >  	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> > @@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> >  	if (ofs->noxattr)
> >  		return xerr;
> >  
> > -	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
> > +	err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
> > +			      propagate_cap);
> >  
> >  	if (err == -EOPNOTSUPP) {
> >  		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
> > diff --git a/fs/xattr.c b/fs/xattr.c
> > index 9c24acc..c164e83 100644
> > --- a/fs/xattr.c
> > +++ b/fs/xattr.c
> > @@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
> >  
> >  	return error;
> >  }
> > -
> > +EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
> >  
> >  int
> >  vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
> > -- 
> > 1.8.3.1
> > 
> > _______________________________________________
> > Devel mailing list
> > Devel@openvz.org
> > https://lists.openvz.org/mailman/listinfo/devel
> 
> 
> -- 
> Regards,
> Alex.