[2/3] image/dedup: use userns_call to be able to open images for write

Submitted by Pavel Tikhomirov on Sept. 28, 2018, 1:11 p.m.

Details

Message ID 20180928131135.19046-3-ptikhomirov@virtuozzo.com
State Accepted
Series "dedup: fix opening pages images for write"
Headers show

Commit Message

Pavel Tikhomirov Sept. 28, 2018, 1:11 p.m.
When restoring a task in it's user namespace we had: "Unable to open
pages-3.img: Permission denied" when trying to open pages images for
write. (Write is needed for memory images deduplication feature to
work.)

These patch makes do_open_image open images fd through usernsd, to have
root permissions.

Also increase MAX_UNSFD_MSG_SIZE as path and metadata don't fit, and
remove a workaround in prepare_vma_ios.

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 criu/image.c              | 42 +++++++++++++++++++++++++++++++++++++--
 criu/include/namespaces.h |  2 +-
 criu/mem.c                | 14 +++----------
 3 files changed, 44 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/image.c b/criu/image.c
index 3ca41c002..e99453659 100644
--- a/criu/image.c
+++ b/criu/image.c
@@ -18,6 +18,7 @@ 
 #include "images/pagemap.pb-c.h"
 #include "proc_parse.h"
 #include "img-remote.h"
+#include "namespaces.h"
 
 bool ns_per_id = false;
 bool img_common_magic = true;
@@ -412,6 +413,25 @@  int do_open_remote_image(int dfd, char *path, int flags)
 	return ret;
 }
 
+struct openat_args {
+	char	path[PATH_MAX];
+	int	flags;
+	int	err;
+	int	mode;
+};
+
+static int userns_openat(void *arg, int dfd, int pid)
+{
+	struct openat_args *pa = (struct openat_args *)arg;
+	int ret;
+
+	ret = openat(dfd, pa->path, pa->flags, pa->mode);
+	if (ret < 0)
+		pa->err = errno;
+
+	return ret;
+}
+
 static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long oflags, char *path)
 {
 	int ret, flags;
@@ -420,8 +440,26 @@  static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
 
 	if (opts.remote && !(oflags & O_FORCE_LOCAL))
 		ret = do_open_remote_image(dfd, path, flags);
-	else
-		ret = openat(dfd, path, flags, CR_FD_PERM);
+	else {
+		/*
+		 * For pages images dedup we need to open images read-write on
+		 * restore, that may require proper capabilities, so we ask
+		 * usernsd to do it for us
+		 */
+		if (root_ns_mask & CLONE_NEWUSER &&
+		    type == CR_FD_PAGES && oflags & O_RDWR) {
+			struct openat_args pa = {
+				.flags = flags,
+				.err = 0,
+				.mode = CR_FD_PERM,
+			};
+			snprintf(pa.path, PATH_MAX, "%s", path);
+			ret = userns_call(userns_openat, UNS_FDOUT, &pa, sizeof(struct openat_args), dfd);
+			if (ret < 0)
+				errno = pa.err;
+		} else
+			ret = openat(dfd, path, flags, CR_FD_PERM);
+	}
 	if (ret < 0) {
 		if (!(flags & O_CREAT) && (errno == ENOENT || ret == -ENOENT)) {
 			pr_info("No %s image\n", path);
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 5fe8038bf..287abb3c8 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -198,7 +198,7 @@  typedef int (*uns_call_t)(void *arg, int fd, pid_t pid);
  */
 #define UNS_FDOUT	0x2
 
-#define MAX_UNSFD_MSG_SIZE 4096
+#define MAX_UNSFD_MSG_SIZE 8192
 
 /*
  * When we're restoring inside user namespace, some things are
diff --git a/criu/mem.c b/criu/mem.c
index 284af2954..8015a7e4e 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -1299,20 +1299,12 @@  static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta)
 {
 	struct cr_img *pages;
 
-	/* if auto-dedup is on we need RDWR mode to be able to punch holes
-	 * in the input files (in restorer.c)
+	/*
+	 * If auto-dedup is on we need RDWR mode to be able to punch holes in
+	 * the input files (in restorer.c)
 	 */
 	pages = open_image(CR_FD_PAGES, opts.auto_dedup ? O_RDWR : O_RSTR,
 				rsti(t)->pages_img_id);
-	/* When running inside namespace we might lack privileges to open the file
-	 * for writing.
-	 * TODO: use userns_call to do the opening instead of downgrading to opening
-	 * read-only.
-	 */
-	if (!pages && opts.auto_dedup) {
-		pr_warn("Failed to open image read-write, trying read-only instead. auto-dedup won't work\n");
-		pages = open_image(CR_FD_PAGES, O_RSTR, rsti(t)->pages_img_id);
-	}
 	if (!pages)
 		return -1;