[v2,2/5] mount: delay restoring readonly mount flag until all files ready

Submitted by Pavel Tikhomirov on Sept. 27, 2018, 9:17 a.m.

Details

Message ID 20180927091736.10727-3-ptikhomirov@virtuozzo.com
State New
Series "support restoring ghost files on readonly mounts"
Headers show

Commit Message

Pavel Tikhomirov Sept. 27, 2018, 9:17 a.m.
We can have ghost-files on readonly mounts, for them we will need to
recreate the file on restore, and we can't do that if mount is readonly,
so the idea is to move restoring readonly mount flags after all files are
restored, before that restore process will see mounts writable.

There is an exception for overmounted mounts as it is not so easy to
set flags on them at these late point. Other exception is if the mount
has readonly superblock - there can be no ghost-files on such a mount.

The first point where we delay readonly is do_new_mount and the second
is do_bind_mount. The latter is a bit more complex as we need to handle
nesting from source mount which can be also delayed/undelayed.

I prefer leaving all mounts writable where possible as we might need
write access not only for ghost files but also link-remaps and may be
something else.

v2: minor commit message cleanup and remove warn

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 criu/cr-restore.c    |   2 +
 criu/include/mount.h |   2 +
 criu/mount.c         | 110 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 112 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 804a33f83..efefd2d79 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -3266,6 +3266,8 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		/* Wait when all tasks restored all files */
 		if (restore_wait_other_tasks())
 			goto err_nv;
+		if (remount_readonly_mounts())
+			goto err_nv;
 	}
 
 	/*
diff --git a/criu/include/mount.h b/criu/include/mount.h
index e7d026264..776c348cd 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -128,4 +128,6 @@  extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool fo
 
 extern int check_mnt_id(void);
 
+extern int remount_readonly_mounts(void);
+
 #endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index 29370afc7..0bcc3a66d 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -2105,6 +2105,14 @@  static int do_new_mount(struct mount_info *mi)
 		close(fd);
 	}
 
+	/*
+	 * Restoring ghost files on readonly mounts requires write access,
+	 * remount_readonly_mounts() will set these flags after all files
+	 * restored
+	 */
+	if (!mnt_is_overmounted(mi))
+		mflags &= ~MS_RDONLY;
+
 	if (mflags && mount(NULL, mi->mountpoint, NULL,
 				MS_REMOUNT | MS_BIND | mflags, NULL)) {
 		pr_perror("Unable to apply bind-mount options");
@@ -2182,7 +2190,7 @@  static int do_bind_mount(struct mount_info *mi)
 {
 	char mnt_fd_path[PSFDS];
 	char *root, *cut_root, rpath[PATH_MAX];
-	unsigned long mflags;
+	unsigned long mflags, bmflags = 0;
 	int exit_code = -1, mp_len;
 	bool shared = false;
 	bool master = false;
@@ -2302,7 +2310,15 @@  static int do_bind_mount(struct mount_info *mi)
 	}
 
 	mflags = mi->flags & (~MS_PROPAGATE);
-	if (!mi->bind || mflags != (mi->bind->flags & (~MS_PROPAGATE)))
+	if (!mnt_is_overmounted(mi) && !(mi->sb_flags & MS_RDONLY))
+		mflags &= ~MS_RDONLY;
+	if (mi->bind) {
+		bmflags = mi->bind->flags & (~MS_PROPAGATE);
+		if (!mnt_is_overmounted(mi->bind) && !(mi->bind->sb_flags & MS_RDONLY))
+			bmflags &= ~MS_RDONLY;
+	}
+
+	if (!mi->bind || mflags != bmflags)
 		if (mount(NULL, mi->mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
 			pr_perror("Can't mount at %s", mi->mountpoint);
 			goto err;
@@ -3682,3 +3698,93 @@  void clean_cr_time_mounts(void)
 }
 
 struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
+
+int __remount_readonly_mounts(struct ns_id *ns)
+{
+	struct mount_info *mnt;
+
+	for (mnt = mntinfo; mnt; mnt = mnt->next) {
+		if (ns && mnt->nsid != ns)
+			continue;
+
+		if (mnt_is_overmounted(mnt))
+			continue;
+
+		if (mnt->sb_flags & MS_RDONLY)
+			continue;
+
+		if (!(mnt->flags & MS_RDONLY))
+			continue;
+
+		if (mount(NULL, mnt->ns_mountpoint, NULL,
+			  MS_REMOUNT | MS_BIND | (mnt->flags & (~MS_PROPAGATE)),
+			  NULL)) {
+			pr_perror("Failed to restore %d:%s mount flags %x",
+				  mnt->mnt_id, mnt->mountpoint, mnt->flags);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int ns_remount_readonly_mounts(void *arg)
+{
+	struct ns_id *nsid;
+
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		int mntns_fd;
+
+		if (nsid->nd != &mnt_ns_desc)
+			continue;
+
+		mntns_fd = fdstore_get(nsid->mnt.nsfd_id);
+		if (mntns_fd < 0)
+			return 1;
+
+		if (setns(mntns_fd, CLONE_NEWNS) < 0) {
+			pr_perror("`- Can't switch");
+			close(mntns_fd);
+			return 1;
+		}
+		close(mntns_fd);
+
+		pr_info("Switched to mntns %u:%u/n", nsid->id, nsid->kid);
+
+		if (__remount_readonly_mounts(nsid))
+			return 1;
+	}
+
+	return 0;
+}
+
+int remount_readonly_mounts(void)
+{
+	int pid, status;
+
+	if (!(root_ns_mask & CLONE_NEWNS))
+		return __remount_readonly_mounts(NULL);
+
+	/*
+	 * Need a helper process because the root task can share fs via
+	 * CLONE_FS and we would not be able to enter mount namespaces
+	 */
+	pid = clone_noasan(ns_remount_readonly_mounts,
+			   CLONE_VFORK | CLONE_VM | CLONE_FILES
+			   | CLONE_IO | CLONE_SIGHAND
+			   | CLONE_SYSVSEM, NULL);
+	if (pid == -1) {
+		pr_perror("Can't clone helper process");
+		return -1;
+	}
+
+	errno = 0;
+	if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status)
+	    || WEXITSTATUS(status)) {
+		pr_err("Can't wait or bad status: errno=%d, status=%d\n",
+		       errno, status);
+		return -1;
+	}
+
+	return 0;
+}