criu: always enable the userfaultfd support

Submitted by Andrei Vagin on April 20, 2016, 5:20 a.m.

Details

Message ID 1461129652-1133-1-git-send-email-avagin@openvz.org
State Rejected, archived
Series "criu: always enable the userfaultfd support"
Headers show

Patch hide | download patch | download mbox

diff --git a/criu/Makefile.config b/criu/Makefile.config
index 326356e..15af65f 100644
--- a/criu/Makefile.config
+++ b/criu/Makefile.config
@@ -14,10 +14,6 @@  ifeq ($(call pkg-config-check,libselinux),y)
         DEFINES	+= -DCONFIG_HAS_SELINUX
 endif
 
-ifeq ($(call try-cc,$(FEATURE_TEST_UFFD)),y)
-	export UFFD := 1
-endif
-
 FEATURES_LIST	:= TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
 	SETPROCTITLE_INIT MEMFD
 
@@ -42,10 +38,6 @@  ifeq ($$(VDSO),y)
 	$(Q) @echo '#define CONFIG_VDSO'				>> $$@
 	$(Q) @echo ''							>> $$@
 endif
-ifeq ($$(UFFD),1)
-	$(Q) @echo '#define CONFIG_HAS_UFFD'				>> $$@
-	$(Q) @echo ''							>> $$@
-endif
 ifeq ($$(piegen-y),y)
 	$(Q) @echo '#define CONFIG_PIEGEN'				>> $$@
 	$(Q) @echo ''							>> $$@
diff --git a/criu/arch/arm/syscalls/syscall.def b/criu/arch/arm/syscalls/syscall.def
index a42c1b7..ffa807d 100644
--- a/criu/arch/arm/syscalls/syscall.def
+++ b/criu/arch/arm/syscalls/syscall.def
@@ -106,3 +106,4 @@  io_setup			0	243	(unsigned nr_events, aio_context_t *ctx)
 io_submit			2	246	(aio_context_t ctx_id, long nr, struct iocb **iocbpp)
 io_getevents			4	245	(aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
 seccomp				277	383	(unsigned int op, unsigned int flags, const char *uargs)
+userfaultfd			282	388	(int flags)
diff --git a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
index 5087037..d629d5a 100644
--- a/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
+++ b/criu/arch/ppc64/syscalls/syscall-ppc64.tbl
@@ -103,3 +103,4 @@  __NR_io_setup		227		sys_io_setup		(unsigned nr_events, aio_context_t *ctx_idp)
 __NR_io_getevents	229		sys_io_getevents	(aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout)
 __NR_io_submit		230		sys_io_submit		(aio_context_t ctx_id, long nr, struct iocb **iocbpp)
 __NR_ipc		117		sys_ipc			(unsigned int call, int first, unsigned long second, unsigned long third, const void *ptr, long fifth)
+__NR_userfaultfd	364		sys_userfaultfd		(int flags)
diff --git a/criu/arch/x86/syscalls/syscall_32.tbl b/criu/arch/x86/syscalls/syscall_32.tbl
index 53970f4..7c4867c 100644
--- a/criu/arch/x86/syscalls/syscall_32.tbl
+++ b/criu/arch/x86/syscalls/syscall_32.tbl
@@ -91,3 +91,4 @@  __NR_setns		346		sys_setns		(int fd, int nstype)
 __NR_kcmp		349		sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
 __NR_seccomp		354		sys_seccomp		(unsigned int op, unsigned int flags, const char *uargs)
 __NR_memfd_create	356		sys_memfd_create	(const char *name, unsigned int flags)
+__NR_userfaultfd	374		sys_userfaultfd		(int flags)
diff --git a/criu/arch/x86/syscalls/syscall_64.tbl b/criu/arch/x86/syscalls/syscall_64.tbl
index 543e851..c0b8d8c 100644
--- a/criu/arch/x86/syscalls/syscall_64.tbl
+++ b/criu/arch/x86/syscalls/syscall_64.tbl
@@ -101,3 +101,4 @@  __NR_open_by_handle_at		304		sys_open_by_handle_at	(int mountdirfd, struct file_
 __NR_setns			308		sys_setns		(int fd, int nstype)
 __NR_kcmp			312		sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
 __NR_memfd_create		319		sys_memfd_create	(const char *name, unsigned int flags)
+__NR_userfaultfd		323		sys_userfaultfd		(int flags)
diff --git a/criu/include/linux/userfaultfd.h b/criu/include/linux/userfaultfd.h
new file mode 100644
index 0000000..9057d7a
--- /dev/null
+++ b/criu/include/linux/userfaultfd.h
@@ -0,0 +1,167 @@ 
+/*
+ *  include/linux/userfaultfd.h
+ *
+ *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
+ *  Copyright (C) 2015  Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include <linux/types.h>
+
+#define UFFD_API ((__u64)0xAA)
+/*
+ * After implementing the respective features it will become:
+ * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ *			      UFFD_FEATURE_EVENT_FORK)
+ */
+#define UFFD_API_FEATURES (0)
+#define UFFD_API_IOCTLS				\
+	((__u64)1 << _UFFDIO_REGISTER |		\
+	 (__u64)1 << _UFFDIO_UNREGISTER |	\
+	 (__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS			\
+	((__u64)1 << _UFFDIO_WAKE |		\
+	 (__u64)1 << _UFFDIO_COPY |		\
+	 (__u64)1 << _UFFDIO_ZEROPAGE)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F.  UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER		(0x00)
+#define _UFFDIO_UNREGISTER		(0x01)
+#define _UFFDIO_WAKE			(0x02)
+#define _UFFDIO_COPY			(0x03)
+#define _UFFDIO_ZEROPAGE		(0x04)
+#define _UFFDIO_API			(0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API		_IOWR(UFFDIO, _UFFDIO_API,	\
+				      struct uffdio_api)
+#define UFFDIO_REGISTER		_IOWR(UFFDIO, _UFFDIO_REGISTER, \
+				      struct uffdio_register)
+#define UFFDIO_UNREGISTER	_IOR(UFFDIO, _UFFDIO_UNREGISTER,	\
+				     struct uffdio_range)
+#define UFFDIO_WAKE		_IOR(UFFDIO, _UFFDIO_WAKE,	\
+				     struct uffdio_range)
+#define UFFDIO_COPY		_IOWR(UFFDIO, _UFFDIO_COPY,	\
+				      struct uffdio_copy)
+#define UFFDIO_ZEROPAGE		_IOWR(UFFDIO, _UFFDIO_ZEROPAGE,	\
+				      struct uffdio_zeropage)
+
+/* read() structure */
+struct uffd_msg {
+	__u8	event;
+
+	__u8	reserved1;
+	__u16	reserved2;
+	__u32	reserved3;
+
+	union {
+		struct {
+			__u64	flags;
+			__u64	address;
+		} pagefault;
+
+		struct {
+			/* unused reserved fields */
+			__u64	reserved1;
+			__u64	reserved2;
+			__u64	reserved3;
+		} reserved;
+	} arg;
+} __packed;
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT	0x12
+#if 0 /* not available yet */
+#define UFFD_EVENT_FORK		0x13
+#endif
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP		(1<<1)	/* If reason is VM_UFFD_WP */
+
+struct uffdio_api {
+	/* userland asks for an API number and the features to enable */
+	__u64 api;
+	/*
+	 * Kernel answers below with the all available features for
+	 * the API, this notifies userland of which events and/or
+	 * which flags for each event are enabled in the current
+	 * kernel.
+	 *
+	 * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+	 * are to be considered implicitly always enabled in all kernels as
+	 * long as the uffdio_api.api requested matches UFFD_API.
+	 */
+#if 0 /* not available yet */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
+#define UFFD_FEATURE_EVENT_FORK			(1<<1)
+#endif
+	__u64 features;
+
+	__u64 ioctls;
+};
+
+struct uffdio_range {
+	__u64 start;
+	__u64 len;
+};
+
+struct uffdio_register {
+	struct uffdio_range range;
+#define UFFDIO_REGISTER_MODE_MISSING	((__u64)1<<0)
+#define UFFDIO_REGISTER_MODE_WP		((__u64)1<<1)
+	__u64 mode;
+
+	/*
+	 * kernel answers which ioctl commands are available for the
+	 * range, keep at the end as the last 8 bytes aren't read.
+	 */
+	__u64 ioctls;
+};
+
+struct uffdio_copy {
+	__u64 dst;
+	__u64 src;
+	__u64 len;
+	/*
+	 * There will be a wrprotection flag later that allows to map
+	 * pages wrprotected on the fly. And such a flag will be
+	 * available if the wrprotection ioctl are implemented for the
+	 * range according to the uffdio_register.ioctls.
+	 */
+#define UFFDIO_COPY_MODE_DONTWAKE		((__u64)1<<0)
+	__u64 mode;
+
+	/*
+	 * "copy" is written by the ioctl and must be at the end: the
+	 * copy_from_user will not read the last 8 bytes.
+	 */
+	__s64 copy;
+};
+
+struct uffdio_zeropage {
+	struct uffdio_range range;
+#define UFFDIO_ZEROPAGE_MODE_DONTWAKE		((__u64)1<<0)
+	__u64 mode;
+
+	/*
+	 * "zeropage" is written by the ioctl and must be at the end:
+	 * the copy_from_user will not read the last 8 bytes.
+	 */
+	__s64 zeropage;
+};
+
+#endif /* _LINUX_USERFAULTFD_H */
diff --git a/criu/include/uffd.h b/criu/include/uffd.h
index 6c931e2..4e1ba4a 100644
--- a/criu/include/uffd.h
+++ b/criu/include/uffd.h
@@ -1,22 +1,7 @@ 
 #ifndef __CR_UFFD_H_
 #define __CR_UFFD_H_
 
-#include "config.h"
-#include "restorer.h"
-
-#ifdef CONFIG_HAS_UFFD
-
-#include <syscall.h>
-#include <linux/userfaultfd.h>
-
-#ifndef __NR_userfaultfd
-#error "missing __NR_userfaultfd definition"
-#endif
-
+struct task_restore_args;
 extern int setup_uffd(struct task_restore_args *task_args, int pid);
-#else
-static inline int setup_uffd(struct task_restore_args *task_args, int pid) { return 0; }
-
-#endif /* CONFIG_HAS_UFFD */
 
 #endif /* __CR_UFFD_H_ */
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 59e1723..5fc8949 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -16,6 +16,8 @@ 
 #include <sys/resource.h>
 #include <signal.h>
 
+#include "linux/userfaultfd.h"
+
 #include "compiler.h"
 #include "asm/string.h"
 #include "asm/types.h"
@@ -704,16 +706,16 @@  static void rst_tcp_socks_all(struct task_restore_args *ta)
 
 static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
 {
+	int rc;
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+
 	/*
 	 * If uffd == -1, this means that userfaultfd is not enabled
 	 * or it is not available.
 	 */
 	if (uffd == -1)
 		return 0;
-#ifdef CONFIG_HAS_UFFD
-	int rc;
-	struct uffdio_register uffdio_register;
-	unsigned long expected_ioctls;
 
 	uffdio_register.range.start = addr;
 	uffdio_register.range.len = len;
@@ -733,7 +735,6 @@  static int enable_uffd(int uffd, unsigned long addr, unsigned long len)
 		pr_err("lazy-pages: unexpected missing uffd ioctl for anon memory\n");
 	}
 
-#endif
 	return 0;
 }
 
diff --git a/criu/uffd.c b/criu/uffd.c
index 254a5b3..6f5e774 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -15,21 +15,23 @@ 
 #include <sys/un.h>
 #include <sys/socket.h>
 
+#include "linux/userfaultfd.h"
+
 #include "asm/page.h"
-#include "include/log.h"
-#include "include/criu-plugin.h"
-#include "include/page-read.h"
-#include "include/files-reg.h"
-#include "include/kerndat.h"
-#include "include/mem.h"
-#include "include/uffd.h"
-#include "include/util-pie.h"
-#include "include/pstree.h"
-#include "include/crtools.h"
-#include "include/cr_options.h"
+#include "log.h"
+#include "criu-plugin.h"
+#include "page-read.h"
+#include "files-reg.h"
+#include "kerndat.h"
+#include "mem.h"
+#include "uffd.h"
+#include "util-pie.h"
+#include "pstree.h"
+#include "crtools.h"
+#include "cr_options.h"
 #include "xmalloc.h"
-
-#ifdef CONFIG_HAS_UFFD
+#include "syscall-codes.h"
+#include "restorer.h"
 
 #undef  LOG_PREFIX
 #define LOG_PREFIX "lazy-pages: "
@@ -94,7 +96,7 @@  int setup_uffd(struct task_restore_args *task_args, int pid)
 	 * Open userfaulfd FD which is passed to the restorer blob and
 	 * to a second process handling the userfaultfd page faults.
 	 */
-	task_args->uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	task_args->uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 
 	/*
 	 * Check if the UFFD_API is the one which is expected
@@ -680,13 +682,3 @@  int cr_lazy_pages()
 
 	return ret;
 }
-
-#else /* CONFIG_HAS_UFFD */
-
-int cr_lazy_pages()
-{
-	pr_msg("userfaultfd system call is not supported, cannot start lazy-pages daemon\n");
-	return -1;
-}
-
-#endif /* CONFIG_HAS_UFFD */
diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
index c48b52e..c3dc85e 100644
--- a/scripts/feature-tests.mak
+++ b/scripts/feature-tests.mak
@@ -90,18 +90,3 @@  int main(void)
 }
 
 endef
-
-define FEATURE_TEST_UFFD
-
-#include <syscall.h>
-#include <linux/userfaultfd.h>
-
-int main(void)
-{
-#ifndef __NR_userfaultfd
-#error "missing __NR_userfaultfd definition"
-#endif
-	return 0;
-}
-
-endef