[v8,3/3] test: Add test for pidfd getfd

Submitted by Sargun Dhillon on Jan. 3, 2020, 4:29 p.m.

Details

Message ID 20200103162928.5271-4-sargun@sargun.me
State New
Series "Add pidfd_getfd syscall"
Headers show

Commit Message

Sargun Dhillon Jan. 3, 2020, 4:29 p.m.
The following tests:
  * Fetch FD, and then compare via kcmp
  * Make sure getfd can be blocked by blocking ptrace_may_access
  * Making sure fetching bad FDs fails
  * Make sure trying to set flags to non-zero results in an EINVAL

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
---
 tools/testing/selftests/pidfd/.gitignore      |   1 +
 tools/testing/selftests/pidfd/Makefile        |   4 +-
 .../selftests/pidfd/pidfd_getfd_test.c        | 227 ++++++++++++++++++
 3 files changed, 230 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/pidfd/pidfd_getfd_test.c

Patch hide | download patch | download mbox

diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 8d069490e17b..3a779c084d96 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -2,3 +2,4 @@  pidfd_open_test
 pidfd_poll_test
 pidfd_test
 pidfd_wait
+pidfd_getfd_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 43db1b98e845..2071f7ab5dc9 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,7 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/ -pthread
+CFLAGS += -g -I../../../../usr/include/ -pthread -Wall
 
-TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
new file mode 100644
index 000000000000..26ca75597812
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -0,0 +1,227 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/*
+ * UNKNOWN_FD is an fd number that should never exist in the child, as it is
+ * used to check the negative case.
+ */
+#define UNKNOWN_FD 111
+
+static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1,
+		    unsigned long idx2)
+{
+	return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
+}
+
+static int sys_pidfd_getfd(int pidfd, int fd, int flags)
+{
+	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
+}
+
+static int sys_memfd_create(const char *name, unsigned int flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+
+static int __child(int sk, int memfd)
+{
+	int ret;
+	char buf;
+
+	/*
+	 * Ensure we don't leave around a bunch of orphaned children if our
+	 * tests fail.
+	 */
+	ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
+	if (ret) {
+		fprintf(stderr, "%s: Child could not set DEATHSIG\n",
+			strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	ret = send(sk, &memfd, sizeof(memfd), 0);
+	if (ret != sizeof(memfd)) {
+		fprintf(stderr, "%s: Child failed to send fd number\n",
+			strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
+		if (buf == 'P') {
+			ret = prctl(PR_SET_DUMPABLE, 0);
+			if (ret < 0) {
+				fprintf(stderr,
+					"%s: Child failed to disable ptrace\n",
+					strerror(errno));
+				return EXIT_FAILURE;
+			}
+		} else {
+			fprintf(stderr, "Child received unknown command %c\n",
+				buf);
+			return EXIT_FAILURE;
+		}
+		ret = send(sk, &buf, sizeof(buf), 0);
+		if (ret != 1) {
+			fprintf(stderr, "%s: Child failed to ack\n",
+				strerror(errno));
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (ret < 0) {
+		fprintf(stderr, "%s: Child failed to read from socket\n",
+			strerror(errno));
+	}
+
+	return EXIT_SUCCESS;
+}
+
+static int child(int sk)
+{
+	int memfd, ret;
+
+	memfd = sys_memfd_create("test", 0);
+	if (memfd < 0) {
+		fprintf(stderr, "%s: Child could not create memfd\n",
+			strerror(errno));
+		ret = EXIT_FAILURE;
+	} else {
+		ret = __child(sk, memfd);
+		close(memfd);
+	}
+
+	close(sk);
+	return ret;
+}
+
+FIXTURE(child)
+{
+	pid_t pid;
+	int pidfd, sk, remote_fd;
+};
+
+FIXTURE_SETUP(child)
+{
+	int ret, sk_pair[2];
+
+	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair))
+	{
+		TH_LOG("%s: failed to create socketpair", strerror(errno));
+	}
+	self->sk = sk_pair[0];
+
+	self->pid = fork();
+	ASSERT_GE(self->pid, 0);
+
+	if (self->pid == 0) {
+		close(sk_pair[0]);
+		exit(child(sk_pair[1]));
+	}
+
+	close(sk_pair[1]);
+
+	self->pidfd = sys_pidfd_open(self->pid, 0);
+	ASSERT_GE(self->pidfd, 0);
+
+	/*
+	 * Wait for the child to complete setup. It'll send the remote memfd's
+	 * number when ready.
+	 */
+	ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0);
+	ASSERT_EQ(sizeof(self->remote_fd), ret);
+}
+
+FIXTURE_TEARDOWN(child)
+{
+	int status;
+
+	EXPECT_EQ(0, close(self->pidfd));
+	EXPECT_EQ(0, close(self->sk));
+
+	EXPECT_EQ(waitpid(self->pid, &status, 0), self->pid);
+	EXPECT_EQ(true, WIFEXITED(status));
+	EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST_F(child, disable_ptrace)
+{
+	int uid, fd;
+	char c;
+
+	/*
+	 * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE
+	 *
+	 * The tests should run in their own process, so even this test fails,
+	 * it shouldn't result in subsequent tests failing.
+	 */
+	uid = getuid();
+	if (uid == 0)
+		ASSERT_EQ(0, seteuid(USHRT_MAX));
+
+	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
+	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
+
+	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
+	EXPECT_EQ(-1, fd);
+	EXPECT_EQ(EPERM, errno);
+
+	if (uid == 0)
+		ASSERT_EQ(0, seteuid(0));
+}
+
+TEST_F(child, fetch_fd)
+{
+	int fd, ret;
+
+	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
+	ASSERT_GE(fd, 0);
+
+	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
+
+	ret = fcntl(fd, F_GETFD);
+	ASSERT_GE(ret, 0);
+	EXPECT_GE(ret & FD_CLOEXEC, 0);
+
+	close(fd);
+}
+
+TEST_F(child, test_unknown_fd)
+{
+	int fd;
+
+	fd = sys_pidfd_getfd(self->pidfd, UNKNOWN_FD, 0);
+	EXPECT_EQ(-1, fd) {
+		TH_LOG("getfd succeeded while fetching unknown fd");
+	};
+	EXPECT_EQ(EBADF, errno) {
+		TH_LOG("%s: getfd did not get EBADF", strerror(errno));
+	}
+}
+
+TEST(flags_set)
+{
+	ASSERT_EQ(-1, sys_pidfd_getfd(0, 0, 1));
+	EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_HARNESS_MAIN

Comments

Christian Brauner Jan. 5, 2020, 2:20 p.m.
On Fri, Jan 03, 2020 at 08:29:28AM -0800, Sargun Dhillon wrote:
> The following tests:
>   * Fetch FD, and then compare via kcmp
>   * Make sure getfd can be blocked by blocking ptrace_may_access
>   * Making sure fetching bad FDs fails
>   * Make sure trying to set flags to non-zero results in an EINVAL
> 
> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> ---
>  tools/testing/selftests/pidfd/.gitignore      |   1 +
>  tools/testing/selftests/pidfd/Makefile        |   4 +-
>  .../selftests/pidfd/pidfd_getfd_test.c        | 227 ++++++++++++++++++
>  3 files changed, 230 insertions(+), 2 deletions(-)
>  create mode 100644 tools/testing/selftests/pidfd/pidfd_getfd_test.c
> 
> diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
> index 8d069490e17b..3a779c084d96 100644
> --- a/tools/testing/selftests/pidfd/.gitignore
> +++ b/tools/testing/selftests/pidfd/.gitignore
> @@ -2,3 +2,4 @@ pidfd_open_test
>  pidfd_poll_test
>  pidfd_test
>  pidfd_wait
> +pidfd_getfd_test
> diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
> index 43db1b98e845..2071f7ab5dc9 100644
> --- a/tools/testing/selftests/pidfd/Makefile
> +++ b/tools/testing/selftests/pidfd/Makefile
> @@ -1,7 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> -CFLAGS += -g -I../../../../usr/include/ -pthread
> +CFLAGS += -g -I../../../../usr/include/ -pthread -Wall
>  
> -TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait
> +TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
>  
>  include ../lib.mk
>  
> diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
> new file mode 100644
> index 000000000000..26ca75597812
> --- /dev/null
> +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
> @@ -0,0 +1,227 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#define _GNU_SOURCE
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <linux/types.h>
> +#include <sched.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <syscall.h>
> +#include <sys/prctl.h>
> +#include <sys/wait.h>
> +#include <unistd.h>
> +#include <sys/socket.h>
> +#include <linux/kcmp.h>
> +
> +#include "pidfd.h"
> +#include "../kselftest.h"
> +#include "../kselftest_harness.h"
> +
> +/*
> + * UNKNOWN_FD is an fd number that should never exist in the child, as it is
> + * used to check the negative case.
> + */
> +#define UNKNOWN_FD 111
> +
> +static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1,
> +		    unsigned long idx2)
> +{
> +	return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
> +}
> +
> +static int sys_pidfd_getfd(int pidfd, int fd, int flags)
> +{
> +	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> +}

I think you can move this to the pidfd.h header as:

static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
{
	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
}

Note, this also needs an

#ifndef __NR_pidfd_getfd
__NR_pidfd_getfd -1
#endif
so that compilation doesn't fail.

> +
> +static int sys_memfd_create(const char *name, unsigned int flags)
> +{
> +	return syscall(__NR_memfd_create, name, flags);
> +}
> +
> +static int __child(int sk, int memfd)
> +{
> +	int ret;
> +	char buf;
> +
> +	/*
> +	 * Ensure we don't leave around a bunch of orphaned children if our
> +	 * tests fail.
> +	 */
> +	ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
> +	if (ret) {
> +		fprintf(stderr, "%s: Child could not set DEATHSIG\n",
> +			strerror(errno));
> +		return EXIT_FAILURE;

return -1

> +	}
> +
> +	ret = send(sk, &memfd, sizeof(memfd), 0);
> +	if (ret != sizeof(memfd)) {
> +		fprintf(stderr, "%s: Child failed to send fd number\n",
> +			strerror(errno));
> +		return EXIT_FAILURE;

return -1

> +	}
> +
> +	while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
> +		if (buf == 'P') {
> +			ret = prctl(PR_SET_DUMPABLE, 0);
> +			if (ret < 0) {
> +				fprintf(stderr,
> +					"%s: Child failed to disable ptrace\n",
> +					strerror(errno));
> +				return EXIT_FAILURE;

return -1

> +			}
> +		} else {
> +			fprintf(stderr, "Child received unknown command %c\n",
> +				buf);
> +			return EXIT_FAILURE;

return -1

> +		}
> +		ret = send(sk, &buf, sizeof(buf), 0);
> +		if (ret != 1) {
> +			fprintf(stderr, "%s: Child failed to ack\n",
> +				strerror(errno));
> +			return EXIT_FAILURE;

return -1

> +		}
> +	}
> +
> +	if (ret < 0) {
> +		fprintf(stderr, "%s: Child failed to read from socket\n",
> +			strerror(errno));

Is this intentional that this is no failure?

> +	}
> +
> +	return EXIT_SUCCESS;

return 0

> +}
> +
> +static int child(int sk)
> +{
> +	int memfd, ret;
> +
> +	memfd = sys_memfd_create("test", 0);
> +	if (memfd < 0) {
> +		fprintf(stderr, "%s: Child could not create memfd\n",
> +			strerror(errno));
> +		ret = EXIT_FAILURE;

ret = -1;

> +	} else {
> +		ret = __child(sk, memfd);
> +		close(memfd);
> +	}
> +
> +	close(sk);
> +	return ret;
> +}
> +
> +FIXTURE(child)
> +{
> +	pid_t pid;
> +	int pidfd, sk, remote_fd;
> +};
> +
> +FIXTURE_SETUP(child)
> +{
> +	int ret, sk_pair[2];
> +
> +	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair))
> +	{
> +		TH_LOG("%s: failed to create socketpair", strerror(errno));
> +	}
> +	self->sk = sk_pair[0];
> +
> +	self->pid = fork();
> +	ASSERT_GE(self->pid, 0);
> +
> +	if (self->pid == 0) {
> +		close(sk_pair[0]);
> +		exit(child(sk_pair[1]));

if (child(sk_pair[1]))
	_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);

I would like to only use exit macros where one actually calls
{_}exit()s. It makes the logic easier to follow and ensures that one
doesn't accidently do an exit(-21345) or something (e.g. when adding new
code).

> +	}
> +
> +	close(sk_pair[1]);
> +
> +	self->pidfd = sys_pidfd_open(self->pid, 0);
> +	ASSERT_GE(self->pidfd, 0);
> +
> +	/*
> +	 * Wait for the child to complete setup. It'll send the remote memfd's
> +	 * number when ready.
> +	 */
> +	ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0);
> +	ASSERT_EQ(sizeof(self->remote_fd), ret);
> +}
> +
> +FIXTURE_TEARDOWN(child)
> +{
> +	int status;
> +
> +	EXPECT_EQ(0, close(self->pidfd));
> +	EXPECT_EQ(0, close(self->sk));
> +
> +	EXPECT_EQ(waitpid(self->pid, &status, 0), self->pid);
> +	EXPECT_EQ(true, WIFEXITED(status));
> +	EXPECT_EQ(0, WEXITSTATUS(status));
> +}
> +
> +TEST_F(child, disable_ptrace)
> +{
> +	int uid, fd;
> +	char c;
> +
> +	/*
> +	 * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE
> +	 *
> +	 * The tests should run in their own process, so even this test fails,
> +	 * it shouldn't result in subsequent tests failing.
> +	 */
> +	uid = getuid();
> +	if (uid == 0)
> +		ASSERT_EQ(0, seteuid(USHRT_MAX));

Hm, isn't it safer to do 65535 explicitly? Since USHRT_MAX can
technically be greater than 65535.

> +
> +	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
> +	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
> +
> +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> +	EXPECT_EQ(-1, fd);
> +	EXPECT_EQ(EPERM, errno);
> +
> +	if (uid == 0)
> +		ASSERT_EQ(0, seteuid(0));
> +}
> +
> +TEST_F(child, fetch_fd)
> +{
> +	int fd, ret;
> +
> +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> +	ASSERT_GE(fd, 0);
> +
> +	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));

So most of these tests seem to take place when the child has already
called exit() - or at least it's very likely that the child has already
called exit() - and remains a zombie. That's not ideal because
that's not the common scenario/use-case. Usually the task of which we
want to get an fd will be alive. Also, if the child has already called
exit(), by the time it returns to userspace it should have already
called exit_files() and so I wonder whether this test would fail if it's
run after the child has exited. Maybe I'm missing something here... Is
there some ordering enforced by TEST_F()?

Also, what does self->pid point to? The fd of the already exited child?
Sargun Dhillon Jan. 5, 2020, 7:08 p.m.
On Sun, Jan 05, 2020 at 03:20:23PM +0100, Christian Brauner wrote:
> On Fri, Jan 03, 2020 at 08:29:28AM -0800, Sargun Dhillon wrote:
> > +static int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > +{
> > +	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > +}
> 
> I think you can move this to the pidfd.h header as:
> 
> static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
> {
> 	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> }
> 
> Note, this also needs an
> 
> #ifndef __NR_pidfd_getfd
> __NR_pidfd_getfd -1
> #endif
> so that compilation doesn't fail.
> 
I'll go ahead and move this into pidfd.h, and follow the pattern there. I
don't think it's worth checking if each time the return code is ENOSYS.

Does it make sense to add something like:
#ifdef __NR_pidfd_getfd
TEST_HARNESS_MAIN
#else
int main(void)
{
	fprintf(stderr, "pidfd_getfd syscall not supported\n");
	return KSFT_SKIP;
}
#endif

to short-circuit the entire test suite?


> > +
> > +static int sys_memfd_create(const char *name, unsigned int flags)
> > +{
> > +	return syscall(__NR_memfd_create, name, flags);
> > +}
> > +
> > +static int __child(int sk, int memfd)
> > +{
> > +	int ret;
> > +	char buf;
> > +
> > +	/*
> > +	 * Ensure we don't leave around a bunch of orphaned children if our
> > +	 * tests fail.
> > +	 */
> > +	ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
> > +	if (ret) {
> > +		fprintf(stderr, "%s: Child could not set DEATHSIG\n",
> > +			strerror(errno));
> > +		return EXIT_FAILURE;
> 
> return -1
> 
> > +	}
> > +
> > +	ret = send(sk, &memfd, sizeof(memfd), 0);
> > +	if (ret != sizeof(memfd)) {
> > +		fprintf(stderr, "%s: Child failed to send fd number\n",
> > +			strerror(errno));
> > +		return EXIT_FAILURE;
> 
> return -1
> 
> > +	}
> > +
> > +	while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
> > +		if (buf == 'P') {
> > +			ret = prctl(PR_SET_DUMPABLE, 0);
> > +			if (ret < 0) {
> > +				fprintf(stderr,
> > +					"%s: Child failed to disable ptrace\n",
> > +					strerror(errno));
> > +				return EXIT_FAILURE;
> 
> return -1
> 
> > +			}
> > +		} else {
> > +			fprintf(stderr, "Child received unknown command %c\n",
> > +				buf);
> > +			return EXIT_FAILURE;
> 
> return -1
> 
> > +		}
> > +		ret = send(sk, &buf, sizeof(buf), 0);
> > +		if (ret != 1) {
> > +			fprintf(stderr, "%s: Child failed to ack\n",
> > +				strerror(errno));
> > +			return EXIT_FAILURE;
> 
> return -1
> 
> > +		}
> > +	}
> > +
> > +	if (ret < 0) {
> > +		fprintf(stderr, "%s: Child failed to read from socket\n",
> > +			strerror(errno));
> 
> Is this intentional that this is no failure?
> 
My thought here, is the only case where this should happen is if the "ptrace 
command" was not properly "transmitted", and the ptrace test itself would fail.

I can add an explicit exit failure here.

> > +	}
> > +
> > +	return EXIT_SUCCESS;
> 
> return 0
> 
> > +}
> > +
> > +static int child(int sk)
> > +{
> > +	int memfd, ret;
> > +
> > +	memfd = sys_memfd_create("test", 0);
> > +	if (memfd < 0) {
> > +		fprintf(stderr, "%s: Child could not create memfd\n",
> > +			strerror(errno));
> > +		ret = EXIT_FAILURE;
> 
> ret = -1;
> 
> > +	} else {
> > +		ret = __child(sk, memfd);
> > +		close(memfd);
> > +	}
> > +
> > +	close(sk);
> > +	return ret;
> > +}
> > +
> > +FIXTURE(child)
> > +{
> > +	pid_t pid;
> > +	int pidfd, sk, remote_fd;
> > +};
> > +
> > +FIXTURE_SETUP(child)
> > +{
> > +	int ret, sk_pair[2];
> > +
> > +	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair))
> > +	{
> > +		TH_LOG("%s: failed to create socketpair", strerror(errno));
> > +	}
> > +	self->sk = sk_pair[0];
> > +
> > +	self->pid = fork();
> > +	ASSERT_GE(self->pid, 0);
> > +
> > +	if (self->pid == 0) {
> > +		close(sk_pair[0]);
> > +		exit(child(sk_pair[1]));
> 
> if (child(sk_pair[1]))
> 	_exit(EXIT_FAILURE);
> _exit(EXIT_SUCCESS);
> 
> I would like to only use exit macros where one actually calls
> {_}exit()s. It makes the logic easier to follow and ensures that one
> doesn't accidently do an exit(-21345) or something (e.g. when adding new
> code).
> 
> > +	}
> > +
> > +	close(sk_pair[1]);
> > +
> > +	self->pidfd = sys_pidfd_open(self->pid, 0);
> > +	ASSERT_GE(self->pidfd, 0);
> > +
> > +	/*
> > +	 * Wait for the child to complete setup. It'll send the remote memfd's
> > +	 * number when ready.
> > +	 */
> > +	ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0);
> > +	ASSERT_EQ(sizeof(self->remote_fd), ret);
> > +}
> > +
> > +FIXTURE_TEARDOWN(child)
> > +{
> > +	int status;
> > +
> > +	EXPECT_EQ(0, close(self->pidfd));
> > +	EXPECT_EQ(0, close(self->sk));
> > +
> > +	EXPECT_EQ(waitpid(self->pid, &status, 0), self->pid);
> > +	EXPECT_EQ(true, WIFEXITED(status));
> > +	EXPECT_EQ(0, WEXITSTATUS(status));
> > +}
> > +
> > +TEST_F(child, disable_ptrace)
> > +{
> > +	int uid, fd;
> > +	char c;
> > +
> > +	/*
> > +	 * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE
> > +	 *
> > +	 * The tests should run in their own process, so even this test fails,
> > +	 * it shouldn't result in subsequent tests failing.
> > +	 */
> > +	uid = getuid();
> > +	if (uid == 0)
> > +		ASSERT_EQ(0, seteuid(USHRT_MAX));
> 
> Hm, isn't it safer to do 65535 explicitly? Since USHRT_MAX can
> technically be greater than 65535.
> 
I borrowed this from the BPF tests. I can hardcode something like:
#define NOBODY_UID 65535
and setuid to that, if you think it's safer?

> > +
> > +	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
> > +	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
> > +
> > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > +	EXPECT_EQ(-1, fd);
> > +	EXPECT_EQ(EPERM, errno);
> > +
> > +	if (uid == 0)
> > +		ASSERT_EQ(0, seteuid(0));
> > +}
> > +
> > +TEST_F(child, fetch_fd)
> > +{
> > +	int fd, ret;
> > +
> > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > +	ASSERT_GE(fd, 0);
> > +
> > +	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
> 
> So most of these tests seem to take place when the child has already
> called exit() - or at least it's very likely that the child has already
> called exit() - and remains a zombie. That's not ideal because
> that's not the common scenario/use-case. Usually the task of which we
> want to get an fd will be alive. Also, if the child has already called
> exit(), by the time it returns to userspace it should have already
> called exit_files() and so I wonder whether this test would fail if it's
> run after the child has exited. Maybe I'm missing something here... Is
> there some ordering enforced by TEST_F()?
Yeah, I think perhaps I was being too clever.
The timeline roughly goes something like this:

# Fixture bringup
[parent] creates socket_pair
[parent] forks, and passes pair down to child
[parent] waits to read sizeof(int) from the sk_pair
[child] creates memfd 
[__child] sends local memfd number to parent via sk_pair
[__child] waits to read from sk_pair
[parent] reads remote memfd number from socket
# Test
[parent] performs tests
# Fixture teardown
[parent] closes sk_pair
[__child] reads 0 from recv on sk_pair, implies the other end is closed
[__child] Returns / exits 0
[parent] Reaps child / reads exit code

---
The one case where this is not true, is if the parent sends 'P' to the sk pair,
it triggers setting PR_SET_DUMPABLE to 0, and then resumes waiting for the fd to 
close.

Maybe I'm being too clever? Instead, the alternative was to send explicit stop / 
start messages across the sk_pair, but that got kind of ugly. Do you have a 
better suggestion?

> 
> Also, what does self->pid point to? The fd of the already exited child?
It's just the pid of the child. pidfd is the fd of the (unexited) child.
Christian Brauner Jan. 6, 2020, 5:19 p.m.
On Sun, Jan 05, 2020 at 07:08:13PM +0000, Sargun Dhillon wrote:
> On Sun, Jan 05, 2020 at 03:20:23PM +0100, Christian Brauner wrote:
> > On Fri, Jan 03, 2020 at 08:29:28AM -0800, Sargun Dhillon wrote:
> > > +static int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > > +{
> > > +	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > > +}
> > 
> > I think you can move this to the pidfd.h header as:
> > 
> > static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > {
> > 	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > }
> > 
> > Note, this also needs an
> > 
> > #ifndef __NR_pidfd_getfd
> > __NR_pidfd_getfd -1
> > #endif
> > so that compilation doesn't fail.
> > 
> I'll go ahead and move this into pidfd.h, and follow the pattern there. I
> don't think it's worth checking if each time the return code is ENOSYS.
> 
> Does it make sense to add something like:
> #ifdef __NR_pidfd_getfd
> TEST_HARNESS_MAIN
> #else
> int main(void)
> {
> 	fprintf(stderr, "pidfd_getfd syscall not supported\n");
> 	return KSFT_SKIP;
> }
> #endif
> 
> to short-circuit the entire test suite?

You mean the getfd testsuite? If so and that works, then sounds like a
good idea to me.

> 
> 
> > > +
> > > +static int sys_memfd_create(const char *name, unsigned int flags)
> > > +{
> > > +	return syscall(__NR_memfd_create, name, flags);
> > > +}
> > > +
> > > +static int __child(int sk, int memfd)
> > > +{
> > > +	int ret;
> > > +	char buf;
> > > +
> > > +	/*
> > > +	 * Ensure we don't leave around a bunch of orphaned children if our
> > > +	 * tests fail.
> > > +	 */
> > > +	ret = prctl(PR_SET_PDEATHSIG, SIGKILL);
> > > +	if (ret) {
> > > +		fprintf(stderr, "%s: Child could not set DEATHSIG\n",
> > > +			strerror(errno));
> > > +		return EXIT_FAILURE;
> > 
> > return -1
> > 
> > > +	}
> > > +
> > > +	ret = send(sk, &memfd, sizeof(memfd), 0);
> > > +	if (ret != sizeof(memfd)) {
> > > +		fprintf(stderr, "%s: Child failed to send fd number\n",
> > > +			strerror(errno));
> > > +		return EXIT_FAILURE;
> > 
> > return -1
> > 
> > > +	}
> > > +
> > > +	while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
> > > +		if (buf == 'P') {
> > > +			ret = prctl(PR_SET_DUMPABLE, 0);
> > > +			if (ret < 0) {
> > > +				fprintf(stderr,
> > > +					"%s: Child failed to disable ptrace\n",
> > > +					strerror(errno));
> > > +				return EXIT_FAILURE;
> > 
> > return -1
> > 
> > > +			}
> > > +		} else {
> > > +			fprintf(stderr, "Child received unknown command %c\n",
> > > +				buf);
> > > +			return EXIT_FAILURE;
> > 
> > return -1
> > 
> > > +		}
> > > +		ret = send(sk, &buf, sizeof(buf), 0);
> > > +		if (ret != 1) {
> > > +			fprintf(stderr, "%s: Child failed to ack\n",
> > > +				strerror(errno));
> > > +			return EXIT_FAILURE;
> > 
> > return -1
> > 
> > > +		}
> > > +	}
> > > +
> > > +	if (ret < 0) {
> > > +		fprintf(stderr, "%s: Child failed to read from socket\n",
> > > +			strerror(errno));
> > 
> > Is this intentional that this is no failure?
> > 
> My thought here, is the only case where this should happen is if the "ptrace 
> command" was not properly "transmitted", and the ptrace test itself would fail.
> 
> I can add an explicit exit failure here.

Ok.

> 
> > > +	}
> > > +
> > > +	return EXIT_SUCCESS;
> > 
> > return 0
> > 
> > > +}
> > > +
> > > +static int child(int sk)
> > > +{
> > > +	int memfd, ret;
> > > +
> > > +	memfd = sys_memfd_create("test", 0);
> > > +	if (memfd < 0) {
> > > +		fprintf(stderr, "%s: Child could not create memfd\n",
> > > +			strerror(errno));
> > > +		ret = EXIT_FAILURE;
> > 
> > ret = -1;
> > 
> > > +	} else {
> > > +		ret = __child(sk, memfd);
> > > +		close(memfd);
> > > +	}
> > > +
> > > +	close(sk);
> > > +	return ret;
> > > +}
> > > +
> > > +FIXTURE(child)
> > > +{
> > > +	pid_t pid;
> > > +	int pidfd, sk, remote_fd;
> > > +};
> > > +
> > > +FIXTURE_SETUP(child)
> > > +{
> > > +	int ret, sk_pair[2];
> > > +
> > > +	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair))
> > > +	{
> > > +		TH_LOG("%s: failed to create socketpair", strerror(errno));
> > > +	}
> > > +	self->sk = sk_pair[0];
> > > +
> > > +	self->pid = fork();
> > > +	ASSERT_GE(self->pid, 0);
> > > +
> > > +	if (self->pid == 0) {
> > > +		close(sk_pair[0]);
> > > +		exit(child(sk_pair[1]));
> > 
> > if (child(sk_pair[1]))
> > 	_exit(EXIT_FAILURE);
> > _exit(EXIT_SUCCESS);
> > 
> > I would like to only use exit macros where one actually calls
> > {_}exit()s. It makes the logic easier to follow and ensures that one
> > doesn't accidently do an exit(-21345) or something (e.g. when adding new
> > code).
> > 
> > > +	}
> > > +
> > > +	close(sk_pair[1]);
> > > +
> > > +	self->pidfd = sys_pidfd_open(self->pid, 0);
> > > +	ASSERT_GE(self->pidfd, 0);
> > > +
> > > +	/*
> > > +	 * Wait for the child to complete setup. It'll send the remote memfd's
> > > +	 * number when ready.
> > > +	 */
> > > +	ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0);
> > > +	ASSERT_EQ(sizeof(self->remote_fd), ret);
> > > +}
> > > +
> > > +FIXTURE_TEARDOWN(child)
> > > +{
> > > +	int status;
> > > +
> > > +	EXPECT_EQ(0, close(self->pidfd));
> > > +	EXPECT_EQ(0, close(self->sk));
> > > +
> > > +	EXPECT_EQ(waitpid(self->pid, &status, 0), self->pid);
> > > +	EXPECT_EQ(true, WIFEXITED(status));
> > > +	EXPECT_EQ(0, WEXITSTATUS(status));
> > > +}
> > > +
> > > +TEST_F(child, disable_ptrace)
> > > +{
> > > +	int uid, fd;
> > > +	char c;
> > > +
> > > +	/*
> > > +	 * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE
> > > +	 *
> > > +	 * The tests should run in their own process, so even this test fails,
> > > +	 * it shouldn't result in subsequent tests failing.
> > > +	 */
> > > +	uid = getuid();
> > > +	if (uid == 0)
> > > +		ASSERT_EQ(0, seteuid(USHRT_MAX));
> > 
> > Hm, isn't it safer to do 65535 explicitly? Since USHRT_MAX can
> > technically be greater than 65535.
> > 
> I borrowed this from the BPF tests. I can hardcode something like:
> #define NOBODY_UID 65535
> and setuid to that, if you think it's safer?

If you want to specifically seteuid() to 65535 then yes, using the
hard-coded number or using a dedicated macro seems better.

> 
> > > +
> > > +	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
> > > +	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
> > > +
> > > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > > +	EXPECT_EQ(-1, fd);
> > > +	EXPECT_EQ(EPERM, errno);
> > > +
> > > +	if (uid == 0)
> > > +		ASSERT_EQ(0, seteuid(0));
> > > +}
> > > +
> > > +TEST_F(child, fetch_fd)
> > > +{
> > > +	int fd, ret;
> > > +
> > > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > > +	ASSERT_GE(fd, 0);
> > > +
> > > +	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
> > 
> > So most of these tests seem to take place when the child has already
> > called exit() - or at least it's very likely that the child has already
> > called exit() - and remains a zombie. That's not ideal because
> > that's not the common scenario/use-case. Usually the task of which we
> > want to get an fd will be alive. Also, if the child has already called
> > exit(), by the time it returns to userspace it should have already
> > called exit_files() and so I wonder whether this test would fail if it's
> > run after the child has exited. Maybe I'm missing something here... Is
> > there some ordering enforced by TEST_F()?
> Yeah, I think perhaps I was being too clever.
> The timeline roughly goes something like this:
> 
> # Fixture bringup
> [parent] creates socket_pair
> [parent] forks, and passes pair down to child
> [parent] waits to read sizeof(int) from the sk_pair
> [child] creates memfd 
> [__child] sends local memfd number to parent via sk_pair
> [__child] waits to read from sk_pair
> [parent] reads remote memfd number from socket
> # Test
> [parent] performs tests
> # Fixture teardown
> [parent] closes sk_pair
> [__child] reads 0 from recv on sk_pair, implies the other end is closed
> [__child] Returns / exits 0
> [parent] Reaps child / reads exit code
> 
> ---
> The one case where this is not true, is if the parent sends 'P' to the sk pair,
> it triggers setting PR_SET_DUMPABLE to 0, and then resumes waiting for the fd to 
> close.
> 
> Maybe I'm being too clever? Instead, the alternative was to send explicit stop / 
> start messages across the sk_pair, but that got kind of ugly. Do you have a 
> better suggestion?

If I understand correctly you just need to block the child to stop it
from exiting. Couldn't you do this by simply calling recv() on the
socket in the child thereby blocking it? At the end you just send a
final message to proceed and if that doesn't work SIGKILL it?

> 
> > 
> > Also, what does self->pid point to? The fd of the already exited child?
> It's just the pid of the child. pidfd is the fd of the (unexited) child.

Ah, thanks!
Christian
Sargun Dhillon Jan. 6, 2020, 9:06 p.m.
On Mon, Jan 06, 2020 at 06:19:41PM +0100, Christian Brauner wrote:
> On Sun, Jan 05, 2020 at 07:08:13PM +0000, Sargun Dhillon wrote:
> > On Sun, Jan 05, 2020 at 03:20:23PM +0100, Christian Brauner wrote:
> > > On Fri, Jan 03, 2020 at 08:29:28AM -0800, Sargun Dhillon wrote:
> > > > +static int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > > > +{
> > > > +	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > > > +}
> > > 
> > > I think you can move this to the pidfd.h header as:
> > > 
> > > static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > > {
> > > 	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > > }
> > > 
> > > Note, this also needs an
> > > 
> > > #ifndef __NR_pidfd_getfd
> > > __NR_pidfd_getfd -1
> > > #endif
> > > so that compilation doesn't fail.
> > > 
> > I'll go ahead and move this into pidfd.h, and follow the pattern there. I
> > don't think it's worth checking if each time the return code is ENOSYS.
> > 
> > Does it make sense to add something like:
> > #ifdef __NR_pidfd_getfd
> > TEST_HARNESS_MAIN
> > #else
> > int main(void)
> > {
> > 	fprintf(stderr, "pidfd_getfd syscall not supported\n");
> > 	return KSFT_SKIP;
> > }
> > #endif
> > 
> > to short-circuit the entire test suite?
> 
> You mean the getfd testsuite? If so and that works, then sounds like a
> good idea to me.
> 
> > 
> > 
> > 
> > > 
> > > Hm, isn't it safer to do 65535 explicitly? Since USHRT_MAX can
> > > technically be greater than 65535.
> > > 
> > I borrowed this from the BPF tests. I can hardcode something like:
> > #define NOBODY_UID 65535
> > and setuid to that, if you think it's safer?
> 
> If you want to specifically seteuid() to 65535 then yes, using the
> hard-coded number or using a dedicated macro seems better.
> 
> > 
> > > > +
> > > > +	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
> > > > +	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
> > > > +
> > > > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > > > +	EXPECT_EQ(-1, fd);
> > > > +	EXPECT_EQ(EPERM, errno);
> > > > +
> > > > +	if (uid == 0)
> > > > +		ASSERT_EQ(0, seteuid(0));
> > > > +}
> > > > +
> > > > +TEST_F(child, fetch_fd)
> > > > +{
> > > > +	int fd, ret;
> > > > +
> > > > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > > > +	ASSERT_GE(fd, 0);
> > > > +
> > > > +	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
> > > 
> > > So most of these tests seem to take place when the child has already
> > > called exit() - or at least it's very likely that the child has already
> > > called exit() - and remains a zombie. That's not ideal because
> > > that's not the common scenario/use-case. Usually the task of which we
> > > want to get an fd will be alive. Also, if the child has already called
> > > exit(), by the time it returns to userspace it should have already
> > > called exit_files() and so I wonder whether this test would fail if it's
> > > run after the child has exited. Maybe I'm missing something here... Is
> > > there some ordering enforced by TEST_F()?
> > Yeah, I think perhaps I was being too clever.
> > The timeline roughly goes something like this:
> > 
> > # Fixture bringup
> > [parent] creates socket_pair
> > [parent] forks, and passes pair down to child
> > [parent] waits to read sizeof(int) from the sk_pair
> > [child] creates memfd 
> > [__child] sends local memfd number to parent via sk_pair
> > [__child] waits to read from sk_pair
> > [parent] reads remote memfd number from socket
> > # Test
> > [parent] performs tests
> > # Fixture teardown
> > [parent] closes sk_pair
> > [__child] reads 0 from recv on sk_pair, implies the other end is closed
> > [__child] Returns / exits 0
> > [parent] Reaps child / reads exit code
> > 
> > ---
> > The one case where this is not true, is if the parent sends 'P' to the sk pair,
> > it triggers setting PR_SET_DUMPABLE to 0, and then resumes waiting for the fd to 
> > close.
> > 
> > Maybe I'm being too clever? Instead, the alternative was to send explicit stop / 
> > start messages across the sk_pair, but that got kind of ugly. Do you have a 
> > better suggestion?
> 
> If I understand correctly you just need to block the child to stop it
> from exiting. Couldn't you do this by simply calling recv() on the
> socket in the child thereby blocking it? At the end you just send a
> final message to proceed and if that doesn't work SIGKILL it?
> 
This already exists in:
while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
	if (buf == 'P') {
		ret = prctl(PR_SET_DUMPABLE, 0);
		if (ret < 0) {
			fprintf(stderr,
				"%s: Child failed to disable ptrace\n",
				strerror(errno));
			return -1;
		}
	} else {
		fprintf(stderr, "Child received unknown command %c\n",
			buf);
		return -1;
	}
	ret = send(sk, &buf, sizeof(buf), 0);
	if (ret != 1) {
		fprintf(stderr, "%s: Child failed to ack\n",
			strerror(errno));
		return -1;
	}
}
----
This will block until the close(self->sk) in the fixture teardown. Then ret
returns 0, and the child should exit. Maybe a comment like:
/*
 * The fixture setup is completed at this point. The tests will run.
 *
 * Either we will read 'P' off of the sk, indicating that we need
 * to disable ptrace, or if the other side of the socket is closed
 * recv will return 0-bytes. This indicates that the fixture teardown
 * has occured, and the child should exit.
 */
would be useful?

> > 
> > > 
> > > Also, what does self->pid point to? The fd of the already exited child?
> > It's just the pid of the child. pidfd is the fd of the (unexited) child.
I have no idea if it's pro / against the commenting style to blow up that
structure:
FIXTURE(child)
{
	/* pid points to the child which we are fetching FDs from */
	pid_t pid;
	/* pidfd is the pidfd of the child */
	int pidfd;
	/*
	 * sk is our side of the socketpair used to communicate with the child.
	 * When it is closed, the child will exit.
	 */
	int sk;
	/*
	 * remote_fd is the number of the FD which we are trying to retrieve
	 * from the child.
	 */
	int remote_fd;
};

> 
> Ah, thanks!
> Christian
Christian Brauner Jan. 7, 2020, 8:55 a.m.
On Mon, Jan 06, 2020 at 09:06:47PM +0000, Sargun Dhillon wrote:
> On Mon, Jan 06, 2020 at 06:19:41PM +0100, Christian Brauner wrote:
> > On Sun, Jan 05, 2020 at 07:08:13PM +0000, Sargun Dhillon wrote:
> > > On Sun, Jan 05, 2020 at 03:20:23PM +0100, Christian Brauner wrote:
> > > > On Fri, Jan 03, 2020 at 08:29:28AM -0800, Sargun Dhillon wrote:
> > > > > +static int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > > > > +{
> > > > > +	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > > > > +}
> > > > 
> > > > I think you can move this to the pidfd.h header as:
> > > > 
> > > > static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
> > > > {
> > > > 	return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
> > > > }
> > > > 
> > > > Note, this also needs an
> > > > 
> > > > #ifndef __NR_pidfd_getfd
> > > > __NR_pidfd_getfd -1
> > > > #endif
> > > > so that compilation doesn't fail.
> > > > 
> > > I'll go ahead and move this into pidfd.h, and follow the pattern there. I
> > > don't think it's worth checking if each time the return code is ENOSYS.
> > > 
> > > Does it make sense to add something like:
> > > #ifdef __NR_pidfd_getfd
> > > TEST_HARNESS_MAIN
> > > #else
> > > int main(void)
> > > {
> > > 	fprintf(stderr, "pidfd_getfd syscall not supported\n");
> > > 	return KSFT_SKIP;
> > > }
> > > #endif
> > > 
> > > to short-circuit the entire test suite?
> > 
> > You mean the getfd testsuite? If so and that works, then sounds like a
> > good idea to me.
> > 
> > > 
> > > 
> > > 
> > > > 
> > > > Hm, isn't it safer to do 65535 explicitly? Since USHRT_MAX can
> > > > technically be greater than 65535.
> > > > 
> > > I borrowed this from the BPF tests. I can hardcode something like:
> > > #define NOBODY_UID 65535
> > > and setuid to that, if you think it's safer?
> > 
> > If you want to specifically seteuid() to 65535 then yes, using the
> > hard-coded number or using a dedicated macro seems better.
> > 
> > > 
> > > > > +
> > > > > +	ASSERT_EQ(1, send(self->sk, "P", 1, 0));
> > > > > +	ASSERT_EQ(1, recv(self->sk, &c, 1, 0));
> > > > > +
> > > > > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > > > > +	EXPECT_EQ(-1, fd);
> > > > > +	EXPECT_EQ(EPERM, errno);
> > > > > +
> > > > > +	if (uid == 0)
> > > > > +		ASSERT_EQ(0, seteuid(0));
> > > > > +}
> > > > > +
> > > > > +TEST_F(child, fetch_fd)
> > > > > +{
> > > > > +	int fd, ret;
> > > > > +
> > > > > +	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
> > > > > +	ASSERT_GE(fd, 0);
> > > > > +
> > > > > +	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
> > > > 
> > > > So most of these tests seem to take place when the child has already
> > > > called exit() - or at least it's very likely that the child has already
> > > > called exit() - and remains a zombie. That's not ideal because
> > > > that's not the common scenario/use-case. Usually the task of which we
> > > > want to get an fd will be alive. Also, if the child has already called
> > > > exit(), by the time it returns to userspace it should have already
> > > > called exit_files() and so I wonder whether this test would fail if it's
> > > > run after the child has exited. Maybe I'm missing something here... Is
> > > > there some ordering enforced by TEST_F()?
> > > Yeah, I think perhaps I was being too clever.
> > > The timeline roughly goes something like this:
> > > 
> > > # Fixture bringup
> > > [parent] creates socket_pair
> > > [parent] forks, and passes pair down to child
> > > [parent] waits to read sizeof(int) from the sk_pair
> > > [child] creates memfd 
> > > [__child] sends local memfd number to parent via sk_pair
> > > [__child] waits to read from sk_pair
> > > [parent] reads remote memfd number from socket
> > > # Test
> > > [parent] performs tests
> > > # Fixture teardown
> > > [parent] closes sk_pair
> > > [__child] reads 0 from recv on sk_pair, implies the other end is closed
> > > [__child] Returns / exits 0
> > > [parent] Reaps child / reads exit code
> > > 
> > > ---
> > > The one case where this is not true, is if the parent sends 'P' to the sk pair,
> > > it triggers setting PR_SET_DUMPABLE to 0, and then resumes waiting for the fd to 
> > > close.
> > > 
> > > Maybe I'm being too clever? Instead, the alternative was to send explicit stop / 
> > > start messages across the sk_pair, but that got kind of ugly. Do you have a 
> > > better suggestion?
> > 
> > If I understand correctly you just need to block the child to stop it
> > from exiting. Couldn't you do this by simply calling recv() on the
> > socket in the child thereby blocking it? At the end you just send a
> > final message to proceed and if that doesn't work SIGKILL it?
> > 
> This already exists in:
> while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) {
> 	if (buf == 'P') {
> 		ret = prctl(PR_SET_DUMPABLE, 0);
> 		if (ret < 0) {
> 			fprintf(stderr,
> 				"%s: Child failed to disable ptrace\n",
> 				strerror(errno));
> 			return -1;
> 		}
> 	} else {
> 		fprintf(stderr, "Child received unknown command %c\n",
> 			buf);
> 		return -1;
> 	}
> 	ret = send(sk, &buf, sizeof(buf), 0);
> 	if (ret != 1) {
> 		fprintf(stderr, "%s: Child failed to ack\n",
> 			strerror(errno));
> 		return -1;
> 	}
> }
> ----
> This will block until the close(self->sk) in the fixture teardown. Then ret
> returns 0, and the child should exit. Maybe a comment like:
> /*
>  * The fixture setup is completed at this point. The tests will run.
>  *
>  * Either we will read 'P' off of the sk, indicating that we need
>  * to disable ptrace, or if the other side of the socket is closed
>  * recv will return 0-bytes. This indicates that the fixture teardown
>  * has occured, and the child should exit.
>  */
> would be useful?

Ah yeah, that would be helpful. I missed that while reading the code. So
the child is definitely still alive when te tests are run, it seems.
That explains why this didn't fail. :)

> 
> > > 
> > > > 
> > > > Also, what does self->pid point to? The fd of the already exited child?
> > > It's just the pid of the child. pidfd is the fd of the (unexited) child.
> I have no idea if it's pro / against the commenting style to blow up that
> structure:

I think it's fine to comment it like that.

Thanks!
Christian