From patchwork Mon Jul 10 09:37:50 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [01/18] zdtm: Test descriptor sent over unix and kept open From: Pavel Emelyanov X-Patchwork-Id: 5840 Message-Id: <6229dbad-4666-78f6-7fed-0780de10d148@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:37:50 +0300 Signed-off-by: Pavel Emelyanov --- test/zdtm/static/Makefile | 2 ++ test/zdtm/static/scm00.c | 26 +++++++++++++++++++++----- test/zdtm/static/scm01.c | 1 + test/zdtm/static/scm01.desc | 1 + 4 files changed, 25 insertions(+), 5 deletions(-) create mode 120000 test/zdtm/static/scm01.c create mode 100644 test/zdtm/static/scm01.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 6947e05..661ff7f 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -154,6 +154,7 @@ TST_NOFILE := \ remap_dead_pid \ remap_dead_pid_root \ scm00 \ + scm01 \ aio00 \ aio01 \ fd \ @@ -442,6 +443,7 @@ socket-tcp-closed-last-ack: CFLAGS += -D ZDTM_TCP_LAST_ACK mnt_ext_manual: CFLAGS += -D ZDTM_EXTMAP_MANUAL sigpending: LDLIBS += -lrt vdso01: LDLIBS += -lrt +scm01: CFLAGS += -DKEEP_SENT_FD mntns_link_remap: CFLAGS += -DZDTM_LINK_REMAP mntns_shared_bind02: CFLAGS += -DSHARED_BIND02 mntns_root_bind02: CFLAGS += -DROOT_BIND02 diff --git a/test/zdtm/static/scm00.c b/test/zdtm/static/scm00.c index 6f0c7b7..39c1473 100644 --- a/test/zdtm/static/scm00.c +++ b/test/zdtm/static/scm00.c @@ -68,7 +68,7 @@ static int recv_fd(int via) int main(int argc, char **argv) { - int sk[2], p[2]; + int sk[2], p[2], rfd; #define MSG "HELLO" char buf[8]; /* bigger than the MSG to check boundaries */ @@ -89,6 +89,7 @@ int main(int argc, char **argv) exit(1); } +#ifndef KEEP_SENT_FD close(p[0]); /* Swap pipe ends to make scm recv put pipe into different place */ @@ -96,22 +97,29 @@ int main(int argc, char **argv) close(p[1]); p[1] = p[0]; p[0] = -1; +#endif test_daemon(); test_waitsig(); - p[0] = recv_fd(sk[1]); - if (p[0] < 0) { + rfd = recv_fd(sk[1]); + if (rfd < 0) { fail("Can't recv pipe back (%d)", p[0]); goto out; } - +#ifdef KEEP_SENT_FD + if (rfd == p[0]) { + fail("Original descriptor not kept"); + goto out; + } +again: +#endif if (write(p[1], MSG, sizeof(MSG)) != sizeof(MSG)) { fail("Pipe write-broken"); goto out; } - if (read(p[0], buf, sizeof(buf)) != sizeof(MSG)) { + if (read(rfd, buf, sizeof(buf)) != sizeof(MSG)) { fail("Pipe read-broken"); goto out; } @@ -122,6 +130,14 @@ int main(int argc, char **argv) goto out; } +#ifdef KEEP_SENT_FD + if (rfd != p[0]) { + test_msg("Check kept\n"); + rfd = p[0]; + goto again; + } +#endif + pass(); out: return 0; diff --git a/test/zdtm/static/scm01.c b/test/zdtm/static/scm01.c new file mode 120000 index 0000000..4cab0ed --- /dev/null +++ b/test/zdtm/static/scm01.c @@ -0,0 +1 @@ +scm00.c \ No newline at end of file diff --git a/test/zdtm/static/scm01.desc b/test/zdtm/static/scm01.desc new file mode 100644 index 0000000..ded8987 --- /dev/null +++ b/test/zdtm/static/scm01.desc @@ -0,0 +1 @@ +{'flags': 'crfail'} From patchwork Mon Jul 10 09:38:04 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [02/18] zdtm: Send two descriptors via unix socket From: Pavel Emelyanov X-Patchwork-Id: 5841 Message-Id: <2fc0ae63-8961-c696-8ca4-2d09c7373357@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:38:04 +0300 Signed-off-by: Pavel Emelyanov --- test/zdtm/static/Makefile | 2 ++ test/zdtm/static/scm00.c | 20 +++++++++++++++++++- test/zdtm/static/scm02.c | 1 + test/zdtm/static/scm02.desc | 1 + 4 files changed, 23 insertions(+), 1 deletion(-) create mode 120000 test/zdtm/static/scm02.c create mode 100644 test/zdtm/static/scm02.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 661ff7f..f0c0c17 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -155,6 +155,7 @@ TST_NOFILE := \ remap_dead_pid_root \ scm00 \ scm01 \ + scm02 \ aio00 \ aio01 \ fd \ @@ -444,6 +445,7 @@ mnt_ext_manual: CFLAGS += -D ZDTM_EXTMAP_MANUAL sigpending: LDLIBS += -lrt vdso01: LDLIBS += -lrt scm01: CFLAGS += -DKEEP_SENT_FD +scm02: CFLAGS += -DSEND_BOTH mntns_link_remap: CFLAGS += -DZDTM_LINK_REMAP mntns_shared_bind02: CFLAGS += -DSHARED_BIND02 mntns_root_bind02: CFLAGS += -DROOT_BIND02 diff --git a/test/zdtm/static/scm00.c b/test/zdtm/static/scm00.c index 39c1473..de626d9 100644 --- a/test/zdtm/static/scm00.c +++ b/test/zdtm/static/scm00.c @@ -91,13 +91,21 @@ int main(int argc, char **argv) #ifndef KEEP_SENT_FD close(p[0]); - +#ifdef SEND_BOTH + if (send_fd(sk[0], p[1]) < 0) { + pr_perror("Can't send 2nd descriptor"); + exit(1); + } + close(p[1]); + p[0] = p[1] = -1; +#else /* Swap pipe ends to make scm recv put pipe into different place */ dup2(p[1], p[0]); close(p[1]); p[1] = p[0]; p[0] = -1; #endif +#endif test_daemon(); test_waitsig(); @@ -107,6 +115,16 @@ int main(int argc, char **argv) fail("Can't recv pipe back (%d)", p[0]); goto out; } + +#ifdef SEND_BOTH + test_msg("Recv 2nd end\n"); + p[1] = recv_fd(sk[1]); + if (p[1] < 0) { + fail("Can't recv 2nd pipe back (%d)", p[1]); + goto out; + } +#endif + #ifdef KEEP_SENT_FD if (rfd == p[0]) { fail("Original descriptor not kept"); diff --git a/test/zdtm/static/scm02.c b/test/zdtm/static/scm02.c new file mode 120000 index 0000000..4cab0ed --- /dev/null +++ b/test/zdtm/static/scm02.c @@ -0,0 +1 @@ +scm00.c \ No newline at end of file diff --git a/test/zdtm/static/scm02.desc b/test/zdtm/static/scm02.desc new file mode 100644 index 0000000..ded8987 --- /dev/null +++ b/test/zdtm/static/scm02.desc @@ -0,0 +1 @@ +{'flags': 'crfail'} From patchwork Mon Jul 10 09:38:20 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [03/18] zdtm: Send two descriptors in one message From: Pavel Emelyanov X-Patchwork-Id: 5842 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:38:20 +0300 Signed-off-by: Pavel Emelyanov --- test/zdtm/static/Makefile | 1 + test/zdtm/static/scm03.c | 126 ++++++++++++++++++++++++++++++++++++++++++++ test/zdtm/static/scm03.desc | 1 + 3 files changed, 128 insertions(+) create mode 100644 test/zdtm/static/scm03.c create mode 100644 test/zdtm/static/scm03.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index f0c0c17..79963f3 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -156,6 +156,7 @@ TST_NOFILE := \ scm00 \ scm01 \ scm02 \ + scm03 \ aio00 \ aio01 \ fd \ diff --git a/test/zdtm/static/scm03.c b/test/zdtm/static/scm03.c new file mode 100644 index 0000000..9e89628 --- /dev/null +++ b/test/zdtm/static/scm03.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check that SCM_RIGHTS are preserved"; +const char *test_author = "Pavel Emelyanov "; + +static int send_fd(int via, int fd1, int fd2) +{ + struct msghdr h = {}; + struct cmsghdr *ch; + struct iovec iov; + char buf[CMSG_SPACE(2 * sizeof(int))], c = '\0'; + int *fdp; + + h.msg_control = buf; + h.msg_controllen = sizeof(buf); + ch = CMSG_FIRSTHDR(&h); + ch->cmsg_level = SOL_SOCKET; + ch->cmsg_type = SCM_RIGHTS; + ch->cmsg_len = CMSG_LEN(2 * sizeof(int)); + fdp = (int *)CMSG_DATA(ch); + fdp[0] = fd1; + fdp[1] = fd2; + h.msg_iov = &iov; + h.msg_iovlen = 1; + iov.iov_base = &c; + iov.iov_len = sizeof(c); + + if (sendmsg(via, &h, 0) <= 0) + return -1; + + return 0; +} + +static int recv_fd(int via, int *fd1, int *fd2) +{ + struct msghdr h = {}; + struct cmsghdr *ch; + struct iovec iov; + char buf[CMSG_SPACE(2 * sizeof(int))], c; + int *fdp; + + h.msg_control = buf; + h.msg_controllen = sizeof(buf); + h.msg_iov = &iov; + h.msg_iovlen = 1; + iov.iov_base = &c; + iov.iov_len = sizeof(c); + + if (recvmsg(via, &h, 0) <= 0) + return -1; + + ch = CMSG_FIRSTHDR(&h); + if (h.msg_flags & MSG_TRUNC) + return -2; + if (ch == NULL) + return -3; + if (ch->cmsg_type != SCM_RIGHTS) + return -4; + + fdp = (int *)CMSG_DATA(ch); + *fd1 = fdp[0]; + *fd2 = fdp[1]; + return 0; +} + +int main(int argc, char **argv) +{ + int sk[2], p[2]; +#define MSG "HELLO" + char buf[8]; /* bigger than the MSG to check boundaries */ + + test_init(argc, argv); + + if (socketpair(PF_UNIX, SOCK_DGRAM, 0, sk) < 0) { + pr_perror("Can't make unix pair"); + exit(1); + } + + if (pipe(p) < 0) { + pr_perror("Can't make pipe"); + exit(1); + } + + if (send_fd(sk[0], p[0], p[1]) < 0) { + pr_perror("Can't send descriptor"); + exit(1); + } + + close(p[0]); + close(p[1]); + p[0] = p[1] = -1; + + test_daemon(); + test_waitsig(); + + if (recv_fd(sk[1], &p[0], &p[1]) < 0) { + fail("Can't recv pipes back"); + goto out; + } + + if (write(p[1], MSG, sizeof(MSG)) != sizeof(MSG)) { + fail("Pipe write-broken"); + goto out; + } + + if (read(p[0], buf, sizeof(buf)) != sizeof(MSG)) { + fail("Pipe read-broken"); + goto out; + } + + if (strcmp(buf, MSG)) { + buf[sizeof(buf) - 1] = '\0'; + fail("Pipe read-broken (%s)", buf); + goto out; + } + + pass(); +out: + return 0; +} diff --git a/test/zdtm/static/scm03.desc b/test/zdtm/static/scm03.desc new file mode 100644 index 0000000..ded8987 --- /dev/null +++ b/test/zdtm/static/scm03.desc @@ -0,0 +1 @@ +{'flags': 'crfail'} From patchwork Mon Jul 10 09:38:36 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [04/18] zdtm: Send two descriptors in two SCMs From: Pavel Emelyanov X-Patchwork-Id: 5843 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:38:36 +0300 Only the send code is altered, as upon receiving kernel merges all scm_rights int one. CRIU relies on this merge and this is to catch situations if the kernel suddenly stops doing this. Signed-off-by: Pavel Emelyanov --- test/zdtm/static/Makefile | 2 ++ test/zdtm/static/scm03.c | 31 +++++++++++++++++++++++++++++-- test/zdtm/static/scm04.c | 1 + test/zdtm/static/scm04.desc | 1 + 4 files changed, 33 insertions(+), 2 deletions(-) create mode 120000 test/zdtm/static/scm04.c create mode 100644 test/zdtm/static/scm04.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 79963f3..dd77768 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -157,6 +157,7 @@ TST_NOFILE := \ scm01 \ scm02 \ scm03 \ + scm04 \ aio00 \ aio01 \ fd \ @@ -447,6 +448,7 @@ sigpending: LDLIBS += -lrt vdso01: LDLIBS += -lrt scm01: CFLAGS += -DKEEP_SENT_FD scm02: CFLAGS += -DSEND_BOTH +scm04: CFLAGS += -DSEPARATE mntns_link_remap: CFLAGS += -DZDTM_LINK_REMAP mntns_shared_bind02: CFLAGS += -DSHARED_BIND02 mntns_root_bind02: CFLAGS += -DROOT_BIND02 diff --git a/test/zdtm/static/scm03.c b/test/zdtm/static/scm03.c index 9e89628..881bdf8 100644 --- a/test/zdtm/static/scm03.c +++ b/test/zdtm/static/scm03.c @@ -14,11 +14,30 @@ static int send_fd(int via, int fd1, int fd2) struct msghdr h = {}; struct cmsghdr *ch; struct iovec iov; - char buf[CMSG_SPACE(2 * sizeof(int))], c = '\0'; +#ifdef SEPARATE + char buf[2 * CMSG_SPACE(sizeof(int))]; +#else + char buf[CMSG_SPACE(2 * sizeof(int))]; +#endif + char c = '\0'; int *fdp; h.msg_control = buf; h.msg_controllen = sizeof(buf); +#ifdef SEPARATE + ch = CMSG_FIRSTHDR(&h); + ch->cmsg_level = SOL_SOCKET; + ch->cmsg_type = SCM_RIGHTS; + ch->cmsg_len = CMSG_LEN(sizeof(int)); + fdp = (int *)CMSG_DATA(ch); + fdp[0] = fd1; + ch = CMSG_NXTHDR(&h, ch); + ch->cmsg_level = SOL_SOCKET; + ch->cmsg_type = SCM_RIGHTS; + ch->cmsg_len = CMSG_LEN(sizeof(int)); + fdp = (int *)CMSG_DATA(ch); + fdp[0] = fd2; +#else ch = CMSG_FIRSTHDR(&h); ch->cmsg_level = SOL_SOCKET; ch->cmsg_type = SCM_RIGHTS; @@ -26,6 +45,7 @@ static int send_fd(int via, int fd1, int fd2) fdp = (int *)CMSG_DATA(ch); fdp[0] = fd1; fdp[1] = fd2; +#endif h.msg_iov = &iov; h.msg_iovlen = 1; iov.iov_base = &c; @@ -42,7 +62,8 @@ static int recv_fd(int via, int *fd1, int *fd2) struct msghdr h = {}; struct cmsghdr *ch; struct iovec iov; - char buf[CMSG_SPACE(2 * sizeof(int))], c; + char buf[CMSG_SPACE(2 * sizeof(int))]; + char c; int *fdp; h.msg_control = buf; @@ -55,6 +76,12 @@ static int recv_fd(int via, int *fd1, int *fd2) if (recvmsg(via, &h, 0) <= 0) return -1; + if (h.msg_flags & MSG_CTRUNC) { + test_msg("CTR\n"); + return -2; + } + + /* No 2 SCM-s here, kernel merges them upon send */ ch = CMSG_FIRSTHDR(&h); if (h.msg_flags & MSG_TRUNC) return -2; diff --git a/test/zdtm/static/scm04.c b/test/zdtm/static/scm04.c new file mode 120000 index 0000000..f1f86dd --- /dev/null +++ b/test/zdtm/static/scm04.c @@ -0,0 +1 @@ +scm03.c \ No newline at end of file diff --git a/test/zdtm/static/scm04.desc b/test/zdtm/static/scm04.desc new file mode 100644 index 0000000..ded8987 --- /dev/null +++ b/test/zdtm/static/scm04.desc @@ -0,0 +1 @@ +{'flags': 'crfail'} From patchwork Mon Jul 10 09:38:51 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [05/18] tty: Prepare for no parasite_ctl From: Pavel Emelyanov X-Patchwork-Id: 5844 Message-Id: <6fe3f32d-0f42-de3c-7b0f-674492aa1fc4@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:38:51 +0300 SCM-dumping code won't provide the ctl for TTY dump. To make this work we'll have to peek required tty info from current (see the SCM patch), but I doubt that anyone sends TTYs via sockets ... so keep this in TODO list. Signed-off-by: Pavel Emelyanov --- criu/tty.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/criu/tty.c b/criu/tty.c index f674f25..a967ee3 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -1764,6 +1764,11 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d int ret = -1; + if (!p->fd_ctl) { + pr_err("No CTL for TTY dump, likely SCM case\n"); + return -1; + } + /* * Make sure the structures the system provides us * correlates well with protobuf templates. From patchwork Mon Jul 10 09:39:05 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [06/18] fifo: Generate unique ID for satellite regfile entry From: Pavel Emelyanov X-Patchwork-Id: 5845 Message-Id: <7ad1a8a5-ed22-3883-2fb6-e4139228c543@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:39:05 +0300 The plan is to have all file entries have unique ID. Fifo generates a reg file entry to reuse the whole reg-files c/r-ing engine (ghosts, open-by-path, etc.) and right now ID for this entry is the same as for fifo entry. Signed-off-by: Pavel Emelyanov --- criu/fifo.c | 14 +++++++++++--- images/fifo.proto | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/criu/fifo.c b/criu/fifo.c index d0728e1..a269343 100644 --- a/criu/fifo.c +++ b/criu/fifo.c @@ -8,6 +8,7 @@ #include "image.h" #include "files.h" #include "files-reg.h" +#include "file-ids.h" #include "pipes.h" #include "fifo.h" @@ -43,13 +44,16 @@ static int dump_one_fifo(int lfd, u32 id, const struct fd_parms *p) struct cr_img *img = img_from_set(glob_imgset, CR_FD_FILES); FileEntry fe = FILE_ENTRY__INIT; FifoEntry e = FIFO_ENTRY__INIT; + u32 rf_id; + + fd_id_generate_special(NULL, &rf_id); /* * It's a trick here, we use regular files dumping * code to save path to a fifo, then we reuse it * on restore. */ - if (dump_one_reg_file(lfd, id, p)) + if (dump_one_reg_file(lfd, rf_id, p)) return -1; pr_info("Dumping fifo %d with id %#x pipe_id %#x\n", @@ -57,6 +61,8 @@ static int dump_one_fifo(int lfd, u32 id, const struct fd_parms *p) e.id = id; e.pipe_id = pipe_id(p); + e.has_regf_id = true; + e.regf_id = rf_id; fe.type = FD_TYPES__FIFO; fe.id = e.id; @@ -116,8 +122,10 @@ static int open_fifo_fd(struct file_desc *d, int *new_fd) struct file_desc *reg_d; int fd; - reg_d = find_file_desc_raw(FD_TYPES__REG, info->fe->id); - BUG_ON(!reg_d); + reg_d = collect_special_file(info->fe->has_regf_id ? + info->fe->regf_id : info->fe->id); + if (!reg_d) + return -1; fd = open_path(reg_d, do_open_fifo, info); if (fd < 0) diff --git a/images/fifo.proto b/images/fifo.proto index 9d5b953..f5b3283 100644 --- a/images/fifo.proto +++ b/images/fifo.proto @@ -3,4 +3,5 @@ syntax = "proto2"; message fifo_entry { required uint32 id = 1; required uint32 pipe_id = 2; + optional uint32 regf_id = 3; } From patchwork Mon Jul 10 09:39:19 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [07/18] tty: Generate unique ID for satellite regfile entry From: Pavel Emelyanov X-Patchwork-Id: 5846 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:39:19 +0300 Same thing as for fifo-s. Signed-off-by: Pavel Emelyanov --- criu/tty.c | 18 ++++++++++++++---- images/tty.proto | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/criu/tty.c b/criu/tty.c index a967ee3..b42e54a 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -1619,7 +1619,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i) * reg file rectord because they are inherited from * command line on restore. */ - info->reg_d = try_collect_special_file(info->tfe->id, 1); + info->reg_d = try_collect_special_file( info->tfe->has_regf_id ? + info->tfe->regf_id : info->tfe->id, 1); if (!info->reg_d) { if (info->driver->type != TTY_TYPE__EXT_TTY) { if (!deprecated_ok("TTY w/o regfile")) @@ -1908,14 +1909,23 @@ static int dump_one_tty(int lfd, u32 id, const struct fd_parms *p) return -1; } - if (driver->type != TTY_TYPE__EXT_TTY && dump_one_reg_file(lfd, id, p)) - return -1; - e.id = id; e.tty_info_id = tty_gen_id(driver, index); e.flags = p->flags; e.fown = (FownEntry *)&p->fown; + if (driver->type != TTY_TYPE__EXT_TTY) { + u32 rf_id; + + fd_id_generate_special(NULL, &rf_id); + if (dump_one_reg_file(lfd, rf_id, p)) + return -1; + + e.has_regf_id = true; + e.regf_id = rf_id; + } + + /* * FIXME * diff --git a/images/tty.proto b/images/tty.proto index 8ae804a..12c6166 100644 --- a/images/tty.proto +++ b/images/tty.proto @@ -87,4 +87,5 @@ message tty_file_entry { required fown_entry fown = 4; // optional sint32 mnt_id = 5 [default = 0]; + optional uint32 regf_id = 6; } From patchwork Mon Jul 10 09:39:35 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [08/18] fdinfo: Extract FdinfoEntry from dump_one_file From: Pavel Emelyanov X-Patchwork-Id: 5847 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:39:35 +0300 To support SCMs we'll need to receive them into criu task (see the SCM patch for details), then dump the received file as if it was in the dumpee. Then the info about received descriptor will be written into packet entry. For this we'll need to perform all the regular file dumping code BUT not write the FdinfoEntry into image, so shuffle the code for that. The gist of the patch is in two changes -- one in the do_dump_gen_file(), the other in dump_task_files_seized(). The rest is just tossing the arguments of the functions relevant to that change. Signed-off-by: Pavel Emelyanov --- criu/files-ext.c | 4 ++-- criu/files.c | 57 +++++++++++++++++++++++++------------------------- criu/include/files.h | 4 ++-- criu/include/sockets.h | 3 ++- criu/sockets.c | 4 ++-- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/criu/files-ext.c b/criu/files-ext.c index af9c268..a6247d6 100644 --- a/criu/files-ext.c +++ b/criu/files-ext.c @@ -84,11 +84,11 @@ struct collect_image_info ext_file_cinfo = { }; int dump_unsupp_fd(struct fd_parms *p, int lfd, - struct cr_img *img, char *more, char *info) + char *more, char *info, FdinfoEntry *e) { int ret; - ret = do_dump_gen_file(p, lfd, &ext_dump_ops, img); + ret = do_dump_gen_file(p, lfd, &ext_dump_ops, e); if (ret == 0) return 0; if (ret == -ENOTSUP) diff --git a/criu/files.c b/criu/files.c index 36bb9e6..affdac0 100644 --- a/criu/files.c +++ b/criu/files.c @@ -290,27 +290,20 @@ static u32 make_gen_id(const struct fd_parms *p) } int do_dump_gen_file(struct fd_parms *p, int lfd, - const struct fdtype_ops *ops, struct cr_img *img) + const struct fdtype_ops *ops, FdinfoEntry *e) { - FdinfoEntry e = FDINFO_ENTRY__INIT; int ret = -1; - e.type = ops->type; - e.id = make_gen_id(p); - e.fd = p->fd; - e.flags = p->fd_flags; + e->type = ops->type; + e->id = make_gen_id(p); + e->fd = p->fd; + e->flags = p->fd_flags; - ret = fd_id_generate(p->pid, &e, p); + ret = fd_id_generate(p->pid, e, p); if (ret == 1) /* new ID generated */ - ret = ops->dump(lfd, e.id, p); + ret = ops->dump(lfd, e->id, p); - if (ret < 0) - return ret; - - pr_info("fdinfo: type: %#2x flags: %#o/%#o pos: %#8"PRIx64" fd: %d\n", - ops->type, p->flags, (int)p->fd_flags, p->pos, p->fd); - - return pb_write_one(img, &e, PB_FDINFO); + return ret; } int fill_fdlink(int lfd, const struct fd_parms *p, struct fd_link *link) @@ -419,7 +412,7 @@ static const struct fdtype_ops *get_mem_dev_ops(struct fd_parms *p, int minor) return ops; } -static int dump_chrdev(struct fd_parms *p, int lfd, struct cr_img *img) +static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e) { int maj = major(p->stat.st_rdev); const struct fdtype_ops *ops; @@ -447,15 +440,15 @@ static int dump_chrdev(struct fd_parms *p, int lfd, struct cr_img *img) } sprintf(more, "%d:%d", maj, minor(p->stat.st_rdev)); - return dump_unsupp_fd(p, lfd, img, "chr", more); + return dump_unsupp_fd(p, lfd, "chr", more, e); } } - return do_dump_gen_file(p, lfd, ops, img); + return do_dump_gen_file(p, lfd, ops, e); } static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, - struct cr_img *img, struct parasite_ctl *ctl) + struct parasite_ctl *ctl, FdinfoEntry *e) { struct fd_parms p = FD_PARMS_INIT; const struct fdtype_ops *ops; @@ -472,10 +465,10 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, p.fd_ctl = ctl; /* Some dump_opts require this to talk to parasite */ if (S_ISSOCK(p.stat.st_mode)) - return dump_socket(&p, lfd, img); + return dump_socket(&p, lfd, e); if (S_ISCHR(p.stat.st_mode)) - return dump_chrdev(&p, lfd, img); + return dump_chrdev(&p, lfd, e); if (p.fs_type == ANON_INODE_FS_MAGIC) { char link[32]; @@ -496,9 +489,9 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, else if (is_timerfd_link(link)) ops = &timerfd_dump_ops; else - return dump_unsupp_fd(&p, lfd, img, "anon", link); + return dump_unsupp_fd(&p, lfd, "anon", link, e); - return do_dump_gen_file(&p, lfd, ops, img); + return do_dump_gen_file(&p, lfd, ops, e); } if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode)) { @@ -507,12 +500,12 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, p.link = &link; if (link.name[1] == '/') - return do_dump_gen_file(&p, lfd, ®file_dump_ops, img); + return do_dump_gen_file(&p, lfd, ®file_dump_ops, e); if (check_ns_proc(&link)) - return do_dump_gen_file(&p, lfd, &nsfile_dump_ops, img); + return do_dump_gen_file(&p, lfd, &nsfile_dump_ops, e); - return dump_unsupp_fd(&p, lfd, img, "reg", link.name + 1); + return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); } if (S_ISFIFO(p.stat.st_mode)) { @@ -521,7 +514,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, else ops = &fifo_dump_ops; - return do_dump_gen_file(&p, lfd, ops, img); + return do_dump_gen_file(&p, lfd, ops, e); } /* @@ -532,7 +525,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, if (fill_fdlink(lfd, &p, &link)) memzero(&link, sizeof(link)); - return dump_unsupp_fd(&p, lfd, img, "unknown", link.name + 1); + return dump_unsupp_fd(&p, lfd, "unknown", link.name + 1, e); } int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, @@ -571,11 +564,17 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, goto err; for (i = 0; i < nr_fds; i++) { + FdinfoEntry e = FDINFO_ENTRY__INIT; + ret = dump_one_file(item->pid, dfds->fds[i + off], - lfds[i], opts + i, img, ctl); + lfds[i], opts + i, ctl, &e); close(lfds[i]); if (ret) break; + + ret = pb_write_one(img, &e, PB_FDINFO); + if (ret) + break; } } diff --git a/criu/include/files.h b/criu/include/files.h index 22d5bc8..a96b744 100644 --- a/criu/include/files.h +++ b/criu/include/files.h @@ -133,7 +133,7 @@ struct cr_img; extern int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops, - struct cr_img *); + FdinfoEntry *e); struct parasite_drain_fd; int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds); @@ -174,7 +174,7 @@ extern int shared_fdt_prepare(struct pstree_item *item); extern struct collect_image_info ext_file_cinfo; extern int dump_unsupp_fd(struct fd_parms *p, int lfd, - struct cr_img *, char *more, char *info); + char *more, char *info, FdinfoEntry *); extern int inherit_fd_parse(char *optarg); extern int inherit_fd_add(int fd, char *key); diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 9881d5b..3fa8017 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -5,6 +5,7 @@ #include #include "images/sk-opts.pb-c.h" +#include "images/fdinfo.pb-c.h" struct fdinfo_list_entry; struct sk_opts_entry; @@ -22,7 +23,7 @@ struct socket_desc { int already_dumped; }; -extern int dump_socket(struct fd_parms *p, int lfd, struct cr_img *); +extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *); extern int dump_socket_opts(int sk, SkOptsEntry *soe); extern int restore_socket_opts(int sk, SkOptsEntry *soe); extern void release_skopts(SkOptsEntry *); diff --git a/criu/sockets.c b/criu/sockets.c index 9b0c4df..852663e 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -557,7 +557,7 @@ void release_skopts(SkOptsEntry *soe) xfree(soe->so_bound_dev); } -int dump_socket(struct fd_parms *p, int lfd, struct cr_img *img) +int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *e) { int family; const struct fdtype_ops *ops; @@ -586,7 +586,7 @@ int dump_socket(struct fd_parms *p, int lfd, struct cr_img *img) return -1; } - return do_dump_gen_file(p, lfd, ops, img); + return do_dump_gen_file(p, lfd, ops, e); } static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg) From patchwork Mon Jul 10 09:39:50 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [09/18] file: Find descs by ID only (v2) From: Pavel Emelyanov X-Patchwork-Id: 5848 Message-Id: <9fe34230-0d45-dba8-1b25-6e343d31919d@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:39:50 +0300 Actually all file-s we dump have unique IDs, regardless of their types. This fact will be used to reduce complexity of the SCM code -- instead of keeping TYPE:ID pair it'll save only the ID. Siad that -- we will need the way to lookup desc by ID only. v2: Older images had fifo-s and tty-s having matching IDs with respective reg-file entries Signed-off-by: Pavel Emelyanov --- criu/files.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index affdac0..a0e40f1 100644 --- a/criu/files.c +++ b/criu/files.c @@ -101,7 +101,15 @@ struct file_desc *find_file_desc_raw(int type, u32 id) chain = &file_desc_hash[id % FDESC_HASH_SIZE]; hlist_for_each_entry(d, chain, hash) - if (d->ops->type == type && d->id == id) + if ((d->id == id) && + (d->ops->type == type || type == FD_TYPES__UND)) + /* + * Warning -- old CRIU might generate matching IDs + * for different file types! So any code that uses + * FD_TYPES__UND for fdesc search MUST make sure it's + * dealing with the merged files images where all + * descs are forced to have different IDs. + */ return d; return NULL; From patchwork Mon Jul 10 09:40:06 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [10/18] fd: Helper for local fd dump From: Pavel Emelyanov X-Patchwork-Id: 5849 Message-Id: <9ddd0e14-2c17-35c9-de0c-790ed3a7621e@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:40:06 +0300 An aux code to dump the received file and report back its ID and type. Signed-off-by: Pavel Emelyanov --- criu/files.c | 17 +++++++++++++++++ criu/include/files.h | 1 + 2 files changed, 18 insertions(+) diff --git a/criu/files.c b/criu/files.c index a0e40f1..0387803 100644 --- a/criu/files.c +++ b/criu/files.c @@ -536,6 +536,23 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return dump_unsupp_fd(&p, lfd, "unknown", link.name + 1, e); } +int dump_my_file(int lfd, u32 *id, int *type) +{ + struct pid me = {}; + struct fd_opts fo = {}; + FdinfoEntry e = FDINFO_ENTRY__INIT; + + me.real = getpid(); + me.ns[0].virt = -1; /* FIXME */ + + if (dump_one_file(&me, lfd, lfd, &fo, NULL, &e)) + return -1; + + *id = e.id; + *type = e.type; + return 0; +} + int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds) { diff --git a/criu/include/files.h b/criu/include/files.h index a96b744..3dc18df 100644 --- a/criu/include/files.h +++ b/criu/include/files.h @@ -131,6 +131,7 @@ struct fdtype_ops { struct cr_img; +extern int dump_my_file(int lfd, u32 *, int *type); extern int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops, FdinfoEntry *e); From patchwork Mon Jul 10 09:40:21 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [11/18] fd: Split file_master From: Pavel Emelyanov X-Patchwork-Id: 5850 Message-Id: <1a8dd826-f81b-08e2-0ad7-004584d7ab08@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:40:21 +0300 In SCM patch we'll need to generate temporary fdinfo entry for a task that will send descriptors via sockets. Sometimes the files in questions will NOT have any other fdinfo-s (if they are SCM-sent and closed), so we need a helper that would either give us existing fdinfo or say there's no such. Signed-off-by: Pavel Emelyanov --- criu/files.c | 17 ++++++++++++++--- criu/include/files.h | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/criu/files.c b/criu/files.c index 0387803..bd137df 100644 --- a/criu/files.c +++ b/criu/files.c @@ -206,16 +206,27 @@ void wait_fds_event(void) clear_fds_event(); } +struct fdinfo_list_entry *try_file_master(struct file_desc *d) +{ + if (list_empty(&d->fd_info_head)) + return NULL; + + return list_first_entry(&d->fd_info_head, + struct fdinfo_list_entry, desc_list); +} + struct fdinfo_list_entry *file_master(struct file_desc *d) { - if (list_empty(&d->fd_info_head)) { + struct fdinfo_list_entry *fle; + + fle = try_file_master(d); + if (!fle) { pr_err("Empty list on file desc id %#x(%d)\n", d->id, d->ops ? d->ops->type : -1); BUG(); } - return list_first_entry(&d->fd_info_head, - struct fdinfo_list_entry, desc_list); + return fle; } void show_saved_files(void) diff --git a/criu/include/files.h b/criu/include/files.h index 3dc18df..eeae8e0 100644 --- a/criu/include/files.h +++ b/criu/include/files.h @@ -142,6 +142,7 @@ int predump_task_files(int pid); extern void file_desc_init(struct file_desc *d, u32 id, struct file_desc_ops *ops); extern int file_desc_add(struct file_desc *d, u32 id, struct file_desc_ops *ops); +extern struct fdinfo_list_entry *try_file_master(struct file_desc *d); extern struct fdinfo_list_entry *file_master(struct file_desc *d); extern struct file_desc *find_file_desc_raw(int type, u32 id); From patchwork Mon Jul 10 09:40:36 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [12/18] fd: Split fdinfo collect routine From: Pavel Emelyanov X-Patchwork-Id: 5851 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:40:36 +0300 In SCM code there will be the need to attach a newly created FdinfoEntry to a particulat file desc object, then get the created fdinfo helper object back. Current code only allows this via two lookup calls -- first one to attach entry to desc by ID (lookup #1), then get the fdinfo helper by FD (lookup #2). Fortunately, the exising code allows simple split that gives us the optimized routine. Signed-off-by: Pavel Emelyanov --- criu/files.c | 26 ++++++++++++++++++-------- criu/include/files.h | 2 ++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/criu/files.c b/criu/files.c index bd137df..91f7c85 100644 --- a/criu/files.c +++ b/criu/files.c @@ -802,26 +802,36 @@ compare_pid: list_add_tail(&new_le->desc_list, &le->desc_list); } -int collect_fd(int pid, FdinfoEntry *e, struct rst_info *rst_info, bool fake) +struct fdinfo_list_entry *collect_fd_to(int pid, FdinfoEntry *e, + struct rst_info *rst_info, struct file_desc *fdesc, bool fake) { struct fdinfo_list_entry *new_le; + + new_le = alloc_fle(pid, e); + if (new_le) { + new_le->fake = (!!fake); + collect_desc_fle(new_le, fdesc); + collect_task_fd(new_le, rst_info); + } + + return new_le; +} + +int collect_fd(int pid, FdinfoEntry *e, struct rst_info *rst_info, bool fake) +{ struct file_desc *fdesc; pr_info("Collect fdinfo pid=%d fd=%d id=%#x\n", pid, e->fd, e->id); - new_le = alloc_fle(pid, e); - if (!new_le) - return -1; - new_le->fake = (!!fake); - fdesc = find_file_desc(e); if (fdesc == NULL) { pr_err("No file for fd %d id %#x\n", e->fd, e->id); return -1; } - collect_desc_fle(new_le, fdesc); - collect_task_fd(new_le, rst_info); + + if (!collect_fd_to(pid, e, rst_info, fdesc, fake)) + return -1; return 0; } diff --git a/criu/include/files.h b/criu/include/files.h index eeae8e0..eb32065 100644 --- a/criu/include/files.h +++ b/criu/include/files.h @@ -109,6 +109,8 @@ struct file_desc_ops { }; int collect_fd(int pid, FdinfoEntry *e, struct rst_info *rst_info, bool ghost); +struct fdinfo_list_entry *collect_fd_to(int pid, FdinfoEntry *e, + struct rst_info *rst_info, struct file_desc *fdesc, bool fake); unsigned int find_unused_fd(struct pstree_item *, int hint_fd); struct fdinfo_list_entry *find_used_fd(struct pstree_item *, int fd); From patchwork Mon Jul 10 09:40:50 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [13/18] unix: Set queuer in set_peer From: Pavel Emelyanov X-Patchwork-Id: 5852 Message-Id: <22a933e4-d4ae-d31c-540e-ec6dd099218c@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:40:50 +0300 For SCM restore we need to know all queuers, even if they are socketpair-s, but current code skips this setting for one half of them. So set this info for every socket out there. Signed-off-by: Pavel Emelyanov --- criu/sk-unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 765d9ae..901c74b 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1488,6 +1488,8 @@ static void set_peer(struct unix_sk_info *ui, struct unix_sk_info *peer) { ui->peer = peer; list_add(&ui->node, &peer->connected); + if (!peer->queuer) + peer->queuer = ui->ue->ino; } static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *peer) @@ -1527,8 +1529,6 @@ static int resolve_unix_peer(struct unix_sk_info *ui) } set_peer(ui, peer); - if (!peer->queuer) - peer->queuer = ui->ue->ino; if (ui == peer) /* socket connected to self %) */ goto out; From patchwork Mon Jul 10 09:41:04 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [14/18] unix: Use queuer id, not ino From: Pavel Emelyanov X-Patchwork-Id: 5853 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:41:04 +0300 The sk-queue.img keeps socket IDs as recepient descriptor. For SCM code we'll need to find the unix sk info who is the sender for a particular other socket by this ID. There's already the queuer field in the unix_sk_info, but it needs to be an ID rather than inode. Fortunately, id-inode pairs are unique. Signed-off-by: Pavel Emelyanov --- criu/sk-unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 901c74b..5665bec 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -985,7 +985,7 @@ static int post_open_unix_sk(struct file_desc *d, int fd) revert_unix_sk_cwd(&cwd_fd, &root_fd); - if (peer->queuer == ui->ue->ino && restore_sk_queue(fd, peer->ue->id)) + if (peer->queuer == ui->ue->id && restore_sk_queue(fd, peer->ue->id)) return -1; return restore_sk_common(fd, ui); @@ -1489,7 +1489,7 @@ static void set_peer(struct unix_sk_info *ui, struct unix_sk_info *peer) ui->peer = peer; list_add(&ui->node, &peer->connected); if (!peer->queuer) - peer->queuer = ui->ue->ino; + peer->queuer = ui->ue->id; } static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *peer) From patchwork Mon Jul 10 09:41:19 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [15/18] unix: Split resolv and interconnect (v2) From: Pavel Emelyanov X-Patchwork-Id: 5854 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:41:19 +0300 In order to make sender of a descriptor (unix socket) be restored _after_ the descriptor in question, we need to find out those sockets early (before post-prep calls). The problem is that current code gives us info about who's the queuer for who only in post-prep hooks, so the peer resolution should happen right in collect callback. At the same time we need to make sure that all peers configured in the image exist, as well as need to put master/slave flags for socketpairs. Both these actions can only happen in post-prep. Said that -- the current peer resolve routine should be split into two steps. v2: Handle the socket connected to self (dgram) Signed-off-by: Pavel Emelyanov --- criu/sk-unix.c | 62 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 5665bec..42ce1bb 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1400,14 +1400,15 @@ static void unlink_stale(struct unix_sk_info *ui) revert_unix_sk_cwd(&cwd_fd, &root_fd); } -static int resolve_unix_peer(struct unix_sk_info *ui); +static void try_resolve_unix_peer(struct unix_sk_info *ui); +static int fixup_unix_peer(struct unix_sk_info *ui); static int post_prepare_unix_sk(struct pprep_head *ph) { struct unix_sk_info *ui; ui = container_of(ph, struct unix_sk_info, peer_resolve); - if (ui->ue->peer && resolve_unix_peer(ui)) + if (ui->ue->peer && fixup_unix_peer(ui)) return -1; if (ui->name) unlink_stale(ui); @@ -1468,6 +1469,9 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) ui->name_dir ? ui->name_dir : "-"); if (ui->ue->peer || ui->name) { + if (ui->ue->peer) + try_resolve_unix_peer(ui); + ui->peer_resolve.actor = post_prepare_unix_sk; add_post_prepare_cb(&ui->peer_resolve); } @@ -1512,38 +1516,50 @@ static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *pe } } -static int resolve_unix_peer(struct unix_sk_info *ui) +static int fixup_unix_peer(struct unix_sk_info *ui) { - struct unix_sk_info *peer; - - if (ui->peer) - goto out; - - BUG_ON(!ui->ue->peer); + struct unix_sk_info *peer = ui->peer; - peer = find_unix_sk_by_ino(ui->ue->peer); if (!peer) { pr_err("FATAL: Peer %#x unresolved for %#x\n", ui->ue->peer, ui->ue->ino); return -1; } - set_peer(ui, peer); - if (ui == peer) - /* socket connected to self %) */ - goto out; - if (peer->ue->peer != ui->ue->ino) - goto out; - - pr_info("Connected %#x -> %#x (%#x) flags %#x\n", - ui->ue->ino, ui->ue->peer, peer->ue->ino, ui->flags); - set_peer(peer, ui); - /* socketpair or interconnected sockets */ - interconnected_pair(ui, peer); -out: + if (peer != ui && peer->peer == ui && + !(ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE))) { + pr_info("Connected %#x -> %#x (%#x) flags %#x\n", + ui->ue->ino, ui->ue->peer, peer->ue->ino, ui->flags); + /* socketpair or interconnected sockets */ + interconnected_pair(ui, peer); + } + return 0; } +static void try_resolve_unix_peer(struct unix_sk_info *ui) +{ + struct unix_sk_info *peer; + + if (ui->peer) + return; + + BUG_ON(!ui->ue->peer); + + if (ui->ue->peer == ui->ue->ino) { + /* socket connected to self %) */ + set_peer(ui, ui); + return; + } + + peer = find_unix_sk_by_ino(ui->ue->peer); + if (peer) { + set_peer(ui, peer); + if (peer->ue->peer == ui->ue->ino) + set_peer(peer, ui); + } /* else -- maybe later */ +} + int unix_sk_id_add(unsigned int ino) { char *e_str; From patchwork Mon Jul 10 09:41:33 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [16/18] skqueue: Split the packets sending code From: Pavel Emelyanov X-Patchwork-Id: 5855 Message-Id: To: CRIU Date: Mon, 10 Jul 2017 12:41:33 +0300 Signed-off-by: Pavel Emelyanov --- criu/sk-queue.c | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/criu/sk-queue.c b/criu/sk-queue.c index 28578b4..057c0bf 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -197,6 +197,34 @@ err_brk: return ret; } +static int send_one_pkt(int fd, struct sk_packet *pkt) +{ + int ret; + SkPacketEntry *entry = pkt->entry; + + /* + * Don't try to use sendfile here, because it use sendpage() and + * all data are split on pages and a new skb is allocated for + * each page. It creates a big overhead on SNDBUF. + * sendfile() isn't suitable for DGRAM sockets, because message + * boundaries messages should be saved. + */ + + ret = write(fd, pkt->data, entry->length); + xfree(pkt->data); + if (ret < 0) { + pr_perror("Failed to send packet"); + return -1; + } + if (ret != entry->length) { + pr_err("Restored skb trimmed to %d/%d\n", + ret, (unsigned int)entry->length); + return -1; + } + + return 0; +} + int restore_sk_queue(int fd, unsigned int peer_id) { struct sk_packet *pkt, *tmp; @@ -216,26 +244,10 @@ int restore_sk_queue(int fd, unsigned int peer_id) pr_info("\tRestoring %d-bytes skb for %u\n", (unsigned int)entry->length, peer_id); - /* - * Don't try to use sendfile here, because it use sendpage() and - * all data are split on pages and a new skb is allocated for - * each page. It creates a big overhead on SNDBUF. - * sendfile() isn't suitable for DGRAM sockets, because message - * boundaries messages should be saved. - */ - - ret = write(fd, pkt->data, entry->length); - xfree(pkt->data); - if (ret < 0) { - pr_perror("Failed to send packet"); - goto out; - } - if (ret != entry->length) { - pr_err("Restored skb trimmed to %d/%d\n", - ret, (unsigned int)entry->length); - ret = -1; + ret = send_one_pkt(fd, pkt); + if (ret) goto out; - } + list_del(&pkt->list); sk_packet_entry__free_unpacked(entry, NULL); xfree(pkt); From patchwork Mon Jul 10 09:41:47 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [17/18] skqueue: Use sendmsg() to send data From: Pavel Emelyanov X-Patchwork-Id: 5856 Message-Id: <6f35d6a8-690c-7856-5d1f-5f19ee77eba6@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:41:47 +0300 Signed-off-by: Pavel Emelyanov --- criu/sk-queue.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/criu/sk-queue.c b/criu/sk-queue.c index 057c0bf..77e203e 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -201,6 +201,13 @@ static int send_one_pkt(int fd, struct sk_packet *pkt) { int ret; SkPacketEntry *entry = pkt->entry; + struct msghdr mh = {}; + struct iovec iov; + + mh.msg_iov = &iov; + mh.msg_iovlen = 1; + iov.iov_base = pkt->data; + iov.iov_len = entry->length; /* * Don't try to use sendfile here, because it use sendpage() and @@ -210,7 +217,7 @@ static int send_one_pkt(int fd, struct sk_packet *pkt) * boundaries messages should be saved. */ - ret = write(fd, pkt->data, entry->length); + ret = sendmsg(fd, &mh, 0); xfree(pkt->data); if (ret < 0) { pr_perror("Failed to send packet"); From patchwork Mon Jul 10 09:42:39 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [18/18] SCM: Dump and restore SCM_RIGHTs From: Pavel Emelyanov X-Patchwork-Id: 5857 Message-Id: <28a366b7-4d90-583e-4c23-d0f563187c4d@virtuozzo.com> To: CRIU Date: Mon, 10 Jul 2017 12:42:39 +0300 Most of the pieces has already been described in the previous patches :) so here's the summary. * Dump: When receiving a message, also receive any SCM-s (already there) and when SCM_RIGHTs one is met -- go ahead and just dump received descriptors using regular code, but taking current as the victim task. Few words about file paths resolution -- since we do dump path-ed files by receiving them from victim's parasite, such files sent via sockets should still work OK, as we still receive them, just from another socket. Several problems here: 1. Unix sockets sent via unix sockets form knots. Not supported. 2. Eventpolls sent via unix might themseves poll unix sockets. Knots again. Not supported either. * Restore: On restore we need to make unix socket wait for the soon-to-be-scm-sent descriptors to get restored, so we need to find them, then put a dependency. After that, the fake fdinfo entry is attached to the respective file descs, when sent the respective descriptors are closed. https://github.com/xemul/criu/issues/251 Signed-off-by: Pavel Emelyanov --- criu/cr-restore.c | 8 +++ criu/include/sockets.h | 2 + criu/sk-queue.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++++- criu/sk-unix.c | 127 ++++++++++++++++++++++++++++++++++++++- images/sk-packet.proto | 6 ++ 5 files changed, 295 insertions(+), 5 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index e14fa06..b9ef49c 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -364,6 +364,14 @@ static int root_prepare_shared(void) if (ret) goto err; + /* + * This should be called with all packets collected AND all + * fdescs and fles prepared BUT post-prep-s not run. + */ + ret = prepare_scms(); + if (ret) + goto err; + ret = run_post_prepare(); if (ret) goto err; diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 3fa8017..1bd5c67 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -38,6 +38,8 @@ extern int collect_sockets(struct ns_id *); extern struct collect_image_info inet_sk_cinfo; extern struct collect_image_info unix_sk_cinfo; extern int fix_external_unix_sockets(void); +extern int prepare_scms(void); +extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids); extern struct collect_image_info netlink_sk_cinfo; diff --git a/criu/sk-queue.c b/criu/sk-queue.c index 77e203e..953db66 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -18,9 +18,9 @@ #include "util.h" #include "util-pie.h" #include "sockets.h" - +#include "xmalloc.h" #include "sk-queue.h" - +#include "files.h" #include "protobuf.h" #include "images/sk-packet.pb-c.h" @@ -28,6 +28,8 @@ struct sk_packet { struct list_head list; SkPacketEntry *entry; char *data; + unsigned scm_len; + int *scm; }; static LIST_HEAD(packets_list); @@ -37,12 +39,22 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i struct sk_packet *pkt = obj; pkt->entry = pb_msg(msg, SkPacketEntry); - + pkt->scm = NULL; pkt->data = xmalloc(pkt->entry->length); if (pkt->data ==NULL) return -1; /* + * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and + * only 1 of that kind is possible, thus not more than 1 SCMs + * on a packet. + */ + if (pkt->entry->n_scm > 1) { + pr_err("More than 1 SCM is not possible\n"); + return -1; + } + + /* * NOTE: packet must be added to the tail. Otherwise sequence * will be broken. */ @@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = { .collect = collect_one_packet, }; +static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe) +{ + int nr_fds, *fds, i; + void *buf; + ScmEntry *scme; + + nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int); + fds = (int *)CMSG_DATA(ch); + + buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t)); + if (!buf) + return -1; + + scme = xptr_pull(&buf, ScmEntry); + scm_entry__init(scme); + scme->type = SCM_RIGHTS; + scme->n_rights = nr_fds; + scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t)); + + for (i = 0; i < nr_fds; i++) { + int ftyp; + + if (dump_my_file(fds[i], &scme->rights[i], &ftyp)) + return -1; + + /* + * Unix sent over Unix or Epoll with some other sh*t + * sent over unix (maybe with this very unix polled) + * are tricky and not supported for now. (XXX -- todo) + */ + if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) { + pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp); + return -1; + } + } + + i = pe->n_scm++; + if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*))) + return -1; + + pe->scm[i] = scme; + return 0; +} + /* * Maximum size of the control messages. XXX -- is there any * way to get this value out of the kernel? @@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = { static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe) { struct cmsghdr *ch; + int n_rights = 0; for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) { + if (ch->cmsg_type == SCM_RIGHTS) { + if (n_rights) { + /* + * Even if user is sending more than one cmsg with + * rights, kernel merges them alltogether on recv. + */ + pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n"); + return -1; + } + + if (dump_scm_rights(ch, pe)) + return -1; + + n_rights++; + continue; + } + pr_err("Control messages in queue, not supported\n"); return -1; } @@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe) return 0; } +static void release_cmsg(SkPacketEntry *pe) +{ + int i; + + for (i = 0; i < pe->n_scm; i++) + xfree(pe->scm[i]); + xfree(pe->scm); + + pe->n_scm = 0; + pe->scm = NULL; +} + int dump_sk_queue(int sock_fd, int sock_id) { SkPacketEntry pe = SK_PACKET_ENTRY__INIT; @@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id) ret = -EIO; goto err_set_sock; } + + if (pe.scm) + release_cmsg(&pe); } ret = 0; @@ -197,6 +286,20 @@ err_brk: return ret; } +static void close_scm_fds(struct sk_packet *pkt) +{ + int i, *fds, n_fds; + struct cmsghdr *ch = (struct cmsghdr *)pkt->scm; + + fds = (int *)CMSG_DATA(ch); + n_fds = (ch->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int); + + for (i = 0; i < n_fds; i++) { + if (close(fds[i])) + pr_warn("scm: Error closing sent fd\n"); + } +} + static int send_one_pkt(int fd, struct sk_packet *pkt) { int ret; @@ -209,6 +312,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt) iov.iov_base = pkt->data; iov.iov_len = entry->length; + if (pkt->scm != NULL) { + mh.msg_controllen = pkt->scm_len; + mh.msg_control = pkt->scm; + } + /* * Don't try to use sendfile here, because it use sendpage() and * all data are split on pages and a new skb is allocated for @@ -229,6 +337,9 @@ static int send_one_pkt(int fd, struct sk_packet *pkt) return -1; } + if (pkt->scm != NULL) + close_scm_fds(pkt); + return 0; } @@ -264,3 +375,43 @@ int restore_sk_queue(int fd, unsigned int peer_id) out: return ret; } + +int prepare_scms(void) +{ + struct sk_packet *pkt; + + pr_info("Preparing SCMs\n"); + list_for_each_entry(pkt, &packets_list, list) { + SkPacketEntry *pe = pkt->entry; + ScmEntry *se; + struct cmsghdr *ch; + + if (!pe->n_scm) + continue; + + se = pe->scm[0]; /* Only 1 SCM is possible */ + + if (se->type == SCM_RIGHTS) { + pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int)); + pkt->scm = xmalloc(pkt->scm_len); + if (!pkt->scm) + return -1; + + ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */ + ch->cmsg_level = SOL_SOCKET; + ch->cmsg_type = SCM_RIGHTS; + ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int)); + + if (unix_note_scm_rights(pe->id_for, se->rights, + (int *)CMSG_DATA(ch), se->n_rights)) + return -1; + + continue; + } + + pr_err("Unsupported scm %d in image\n", se->type); + return -1; + } + + return 0; +} diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 42ce1bb..165adc9 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -798,6 +798,7 @@ struct unix_sk_info { struct file_desc d; struct list_head connected; /* List of sockets, connected to me */ struct list_head node; /* To link in peer's connected list */ + struct list_head scm_fles; /* * For DGRAM sockets with queues, we should only restore the queue @@ -809,6 +810,11 @@ struct unix_sk_info { u8 listen:1; }; +struct scm_fle { + struct list_head l; + struct fdinfo_list_entry *fle; +}; + #define USK_PAIR_MASTER 0x1 #define USK_PAIR_SLAVE 0x2 @@ -824,6 +830,116 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino) return NULL; } +static struct unix_sk_info *find_queuer_for(int id) +{ + struct unix_sk_info *ui; + + list_for_each_entry(ui, &unix_sockets, list) { + if (ui->queuer == id) + return ui; + } + + return NULL; +} + +int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids) +{ + struct unix_sk_info *ui; + struct pstree_item *owner; + int i; + + ui = find_queuer_for(id_for); + if (!ui) { + pr_err("Can't find sender for %d\n", id_for); + return -1; + } + + pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id); + /* + * This is the task that will restore this socket + */ + owner = file_master(&ui->d)->task; + + pr_info("-> will set up deps\n"); + /* + * The ui will send data to the rights receiver. Add a fake fle + * for the file and a dependency. + */ + for (i = 0; i < n_ids; i++) { + struct file_desc *tgt; + struct fdinfo_list_entry *fle; + struct scm_fle *sfle; + FdinfoEntry *e; + int fd; + + tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]); + if (!tgt) { + pr_err("Can't find fdesc to send\n"); + return -1; + } + + pr_info("`- Found %d file\n", file_ids[i]); + fd = find_unused_fd(owner, -1); + + fle = try_file_master(tgt); + if (fle) { + e = dup_fdinfo(fle->fe, fd, 0); + if (!e) { + pr_err("Can't duplicate fdinfo for scm\n"); + return -1; + } + } else { + /* + * This can happen if the file in question is + * sent over the socket and closed. In this case + * we need to ... invent a new one! + */ + + e = xmalloc(sizeof(*e)); + if (!e) + return -1; + + fdinfo_entry__init(e); + e->id = tgt->id; + e->type = tgt->ops->type; + e->fd = fd; + e->flags = 0; + } + + pr_info("scm: add %d -> %d.fd[%d]\n", tgt->id, vpid(owner), fd); + sfle = xmalloc(sizeof(*sfle)); + if (!sfle) + return -1; + + sfle->fle = collect_fd_to(vpid(owner), e, rsti(owner), tgt, false); + if (!sfle->fle) { + pr_err("Can't request new fle for scm\n"); + return -1; + } + + list_add_tail(&sfle->l, &ui->scm_fles); + fds[i] = fd; + } + + return 0; +} + +static int chk_restored_scms(struct unix_sk_info *ui) +{ + struct scm_fle *sf, *n; + + list_for_each_entry_safe(sf, n, &ui->scm_fles, l) { + if (sf->fle->stage != FLE_RESTORED) + return 1; + + /* Optimization for the next pass */ + list_del(&sf->l); + xfree(sf); + } + + return 0; +} + static int wake_connected_sockets(struct unix_sk_info *ui) { struct fdinfo_list_entry *fle; @@ -1322,12 +1438,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd) struct unix_sk_info *ui; int ret; + ui = container_of(d, struct unix_sk_info, d); + + /* FIXME -- only queue restore may be postponed */ + if (chk_restored_scms(ui)) { + pr_info("scm: Wait for tgt to restore\n"); + return 1; + } + fle = file_master(d); if (fle->stage >= FLE_OPEN) return post_open_unix_sk(d, fle->fe->fd); - ui = container_of(d, struct unix_sk_info, d); - if (inherited_fd(d, new_fd)) { ui->ue->uflags |= USK_INHERIT; ret = *new_fd >= 0 ? 0 : -1; @@ -1440,6 +1562,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) ui->listen = 0; INIT_LIST_HEAD(&ui->connected); INIT_LIST_HEAD(&ui->node); + INIT_LIST_HEAD(&ui->scm_fles); ui->flags = 0; fixup_sock_net_ns_id(&ui->ue->ns_id, &ui->ue->has_ns_id); diff --git a/images/sk-packet.proto b/images/sk-packet.proto index 27b48e4..009b461 100644 --- a/images/sk-packet.proto +++ b/images/sk-packet.proto @@ -1,8 +1,14 @@ syntax = "proto2"; +message scm_entry { + required uint32 type = 1; + repeated uint32 rights = 2; +} + message sk_packet_entry { required uint32 id_for = 1; required uint32 length = 2; // optional bytes addr = 3; // optional sk_ucred_entry ucred = 128; + repeated scm_entry scm = 4; }