Message ID | 20180214112903.13460-5-ptikhomirov@virtuozzo.com |
---|---|
State | New |
Series | "don't use wrong pagemap (from other task) on pid reuse" |
Headers | show |
diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 9e8425504..d0cf922b7 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1290,7 +1290,7 @@ static int pre_dump_one_task(struct pstree_item *item) mdc.pre_dump = true; mdc.lazy = false; - ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); + ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl, NULL); if (ret) goto err_cure; @@ -1449,7 +1449,7 @@ static int dump_one_task(struct pstree_item *item) mdc.pre_dump = false; mdc.lazy = opts.lazy_pages; - ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); + ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl, &pps_buf); if (ret) goto err_cure; diff --git a/criu/include/mem.h b/criu/include/mem.h index bb897c599..aab8e183f 100644 --- a/criu/include/mem.h +++ b/criu/include/mem.h @@ -4,6 +4,8 @@ #include <stdbool.h> #include "int.h" #include "vma.pb-c.h" +#include "pid.h" +#include "proc_parse.h" struct parasite_ctl; struct vm_area_list; @@ -26,7 +28,8 @@ extern unsigned long dump_pages_args_size(struct vm_area_list *vmas); extern int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc, - struct parasite_ctl *ctl); + struct parasite_ctl *ctl, + struct proc_pid_stat* stat); #define PME_PRESENT (1ULL << 63) #define PME_SWAP (1ULL << 62) diff --git a/criu/mem.c b/criu/mem.c index 4c6942a11..a1646fd7d 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -30,9 +30,11 @@ #include "fault-injection.h" #include "prctl.h" #include <compel/compel.h> +#include "proc_parse.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" +#include "images/stats.pb-c.h" static int task_reset_dirty_track(int pid) { @@ -294,7 +296,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasite_dump_pages_args *args, struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc, - struct parasite_ctl *ctl) + struct parasite_ctl *ctl, + struct proc_pid_stat *stat) { pmc_t pmc = PMC_INIT; struct page_pipe *pp; @@ -303,6 +306,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, int ret = -1; unsigned cpp_flags = 0; unsigned long pmc_size; + bool possible_pid_reuse = false; if (opts.check_only) return 0; @@ -360,6 +364,49 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, xfer.parent = NULL + 1; } + if (xfer.parent) { + struct proc_pid_stat pps_buf; + static StatsEntry *stats; + unsigned long dump_ticks; + unsigned long clock_ticks; + + clock_ticks = sysconf(_SC_CLK_TCK); + if (clock_ticks == -1) { + pr_perror("Failed to get clock ticks via sysconf"); + goto out_xfer; + } + + if (stat) { + pps_buf = *stat; + } else { + ret = parse_pid_stat(item->pid->real, &pps_buf); + if (ret < 0) + goto out_xfer; + } + + if (!stats) { + stats = get_parent_stats(); + if (!stats) + goto out_xfer; + } + + if (stats->dump->has_dump_uptime) { + dump_ticks = stats->dump->dump_uptime/(USEC_PER_SEC / clock_ticks); + + if (pps_buf.start_time >= dump_ticks) { + /* Print warning when we are not sure */ + if (pps_buf.start_time == dump_ticks) + pr_warn("Will do full redump for pid=%d due " \ + "to possible pid reuse\n", + item->pid->real); + possible_pid_reuse = true; + } + } else + pr_warn_once("Parent image has no dump timestamp, " \ + "pid reuse detection OFF!\n"); + } + + /* * Step 1 -- generate the pagemap */ @@ -386,7 +433,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, else { again: ret = generate_iovs(vma_area, pp, map, &off, - has_parent); + has_parent && !possible_pid_reuse); if (ret == -EAGAIN) { BUG_ON(!(pp->flags & PP_CHUNK_MODE)); @@ -436,7 +483,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc, - struct parasite_ctl *ctl) + struct parasite_ctl *ctl, + struct proc_pid_stat* stat) { int ret; struct parasite_dump_pages_args *pargs; @@ -463,7 +511,7 @@ int parasite_dump_pages_seized(struct pstree_item *item, return -1; } - ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl); + ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl, stat); if (ret) { pr_err("Can't dump page with parasite\n"); /* Parasite will unprotect VMAs after fail in fini() */ diff --git a/criu/stats.c b/criu/stats.c index 4823e48de..4d646c104 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -213,7 +213,7 @@ void write_stats(int what) display_stats(what, &stats); } -__maybe_unused StatsEntry *get_parent_stats(void) +StatsEntry *get_parent_stats(void) { struct cr_img *img; StatsEntry *se;
On Wed, Feb 14, 2018 at 02:28:59PM +0300, Pavel Tikhomirov wrote: > From: ptikhomirov <ptikhomirov@virtuozzo.com> > > We have a problem when a pid is reused between consequent dumps we can't > understand if pagemap and pages from images of parent dump are invalid > to restore these pid already. That can lead even to wrong memory > restored for these pid, see the test in last patch. > > So these is a try do separate processes with (likely) invalid previous > memory dump from processes with 100% valid previous dump. > > For that we use the value of /proc/<pid>/stat's start_time and also the > timestamp of each (pre)dump. If the start time is strictly less than the > timestamp, that means that the pagemap for these pid from previous dump > is valid - was done for exactly the same process. > > Creation time is in centiseconds by default so if predump is really fast > (<1csec) we can have false negative decisions for some processes, but in > case of long running processes we are fine. > > https://jira.sw.ru/browse/PSBM-67502 > > v2: remove __maybe_unused for get_parent_stats; fix get_parent_stats to > have static typing; print warning only if unsure; check has_dump_uptime > v3: read parent stats from image only once; reuse stat from previous > parse_pid_stat call on dump > > Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com> > --- > criu/cr-dump.c | 4 ++-- > criu/include/mem.h | 5 ++++- > criu/mem.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++---- > criu/stats.c | 2 +- > 4 files changed, 59 insertions(+), 8 deletions(-) > > diff --git a/criu/cr-dump.c b/criu/cr-dump.c > index 9e8425504..d0cf922b7 100644 > --- a/criu/cr-dump.c > +++ b/criu/cr-dump.c > @@ -1290,7 +1290,7 @@ static int pre_dump_one_task(struct pstree_item *item) > mdc.pre_dump = true; > mdc.lazy = false; > > - ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); > + ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl, NULL); > if (ret) > goto err_cure; > > @@ -1449,7 +1449,7 @@ static int dump_one_task(struct pstree_item *item) > mdc.pre_dump = false; > mdc.lazy = opts.lazy_pages; > > - ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); > + ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl, &pps_buf); > if (ret) > goto err_cure; > > diff --git a/criu/include/mem.h b/criu/include/mem.h > index bb897c599..aab8e183f 100644 > --- a/criu/include/mem.h > +++ b/criu/include/mem.h > @@ -4,6 +4,8 @@ > #include <stdbool.h> > #include "int.h" > #include "vma.pb-c.h" > +#include "pid.h" > +#include "proc_parse.h" > > struct parasite_ctl; > struct vm_area_list; > @@ -26,7 +28,8 @@ extern unsigned long dump_pages_args_size(struct vm_area_list *vmas); > extern int parasite_dump_pages_seized(struct pstree_item *item, > struct vm_area_list *vma_area_list, > struct mem_dump_ctl *mdc, > - struct parasite_ctl *ctl); > + struct parasite_ctl *ctl, > + struct proc_pid_stat* stat); > > #define PME_PRESENT (1ULL << 63) > #define PME_SWAP (1ULL << 62) > diff --git a/criu/mem.c b/criu/mem.c > index 4c6942a11..a1646fd7d 100644 > --- a/criu/mem.c > +++ b/criu/mem.c > @@ -30,9 +30,11 @@ > #include "fault-injection.h" > #include "prctl.h" > #include <compel/compel.h> > +#include "proc_parse.h" > > #include "protobuf.h" > #include "images/pagemap.pb-c.h" > +#include "images/stats.pb-c.h" > > static int task_reset_dirty_track(int pid) > { > @@ -294,7 +296,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, > struct parasite_dump_pages_args *args, > struct vm_area_list *vma_area_list, > struct mem_dump_ctl *mdc, > - struct parasite_ctl *ctl) > + struct parasite_ctl *ctl, > + struct proc_pid_stat *stat) > { > pmc_t pmc = PMC_INIT; > struct page_pipe *pp; > @@ -303,6 +306,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, > int ret = -1; > unsigned cpp_flags = 0; > unsigned long pmc_size; > + bool possible_pid_reuse = false; > > if (opts.check_only) > return 0; > @@ -360,6 +364,49 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, > xfer.parent = NULL + 1; > } > > + if (xfer.parent) { pls, move this code in a separate function. __parasite_dump_pages_seized() is big already. > + struct proc_pid_stat pps_buf; > + static StatsEntry *stats; > + unsigned long dump_ticks; unsigned long long > + unsigned long clock_ticks; > + > + clock_ticks = sysconf(_SC_CLK_TCK); > + if (clock_ticks == -1) { > + pr_perror("Failed to get clock ticks via sysconf"); > + goto out_xfer; > + } > + > + if (stat) { > + pps_buf = *stat; > + } else { > + ret = parse_pid_stat(item->pid->real, &pps_buf); > + if (ret < 0) > + goto out_xfer; > + } > + > + if (!stats) { > + stats = get_parent_stats(); > + if (!stats) > + goto out_xfer; > + } > + > + if (stats->dump->has_dump_uptime) { > + dump_ticks = stats->dump->dump_uptime/(USEC_PER_SEC / clock_ticks); > + > + if (pps_buf.start_time >= dump_ticks) { > + /* Print warning when we are not sure */ > + if (pps_buf.start_time == dump_ticks) > + pr_warn("Will do full redump for pid=%d due " \ > + "to possible pid reuse\n", > + item->pid->real); > + possible_pid_reuse = true; > + } > + } else > + pr_warn_once("Parent image has no dump timestamp, " \ > + "pid reuse detection OFF!\n"); > + } > + > + > /* > * Step 1 -- generate the pagemap > */ > @@ -386,7 +433,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, > else { > again: > ret = generate_iovs(vma_area, pp, map, &off, > - has_parent); > + has_parent && !possible_pid_reuse); > if (ret == -EAGAIN) { > BUG_ON(!(pp->flags & PP_CHUNK_MODE)); > > @@ -436,7 +483,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, > int parasite_dump_pages_seized(struct pstree_item *item, > struct vm_area_list *vma_area_list, > struct mem_dump_ctl *mdc, > - struct parasite_ctl *ctl) > + struct parasite_ctl *ctl, > + struct proc_pid_stat* stat) > { > int ret; > struct parasite_dump_pages_args *pargs; > @@ -463,7 +511,7 @@ int parasite_dump_pages_seized(struct pstree_item *item, > return -1; > } > > - ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl); > + ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl, stat); > if (ret) { > pr_err("Can't dump page with parasite\n"); > /* Parasite will unprotect VMAs after fail in fini() */ > diff --git a/criu/stats.c b/criu/stats.c > index 4823e48de..4d646c104 100644 > --- a/criu/stats.c > +++ b/criu/stats.c > @@ -213,7 +213,7 @@ void write_stats(int what) > display_stats(what, &stats); > } > > -__maybe_unused StatsEntry *get_parent_stats(void) > +StatsEntry *get_parent_stats(void) > { > struct cr_img *img; > StatsEntry *se; > -- > 2.14.3 >