vmsplice user-buffer(grabbed by process_vm_readv) to page-pipe

Submitted by Abhishek Dubey on July 4, 2019, 6:47 p.m.

Details

Message ID CAODppzGFTYT81vtWVJ9eTBDEO=e5juO3UHBOH4gk2XWhSs7eag@mail.gmail.com
State New
Series "vmsplice user-buffer(grabbed by process_vm_readv) to page-pipe"
Headers show

Commit Message

Abhishek Dubey July 4, 2019, 6:47 p.m.
This patch implements usage of process_vm_readv syscall to collect memory
pages from target process during pre-dump. process_vm_readv collects pages
in user-buffer, which is later vmspliced to page-pipes.

Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
---
 criu/cr-dump.c           |  3 +-
 criu/include/page-xfer.h |  2 +-
 criu/mem.c               | 88
++++++++++++++++++------------------------------
 criu/page-pipe.c         |  4 +--
 criu/page-xfer.c         | 62 ++++++++++++++++++++++++++++++++--
 criu/shmem.c             | 19 +++--------
 6 files changed, 102 insertions(+), 76 deletions(-)

 static int next_data_segment(int fd, unsigned long pfn,
@@ -678,6 +668,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct
shmem_info *si)
  int err, ret = -1;
  unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;
  unsigned long pages[2] = {};
+ size_t off = 0;

  nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;

@@ -726,7 +717,7 @@ again:
  }

  if (ret == -EAGAIN) {
- ret = dump_pages(pp, &xfer);
+ ret = dump_pages(pp, &xfer, &off);
  if (ret)
  goto err_xfer;
  page_pipe_reinit(pp);
@@ -742,7 +733,7 @@ again:
  cnt_add(CNT_SHPAGES_SKIPPED_PARENT, pages[0]);
  cnt_add(CNT_SHPAGES_WRITTEN, pages[1]);

- ret = dump_pages(pp, &xfer);
+ ret = dump_pages(pp, &xfer, &off);

 err_xfer:
  xfer.close(&xfer);

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 7f2e5ed..ee5f4f3 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1501,6 +1501,7 @@  static int cr_pre_dump_finish(int status)
  struct parasite_ctl *ctl = dmpi(item)->parasite_ctl;
  struct page_pipe *mem_pp;
  struct page_xfer xfer;
+ size_t off = 0;

  if (!ctl)
  continue;
@@ -1512,7 +1513,7 @@  static int cr_pre_dump_finish(int status)
  goto err;

  mem_pp = dmpi(item)->mem_pp;
- ret = page_xfer_dump_pages(&xfer, mem_pp);
+ ret = page_xfer_dump_pages(item->pid->real, &xfer, mem_pp, &off);

  xfer.close(&xfer);

diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index fa72273..74d42f2 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -47,7 +47,7 @@  struct page_xfer {

 extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned
long id);
 struct page_pipe;
-extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *);
+extern int page_xfer_dump_pages(int pid, struct page_xfer *, struct
page_pipe *, size_t *poff);
 extern int connect_to_page_server_to_send(void);
 extern int connect_to_page_server_to_recv(int epfd);
 extern int disconnect_from_page_server(void);
diff --git a/criu/mem.c b/criu/mem.c
index 6a1a87a..844d726 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -260,39 +260,7 @@  static struct parasite_dump_pages_args
*prep_dump_pages_args(struct parasite_ctl
  return args;
 }

-static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
-      struct parasite_dump_pages_args *args)
-{
- struct page_pipe_buf *ppb;
- int ret = 0;
-
- debug_show_page_pipe(pp);
-
- /* Step 2 -- grab pages into page-pipe */
- list_for_each_entry(ppb, &pp->bufs, l) {
- args->nr_segs = ppb->nr_segs;
- args->nr_pages = ppb->pages_in;
- pr_debug("PPB: %d pages %d segs %u pipe %d off\n",
- args->nr_pages, args->nr_segs, ppb->pipe_size, args->off);
-
- ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);
- if (ret < 0)
- return -1;
- ret = compel_util_send_fd(ctl, ppb->p[1]);
- if (ret)
- return -1;
-
- ret = compel_rpc_sync(PARASITE_CMD_DUMPPAGES, ctl);
- if (ret < 0)
- return -1;
-
- args->off += args->nr_segs;
- }
-
- return 0;
-}
-
-static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
+static int xfer_pages( int pid, struct page_pipe *pp, struct page_xfer
*xfer, size_t *poff)
 {
  int ret;

@@ -301,7 +269,7 @@  static int xfer_pages(struct page_pipe *pp, struct
page_xfer *xfer)
  *           pre-dump action (see pre_dump_one_task)
  */
  timing_start(TIME_MEMWRITE);
- ret = page_xfer_dump_pages(xfer, pp);
+ ret = page_xfer_dump_pages(pid, xfer, pp, poff);
  timing_stop(TIME_MEMWRITE);

  return ret;
@@ -351,7 +319,7 @@  static int generate_vma_iovs(struct pstree_item *item,
struct vma_area *vma,
      struct page_pipe *pp, struct page_xfer *xfer,
      struct parasite_dump_pages_args *args,
      struct parasite_ctl *ctl, pmc_t *pmc,
-     bool has_parent, bool pre_dump)
+     bool has_parent, bool pre_dump, size_t *poff)
 {
  u64 off = 0;
  u64 *map;
@@ -361,6 +329,12 @@  static int generate_vma_iovs(struct pstree_item *item,
struct vma_area *vma,
  !vma_area_is(vma, VMA_ANON_SHARED))
  return 0;

+ if (!(vma->e->prot & PROT_READ)){
+ if(pre_dump)
+ return 0;
+ has_parent = false;
+ }
+
  if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
  if (pre_dump)
  return 0;
@@ -379,9 +353,7 @@  again:
  if (ret == -EAGAIN) {
  BUG_ON(!(pp->flags & PP_CHUNK_MODE));

- ret = drain_pages(pp, ctl, args);
- if (!ret)
- ret = xfer_pages(pp, xfer);
+ ret = xfer_pages(item->pid->real, pp, xfer, poff);
  if (!ret) {
  page_pipe_reinit(pp);
  goto again;
@@ -406,6 +378,7 @@  static int __parasite_dump_pages_seized(struct
pstree_item *item,
  unsigned long pmc_size;
  int possible_pid_reuse = 0;
  bool has_parent;
+ size_t poff = 0;

  pr_info("\n");
  pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES,
item->pid->real);
@@ -470,11 +443,12 @@  static int __parasite_dump_pages_seized(struct
pstree_item *item,
  /*
  * Step 1 -- generate the pagemap
  */
+ poff = 0;
  args->off = 0;
  has_parent = !!xfer.parent && !possible_pid_reuse;
  list_for_each_entry(vma_area, &vma_area_list->h, list) {
  ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
- &pmc, has_parent, mdc->pre_dump);
+ &pmc, has_parent, mdc->pre_dump, &poff);
  if (ret < 0)
  goto out_xfer;
  }
@@ -482,9 +456,8 @@  static int __parasite_dump_pages_seized(struct
pstree_item *item,
  if (mdc->lazy)
  memcpy(pargs_iovs(args), pp->iovs,
        sizeof(struct iovec) * pp->nr_iovs);
- ret = drain_pages(pp, ctl, args);
- if (!ret && !mdc->pre_dump)
- ret = xfer_pages(pp, &xfer);
+ if(!mdc->pre_dump)
+ ret = xfer_pages(item->pid->real, pp, &xfer, &poff);
  if (ret)
  goto out_xfer;

@@ -529,17 +502,18 @@  int parasite_dump_pages_seized(struct pstree_item
*item,
  *
  * Afterwards -- reprotect memory back.
  */
+ if(!mdc->pre_dump){
+ pargs->add_prot = PROT_READ;
+ ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
+ if (ret) {
+ pr_err("Can't dump unprotect vmas with parasite\n");
+ return ret;
+ }

- pargs->add_prot = PROT_READ;
- ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
- if (ret) {
- pr_err("Can't dump unprotect vmas with parasite\n");
- return ret;
- }
-
- if (fault_injected(FI_DUMP_PAGES)) {
- pr_err("fault: Dump VMA pages failure!\n");
- return -1;
+ if (fault_injected(FI_DUMP_PAGES)) {
+ pr_err("fault: Dump VMA pages failure!\n");
+ return -1;
+ }
  }

  ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);
@@ -549,10 +523,12 @@  int parasite_dump_pages_seized(struct pstree_item
*item,
  return ret;
  }

- pargs->add_prot = 0;
- if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
- pr_err("Can't rollback unprotected vmas with parasite\n");
- ret = -1;
+ if(!mdc->pre_dump){
+ pargs->add_prot = 0;
+ if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
+ pr_err("Can't rollback unprotected vmas with parasite\n");
+ ret = -1;
+ }
  }

  return ret;
diff --git a/criu/page-pipe.c b/criu/page-pipe.c
index c32b893..c70ba70 100644
--- a/criu/page-pipe.c
+++ b/criu/page-pipe.c
@@ -33,7 +33,7 @@  static int __ppb_resize_pipe(struct page_pipe_buf *ppb,
unsigned long new_size)
 {
  int ret;

- ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
+ ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (new_size * PAGE_SIZE) + 1);
  if (ret < 0)
  return -1;

@@ -41,7 +41,7 @@  static int __ppb_resize_pipe(struct page_pipe_buf *ppb,
unsigned long new_size)
  BUG_ON(ret < ppb->pipe_size);

  pr_debug("Grow pipe %x -> %x\n", ppb->pipe_size, ret);
- ppb->pipe_size = ret;
+ ppb->pipe_size = ret - 1;

  return 0;
 }
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 9cdffd8..9262221 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -496,19 +496,75 @@  static inline u32 ppb_xfer_flags(struct page_xfer
*xfer, struct page_pipe_buf *p
  return PE_PRESENT;
 }

-int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
+static char userbuf[4 << 20];
+
+int page_xfer_dump_pages(int pid, struct page_xfer *xfer, struct page_pipe
*pp, size_t *poff)
 {
  struct page_pipe_buf *ppb;
  unsigned int cur_hole = 0;
- int ret;
+ unsigned int ret, ret2;
+ size_t off;
+ struct iovec *remoteiovs = pp->iovs;

  pr_debug("Transferring pages:\n");

+ off = *poff;
+
  list_for_each_entry(ppb, &pp->bufs, l) {
  unsigned int i;

  pr_debug("\tbuf %d/%d\n", ppb->pages_in, ppb->nr_segs);

+ size_t bufsize = sizeof(userbuf);
+ struct iovec bufvec = {.iov_len = bufsize};
+ bufvec.iov_base = userbuf;
+
+ ret = syscall(__NR_process_vm_readv, pid, &bufvec, 1, \
+ &remoteiovs[off], ppb->nr_segs, 0);
+ if (ret < 0) {
+ switch (errno) {
+ case EINVAL:
+ pr_debug("process_vm_readv: Invalid arguments\n");
+ break;
+ case EFAULT:
+ pr_debug("process_vm_readv: Unable to access remote iov\n");
+ break;
+ case ENOMEM:
+ pr_debug("process_vm_readv: Unable to allocate memory\n");
+ break;
+ case EPERM:
+ pr_debug("process_vm_readv: Insufficient privileges\n");
+ break;
+ case ESRCH:
+ pr_debug("process_vm_readv: Target process doesn't exist\n");
+ break;
+ default:
+ pr_debug("process_vm_readv: Uncategorised error\n");
+ }
+ return 0;
+ }
+
+ /* Handling partial reads due to modified mappings*/
+
+ if(ret != ppb->pages_in * PAGE_SIZE){
+ pr_debug("Can't read remote iovs (%d(%lu)/%d)\n", ret,\
+ (unsigned long int)PAGE_SIZE * ppb->pages_in, ppb->pages_in);
+ continue;
+ }
+
+ bufvec.iov_len = ret;
+ ret2 = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK);
+
+ if(ret2 == -1){
+ pr_debug("vmsplice: Failed to splice user buffer to pipe\n");
+ continue;
+ }
+
+ if(ret != ret2){
+ pr_debug("Partial splice from user buffer to pipe (%d)\n", ret2);
+ continue;
+ }
+
  for (i = 0; i < ppb->nr_segs; i++) {
  struct iovec iov = ppb->iov[i];
  u32 flags;
@@ -530,8 +586,10 @@  int page_xfer_dump_pages(struct page_xfer *xfer,
struct page_pipe *pp)
  ppb->p[0], iov.iov_len))
  return -1;
  }
+ off += ppb->nr_segs;
  }

+ *poff = off;
  return dump_holes(xfer, pp, &cur_hole, NULL);
 }

diff --git a/criu/shmem.c b/criu/shmem.c
index 03b088f..a797bde 100644
--- a/criu/shmem.c
+++ b/criu/shmem.c
@@ -629,19 +629,9 @@  int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
  return 0;
 }

-static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer)
+static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, size_t
*off)
 {
- struct page_pipe_buf *ppb;
-
- list_for_each_entry(ppb, &pp->bufs, l)
- if (vmsplice(ppb->p[1], ppb->iov, ppb->nr_segs,
- SPLICE_F_GIFT | SPLICE_F_NONBLOCK) !=
- ppb->pages_in * PAGE_SIZE) {
- pr_perror("Can't get shmem into page-pipe");
- return -1;
- }
-
- return page_xfer_dump_pages(xfer, pp);
+ return page_xfer_dump_pages(getpid(), xfer, pp, off);
 }


Comments

Radostin Stoyanov July 4, 2019, 9:14 p.m.
Hi Abhishek,

'git am' fails to apply this patch.

Would it be possible to resend it?

You can use the following command for that:

$ git send-email -1 --to=criu@openvz.org


On 04/07/2019 19:47, Abhishek Dubey wrote:
> This patch implements usage of process_vm_readv syscall to collect 
> memory pages from target process during pre-dump. process_vm_readv 
> collects pages in user-buffer, which is later vmspliced to page-pipes.
>
> Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com 
> <mailto:dubeyabhishek777@gmail.com>>
> ---
>  criu/cr-dump.c           |  3 +-
>  criu/include/page-xfer.h |  2 +-
>  criu/mem.c               | 88 
> ++++++++++++++++++------------------------------
>  criu/page-pipe.c         |  4 +--
>  criu/page-xfer.c         | 62 ++++++++++++++++++++++++++++++++--
>  criu/shmem.c             | 19 +++--------
>  6 files changed, 102 insertions(+), 76 deletions(-)
>
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 7f2e5ed..ee5f4f3 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1501,6 +1501,7 @@ static int cr_pre_dump_finish(int status)
>   struct parasite_ctl *ctl = dmpi(item)->parasite_ctl;
>   struct page_pipe *mem_pp;
>   struct page_xfer xfer;
> + size_t off = 0;
>
>   if (!ctl)
>   continue;
> @@ -1512,7 +1513,7 @@ static int cr_pre_dump_finish(int status)
>   goto err;
>
>   mem_pp = dmpi(item)->mem_pp;
> - ret = page_xfer_dump_pages(&xfer, mem_pp);
> + ret = page_xfer_dump_pages(item->pid->real, &xfer, mem_pp, &off);
>
>   xfer.close(&xfer);
>
> diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
> index fa72273..74d42f2 100644
> --- a/criu/include/page-xfer.h
> +++ b/criu/include/page-xfer.h
> @@ -47,7 +47,7 @@ struct page_xfer {
>
>  extern int open_page_xfer(struct page_xfer *xfer, int fd_type, 
> unsigned long id);
>  struct page_pipe;
> -extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *);
> +extern int page_xfer_dump_pages(int pid, struct page_xfer *, struct 
> page_pipe *, size_t *poff);
>  extern int connect_to_page_server_to_send(void);
>  extern int connect_to_page_server_to_recv(int epfd);
>  extern int disconnect_from_page_server(void);
> diff --git a/criu/mem.c b/criu/mem.c
> index 6a1a87a..844d726 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -260,39 +260,7 @@ static struct parasite_dump_pages_args 
> *prep_dump_pages_args(struct parasite_ctl
>   return args;
>  }
>
> -static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
> -      struct parasite_dump_pages_args *args)
> -{
> - struct page_pipe_buf *ppb;
> - int ret = 0;
> -
> - debug_show_page_pipe(pp);
> -
> - /* Step 2 -- grab pages into page-pipe */
> - list_for_each_entry(ppb, &pp->bufs, l) {
> - args->nr_segs = ppb->nr_segs;
> - args->nr_pages = ppb->pages_in;
> - pr_debug("PPB: %d pages %d segs %u pipe %d off\n",
> - args->nr_pages, args->nr_segs, ppb->pipe_size, args->off);
> -
> - ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);
> - if (ret < 0)
> - return -1;
> - ret = compel_util_send_fd(ctl, ppb->p[1]);
> - if (ret)
> - return -1;
> -
> - ret = compel_rpc_sync(PARASITE_CMD_DUMPPAGES, ctl);
> - if (ret < 0)
> - return -1;
> -
> - args->off += args->nr_segs;
> - }
> -
> - return 0;
> -}
> -
> -static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
> +static int xfer_pages( int pid, struct page_pipe *pp, struct 
> page_xfer *xfer, size_t *poff)
>  {
>   int ret;
>
> @@ -301,7 +269,7 @@ static int xfer_pages(struct page_pipe *pp, struct 
> page_xfer *xfer)
>   *           pre-dump action (see pre_dump_one_task)
>   */
>   timing_start(TIME_MEMWRITE);
> - ret = page_xfer_dump_pages(xfer, pp);
> + ret = page_xfer_dump_pages(pid, xfer, pp, poff);
>   timing_stop(TIME_MEMWRITE);
>
>   return ret;
> @@ -351,7 +319,7 @@ static int generate_vma_iovs(struct pstree_item 
> *item, struct vma_area *vma,
>       struct page_pipe *pp, struct page_xfer *xfer,
>       struct parasite_dump_pages_args *args,
>       struct parasite_ctl *ctl, pmc_t *pmc,
> -     bool has_parent, bool pre_dump)
> +     bool has_parent, bool pre_dump, size_t *poff)
>  {
>   u64 off = 0;
>   u64 *map;
> @@ -361,6 +329,12 @@ static int generate_vma_iovs(struct pstree_item 
> *item, struct vma_area *vma,
>   !vma_area_is(vma, VMA_ANON_SHARED))
>   return 0;
>
> + if (!(vma->e->prot & PROT_READ)){
> + if(pre_dump)
> + return 0;
> + has_parent = false;
> + }
> +
>   if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
>   if (pre_dump)
>   return 0;
> @@ -379,9 +353,7 @@ again:
>   if (ret == -EAGAIN) {
>   BUG_ON(!(pp->flags & PP_CHUNK_MODE));
>
> - ret = drain_pages(pp, ctl, args);
> - if (!ret)
> - ret = xfer_pages(pp, xfer);
> + ret = xfer_pages(item->pid->real, pp, xfer, poff);
>   if (!ret) {
>   page_pipe_reinit(pp);
>   goto again;
> @@ -406,6 +378,7 @@ static int __parasite_dump_pages_seized(struct 
> pstree_item *item,
>   unsigned long pmc_size;
>   int possible_pid_reuse = 0;
>   bool has_parent;
> + size_t poff = 0;
>
>   pr_info("\n");
>   pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, 
> item->pid->real);
> @@ -470,11 +443,12 @@ static int __parasite_dump_pages_seized(struct 
> pstree_item *item,
>   /*
>   * Step 1 -- generate the pagemap
>   */
> + poff = 0;
>   args->off = 0;
>   has_parent = !!xfer.parent && !possible_pid_reuse;
>   list_for_each_entry(vma_area, &vma_area_list->h, list) {
>   ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
> - &pmc, has_parent, mdc->pre_dump);
> + &pmc, has_parent, mdc->pre_dump, &poff);
>   if (ret < 0)
>   goto out_xfer;
>   }
> @@ -482,9 +456,8 @@ static int __parasite_dump_pages_seized(struct 
> pstree_item *item,
>   if (mdc->lazy)
>   memcpy(pargs_iovs(args), pp->iovs,
>         sizeof(struct iovec) * pp->nr_iovs);
> - ret = drain_pages(pp, ctl, args);
> - if (!ret && !mdc->pre_dump)
> - ret = xfer_pages(pp, &xfer);
> + if(!mdc->pre_dump)
> + ret = xfer_pages(item->pid->real, pp, &xfer, &poff);
>   if (ret)
>   goto out_xfer;
>
> @@ -529,17 +502,18 @@ int parasite_dump_pages_seized(struct 
> pstree_item *item,
>   *
>   * Afterwards -- reprotect memory back.
>   */
> + if(!mdc->pre_dump){
> + pargs->add_prot = PROT_READ;
> + ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
> + if (ret) {
> + pr_err("Can't dump unprotect vmas with parasite\n");
> + return ret;
> + }
>
> - pargs->add_prot = PROT_READ;
> - ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
> - if (ret) {
> - pr_err("Can't dump unprotect vmas with parasite\n");
> - return ret;
> - }
> -
> - if (fault_injected(FI_DUMP_PAGES)) {
> - pr_err("fault: Dump VMA pages failure!\n");
> - return -1;
> + if (fault_injected(FI_DUMP_PAGES)) {
> + pr_err("fault: Dump VMA pages failure!\n");
> + return -1;
> + }
>   }
>
>   ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, 
> ctl);
> @@ -549,10 +523,12 @@ int parasite_dump_pages_seized(struct 
> pstree_item *item,
>   return ret;
>   }
>
> - pargs->add_prot = 0;
> - if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
> - pr_err("Can't rollback unprotected vmas with parasite\n");
> - ret = -1;
> + if(!mdc->pre_dump){
> + pargs->add_prot = 0;
> + if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
> + pr_err("Can't rollback unprotected vmas with parasite\n");
> + ret = -1;
> + }
>   }
>
>   return ret;
> diff --git a/criu/page-pipe.c b/criu/page-pipe.c
> index c32b893..c70ba70 100644
> --- a/criu/page-pipe.c
> +++ b/criu/page-pipe.c
> @@ -33,7 +33,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf 
> *ppb, unsigned long new_size)
>  {
>   int ret;
>
> - ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
> + ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (new_size * PAGE_SIZE) + 1);
>   if (ret < 0)
>   return -1;
>
> @@ -41,7 +41,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf 
> *ppb, unsigned long new_size)
>   BUG_ON(ret < ppb->pipe_size);
>
>   pr_debug("Grow pipe %x -> %x\n", ppb->pipe_size, ret);
> - ppb->pipe_size = ret;
> + ppb->pipe_size = ret - 1;
>
>   return 0;
>  }
> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> index 9cdffd8..9262221 100644
> --- a/criu/page-xfer.c
> +++ b/criu/page-xfer.c
> @@ -496,19 +496,75 @@ static inline u32 ppb_xfer_flags(struct 
> page_xfer *xfer, struct page_pipe_buf *p
>   return PE_PRESENT;
>  }
>
> -int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
> +static char userbuf[4 << 20];
> +
> +int page_xfer_dump_pages(int pid, struct page_xfer *xfer, struct 
> page_pipe *pp, size_t *poff)
>  {
>   struct page_pipe_buf *ppb;
>   unsigned int cur_hole = 0;
> - int ret;
> + unsigned int ret, ret2;
> + size_t off;
> + struct iovec *remoteiovs = pp->iovs;
>
>   pr_debug("Transferring pages:\n");
>
> + off = *poff;
> +
>   list_for_each_entry(ppb, &pp->bufs, l) {
>   unsigned int i;
>
>   pr_debug("\tbuf %d/%d\n", ppb->pages_in, ppb->nr_segs);
>
> + size_t bufsize = sizeof(userbuf);
> + struct iovec bufvec = {.iov_len = bufsize};
> + bufvec.iov_base = userbuf;
> +
> + ret = syscall(__NR_process_vm_readv, pid, &bufvec, 1, \
> + &remoteiovs[off], ppb->nr_segs, 0);
> + if (ret < 0) {
> + switch (errno) {
> + case EINVAL:
> + pr_debug("process_vm_readv: Invalid arguments\n");
> + break;
> + case EFAULT:
> + pr_debug("process_vm_readv: Unable to access remote iov\n");
> + break;
> + case ENOMEM:
> + pr_debug("process_vm_readv: Unable to allocate memory\n");
> + break;
> + case EPERM:
> + pr_debug("process_vm_readv: Insufficient privileges\n");
> + break;
> + case ESRCH:
> + pr_debug("process_vm_readv: Target process doesn't exist\n");
> + break;
> + default:
> + pr_debug("process_vm_readv: Uncategorised error\n");
> + }
> + return 0;
> + }
> +
> + /* Handling partial reads due to modified mappings*/
> +
> + if(ret != ppb->pages_in * PAGE_SIZE){
> + pr_debug("Can't read remote iovs (%d(%lu)/%d)\n", ret,\
> + (unsigned long int)PAGE_SIZE * ppb->pages_in, ppb->pages_in);
> + continue;
> + }
> +
> + bufvec.iov_len = ret;
> + ret2 = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK);
> +
> + if(ret2 == -1){
> + pr_debug("vmsplice: Failed to splice user buffer to pipe\n");
> + continue;
> + }
> +
> + if(ret != ret2){
> + pr_debug("Partial splice from user buffer to pipe (%d)\n", ret2);
> + continue;
> + }
> +
>   for (i = 0; i < ppb->nr_segs; i++) {
>   struct iovec iov = ppb->iov[i];
>   u32 flags;
> @@ -530,8 +586,10 @@ int page_xfer_dump_pages(struct page_xfer *xfer, 
> struct page_pipe *pp)
>   ppb->p[0], iov.iov_len))
>   return -1;
>   }
> + off += ppb->nr_segs;
>   }
>
> + *poff = off;
>   return dump_holes(xfer, pp, &cur_hole, NULL);
>  }
>
> diff --git a/criu/shmem.c b/criu/shmem.c
> index 03b088f..a797bde 100644
> --- a/criu/shmem.c
> +++ b/criu/shmem.c
> @@ -629,19 +629,9 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 
> *map)
>   return 0;
>  }
>
> -static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer)
> +static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, 
> size_t *off)
>  {
> - struct page_pipe_buf *ppb;
> -
> - list_for_each_entry(ppb, &pp->bufs, l)
> - if (vmsplice(ppb->p[1], ppb->iov, ppb->nr_segs,
> - SPLICE_F_GIFT | SPLICE_F_NONBLOCK) !=
> - ppb->pages_in * PAGE_SIZE) {
> - pr_perror("Can't get shmem into page-pipe");
> - return -1;
> - }
> -
> - return page_xfer_dump_pages(xfer, pp);
> + return page_xfer_dump_pages(getpid(), xfer, pp, off);
>  }
>
>  static int next_data_segment(int fd, unsigned long pfn,
> @@ -678,6 +668,7 @@ static int do_dump_one_shmem(int fd, void *addr, 
> struct shmem_info *si)
>   int err, ret = -1;
>   unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;
>   unsigned long pages[2] = {};
> + size_t off = 0;
>
>   nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
>
> @@ -726,7 +717,7 @@ again:
>   }
>
>   if (ret == -EAGAIN) {
> - ret = dump_pages(pp, &xfer);
> + ret = dump_pages(pp, &xfer, &off);
>   if (ret)
>   goto err_xfer;
>   page_pipe_reinit(pp);
> @@ -742,7 +733,7 @@ again:
>   cnt_add(CNT_SHPAGES_SKIPPED_PARENT, pages[0]);
>   cnt_add(CNT_SHPAGES_WRITTEN, pages[1]);
>
> - ret = dump_pages(pp, &xfer);
> + ret = dump_pages(pp, &xfer, &off);
>
>  err_xfer:
>   xfer.close(&xfer);
> -- 
> 2.7.4
>
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
Andrey Vagin July 24, 2019, 11:22 p.m.
or you can create a github pull-request...

On Thu, Jul 4, 2019 at 2:14 PM Radostin Stoyanov <rstoyanov1@gmail.com> wrote:
>
> Hi Abhishek,
>
> 'git am' fails to apply this patch.
>
> Would it be possible to resend it?
>
> You can use the following command for that:
>
> $ git send-email -1 --to=criu@openvz.org
>
>
> On 04/07/2019 19:47, Abhishek Dubey wrote:
>
> This patch implements usage of process_vm_readv syscall to collect memory pages from target process during pre-dump. process_vm_readv collects pages in user-buffer, which is later vmspliced to page-pipes.
>
> Signed-off-by: Abhishek Dubey <dubeyabhishek777@gmail.com>
> ---
>  criu/cr-dump.c           |  3 +-
>  criu/include/page-xfer.h |  2 +-
>  criu/mem.c               | 88 ++++++++++++++++++------------------------------
>  criu/page-pipe.c         |  4 +--
>  criu/page-xfer.c         | 62 ++++++++++++++++++++++++++++++++--
>  criu/shmem.c             | 19 +++--------
>  6 files changed, 102 insertions(+), 76 deletions(-)
>
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 7f2e5ed..ee5f4f3 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1501,6 +1501,7 @@ static int cr_pre_dump_finish(int status)
>   struct parasite_ctl *ctl = dmpi(item)->parasite_ctl;
>   struct page_pipe *mem_pp;
>   struct page_xfer xfer;
> + size_t off = 0;
>
>   if (!ctl)
>   continue;
> @@ -1512,7 +1513,7 @@ static int cr_pre_dump_finish(int status)
>   goto err;
>
>   mem_pp = dmpi(item)->mem_pp;
> - ret = page_xfer_dump_pages(&xfer, mem_pp);
> + ret = page_xfer_dump_pages(item->pid->real, &xfer, mem_pp, &off);
>
>   xfer.close(&xfer);
>
> diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
> index fa72273..74d42f2 100644
> --- a/criu/include/page-xfer.h
> +++ b/criu/include/page-xfer.h
> @@ -47,7 +47,7 @@ struct page_xfer {
>
>  extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned long id);
>  struct page_pipe;
> -extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *);
> +extern int page_xfer_dump_pages(int pid, struct page_xfer *, struct page_pipe *, size_t *poff);
>  extern int connect_to_page_server_to_send(void);
>  extern int connect_to_page_server_to_recv(int epfd);
>  extern int disconnect_from_page_server(void);
> diff --git a/criu/mem.c b/criu/mem.c
> index 6a1a87a..844d726 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -260,39 +260,7 @@ static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl
>   return args;
>  }
>
> -static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
> -      struct parasite_dump_pages_args *args)
> -{
> - struct page_pipe_buf *ppb;
> - int ret = 0;
> -
> - debug_show_page_pipe(pp);
> -
> - /* Step 2 -- grab pages into page-pipe */
> - list_for_each_entry(ppb, &pp->bufs, l) {
> - args->nr_segs = ppb->nr_segs;
> - args->nr_pages = ppb->pages_in;
> - pr_debug("PPB: %d pages %d segs %u pipe %d off\n",
> - args->nr_pages, args->nr_segs, ppb->pipe_size, args->off);
> -
> - ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);
> - if (ret < 0)
> - return -1;
> - ret = compel_util_send_fd(ctl, ppb->p[1]);
> - if (ret)
> - return -1;
> -
> - ret = compel_rpc_sync(PARASITE_CMD_DUMPPAGES, ctl);
> - if (ret < 0)
> - return -1;
> -
> - args->off += args->nr_segs;
> - }
> -
> - return 0;
> -}
> -
> -static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
> +static int xfer_pages( int pid, struct page_pipe *pp, struct page_xfer *xfer, size_t *poff)
>  {
>   int ret;
>
> @@ -301,7 +269,7 @@ static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
>   *           pre-dump action (see pre_dump_one_task)
>   */
>   timing_start(TIME_MEMWRITE);
> - ret = page_xfer_dump_pages(xfer, pp);
> + ret = page_xfer_dump_pages(pid, xfer, pp, poff);
>   timing_stop(TIME_MEMWRITE);
>
>   return ret;
> @@ -351,7 +319,7 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>       struct page_pipe *pp, struct page_xfer *xfer,
>       struct parasite_dump_pages_args *args,
>       struct parasite_ctl *ctl, pmc_t *pmc,
> -     bool has_parent, bool pre_dump)
> +     bool has_parent, bool pre_dump, size_t *poff)
>  {
>   u64 off = 0;
>   u64 *map;
> @@ -361,6 +329,12 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>   !vma_area_is(vma, VMA_ANON_SHARED))
>   return 0;
>
> + if (!(vma->e->prot & PROT_READ)){
> + if(pre_dump)
> + return 0;
> + has_parent = false;
> + }
> +
>   if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
>   if (pre_dump)
>   return 0;
> @@ -379,9 +353,7 @@ again:
>   if (ret == -EAGAIN) {
>   BUG_ON(!(pp->flags & PP_CHUNK_MODE));
>
> - ret = drain_pages(pp, ctl, args);
> - if (!ret)
> - ret = xfer_pages(pp, xfer);
> + ret = xfer_pages(item->pid->real, pp, xfer, poff);
>   if (!ret) {
>   page_pipe_reinit(pp);
>   goto again;
> @@ -406,6 +378,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>   unsigned long pmc_size;
>   int possible_pid_reuse = 0;
>   bool has_parent;
> + size_t poff = 0;
>
>   pr_info("\n");
>   pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real);
> @@ -470,11 +443,12 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>   /*
>   * Step 1 -- generate the pagemap
>   */
> + poff = 0;
>   args->off = 0;
>   has_parent = !!xfer.parent && !possible_pid_reuse;
>   list_for_each_entry(vma_area, &vma_area_list->h, list) {
>   ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
> - &pmc, has_parent, mdc->pre_dump);
> + &pmc, has_parent, mdc->pre_dump, &poff);
>   if (ret < 0)
>   goto out_xfer;
>   }
> @@ -482,9 +456,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>   if (mdc->lazy)
>   memcpy(pargs_iovs(args), pp->iovs,
>         sizeof(struct iovec) * pp->nr_iovs);
> - ret = drain_pages(pp, ctl, args);
> - if (!ret && !mdc->pre_dump)
> - ret = xfer_pages(pp, &xfer);
> + if(!mdc->pre_dump)
> + ret = xfer_pages(item->pid->real, pp, &xfer, &poff);
>   if (ret)
>   goto out_xfer;
>
> @@ -529,17 +502,18 @@ int parasite_dump_pages_seized(struct pstree_item *item,
>   *
>   * Afterwards -- reprotect memory back.
>   */
> + if(!mdc->pre_dump){
> + pargs->add_prot = PROT_READ;
> + ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
> + if (ret) {
> + pr_err("Can't dump unprotect vmas with parasite\n");
> + return ret;
> + }
>
> - pargs->add_prot = PROT_READ;
> - ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
> - if (ret) {
> - pr_err("Can't dump unprotect vmas with parasite\n");
> - return ret;
> - }
> -
> - if (fault_injected(FI_DUMP_PAGES)) {
> - pr_err("fault: Dump VMA pages failure!\n");
> - return -1;
> + if (fault_injected(FI_DUMP_PAGES)) {
> + pr_err("fault: Dump VMA pages failure!\n");
> + return -1;
> + }
>   }
>
>   ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);
> @@ -549,10 +523,12 @@ int parasite_dump_pages_seized(struct pstree_item *item,
>   return ret;
>   }
>
> - pargs->add_prot = 0;
> - if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
> - pr_err("Can't rollback unprotected vmas with parasite\n");
> - ret = -1;
> + if(!mdc->pre_dump){
> + pargs->add_prot = 0;
> + if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
> + pr_err("Can't rollback unprotected vmas with parasite\n");
> + ret = -1;
> + }
>   }
>
>   return ret;
> diff --git a/criu/page-pipe.c b/criu/page-pipe.c
> index c32b893..c70ba70 100644
> --- a/criu/page-pipe.c
> +++ b/criu/page-pipe.c
> @@ -33,7 +33,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf *ppb, unsigned long new_size)
>  {
>   int ret;
>
> - ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
> + ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (new_size * PAGE_SIZE) + 1);
>   if (ret < 0)
>   return -1;
>
> @@ -41,7 +41,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf *ppb, unsigned long new_size)
>   BUG_ON(ret < ppb->pipe_size);
>
>   pr_debug("Grow pipe %x -> %x\n", ppb->pipe_size, ret);
> - ppb->pipe_size = ret;
> + ppb->pipe_size = ret - 1;
>
>   return 0;
>  }
> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> index 9cdffd8..9262221 100644
> --- a/criu/page-xfer.c
> +++ b/criu/page-xfer.c
> @@ -496,19 +496,75 @@ static inline u32 ppb_xfer_flags(struct page_xfer *xfer, struct page_pipe_buf *p
>   return PE_PRESENT;
>  }
>
> -int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
> +static char userbuf[4 << 20];
> +
> +int page_xfer_dump_pages(int pid, struct page_xfer *xfer, struct page_pipe *pp, size_t *poff)
>  {
>   struct page_pipe_buf *ppb;
>   unsigned int cur_hole = 0;
> - int ret;
> + unsigned int ret, ret2;
> + size_t off;
> + struct iovec *remoteiovs = pp->iovs;
>
>   pr_debug("Transferring pages:\n");
>
> + off = *poff;
> +
>   list_for_each_entry(ppb, &pp->bufs, l) {
>   unsigned int i;
>
>   pr_debug("\tbuf %d/%d\n", ppb->pages_in, ppb->nr_segs);
>
> + size_t bufsize = sizeof(userbuf);
> + struct iovec bufvec = {.iov_len = bufsize};
> + bufvec.iov_base = userbuf;
> +
> + ret = syscall(__NR_process_vm_readv, pid, &bufvec, 1, \
> + &remoteiovs[off], ppb->nr_segs, 0);
> + if (ret < 0) {
> + switch (errno) {
> + case EINVAL:
> + pr_debug("process_vm_readv: Invalid arguments\n");
> + break;
> + case EFAULT:
> + pr_debug("process_vm_readv: Unable to access remote iov\n");
> + break;
> + case ENOMEM:
> + pr_debug("process_vm_readv: Unable to allocate memory\n");
> + break;
> + case EPERM:
> + pr_debug("process_vm_readv: Insufficient privileges\n");
> + break;
> + case ESRCH:
> + pr_debug("process_vm_readv: Target process doesn't exist\n");
> + break;
> + default:
> + pr_debug("process_vm_readv: Uncategorised error\n");
> + }
> + return 0;
> + }
> +
> + /* Handling partial reads due to modified mappings*/
> +
> + if(ret != ppb->pages_in * PAGE_SIZE){
> + pr_debug("Can't read remote iovs (%d(%lu)/%d)\n", ret,\
> + (unsigned long int)PAGE_SIZE * ppb->pages_in, ppb->pages_in);
> + continue;
> + }
> +
> + bufvec.iov_len = ret;
> + ret2 = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK);
> +
> + if(ret2 == -1){
> + pr_debug("vmsplice: Failed to splice user buffer to pipe\n");
> + continue;
> + }
> +
> + if(ret != ret2){
> + pr_debug("Partial splice from user buffer to pipe (%d)\n", ret2);
> + continue;
> + }
> +
>   for (i = 0; i < ppb->nr_segs; i++) {
>   struct iovec iov = ppb->iov[i];
>   u32 flags;
> @@ -530,8 +586,10 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
>   ppb->p[0], iov.iov_len))
>   return -1;
>   }
> + off += ppb->nr_segs;
>   }
>
> + *poff = off;
>   return dump_holes(xfer, pp, &cur_hole, NULL);
>  }
>
> diff --git a/criu/shmem.c b/criu/shmem.c
> index 03b088f..a797bde 100644
> --- a/criu/shmem.c
> +++ b/criu/shmem.c
> @@ -629,19 +629,9 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
>   return 0;
>  }
>
> -static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer)
> +static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, size_t *off)
>  {
> - struct page_pipe_buf *ppb;
> -
> - list_for_each_entry(ppb, &pp->bufs, l)
> - if (vmsplice(ppb->p[1], ppb->iov, ppb->nr_segs,
> - SPLICE_F_GIFT | SPLICE_F_NONBLOCK) !=
> - ppb->pages_in * PAGE_SIZE) {
> - pr_perror("Can't get shmem into page-pipe");
> - return -1;
> - }
> -
> - return page_xfer_dump_pages(xfer, pp);
> + return page_xfer_dump_pages(getpid(), xfer, pp, off);
>  }
>
>  static int next_data_segment(int fd, unsigned long pfn,
> @@ -678,6 +668,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si)
>   int err, ret = -1;
>   unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;
>   unsigned long pages[2] = {};
> + size_t off = 0;
>
>   nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
>
> @@ -726,7 +717,7 @@ again:
>   }
>
>   if (ret == -EAGAIN) {
> - ret = dump_pages(pp, &xfer);
> + ret = dump_pages(pp, &xfer, &off);
>   if (ret)
>   goto err_xfer;
>   page_pipe_reinit(pp);
> @@ -742,7 +733,7 @@ again:
>   cnt_add(CNT_SHPAGES_SKIPPED_PARENT, pages[0]);
>   cnt_add(CNT_SHPAGES_WRITTEN, pages[1]);
>
> - ret = dump_pages(pp, &xfer);
> + ret = dump_pages(pp, &xfer, &off);
>
>  err_xfer:
>   xfer.close(&xfer);
> --
> 2.7.4
>
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
>
>