shmem: use lseek(SEEK_DATA) instead of mincore

Submitted by Andrei Vagin on Oct. 29, 2016, 5:36 a.m.

Details

Message ID 1477719396-25657-1-git-send-email-avagin@openvz.org
State Superseded
Series "shmem: use lseek(SEEK_DATA) instead of mincore"
Headers show

Commit Message

Andrei Vagin Oct. 29, 2016, 5:36 a.m.
From: Andrei Vagin <avagin@virtuozzo.com>

When pages are swapped out we can't detect their presence
with mincore.

Pavel found that lseek(SEEK_DATA, SEEK_HOLE) can show which
pages are used.

Cc: Eugene Batalov <eabatalov89@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/shmem.c | 57 ++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/shmem.c b/criu/shmem.c
index 8af612c..5065abd 100644
--- a/criu/shmem.c
+++ b/criu/shmem.c
@@ -24,6 +24,11 @@ 
 #include "protobuf.h"
 #include "images/pagemap.pb-c.h"
 
+#ifndef SEEK_DATA
+#define SEEK_DATA	3
+#define SEEK_HOLE	4
+#endif
+
 /*
  * Hash table and routines for keeping shmid -> shmem_xinfo mappings 
  */
@@ -628,14 +633,40 @@  static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, void *addr)
 	return page_xfer_dump_pages(xfer, pp, (unsigned long)addr, true);
 }
 
+static int next_data_segment(int fd, unsigned long pfn,
+			unsigned long *next_data_pfn, unsigned long *next_hole_pfn)
+{
+	off_t off;
+
+	off = lseek(fd, pfn * PAGE_SIZE, SEEK_DATA);
+	if (off == (off_t) -1) {
+		if (errno == ENXIO) {
+			*next_data_pfn = ~0UL;
+			*next_hole_pfn = ~0UL;
+			return 0;
+		}
+		pr_perror("Unable to lseek(SEEK_DATA)");
+		return -1;
+	}
+	*next_data_pfn = off / PAGE_SIZE;
+
+	off = lseek(fd, off, SEEK_HOLE);
+	if (off == (off_t) -1) {
+		pr_perror("Unable to lseek(SEEK_HOLE)");
+		return -1;
+	}
+	*next_hole_pfn = off / PAGE_SIZE;
+
+	return 0;
+}
+
 static int dump_one_shmem(struct shmem_info *si)
 {
 	struct page_pipe *pp;
 	struct page_xfer xfer;
 	int err, ret = -1, fd;
-	unsigned char *mc_map = NULL;
 	void *addr = NULL;
-	unsigned long pfn, nrpages;
+	unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;
 
 	pr_info("Dumping shared memory %ld\n", si->shmid);
 
@@ -644,7 +675,6 @@  static int dump_one_shmem(struct shmem_info *si)
 		goto err;
 
 	addr = mmap(NULL, si->size, PROT_READ, MAP_SHARED, fd, 0);
-	close(fd);
 	if (addr == MAP_FAILED) {
 		pr_err("Can't map shmem 0x%lx (0x%lx-0x%lx)\n",
 				si->shmid, si->start, si->end);
@@ -652,13 +682,6 @@  static int dump_one_shmem(struct shmem_info *si)
 	}
 
 	nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
-	mc_map = xmalloc(nrpages * sizeof(*mc_map));
-	if (!mc_map)
-		goto err_unmap;
-	/* We can't rely only on PME bits for anon shmem */
-	err = mincore(addr, si->size, mc_map);
-	if (err)
-		goto err_unmap;
 
 	pp = create_page_pipe((nrpages + 1) / 2, NULL, PP_CHUNK_MODE);
 	if (!pp)
@@ -673,13 +696,21 @@  static int dump_one_shmem(struct shmem_info *si)
 		bool use_mc = true;
 		unsigned long pgaddr;
 
+		if (pfn >= next_hole_pfn &&
+		    next_data_segment(fd, pfn, &next_data_pnf, &next_hole_pfn))
+			goto err_xfer;
+
 		if (is_shmem_tracking_en()) {
 			pgstate = get_pstate(si->pstate_map, pfn);
 			use_mc = pgstate == PST_DONT_DUMP;
 		}
 
-		if (use_mc && !(mc_map[pfn] & PAGE_RSS))
-			continue;
+		if (use_mc) {
+			if (pfn < next_data_pnf)
+				pgstate = PST_ZERO;
+			else
+				pgstate = PST_DIRTY;
+		}
 
 		pgaddr = (unsigned long)addr + pfn * PAGE_SIZE;
 again:
@@ -709,7 +740,7 @@  err_pp:
 err_unmap:
 	munmap(addr,  si->size);
 err:
-	xfree(mc_map);
+	close_safe(&fd);
 	return ret;
 }
 

Comments

Eugene Batalov Oct. 29, 2016, 11:15 a.m.
Hello.

Looks good for me.

2016-10-29 8:36 GMT+03:00 Andrei Vagin <avagin@openvz.org>:

> From: Andrei Vagin <avagin@virtuozzo.com>
>
> When pages are swapped out we can't detect their presence
> with mincore.
>
> Pavel found that lseek(SEEK_DATA, SEEK_HOLE) can show which
> pages are used.
>
> Cc: Eugene Batalov <eabatalov89@gmail.com>
> Cc: Cyrill Gorcunov <gorcunov@openvz.org>
> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/shmem.c | 57 ++++++++++++++++++++++++++++++
> ++++++++++++++-------------
>  1 file changed, 44 insertions(+), 13 deletions(-)
>
> diff --git a/criu/shmem.c b/criu/shmem.c
> index 8af612c..5065abd 100644
> --- a/criu/shmem.c
> +++ b/criu/shmem.c
> @@ -24,6 +24,11 @@
>  #include "protobuf.h"
>  #include "images/pagemap.pb-c.h"
>
> +#ifndef SEEK_DATA
> +#define SEEK_DATA      3
> +#define SEEK_HOLE      4
> +#endif
> +
>  /*
>   * Hash table and routines for keeping shmid -> shmem_xinfo mappings
>   */
> @@ -628,14 +633,40 @@ static int dump_pages(struct page_pipe *pp, struct
> page_xfer *xfer, void *addr)
>         return page_xfer_dump_pages(xfer, pp, (unsigned long)addr, true);
>  }
>
> +static int next_data_segment(int fd, unsigned long pfn,
> +                       unsigned long *next_data_pfn, unsigned long
> *next_hole_pfn)
> +{
> +       off_t off;
> +
> +       off = lseek(fd, pfn * PAGE_SIZE, SEEK_DATA);
> +       if (off == (off_t) -1) {
> +               if (errno == ENXIO) {
> +                       *next_data_pfn = ~0UL;
> +                       *next_hole_pfn = ~0UL;
> +                       return 0;
> +               }
> +               pr_perror("Unable to lseek(SEEK_DATA)");
> +               return -1;
> +       }
> +       *next_data_pfn = off / PAGE_SIZE;
> +
> +       off = lseek(fd, off, SEEK_HOLE);
> +       if (off == (off_t) -1) {
> +               pr_perror("Unable to lseek(SEEK_HOLE)");
> +               return -1;
> +       }
> +       *next_hole_pfn = off / PAGE_SIZE;
> +
> +       return 0;
> +}
> +
>  static int dump_one_shmem(struct shmem_info *si)
>  {
>         struct page_pipe *pp;
>         struct page_xfer xfer;
>         int err, ret = -1, fd;
> -       unsigned char *mc_map = NULL;
>         void *addr = NULL;
> -       unsigned long pfn, nrpages;
> +       unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;
>
>         pr_info("Dumping shared memory %ld\n", si->shmid);
>
> @@ -644,7 +675,6 @@ static int dump_one_shmem(struct shmem_info *si)
>                 goto err;
>
>         addr = mmap(NULL, si->size, PROT_READ, MAP_SHARED, fd, 0);
> -       close(fd);
>         if (addr == MAP_FAILED) {
>                 pr_err("Can't map shmem 0x%lx (0x%lx-0x%lx)\n",
>                                 si->shmid, si->start, si->end);
> @@ -652,13 +682,6 @@ static int dump_one_shmem(struct shmem_info *si)
>         }
>
>         nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
> -       mc_map = xmalloc(nrpages * sizeof(*mc_map));
> -       if (!mc_map)
> -               goto err_unmap;
> -       /* We can't rely only on PME bits for anon shmem */
> -       err = mincore(addr, si->size, mc_map);
> -       if (err)
> -               goto err_unmap;
>
>         pp = create_page_pipe((nrpages + 1) / 2, NULL, PP_CHUNK_MODE);
>         if (!pp)
> @@ -673,13 +696,21 @@ static int dump_one_shmem(struct shmem_info *si)
>                 bool use_mc = true;
>                 unsigned long pgaddr;
>
> +               if (pfn >= next_hole_pfn &&
> +                   next_data_segment(fd, pfn, &next_data_pnf,
> &next_hole_pfn))
> +                       goto err_xfer;
> +
>                 if (is_shmem_tracking_en()) {
>                         pgstate = get_pstate(si->pstate_map, pfn);
>                         use_mc = pgstate == PST_DONT_DUMP;
>                 }
>
> -               if (use_mc && !(mc_map[pfn] & PAGE_RSS))
> -                       continue;
> +               if (use_mc) {
> +                       if (pfn < next_data_pnf)
> +                               pgstate = PST_ZERO;
> +                       else
> +                               pgstate = PST_DIRTY;
> +               }
>
>                 pgaddr = (unsigned long)addr + pfn * PAGE_SIZE;
>  again:
> @@ -709,7 +740,7 @@ err_pp:
>  err_unmap:
>         munmap(addr,  si->size);
>  err:
> -       xfree(mc_map);
> +       close_safe(&fd);
>         return ret;
>  }
>
> --
> 2.7.4
>
>
Pavel Emelianov Nov. 1, 2016, 11:34 p.m.
Applied, thanks