pcs7: shmem -- Inspect pages before the dump

Submitted by Andrei Vagin on Oct. 7, 2016, 11:42 p.m.

Details

Message ID 1475883727-11829-1-git-send-email-avagin@openvz.org
State Superseded
Series "pcs7: shmem -- Inspect pages before the dump"
Headers show

Commit Message

Andrei Vagin Oct. 7, 2016, 11:42 p.m.
From: Cyrill Gorcunov <gorcunov@virtuozzo.com>

When pages are swapped out we can't detect their presence
with mincore. Instead lest do a trick: walk over pagerange
and touch pages so we can use @pagemap then and inspect
the pages status.

https://jira.sw.ru/browse/PSBM-52138

Suggested-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com>
Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/include/image.h |  2 --
 criu/shmem.c         | 33 +++++++++++++++++++++++++++------
 2 files changed, 27 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/image.h b/criu/include/image.h
index 65b7b0a..af59ea2 100644
--- a/criu/include/image.h
+++ b/criu/include/image.h
@@ -16,8 +16,6 @@ 
 #else
 #define PAGE_IMAGE_SIZE	4096
 #endif /* _ARCH_PPC64 */
-#define PAGE_RSS	1
-#define PAGE_ANON	2
 
 /*
  * Top bit set in the tgt id means we've remapped
diff --git a/criu/shmem.c b/criu/shmem.c
index ebd22ec..023428b 100644
--- a/criu/shmem.c
+++ b/criu/shmem.c
@@ -631,8 +631,8 @@  static int dump_one_shmem(struct shmem_info *si)
 {
 	struct page_pipe *pp;
 	struct page_xfer xfer;
-	int err, ret = -1, fd;
-	unsigned char *mc_map = NULL;
+	int err, ret = -1, fd, fd_map;
+	u64 *mc_map = NULL;
 	void *addr = NULL;
 	unsigned long pfn, nrpages;
 
@@ -651,13 +651,32 @@  static int dump_one_shmem(struct shmem_info *si)
 	}
 
 	nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
+
+	/*
+	 * Make sure PTEs are created in our space
+	 * so pagemap would show us the proper results.
+	 */
+	for (pfn = 0; pfn < nrpages; pfn++) {
+		volatile char v = *(char *)((unsigned long)addr + pfn * PAGE_SIZE);
+		(void)v;
+	}
+
 	mc_map = xmalloc(nrpages * sizeof(*mc_map));
 	if (!mc_map)
 		goto err_unmap;
-	/* We can't rely only on PME bits for anon shmem */
-	err = mincore(addr, si->size, mc_map);
-	if (err)
+
+	fd_map = open_proc(PROC_SELF, "pagemap");
+	if (fd_map < 0) {
 		goto err_unmap;
+	}
+	if (pread(fd_map, mc_map, nrpages * sizeof(*mc_map),
+		  PAGE_PFN((unsigned long)addr) * sizeof(u64)) != nrpages * sizeof(*mc_map)) {
+		pr_perror("Can't read shmem 0x%lx (0x%lx-0x%lx) pagemap\n",
+			  si->shmid, si->start, si->end);
+		close(fd_map);
+		goto err_unmap;
+	}
+	close(fd_map);
 
 	pp = create_page_pipe((nrpages + 1) / 2, NULL, PP_CHUNK_MODE);
 	if (!pp)
@@ -677,7 +696,9 @@  static int dump_one_shmem(struct shmem_info *si)
 			use_mc = pgstate == PST_DONT_DUMP;
 		}
 
-		if (use_mc && !(mc_map[pfn] & PAGE_RSS))
+		if (use_mc &&
+		   (!(mc_map[pfn] & (PME_PRESENT | PME_SWAP)) ||
+		    page_is_zero(mc_map[pfn])))
 			continue;
 
 		pgaddr = (unsigned long)addr + pfn * PAGE_SIZE;

Comments

Andrey Vagin Oct. 11, 2016, 4:28 p.m.
This patch is for criu-dev

On Sat, Oct 08, 2016 at 02:42:07AM +0300, Andrei Vagin wrote:
> From: Cyrill Gorcunov <gorcunov@virtuozzo.com>
> 
> When pages are swapped out we can't detect their presence
> with mincore. Instead lest do a trick: walk over pagerange
> and touch pages so we can use @pagemap then and inspect
> the pages status.
> 
> https://jira.sw.ru/browse/PSBM-52138
> 
> Suggested-by: Andrei Vagin <avagin@openvz.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com>
> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/include/image.h |  2 --
>  criu/shmem.c         | 33 +++++++++++++++++++++++++++------
>  2 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/criu/include/image.h b/criu/include/image.h
> index 65b7b0a..af59ea2 100644
> --- a/criu/include/image.h
> +++ b/criu/include/image.h
> @@ -16,8 +16,6 @@
>  #else
>  #define PAGE_IMAGE_SIZE	4096
>  #endif /* _ARCH_PPC64 */
> -#define PAGE_RSS	1
> -#define PAGE_ANON	2
>  
>  /*
>   * Top bit set in the tgt id means we've remapped
> diff --git a/criu/shmem.c b/criu/shmem.c
> index ebd22ec..023428b 100644
> --- a/criu/shmem.c
> +++ b/criu/shmem.c
> @@ -631,8 +631,8 @@ static int dump_one_shmem(struct shmem_info *si)
>  {
>  	struct page_pipe *pp;
>  	struct page_xfer xfer;
> -	int err, ret = -1, fd;
> -	unsigned char *mc_map = NULL;
> +	int err, ret = -1, fd, fd_map;
> +	u64 *mc_map = NULL;
>  	void *addr = NULL;
>  	unsigned long pfn, nrpages;
>  
> @@ -651,13 +651,32 @@ static int dump_one_shmem(struct shmem_info *si)
>  	}
>  
>  	nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
> +
> +	/*
> +	 * Make sure PTEs are created in our space
> +	 * so pagemap would show us the proper results.
> +	 */
> +	for (pfn = 0; pfn < nrpages; pfn++) {
> +		volatile char v = *(char *)((unsigned long)addr + pfn * PAGE_SIZE);
> +		(void)v;
> +	}
> +
>  	mc_map = xmalloc(nrpages * sizeof(*mc_map));
>  	if (!mc_map)
>  		goto err_unmap;
> -	/* We can't rely only on PME bits for anon shmem */
> -	err = mincore(addr, si->size, mc_map);
> -	if (err)
> +
> +	fd_map = open_proc(PROC_SELF, "pagemap");
> +	if (fd_map < 0) {
>  		goto err_unmap;
> +	}
> +	if (pread(fd_map, mc_map, nrpages * sizeof(*mc_map),
> +		  PAGE_PFN((unsigned long)addr) * sizeof(u64)) != nrpages * sizeof(*mc_map)) {
> +		pr_perror("Can't read shmem 0x%lx (0x%lx-0x%lx) pagemap\n",
> +			  si->shmid, si->start, si->end);
> +		close(fd_map);
> +		goto err_unmap;
> +	}
> +	close(fd_map);
>  
>  	pp = create_page_pipe((nrpages + 1) / 2, NULL, PP_CHUNK_MODE);
>  	if (!pp)
> @@ -677,7 +696,9 @@ static int dump_one_shmem(struct shmem_info *si)
>  			use_mc = pgstate == PST_DONT_DUMP;
>  		}
>  
> -		if (use_mc && !(mc_map[pfn] & PAGE_RSS))
> +		if (use_mc &&
> +		   (!(mc_map[pfn] & (PME_PRESENT | PME_SWAP)) ||
> +		    page_is_zero(mc_map[pfn])))
>  			continue;
>  
>  		pgaddr = (unsigned long)addr + pfn * PAGE_SIZE;
> -- 
> 2.7.4
>
Kirill Gorkunov Oct. 11, 2016, 10:15 p.m.
On Tue, Oct 11, 2016 at 09:28:59AM -0700, Andrei Vagin wrote:
> This patch is for criu-dev
> 
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>

Thanks!
Pavel Emelianov Oct. 17, 2016, 10:30 a.m.
On 10/08/2016 02:42 AM, Andrei Vagin wrote:
> From: Cyrill Gorcunov <gorcunov@virtuozzo.com>
> 
> When pages are swapped out we can't detect their presence
> with mincore. Instead lest do a trick: walk over pagerange
> and touch pages so we can use @pagemap then and inspect
> the pages status.

I have some better (I think) idea :) What if we open the shmem file
and lseek it with SEEK_DATA? Even swapped out pages are reported as
data as far as I can read from kernel's shmem_seek_hole_data.

-- Pavel
Cyrill Gorcunov Oct. 17, 2016, 10:50 a.m.
On Mon, Oct 17, 2016 at 01:30:31PM +0300, Pavel Emelyanov wrote:
> On 10/08/2016 02:42 AM, Andrei Vagin wrote:
> > From: Cyrill Gorcunov <gorcunov@virtuozzo.com>
> > 
> > When pages are swapped out we can't detect their presence
> > with mincore. Instead lest do a trick: walk over pagerange
> > and touch pages so we can use @pagemap then and inspect
> > the pages status.
> 
> I have some better (I think) idea :) What if we open the shmem file
> and lseek it with SEEK_DATA? Even swapped out pages are reported as
> data as far as I can read from kernel's shmem_seek_hole_data.

Will take a look.
Andrey Vagin Oct. 27, 2016, 3:02 a.m.
On Mon, Oct 17, 2016 at 01:30:31PM +0300, Pavel Emelyanov wrote:
> On 10/08/2016 02:42 AM, Andrei Vagin wrote:
> > From: Cyrill Gorcunov <gorcunov@virtuozzo.com>
> > 
> > When pages are swapped out we can't detect their presence
> > with mincore. Instead lest do a trick: walk over pagerange
> > and touch pages so we can use @pagemap then and inspect
> > the pages status.
> 
> I have some better (I think) idea :) What if we open the shmem file
> and lseek it with SEEK_DATA? Even swapped out pages are reported as
> data as far as I can read from kernel's shmem_seek_hole_data.

It is a cool idea, but i think it will be slower than what we have here.

We will need to call lseek for each page. A system call is more
expensive than a map of zero page.

It will work better only for cases when we have big holes.

> 
> -- Pavel
>