pagemap: fix reading pages from socket for --remote case

Submitted by Mike Rapoport on April 21, 2017, 3:25 p.m.

Details

Message ID 1492788354-25397-1-git-send-email-rppt@linux.vnet.ibm.com
State New
Series "pagemap: fix reading pages from socket for --remote case"
Headers show

Commit Message

Mike Rapoport April 21, 2017, 3:25 p.m.
When --remote option is specified, read_local_page tries to pread from a
socket, and fails with "Illegal seek" error.
Restore single pread call for regular image files case and use read syscall
instead of pread for the --remote case.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
---
 criu/pagemap.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/pagemap.c b/criu/pagemap.c
index 6c741b4..7d25c6f 100644
--- a/criu/pagemap.c
+++ b/criu/pagemap.c
@@ -265,15 +265,23 @@  static int read_local_page(struct page_read *pr, unsigned long vaddr,
 		return -1;
 
 	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
-	while (1) {
+	if (!opts.remote) {
 		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
-		if (ret < 1) {
+		if (ret != len) {
 			pr_perror("Can't read mapping page %d", ret);
 			return -1;
 		}
-		curr += ret;
-		if (curr == len)
-			break;
+	} else {
+		while (1) {
+			ret = read(fd, buf + curr, len - curr);
+			if (ret < 0) {
+				pr_perror("Can't read mapping page %d", ret);
+				return -1;
+			}
+			curr += ret;
+			if (curr == len)
+				break;
+		}
 	}
 
 	if (opts.auto_dedup) {

Comments

Andrey Vagin April 22, 2017, 3:42 a.m.
Rodrigo, could you review this patch?

On Fri, Apr 21, 2017 at 06:25:54PM +0300, Mike Rapoport wrote:
> When --remote option is specified, read_local_page tries to pread from a
> socket, and fails with "Illegal seek" error.
> Restore single pread call for regular image files case and use read syscall
> instead of pread for the --remote case.
> 
> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> ---
>  criu/pagemap.c | 18 +++++++++++++-----
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index 6c741b4..7d25c6f 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  		return -1;
>  
>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> -	while (1) {
> +	if (!opts.remote) {
>  		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> -		if (ret < 1) {
> +		if (ret != len) {
>  			pr_perror("Can't read mapping page %d", ret);
>  			return -1;
>  		}
> -		curr += ret;
> -		if (curr == len)
> -			break;
> +	} else {
> +		while (1) {
> +			ret = read(fd, buf + curr, len - curr);
> +			if (ret < 0) {
> +				pr_perror("Can't read mapping page %d", ret);
> +				return -1;
> +			}
> +			curr += ret;
> +			if (curr == len)
> +				break;
> +		}
>  	}
>  
>  	if (opts.auto_dedup) {
> -- 
> 1.9.1
>
Andrey Vagin April 22, 2017, 3:50 a.m.
On Fri, Apr 21, 2017 at 08:42:44PM -0700, Andrei Vagin wrote:
> Rodrigo, could you review this patch?

And there are other problems:

[root@fc24 criu]# python test/zdtm.py run --remote -t zdtm/static/env00 -f ns
=== Run 1/1 ================ zdtm/static/env00

========================= Run zdtm/static/env00 in ns ==========================
make[1]: Nothing to be done for 'default'.
Start test
make[1]: Nothing to be done for 'default'.
./env00 --pidfile=env00.pid --outfile=env00.out --envname=ENV_00_TEST
Adding image cache
Adding image proxy
Run criu dump
Run criu restore
=[log]=> dump/zdtm/static/env00/29/1/restore.log
------------------------ grep Error ------------------------
RTNETLINK answers: File exists
(00.229999)      1: do_open_remote_image RDONLY path=route-9.img snapshot_id=dump/zdtm/static/env00/29/1
(00.230344)      1: 	Running ip route restore
Failed to restore: ftell: Illegal seek
(00.233615)      1: Error (criu/util.c:712): exited, status=255
(00.233662)      1: Error (criu/net.c:1479): IP tool failed on route restore
(00.233695)      1: Error (criu/net.c:2153): Can't create net_ns
(00.258027) Error (criu/cr-restore.c:1130): 105 killed by signal 9: Killed
(00.258139) Error (criu/mount.c:2960): mnt: Can't remove the directory /tmp/.criu.mntns.s9sRwU: No such file or directory
(00.258203) Error (criu/cr-restore.c:2059): Restoring FAILED.
------------------------ ERROR OVER ------------------------
################# Test zdtm/static/env00 FAIL at CRIU restore ##################


> 
> On Fri, Apr 21, 2017 at 06:25:54PM +0300, Mike Rapoport wrote:
> > When --remote option is specified, read_local_page tries to pread from a
> > socket, and fails with "Illegal seek" error.
> > Restore single pread call for regular image files case and use read syscall
> > instead of pread for the --remote case.
> > 
> > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> > ---
> >  criu/pagemap.c | 18 +++++++++++++-----
> >  1 file changed, 13 insertions(+), 5 deletions(-)
> > 
> > diff --git a/criu/pagemap.c b/criu/pagemap.c
> > index 6c741b4..7d25c6f 100644
> > --- a/criu/pagemap.c
> > +++ b/criu/pagemap.c
> > @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
> >  		return -1;
> >  
> >  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> > -	while (1) {
> > +	if (!opts.remote) {
> >  		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> > -		if (ret < 1) {
> > +		if (ret != len) {
> >  			pr_perror("Can't read mapping page %d", ret);
> >  			return -1;
> >  		}
> > -		curr += ret;
> > -		if (curr == len)
> > -			break;
> > +	} else {
> > +		while (1) {
> > +			ret = read(fd, buf + curr, len - curr);
> > +			if (ret < 0) {
> > +				pr_perror("Can't read mapping page %d", ret);
> > +				return -1;
> > +			}
> > +			curr += ret;
> > +			if (curr == len)
> > +				break;
> > +		}
> >  	}
> >  
> >  	if (opts.auto_dedup) {
> > -- 
> > 1.9.1
> > 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
Mike Rapoport April 22, 2017, 1:37 p.m.
On April 22, 2017 6:50:39 AM GMT+03:00, Andrei Vagin <avagin@virtuozzo.com> wrote:
>On Fri, Apr 21, 2017 at 08:42:44PM -0700, Andrei Vagin wrote:
>> Rodrigo, could you review this patch?
>
>And there are other problems:

Let's move fix by fix :)

>[root@fc24 criu]# python test/zdtm.py run --remote -t zdtm/static/env00
>-f ns
>=== Run 1/1 ================ zdtm/static/env00
>
>========================= Run zdtm/static/env00 in ns
>==========================
>make[1]: Nothing to be done for 'default'.
>Start test
>make[1]: Nothing to be done for 'default'.
>./env00 --pidfile=env00.pid --outfile=env00.out --envname=ENV_00_TEST
>Adding image cache
>Adding image proxy
>Run criu dump
>Run criu restore
>=[log]=> dump/zdtm/static/env00/29/1/restore.log
>------------------------ grep Error ------------------------
>RTNETLINK answers: File exists
>(00.229999)      1: do_open_remote_image RDONLY path=route-9.img
>snapshot_id=dump/zdtm/static/env00/29/1
>(00.230344)      1: 	Running ip route restore
>Failed to restore: ftell: Illegal seek
>(00.233615)      1: Error (criu/util.c:712): exited, status=255
>(00.233662)      1: Error (criu/net.c:1479): IP tool failed on route
>restore
>(00.233695)      1: Error (criu/net.c:2153): Can't create net_ns
>(00.258027) Error (criu/cr-restore.c:1130): 105 killed by signal 9:
>Killed
>(00.258139) Error (criu/mount.c:2960): mnt: Can't remove the directory
>/tmp/.criu.mntns.s9sRwU: No such file or directory
>(00.258203) Error (criu/cr-restore.c:2059): Restoring FAILED.
>------------------------ ERROR OVER ------------------------
>################# Test zdtm/static/env00 FAIL at CRIU restore
>##################
>
>
>> 
>> On Fri, Apr 21, 2017 at 06:25:54PM +0300, Mike Rapoport wrote:
>> > When --remote option is specified, read_local_page tries to pread
>from a
>> > socket, and fails with "Illegal seek" error.
>> > Restore single pread call for regular image files case and use read
>syscall
>> > instead of pread for the --remote case.
>> > 
>> > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
>> > ---
>> >  criu/pagemap.c | 18 +++++++++++++-----
>> >  1 file changed, 13 insertions(+), 5 deletions(-)
>> > 
>> > diff --git a/criu/pagemap.c b/criu/pagemap.c
>> > index 6c741b4..7d25c6f 100644
>> > --- a/criu/pagemap.c
>> > +++ b/criu/pagemap.c
>> > @@ -265,15 +265,23 @@ static int read_local_page(struct page_read
>*pr, unsigned long vaddr,
>> >  		return -1;
>> >  
>> >  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n",
>pr->pid, pr->id, pr->cvaddr, pr->pi_off);
>> > -	while (1) {
>> > +	if (!opts.remote) {
>> >  		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
>> > -		if (ret < 1) {
>> > +		if (ret != len) {
>> >  			pr_perror("Can't read mapping page %d", ret);
>> >  			return -1;
>> >  		}
>> > -		curr += ret;
>> > -		if (curr == len)
>> > -			break;
>> > +	} else {
>> > +		while (1) {
>> > +			ret = read(fd, buf + curr, len - curr);
>> > +			if (ret < 0) {
>> > +				pr_perror("Can't read mapping page %d", ret);
>> > +				return -1;
>> > +			}
>> > +			curr += ret;
>> > +			if (curr == len)
>> > +				break;
>> > +		}
>> >  	}
>> >  
>> >  	if (opts.auto_dedup) {
>> > -- 
>> > 1.9.1
>> > 
>> _______________________________________________
>> CRIU mailing list
>> CRIU@openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
Rodrigo Bruno April 23, 2017, 1:29 a.m.
Hi,

the patch looks good. It avoids using 'pread' which is not supported for
remote images.

However, the patch that I sent for the mailing list introducing remote
images used 'read' instead of 'pread'.

If more code locations were reverted to 'pread', it might be necessary to
protect them as well.

Is there any reason for using 'pread' instead of 'read'? It seems to me
that the code always reads files forwards.
Additionally, I also made some experiments (including zdtm tests) back in
February using 'read' instead of 'pread' and it worked.

Best,
rodrigo


2017-04-22 4:42 GMT+01:00 Andrei Vagin <avagin@virtuozzo.com>:

> Rodrigo, could you review this patch?
>
> On Fri, Apr 21, 2017 at 06:25:54PM +0300, Mike Rapoport wrote:
> > When --remote option is specified, read_local_page tries to pread from a
> > socket, and fails with "Illegal seek" error.
> > Restore single pread call for regular image files case and use read
> syscall
> > instead of pread for the --remote case.
> >
> > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> > ---
> >  criu/pagemap.c | 18 +++++++++++++-----
> >  1 file changed, 13 insertions(+), 5 deletions(-)
> >
> > diff --git a/criu/pagemap.c b/criu/pagemap.c
> > index 6c741b4..7d25c6f 100644
> > --- a/criu/pagemap.c
> > +++ b/criu/pagemap.c
> > @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr,
> unsigned long vaddr,
> >               return -1;
> >
> >       pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid,
> pr->id, pr->cvaddr, pr->pi_off);
> > -     while (1) {
> > +     if (!opts.remote) {
> >               ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> > -             if (ret < 1) {
> > +             if (ret != len) {
> >                       pr_perror("Can't read mapping page %d", ret);
> >                       return -1;
> >               }
> > -             curr += ret;
> > -             if (curr == len)
> > -                     break;
> > +     } else {
> > +             while (1) {
> > +                     ret = read(fd, buf + curr, len - curr);
> > +                     if (ret < 0) {
> > +                             pr_perror("Can't read mapping page %d",
> ret);
> > +                             return -1;
> > +                     }
> > +                     curr += ret;
> > +                     if (curr == len)
> > +                             break;
> > +             }
> >       }
> >
> >       if (opts.auto_dedup) {
> > --
> > 1.9.1
> >
>
Pavel Emelianov April 24, 2017, 11:55 a.m.
On 04/23/2017 04:29 AM, Rodrigo Bruno wrote:
> Hi,
> 
> the patch looks good. It avoids using 'pread' which is not supported for remote images.
> 
> However, the patch that I sent for the mailing list introducing remote images used 'read' instead of 'pread'. 

Yes, sorry about that, it was me wrongly merging async reads with remotes :(

I have a question regarding this place, while we're at it. Here's the code from
maybe_read_page_local():

        if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC && !opts.remote)
                ret = enqueue_async_page(pr, vaddr, len, buf);
        else {
                ret = read_local_page(pr, vaddr, len, buf);

Why don't we enqueue the read request for opts.remote case?

> If more code locations were reverted to 'pread', it might be necessary to protect them as well.
> 
> Is there any reason for using 'pread' instead of 'read'? It seems to me that the code always reads files forwards. 
> Additionally, I also made some experiments (including zdtm tests) back in February using 'read' instead of 'pread' and it worked.
> 
> Best,
> rodrigo
> 
> 
> 2017-04-22 4:42 GMT+01:00 Andrei Vagin <avagin@virtuozzo.com <mailto:avagin@virtuozzo.com>>:
> 
>     Rodrigo, could you review this patch?
> 
>     On Fri, Apr 21, 2017 at 06:25:54PM +0300, Mike Rapoport wrote:
>     > When --remote option is specified, read_local_page tries to pread from a
>     > socket, and fails with "Illegal seek" error.
>     > Restore single pread call for regular image files case and use read syscall
>     > instead of pread for the --remote case.
>     >
>     > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com <mailto:rppt@linux.vnet.ibm.com>>
>     > ---
>     >  criu/pagemap.c | 18 +++++++++++++-----
>     >  1 file changed, 13 insertions(+), 5 deletions(-)
>     >
>     > diff --git a/criu/pagemap.c b/criu/pagemap.c
>     > index 6c741b4..7d25c6f 100644
>     > --- a/criu/pagemap.c
>     > +++ b/criu/pagemap.c
>     > @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>     >               return -1;
>     >
>     >       pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
>     > -     while (1) {
>     > +     if (!opts.remote) {
>     >               ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
>     > -             if (ret < 1) {
>     > +             if (ret != len) {
>     >                       pr_perror("Can't read mapping page %d", ret);
>     >                       return -1;
>     >               }
>     > -             curr += ret;
>     > -             if (curr == len)
>     > -                     break;
>     > +     } else {
>     > +             while (1) {
>     > +                     ret = read(fd, buf + curr, len - curr);
>     > +                     if (ret < 0) {
>     > +                             pr_perror("Can't read mapping page %d", ret);
>     > +                             return -1;
>     > +                     }
>     > +                     curr += ret;
>     > +                     if (curr == len)
>     > +                             break;
>     > +             }
>     >       }
>     >
>     >       if (opts.auto_dedup) {
>     > --
>     > 1.9.1
>     >
> 
>
Pavel Emelianov April 24, 2017, 12:10 p.m.
On 04/21/2017 06:25 PM, Mike Rapoport wrote:
> When --remote option is specified, read_local_page tries to pread from a
> socket, and fails with "Illegal seek" error.
> Restore single pread call for regular image files case and use read syscall
> instead of pread for the --remote case.
> 
> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> ---
>  criu/pagemap.c | 18 +++++++++++++-----
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index 6c741b4..7d25c6f 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  		return -1;
>  
>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> -	while (1) {
> +	if (!opts.remote) {

Maybe it's better to introduce separate ->maybe_read_page callback for opts.remote case?
This would let us localize the cache/proxy pages reading code in one place.

-- Pavel

>  		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> -		if (ret < 1) {
> +		if (ret != len) {
>  			pr_perror("Can't read mapping page %d", ret);
>  			return -1;
>  		}
> -		curr += ret;
> -		if (curr == len)
> -			break;
> +	} else {
> +		while (1) {
> +			ret = read(fd, buf + curr, len - curr);
> +			if (ret < 0) {
> +				pr_perror("Can't read mapping page %d", ret);
> +				return -1;
> +			}
> +			curr += ret;
> +			if (curr == len)
> +				break;
> +		}
>  	}
>  
>  	if (opts.auto_dedup) {
>
Pavel Emelianov April 24, 2017, 5 p.m.
On 04/24/2017 04:52 PM, Mike Rapoport wrote:
> On Mon, Apr 24, 2017 at 03:10:04PM +0300, Pavel Emelyanov wrote:
>> On 04/21/2017 06:25 PM, Mike Rapoport wrote:
>>> When --remote option is specified, read_local_page tries to pread from a
>>> socket, and fails with "Illegal seek" error.
>>> Restore single pread call for regular image files case and use read syscall
>>> instead of pread for the --remote case.
>>>
>>> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
>>> ---
>>>  criu/pagemap.c | 18 +++++++++++++-----
>>>  1 file changed, 13 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/criu/pagemap.c b/criu/pagemap.c
>>> index 6c741b4..7d25c6f 100644
>>> --- a/criu/pagemap.c
>>> +++ b/criu/pagemap.c
>>> @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>>>  		return -1;
>>>  
>>>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
>>> -	while (1) {
>>> +	if (!opts.remote) {
>>
>> Maybe it's better to introduce separate ->maybe_read_page callback for opts.remote case?
>> This would let us localize the cache/proxy pages reading code in one place.
> 
> Well, we could. E.g. something like the patch below (untested). Note that
> we already have maybe_read_page_remote for the remote lazy restore case,
> and, ideally, the cache/proxy and lazy cases should use the same code to
> read pages.

Yup. I like this patch :)
Rodrigo, what do you think?

> ----------------
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index 6c741b4..353c469 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  {
>  	int fd = img_raw_fd(pr->pi);
>  	int ret;
> -	size_t curr = 0;
>  
>  	/*
>  	 * Flush any pending async requests if any not to break the
> @@ -265,15 +264,10 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  		return -1;
>  
>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> -	while (1) {
> -		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> -		if (ret < 1) {
> -			pr_perror("Can't read mapping page %d", ret);
> -			return -1;
> -		}
> -		curr += ret;
> -		if (curr == len)
> -			break;
> +	ret = pread(fd, buf, len, pr->pi_off);
> +	if (ret != len) {
> +		pr_perror("Can't read mapping page %d", ret);
> +		return -1;
>  	}
>  
>  	if (opts.auto_dedup) {
> @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr,
>  	return ret;
>  }
>  
> +static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr,
> +				     int nr, void *buf, unsigned flags)
> +{
> +	unsigned long len = nr * PAGE_SIZE;
> +	int fd = img_raw_fd(pr->pi);
> +	int ret;
> +	size_t curr = 0;
> +
> +	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> +	while (1) {
> +		ret = read(fd, buf + curr, len - curr);
> +		if (ret < 0) {
> +			pr_perror("Can't read mapping page %d", ret);
> +			return -1;
> +		}
> +		curr += ret;
> +		if (curr == len)
> +			break;
> +	}
> +
> +	if (opts.auto_dedup) {
> +		ret = punch_hole(pr, pr->pi_off, len, false);
> +		if (ret == -1)
> +			return -1;
> +	}
> +
> +	if (ret == 0 && pr->io_complete)
> +		ret = pr->io_complete(pr, vaddr, nr);
> +
> +	pr->pi_off += len;
> +
> +	return ret;
> +}
> +
>  static int read_page_complete(int pid, unsigned long vaddr, int nr_pages, void *priv)
>  {
>  	int ret = 0;
> @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	pr->id = ids++;
>  	pr->pid = pid;
>  
> -	if (remote)
> +	if (opts.remote)
> +		pr->maybe_read_page = maybe_read_page_img_cache;
> +	else if (remote)
>  		pr->maybe_read_page = maybe_read_page_remote;
>  	else
>  		pr->maybe_read_page = maybe_read_page_local;
> ----------------
> 
> 
>> -- Pavel
>>
>>>  		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
>>> -		if (ret < 1) {
>>> +		if (ret != len) {
>>>  			pr_perror("Can't read mapping page %d", ret);
>>>  			return -1;
>>>  		}
>>> -		curr += ret;
>>> -		if (curr == len)
>>> -			break;
>>> +	} else {
>>> +		while (1) {
>>> +			ret = read(fd, buf + curr, len - curr);
>>> +			if (ret < 0) {
>>> +				pr_perror("Can't read mapping page %d", ret);
>>> +				return -1;
>>> +			}
>>> +			curr += ret;
>>> +			if (curr == len)
>>> +				break;
>>> +		}
>>>  	}
>>>  
>>>  	if (opts.auto_dedup) {
>>>
>>
> 
> .
>
Rodrigo Bruno April 25, 2017, 7:54 a.m.
Hi,

2017-04-24 12:55 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com>:

> On 04/23/2017 04:29 AM, Rodrigo Bruno wrote:
> > Hi,
> >
> > the patch looks good. It avoids using 'pread' which is not supported for
> remote images.
> >
> > However, the patch that I sent for the mailing list introducing remote
> images used 'read' instead of 'pread'.
>
> Yes, sorry about that, it was me wrongly merging async reads with remotes
> :(
>
> I have a question regarding this place, while we're at it. Here's the code
> from
> maybe_read_page_local():
>
>         if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC && !opts.remote)
>                 ret = enqueue_async_page(pr, vaddr, len, buf);
>         else {
>                 ret = read_local_page(pr, vaddr, len, buf);
>
> Why don't we enqueue the read request for opts.remote case?
>

To be honest, I don't remember why I avoided async pages... I looked
through the code and I don't see any
particular reason for it not to work with remote images.

Anyway, just to clarify, what is the motivation behind async pages? Are we
avoiding any page read that might be unnecessary
in the future?


>
> > If more code locations were reverted to 'pread', it might be necessary
> to protect them as well.
> >
> > Is there any reason for using 'pread' instead of 'read'? It seems to me
> that the code always reads files forwards.
> > Additionally, I also made some experiments (including zdtm tests) back
> in February using 'read' instead of 'pread' and it worked.
> >
> > Best,
> > rodrigo
> >
> >
> > 2017-04-22 4:42 GMT+01:00 Andrei Vagin <avagin@virtuozzo.com <mailto:
> avagin@virtuozzo.com>>:
> >
> >     Rodrigo, could you review this patch?
> >
> >     On Fri, Apr 21, 2017 at 06:25:54PM +0300, Mike Rapoport wrote:
> >     > When --remote option is specified, read_local_page tries to pread
> from a
> >     > socket, and fails with "Illegal seek" error.
> >     > Restore single pread call for regular image files case and use
> read syscall
> >     > instead of pread for the --remote case.
> >     >
> >     > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com <mailto:
> rppt@linux.vnet.ibm.com>>
> >     > ---
> >     >  criu/pagemap.c | 18 +++++++++++++-----
> >     >  1 file changed, 13 insertions(+), 5 deletions(-)
> >     >
> >     > diff --git a/criu/pagemap.c b/criu/pagemap.c
> >     > index 6c741b4..7d25c6f 100644
> >     > --- a/criu/pagemap.c
> >     > +++ b/criu/pagemap.c
> >     > @@ -265,15 +265,23 @@ static int read_local_page(struct page_read
> *pr, unsigned long vaddr,
> >     >               return -1;
> >     >
> >     >       pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n",
> pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> >     > -     while (1) {
> >     > +     if (!opts.remote) {
> >     >               ret = pread(fd, buf + curr, len - curr, pr->pi_off +
> curr);
> >     > -             if (ret < 1) {
> >     > +             if (ret != len) {
> >     >                       pr_perror("Can't read mapping page %d", ret);
> >     >                       return -1;
> >     >               }
> >     > -             curr += ret;
> >     > -             if (curr == len)
> >     > -                     break;
> >     > +     } else {
> >     > +             while (1) {
> >     > +                     ret = read(fd, buf + curr, len - curr);
> >     > +                     if (ret < 0) {
> >     > +                             pr_perror("Can't read mapping page
> %d", ret);
> >     > +                             return -1;
> >     > +                     }
> >     > +                     curr += ret;
> >     > +                     if (curr == len)
> >     > +                             break;
> >     > +             }
> >     >       }
> >     >
> >     >       if (opts.auto_dedup) {
> >     > --
> >     > 1.9.1
> >     >
> >
> >
>
>
Rodrigo Bruno April 25, 2017, 7:56 a.m.
Hi,

2017-04-24 18:00 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com>:

> On 04/24/2017 04:52 PM, Mike Rapoport wrote:
> > On Mon, Apr 24, 2017 at 03:10:04PM +0300, Pavel Emelyanov wrote:
> >> On 04/21/2017 06:25 PM, Mike Rapoport wrote:
> >>> When --remote option is specified, read_local_page tries to pread from
> a
> >>> socket, and fails with "Illegal seek" error.
> >>> Restore single pread call for regular image files case and use read
> syscall
> >>> instead of pread for the --remote case.
> >>>
> >>> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> >>> ---
> >>>  criu/pagemap.c | 18 +++++++++++++-----
> >>>  1 file changed, 13 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/criu/pagemap.c b/criu/pagemap.c
> >>> index 6c741b4..7d25c6f 100644
> >>> --- a/criu/pagemap.c
> >>> +++ b/criu/pagemap.c
> >>> @@ -265,15 +265,23 @@ static int read_local_page(struct page_read *pr,
> unsigned long vaddr,
> >>>             return -1;
> >>>
> >>>     pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid,
> pr->id, pr->cvaddr, pr->pi_off);
> >>> -   while (1) {
> >>> +   if (!opts.remote) {
> >>
> >> Maybe it's better to introduce separate ->maybe_read_page callback for
> opts.remote case?
> >> This would let us localize the cache/proxy pages reading code in one
> place.
> >
> > Well, we could. E.g. something like the patch below (untested). Note that
> > we already have maybe_read_page_remote for the remote lazy restore case,
> > and, ideally, the cache/proxy and lazy cases should use the same code to
> > read pages.
>
> Yup. I like this patch :)
> Rodrigo, what do you think?
>


I agree.


>
> > ----------------
> > diff --git a/criu/pagemap.c b/criu/pagemap.c
> > index 6c741b4..353c469 100644
> > --- a/criu/pagemap.c
> > +++ b/criu/pagemap.c
> > @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr,
> unsigned long vaddr,
> >  {
> >       int fd = img_raw_fd(pr->pi);
> >       int ret;
> > -     size_t curr = 0;
> >
> >       /*
> >        * Flush any pending async requests if any not to break the
> > @@ -265,15 +264,10 @@ static int read_local_page(struct page_read *pr,
> unsigned long vaddr,
> >               return -1;
> >
> >       pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid,
> pr->id, pr->cvaddr, pr->pi_off);
> > -     while (1) {
> > -             ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> > -             if (ret < 1) {
> > -                     pr_perror("Can't read mapping page %d", ret);
> > -                     return -1;
> > -             }
> > -             curr += ret;
> > -             if (curr == len)
> > -                     break;
> > +     ret = pread(fd, buf, len, pr->pi_off);
> > +     if (ret != len) {
> > +             pr_perror("Can't read mapping page %d", ret);
> > +             return -1;
> >       }
> >
> >       if (opts.auto_dedup) {
> > @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct page_read
> *pr, unsigned long vaddr,
> >       return ret;
> >  }
> >
> > +static int maybe_read_page_img_cache(struct page_read *pr, unsigned
> long vaddr,
> > +                                  int nr, void *buf, unsigned flags)
> > +{
> > +     unsigned long len = nr * PAGE_SIZE;
> > +     int fd = img_raw_fd(pr->pi);
> > +     int ret;
> > +     size_t curr = 0;
> > +
> > +     pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid,
> pr->id, pr->cvaddr, pr->pi_off);
> > +     while (1) {
> > +             ret = read(fd, buf + curr, len - curr);
> > +             if (ret < 0) {
> > +                     pr_perror("Can't read mapping page %d", ret);
> > +                     return -1;
> > +             }
> > +             curr += ret;
> > +             if (curr == len)
> > +                     break;
> > +     }
> > +
> > +     if (opts.auto_dedup) {
> > +             ret = punch_hole(pr, pr->pi_off, len, false);
> > +             if (ret == -1)
> > +                     return -1;
> > +     }
> > +
> > +     if (ret == 0 && pr->io_complete)
> > +             ret = pr->io_complete(pr, vaddr, nr);
> > +
> > +     pr->pi_off += len;
> > +
> > +     return ret;
> > +}
> > +
> >  static int read_page_complete(int pid, unsigned long vaddr, int
> nr_pages, void *priv)
> >  {
> >       int ret = 0;
> > @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct
> page_read *pr, int pr_flags)
> >       pr->id = ids++;
> >       pr->pid = pid;
> >
> > -     if (remote)
> > +     if (opts.remote)
> > +             pr->maybe_read_page = maybe_read_page_img_cache;
> > +     else if (remote)
> >               pr->maybe_read_page = maybe_read_page_remote;
> >       else
> >               pr->maybe_read_page = maybe_read_page_local;
> > ----------------
> >
> >
> >> -- Pavel
> >>
> >>>             ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> >>> -           if (ret < 1) {
> >>> +           if (ret != len) {
> >>>                     pr_perror("Can't read mapping page %d", ret);
> >>>                     return -1;
> >>>             }
> >>> -           curr += ret;
> >>> -           if (curr == len)
> >>> -                   break;
> >>> +   } else {
> >>> +           while (1) {
> >>> +                   ret = read(fd, buf + curr, len - curr);
> >>> +                   if (ret < 0) {
> >>> +                           pr_perror("Can't read mapping page %d",
> ret);
> >>> +                           return -1;
> >>> +                   }
> >>> +                   curr += ret;
> >>> +                   if (curr == len)
> >>> +                           break;
> >>> +           }
> >>>     }
> >>>
> >>>     if (opts.auto_dedup) {
> >>>
> >>
> >
> > .
> >
>
>
Pavel Emelianov April 25, 2017, 10:57 a.m.
On 04/25/2017 10:54 AM, Rodrigo Bruno wrote:
> Hi,
> 
> 2017-04-24 12:55 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com <mailto:xemul@virtuozzo.com>>:
> 
>     On 04/23/2017 04:29 AM, Rodrigo Bruno wrote:
>     > Hi,
>     >
>     > the patch looks good. It avoids using 'pread' which is not supported for remote images.
>     >
>     > However, the patch that I sent for the mailing list introducing remote images used 'read' instead of 'pread'.
> 
>     Yes, sorry about that, it was me wrongly merging async reads with remotes :(
> 
>     I have a question regarding this place, while we're at it. Here's the code from
>     maybe_read_page_local():
> 
>             if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC && !opts.remote)
>                     ret = enqueue_async_page(pr, vaddr, len, buf);
>             else {
>                     ret = read_local_page(pr, vaddr, len, buf);
> 
>     Why don't we enqueue the read request for opts.remote case?
> 
> 
> To be honest, I don't remember why I avoided async pages... I looked through the code and I don't see any
> particular reason for it not to work with remote images.
> 
> Anyway, just to clarify, what is the motivation behind async pages? Are we avoiding any page read that might be unnecessary
> in the future? 

No, for on-disk files doing many small read()-s is still slow even if the data
is in cache. With async pages we merge all the reads together and then call
single preadv() doing all IO with one call.

-- Pavel
Mike Rapoport April 25, 2017, 3:38 p.m.
On Tue, Apr 25, 2017 at 01:38:01PM +0300, Mike Rapoport wrote:
> On Tue, Apr 25, 2017 at 08:56:19AM +0100, Rodrigo Bruno wrote:
> > Hi,
> > 
> > 2017-04-24 18:00 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com>:
> > 
> > > >>
> > > >> Maybe it's better to introduce separate ->maybe_read_page callback for
> > > opts.remote case?
> > > >> This would let us localize the cache/proxy pages reading code in one
> > > place.
> > > >
> > > > Well, we could. E.g. something like the patch below (untested). Note that
> > > > we already have maybe_read_page_remote for the remote lazy restore case,
> > > > and, ideally, the cache/proxy and lazy cases should use the same code to
> > > > read pages.
> > >
> > > Yup. I like this patch :)
> > > Rodrigo, what do you think?
> > >
> > 
> > I agree.
> 
> All right, below is the more formal version of the patch. I'd suggest to
> start with it to make --remote able to restore the memory. The next step
> related to --remote and the page-read would be to see if it's possible to
> use ASYNC with sockets, what should be refactored to enable it and how we
> reduce code and functionality duplication.
> 
> 
> From 645c2f8e5afe090ee362d3225b97e2b5f1bc95cb Mon Sep 17 00:00:00 2001
> From: Mike Rapoport <rppt@linux.vnet.ibm.com>
> Date: Fri, 21 Apr 2017 17:29:10 +0300
> Subject: [CRIU][PATCH v3] pagemap: fix reading pages from socket for --remote case
> 
> When --remote option is specified, read_local_page tries to pread from a
> socket, and fails with "Illegal seek" error.
> Restore single pread call for regular image files case and introduce
> maybe_read_page_img_cache version of maybe_read_page method.
> 
> Generally-approved-by: Rodrigo Bruno <rbruno@gsd.inesc-id.pt>
> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>

travis-ci: https://travis-ci.org/rppt/criu/builds/225546422

> ---
> v3: use different ->maybe_read_page method for --remote case
> v2: simplify call to pread in non-remote case, it does not need 'curr'
> 
>  criu/pagemap.c | 52 +++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 41 insertions(+), 11 deletions(-)
> 
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index 6c741b4..353c469 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  {
>  	int fd = img_raw_fd(pr->pi);
>  	int ret;
> -	size_t curr = 0;
> 
>  	/*
>  	 * Flush any pending async requests if any not to break the
> @@ -265,15 +264,10 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  		return -1;
> 
>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> -	while (1) {
> -		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> -		if (ret < 1) {
> -			pr_perror("Can't read mapping page %d", ret);
> -			return -1;
> -		}
> -		curr += ret;
> -		if (curr == len)
> -			break;
> +	ret = pread(fd, buf, len, pr->pi_off);
> +	if (ret != len) {
> +		pr_perror("Can't read mapping page %d", ret);
> +		return -1;
>  	}
> 
>  	if (opts.auto_dedup) {
> @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr,
>  	return ret;
>  }
> 
> +static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr,
> +				     int nr, void *buf, unsigned flags)
> +{
> +	unsigned long len = nr * PAGE_SIZE;
> +	int fd = img_raw_fd(pr->pi);
> +	int ret;
> +	size_t curr = 0;
> +
> +	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> +	while (1) {
> +		ret = read(fd, buf + curr, len - curr);
> +		if (ret < 0) {
> +			pr_perror("Can't read mapping page %d", ret);
> +			return -1;
> +		}
> +		curr += ret;
> +		if (curr == len)
> +			break;
> +	}
> +
> +	if (opts.auto_dedup) {
> +		ret = punch_hole(pr, pr->pi_off, len, false);
> +		if (ret == -1)
> +			return -1;
> +	}
> +
> +	if (ret == 0 && pr->io_complete)
> +		ret = pr->io_complete(pr, vaddr, nr);
> +
> +	pr->pi_off += len;
> +
> +	return ret;
> +}
> +
>  static int read_page_complete(int pid, unsigned long vaddr, int nr_pages, void *priv)
>  {
>  	int ret = 0;
> @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	pr->id = ids++;
>  	pr->pid = pid;
> 
> -	if (remote)
> +	if (opts.remote)
> +		pr->maybe_read_page = maybe_read_page_img_cache;
> +	else if (remote)
>  		pr->maybe_read_page = maybe_read_page_remote;
>  	else
>  		pr->maybe_read_page = maybe_read_page_local;
> -- 
> 1.9.1
> 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
>
Pavel Emelianov April 25, 2017, 6:55 p.m.
On 04/25/2017 01:38 PM, Mike Rapoport wrote:
> On Tue, Apr 25, 2017 at 08:56:19AM +0100, Rodrigo Bruno wrote:
>> Hi,
>>
>> 2017-04-24 18:00 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com>:
>>
>>>>>
>>>>> Maybe it's better to introduce separate ->maybe_read_page callback for
>>> opts.remote case?
>>>>> This would let us localize the cache/proxy pages reading code in one
>>> place.
>>>>
>>>> Well, we could. E.g. something like the patch below (untested). Note that
>>>> we already have maybe_read_page_remote for the remote lazy restore case,
>>>> and, ideally, the cache/proxy and lazy cases should use the same code to
>>>> read pages.
>>>
>>> Yup. I like this patch :)
>>> Rodrigo, what do you think?
>>>
>>
>> I agree.
> 
> All right, below is the more formal version of the patch. I'd suggest to
> start with it to make --remote able to restore the memory. The next step
> related to --remote and the page-read would be to see if it's possible to
> use ASYNC with sockets, what should be refactored to enable it and how we
> reduce code and functionality duplication.
> 
>  
>>From 645c2f8e5afe090ee362d3225b97e2b5f1bc95cb Mon Sep 17 00:00:00 2001
> From: Mike Rapoport <rppt@linux.vnet.ibm.com>
> Date: Fri, 21 Apr 2017 17:29:10 +0300
> Subject: [CRIU][PATCH v3] pagemap: fix reading pages from socket for --remote case
> 
> When --remote option is specified, read_local_page tries to pread from a
> socket, and fails with "Illegal seek" error.
> Restore single pread call for regular image files case and introduce
> maybe_read_page_img_cache version of maybe_read_page method.
> 
> Generally-approved-by: Rodrigo Bruno <rbruno@gsd.inesc-id.pt>
> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>

Acked-by: Pavel Emelyanov <xemul@virtuozzo.com>

And Rodrigo was right telling that the posision in the page-read seem to
be not-needed, as we always access pages images linearly. But that's another
story, I guess we'll need to revisit this a bit later.

> ---
> v3: use different ->maybe_read_page method for --remote case
> v2: simplify call to pread in non-remote case, it does not need 'curr'
> 
>  criu/pagemap.c | 52 +++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 41 insertions(+), 11 deletions(-)
> 
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index 6c741b4..353c469 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  {
>  	int fd = img_raw_fd(pr->pi);
>  	int ret;
> -	size_t curr = 0;
>  
>  	/*
>  	 * Flush any pending async requests if any not to break the
> @@ -265,15 +264,10 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  		return -1;
>  
>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> -	while (1) {
> -		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> -		if (ret < 1) {
> -			pr_perror("Can't read mapping page %d", ret);
> -			return -1;
> -		}
> -		curr += ret;
> -		if (curr == len)
> -			break;
> +	ret = pread(fd, buf, len, pr->pi_off);
> +	if (ret != len) {
> +		pr_perror("Can't read mapping page %d", ret);
> +		return -1;
>  	}
>  
>  	if (opts.auto_dedup) {
> @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr,
>  	return ret;
>  }
>  
> +static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr,
> +				     int nr, void *buf, unsigned flags)
> +{
> +	unsigned long len = nr * PAGE_SIZE;
> +	int fd = img_raw_fd(pr->pi);
> +	int ret;
> +	size_t curr = 0;
> +
> +	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> +	while (1) {
> +		ret = read(fd, buf + curr, len - curr);
> +		if (ret < 0) {
> +			pr_perror("Can't read mapping page %d", ret);
> +			return -1;
> +		}
> +		curr += ret;
> +		if (curr == len)
> +			break;
> +	}
> +
> +	if (opts.auto_dedup) {
> +		ret = punch_hole(pr, pr->pi_off, len, false);
> +		if (ret == -1)
> +			return -1;
> +	}
> +
> +	if (ret == 0 && pr->io_complete)
> +		ret = pr->io_complete(pr, vaddr, nr);
> +
> +	pr->pi_off += len;
> +
> +	return ret;
> +}
> +
>  static int read_page_complete(int pid, unsigned long vaddr, int nr_pages, void *priv)
>  {
>  	int ret = 0;
> @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	pr->id = ids++;
>  	pr->pid = pid;
>  
> -	if (remote)
> +	if (opts.remote)
> +		pr->maybe_read_page = maybe_read_page_img_cache;
> +	else if (remote)
>  		pr->maybe_read_page = maybe_read_page_remote;
>  	else
>  		pr->maybe_read_page = maybe_read_page_local;
>
Andrey Vagin April 25, 2017, 8:47 p.m.
On Tue, Apr 25, 2017 at 01:38:01PM +0300, Mike Rapoport wrote:
> On Tue, Apr 25, 2017 at 08:56:19AM +0100, Rodrigo Bruno wrote:
> > Hi,
> > 
> > 2017-04-24 18:00 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com>:
> > 
> > > >>
> > > >> Maybe it's better to introduce separate ->maybe_read_page callback for
> > > opts.remote case?
> > > >> This would let us localize the cache/proxy pages reading code in one
> > > place.
> > > >
> > > > Well, we could. E.g. something like the patch below (untested). Note that
> > > > we already have maybe_read_page_remote for the remote lazy restore case,
> > > > and, ideally, the cache/proxy and lazy cases should use the same code to
> > > > read pages.
> > >
> > > Yup. I like this patch :)
> > > Rodrigo, what do you think?
> > >
> > 
> > I agree.
> 
> All right, below is the more formal version of the patch. I'd suggest to
> start with it to make --remote able to restore the memory. The next step
> related to --remote and the page-read would be to see if it's possible to
> use ASYNC with sockets, what should be refactored to enable it and how we
> reduce code and functionality duplication.
> 
>  
> From 645c2f8e5afe090ee362d3225b97e2b5f1bc95cb Mon Sep 17 00:00:00 2001
> From: Mike Rapoport <rppt@linux.vnet.ibm.com>
> Date: Fri, 21 Apr 2017 17:29:10 +0300
> Subject: [CRIU][PATCH v3] pagemap: fix reading pages from socket for --remote case
> 
> When --remote option is specified, read_local_page tries to pread from a
> socket, and fails with "Illegal seek" error.
> Restore single pread call for regular image files case and introduce
> maybe_read_page_img_cache version of maybe_read_page method.
> 
> Generally-approved-by: Rodrigo Bruno <rbruno@gsd.inesc-id.pt>
> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> ---
> v3: use different ->maybe_read_page method for --remote case
> v2: simplify call to pread in non-remote case, it does not need 'curr'
> 
>  criu/pagemap.c | 52 +++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 41 insertions(+), 11 deletions(-)
> 
> diff --git a/criu/pagemap.c b/criu/pagemap.c
> index 6c741b4..353c469 100644
> --- a/criu/pagemap.c
> +++ b/criu/pagemap.c
> @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  {
>  	int fd = img_raw_fd(pr->pi);
>  	int ret;
> -	size_t curr = 0;
>  
>  	/*
>  	 * Flush any pending async requests if any not to break the
> @@ -265,15 +264,10 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
>  		return -1;
>  
>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> -	while (1) {
> -		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> -		if (ret < 1) {
> -			pr_perror("Can't read mapping page %d", ret);
> -			return -1;
> -		}
> -		curr += ret;
> -		if (curr == len)
> -			break;

Why do you remove this loop?

> +	ret = pread(fd, buf, len, pr->pi_off);
> +	if (ret != len) {
> +		pr_perror("Can't read mapping page %d", ret);
> +		return -1;
>  	}
>  
>  	if (opts.auto_dedup) {
> @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr,
>  	return ret;
>  }
>  
> +static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr,
> +				     int nr, void *buf, unsigned flags)
> +{
> +	unsigned long len = nr * PAGE_SIZE;
> +	int fd = img_raw_fd(pr->pi);
> +	int ret;
> +	size_t curr = 0;
> +
> +	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> +	while (1) {
> +		ret = read(fd, buf + curr, len - curr);
> +		if (ret < 0) {
> +			pr_perror("Can't read mapping page %d", ret);
> +			return -1;
> +		}
> +		curr += ret;
> +		if (curr == len)
> +			break;
> +	}
> +
> +	if (opts.auto_dedup) {
> +		ret = punch_hole(pr, pr->pi_off, len, false);
> +		if (ret == -1)
> +			return -1;
> +	}
> +
> +	if (ret == 0 && pr->io_complete)
> +		ret = pr->io_complete(pr, vaddr, nr);
> +
> +	pr->pi_off += len;
> +
> +	return ret;
> +}
> +
>  static int read_page_complete(int pid, unsigned long vaddr, int nr_pages, void *priv)
>  {
>  	int ret = 0;
> @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	pr->id = ids++;
>  	pr->pid = pid;
>  
> -	if (remote)
> +	if (opts.remote)
> +		pr->maybe_read_page = maybe_read_page_img_cache;
> +	else if (remote)
>  		pr->maybe_read_page = maybe_read_page_remote;
>  	else
>  		pr->maybe_read_page = maybe_read_page_local;
> -- 
> 1.9.1
>
Mike Rapoport April 25, 2017, 9:25 p.m.
On April 25, 2017 11:47:59 PM GMT+03:00, Andrei Vagin <avagin@virtuozzo.com> wrote:
>On Tue, Apr 25, 2017 at 01:38:01PM +0300, Mike Rapoport wrote:
>> On Tue, Apr 25, 2017 at 08:56:19AM +0100, Rodrigo Bruno wrote:
>> > Hi,
>> > 
>> > 2017-04-24 18:00 GMT+01:00 Pavel Emelyanov <xemul@virtuozzo.com>:
>> > 
>> > > >>
>> > > >> Maybe it's better to introduce separate ->maybe_read_page
>callback for
>> > > opts.remote case?
>> > > >> This would let us localize the cache/proxy pages reading code
>in one
>> > > place.
>> > > >
>> > > > Well, we could. E.g. something like the patch below (untested).
>Note that
>> > > > we already have maybe_read_page_remote for the remote lazy
>restore case,
>> > > > and, ideally, the cache/proxy and lazy cases should use the
>same code to
>> > > > read pages.
>> > >
>> > > Yup. I like this patch :)
>> > > Rodrigo, what do you think?
>> > >
>> > 
>> > I agree.
>> 
>> All right, below is the more formal version of the patch. I'd suggest
>to
>> start with it to make --remote able to restore the memory. The next
>step
>> related to --remote and the page-read would be to see if it's
>possible to
>> use ASYNC with sockets, what should be refactored to enable it and
>how we
>> reduce code and functionality duplication.
>> 
>>  
>> From 645c2f8e5afe090ee362d3225b97e2b5f1bc95cb Mon Sep 17 00:00:00
>2001
>> From: Mike Rapoport <rppt@linux.vnet.ibm.com>
>> Date: Fri, 21 Apr 2017 17:29:10 +0300
>> Subject: [CRIU][PATCH v3] pagemap: fix reading pages from socket for
>--remote case
>> 
>> When --remote option is specified, read_local_page tries to pread
>from a
>> socket, and fails with "Illegal seek" error.
>> Restore single pread call for regular image files case and introduce
>> maybe_read_page_img_cache version of maybe_read_page method.
>> 
>> Generally-approved-by: Rodrigo Bruno <rbruno@gsd.inesc-id.pt>
>> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
>> ---
>> v3: use different ->maybe_read_page method for --remote case
>> v2: simplify call to pread in non-remote case, it does not need
>'curr'
>> 
>>  criu/pagemap.c | 52
>+++++++++++++++++++++++++++++++++++++++++-----------
>>  1 file changed, 41 insertions(+), 11 deletions(-)
>> 
>> diff --git a/criu/pagemap.c b/criu/pagemap.c
>> index 6c741b4..353c469 100644
>> --- a/criu/pagemap.c
>> +++ b/criu/pagemap.c
>> @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr,
>unsigned long vaddr,
>>  {
>>  	int fd = img_raw_fd(pr->pi);
>>  	int ret;
>> -	size_t curr = 0;
>>  
>>  	/*
>>  	 * Flush any pending async requests if any not to break the
>> @@ -265,15 +264,10 @@ static int read_local_page(struct page_read
>*pr, unsigned long vaddr,
>>  		return -1;
>>  
>>  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid,
>pr->id, pr->cvaddr, pr->pi_off);
>> -	while (1) {
>> -		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
>> -		if (ret < 1) {
>> -			pr_perror("Can't read mapping page %d", ret);
>> -			return -1;
>> -		}
>> -		curr += ret;
>> -		if (curr == len)
>> -			break;
>
>Why do you remove this loop?

This loop is introduced by the commit 86b9170ff3cd "page-read: Support reading by chunks" to allow read data from image-cache through socket.
pread and socket don't play well together, so read_local_page is used only with local images and data from the socket is read in maybe_read_page_image_cache.

>> +	ret = pread(fd, buf, len, pr->pi_off);
>> +	if (ret != len) {
>> +		pr_perror("Can't read mapping page %d", ret);
>> +		return -1;
>>  	}
>>  
>>  	if (opts.auto_dedup) {
>> @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct
>page_read *pr, unsigned long vaddr,
>>  	return ret;
>>  }
>>  
>> +static int maybe_read_page_img_cache(struct page_read *pr, unsigned
>long vaddr,
>> +				     int nr, void *buf, unsigned flags)
>> +{
>> +	unsigned long len = nr * PAGE_SIZE;
>> +	int fd = img_raw_fd(pr->pi);
>> +	int ret;
>> +	size_t curr = 0;
>> +
>> +	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid,
>pr->id, pr->cvaddr, pr->pi_off);
>> +	while (1) {
>> +		ret = read(fd, buf + curr, len - curr);
>> +		if (ret < 0) {
>> +			pr_perror("Can't read mapping page %d", ret);
>> +			return -1;
>> +		}
>> +		curr += ret;
>> +		if (curr == len)
>> +			break;
>> +	}
>> +
>> +	if (opts.auto_dedup) {
>> +		ret = punch_hole(pr, pr->pi_off, len, false);
>> +		if (ret == -1)
>> +			return -1;
>> +	}
>> +
>> +	if (ret == 0 && pr->io_complete)
>> +		ret = pr->io_complete(pr, vaddr, nr);
>> +
>> +	pr->pi_off += len;
>> +
>> +	return ret;
>> +}
>> +
>>  static int read_page_complete(int pid, unsigned long vaddr, int
>nr_pages, void *priv)
>>  {
>>  	int ret = 0;
>> @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct
>page_read *pr, int pr_flags)
>>  	pr->id = ids++;
>>  	pr->pid = pid;
>>  
>> -	if (remote)
>> +	if (opts.remote)
>> +		pr->maybe_read_page = maybe_read_page_img_cache;
>> +	else if (remote)
>>  		pr->maybe_read_page = maybe_read_page_remote;
>>  	else
>>  		pr->maybe_read_page = maybe_read_page_local;
>> -- 
>> 1.9.1
>>
Andrey Vagin April 29, 2017, 5:26 a.m.
On Tue, Apr 25, 2017 at 09:55:50PM +0300, Pavel Emelyanov wrote:
> On 04/25/2017 01:38 PM, Mike Rapoport wrote:
> > On Tue, Apr 25, 2017 at 08:56:19AM +0100, Rodrigo Bruno wrote:
> >>From 645c2f8e5afe090ee362d3225b97e2b5f1bc95cb Mon Sep 17 00:00:00 2001
> > From: Mike Rapoport <rppt@linux.vnet.ibm.com>
> > Date: Fri, 21 Apr 2017 17:29:10 +0300
> > Subject: [CRIU][PATCH v3] pagemap: fix reading pages from socket for --remote case
> > 
> > When --remote option is specified, read_local_page tries to pread from a
> > socket, and fails with "Illegal seek" error.
> > Restore single pread call for regular image files case and introduce
> > maybe_read_page_img_cache version of maybe_read_page method.
> > 
> > Generally-approved-by: Rodrigo Bruno <rbruno@gsd.inesc-id.pt>
> > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> 
> Acked-by: Pavel Emelyanov <xemul@virtuozzo.com>

Applied, thanks
> 
> And Rodrigo was right telling that the posision in the page-read seem to
> be not-needed, as we always access pages images linearly. But that's another
> story, I guess we'll need to revisit this a bit later.
> 
> > ---
> > v3: use different ->maybe_read_page method for --remote case
> > v2: simplify call to pread in non-remote case, it does not need 'curr'
> > 
> >  criu/pagemap.c | 52 +++++++++++++++++++++++++++++++++++++++++-----------
> >  1 file changed, 41 insertions(+), 11 deletions(-)
> > 
> > diff --git a/criu/pagemap.c b/criu/pagemap.c
> > index 6c741b4..353c469 100644
> > --- a/criu/pagemap.c
> > +++ b/criu/pagemap.c
> > @@ -255,7 +255,6 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
> >  {
> >  	int fd = img_raw_fd(pr->pi);
> >  	int ret;
> > -	size_t curr = 0;
> >  
> >  	/*
> >  	 * Flush any pending async requests if any not to break the
> > @@ -265,15 +264,10 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr,
> >  		return -1;
> >  
> >  	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> > -	while (1) {
> > -		ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr);
> > -		if (ret < 1) {
> > -			pr_perror("Can't read mapping page %d", ret);
> > -			return -1;
> > -		}
> > -		curr += ret;
> > -		if (curr == len)
> > -			break;
> > +	ret = pread(fd, buf, len, pr->pi_off);
> > +	if (ret != len) {
> > +		pr_perror("Can't read mapping page %d", ret);
> > +		return -1;
> >  	}
> >  
> >  	if (opts.auto_dedup) {
> > @@ -390,6 +384,40 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr,
> >  	return ret;
> >  }
> >  
> > +static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr,
> > +				     int nr, void *buf, unsigned flags)
> > +{
> > +	unsigned long len = nr * PAGE_SIZE;
> > +	int fd = img_raw_fd(pr->pi);
> > +	int ret;
> > +	size_t curr = 0;
> > +
> > +	pr_debug("\tpr%d-%u Read page from self %lx/%"PRIx64"\n", pr->pid, pr->id, pr->cvaddr, pr->pi_off);
> > +	while (1) {
> > +		ret = read(fd, buf + curr, len - curr);
> > +		if (ret < 0) {
> > +			pr_perror("Can't read mapping page %d", ret);
> > +			return -1;
> > +		}
> > +		curr += ret;
> > +		if (curr == len)
> > +			break;
> > +	}
> > +
> > +	if (opts.auto_dedup) {
> > +		ret = punch_hole(pr, pr->pi_off, len, false);
> > +		if (ret == -1)
> > +			return -1;
> > +	}
> > +
> > +	if (ret == 0 && pr->io_complete)
> > +		ret = pr->io_complete(pr, vaddr, nr);
> > +
> > +	pr->pi_off += len;
> > +
> > +	return ret;
> > +}
> > +
> >  static int read_page_complete(int pid, unsigned long vaddr, int nr_pages, void *priv)
> >  {
> >  	int ret = 0;
> > @@ -803,7 +831,9 @@ int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
> >  	pr->id = ids++;
> >  	pr->pid = pid;
> >  
> > -	if (remote)
> > +	if (opts.remote)
> > +		pr->maybe_read_page = maybe_read_page_img_cache;
> > +	else if (remote)
> >  		pr->maybe_read_page = maybe_read_page_remote;
> >  	else
> >  		pr->maybe_read_page = maybe_read_page_local;
> > 
>