page-xfer: Be more verbose on error-paths

Submitted by Dmitry Safonov on June 22, 2017, 5:02 p.m.

Details

Message ID 20170622170215.31557-1-dsafonov@virtuozzo.com
State New
Series "page-xfer: Be more verbose on error-paths"
Headers show

Commit Message

Dmitry Safonov June 22, 2017, 5:02 p.m.
There is a lack of logging in page-xfer and I've an issue where
page-server closed session, but it's unclear why:

> (03.571793) Punch!/(nil)/4096/
> (03.572431) pr331:New bunch/(nil)/0/
> (03.572442) Session over

Log errors to help debugging page server.

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
---
 criu/page-xfer.c | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 49693bb179d6..aa8fccb8f86f 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -284,11 +284,14 @@  static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
 	u32 pages_id;
 
 	xfer->pmi = open_image(fd_type, O_DUMP, id);
-	if (!xfer->pmi)
+	if (!xfer->pmi) {
+		pr_err("Failed to open image file\n");
 		return -1;
+	}
 
 	xfer->pi = open_pages_image(O_DUMP, xfer->pmi, &pages_id);
 	if (!xfer->pi) {
+		pr_err("Failed to open pages image file\n");
 		close_image(xfer->pmi);
 		return -1;
 	}
@@ -329,6 +332,7 @@  static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
 		if (!xfer->parent) {
 			if (!opts.remote)
 				close(pfd);
+			pr_err("xmalloc() failed to allocate parent\n");
 			return -1;
 		}
 
@@ -468,9 +472,10 @@  int check_parent_local_xfer(int fd_type, int id)
 
 	if (opts.remote) {
 		pfd = get_curr_parent_snapshot_id_idx();
-		pr_err("Unable to get parent snapshot id\n");
-		if (pfd == -1)
+		if (pfd == -1) {
+			pr_err("Unable to get parent snapshot id\n");
 			return -1;
+		}
 	} else {
 		pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
 		if (pfd < 0 && errno == ENOENT)
@@ -499,8 +504,10 @@  static int page_server_check_parent(int sk, struct page_server_iov *pi)
 	id = decode_pm_id(pi->dst_id);
 
 	ret = check_parent_local_xfer(type, id);
-	if (ret < 0)
+	if (ret < 0) {
+		pr_err("check_parent_local_xfer() failed: %d\n", ret);
 		return -1;
+	}
 
 	if (write(sk, &ret, sizeof(ret)) != sizeof(ret)) {
 		pr_perror("Unable to send response");
@@ -580,8 +587,10 @@  static int page_server_open(int sk, struct page_server_iov *pi)
 
 	page_server_close();
 
-	if (open_page_local_xfer(&cxfer.loc_xfer, type, id))
+	if (open_page_local_xfer(&cxfer.loc_xfer, type, id)) {
+		pr_err("open_page_local_xfer() failed\n");
 		return -1;
+	}
 
 	cxfer.dst_id = pi->dst_id;
 
@@ -619,8 +628,10 @@  static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
 		return -1;
 
 	psi2iovec(pi, &iov);
-	if (lxfer->write_pagemap(lxfer, &iov, flags))
+	if (lxfer->write_pagemap(lxfer, &iov, flags)) {
+		pr_err("Failed to write_pagemap()\n");
 		return -1;
+	}
 
 	if (!(flags & PE_PRESENT))
 		return 0;
@@ -653,8 +664,10 @@  static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
 			return -1;
 		}
 
-		if (lxfer->write_pages(lxfer, cxfer.p[0], chunk))
+		if (lxfer->write_pages(lxfer, cxfer.p[0], chunk)) {
+			pr_err("Failed to write_pages()\n");
 			return -1;
+		}
 
 		len -= chunk;
 	}
@@ -674,8 +687,10 @@  static int page_server_get_pages(int sk, struct page_server_iov *pi)
 
 	ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr,
 			     &pi->nr_pages, PPB_LAZY);
-	if (ret)
+	if (ret) {
+		pr_err("page_pipe_read() failed: %d\n", ret);
 		return ret;
+	}
 
 	if (pi->nr_pages == 0) {
 		/* no iovs found means we've hit a zero page */
@@ -689,8 +704,10 @@  static int page_server_get_pages(int sk, struct page_server_iov *pi)
 		return -1;
 
 	ret = splice(pipe_read_dest.p[0], NULL, sk, NULL, len, SPLICE_F_MOVE);
-	if (ret != len)
+	if (ret != len) {
+		pr_perror("Failed to splice()");
 		return -1;
+	}
 
 	tcp_nodelay(sk, true);
 
@@ -729,11 +746,9 @@  static int page_server_serve(int sk)
 		u32 cmd;
 
 		ret = recv(sk, &pi, sizeof(pi), MSG_WAITALL);
-		if (!ret)
-			break;
 
 		if (ret != sizeof(pi)) {
-			pr_perror("Can't read pagemap from socket");
+			pr_perror("Can't read pageserver's command from socket, recv() = %d", ret);
 			ret = -1;
 			break;
 		}
@@ -788,8 +803,15 @@  static int page_server_serve(int sk)
 			break;
 		}
 
-		if (ret || (pi.cmd == PS_IOV_FLUSH_N_CLOSE))
+		if (ret) {
+			pr_err("Ending session due to ret code %d\n", ret);
 			break;
+		}
+
+		if (pi.cmd == PS_IOV_FLUSH_N_CLOSE) {
+			pr_info("Closing session\n");
+			break;
+		}
 	}
 
 	if (receiving_pages && !ret && !flushed) {

Comments

Dmitry Safonov June 22, 2017, 5:28 p.m.
On 06/22/2017 08:02 PM, Dmitry Safonov wrote:
> There is a lack of logging in page-xfer and I've an issue where
> page-server closed session, but it's unclear why:
> 
>> (03.571793) Punch!/(nil)/4096/
>> (03.572431) pr331:New bunch/(nil)/0/
>> (03.572442) Session over
> 
> Log errors to help debugging page server.
> 
> Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>

Please, drop it, I'll resend a new version.

> ---
>   criu/page-xfer.c | 48 +++++++++++++++++++++++++++++++++++-------------
>   1 file changed, 35 insertions(+), 13 deletions(-)
> 
> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> index 49693bb179d6..aa8fccb8f86f 100644
> --- a/criu/page-xfer.c
> +++ b/criu/page-xfer.c
> @@ -284,11 +284,14 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
>   	u32 pages_id;
>   
>   	xfer->pmi = open_image(fd_type, O_DUMP, id);
> -	if (!xfer->pmi)
> +	if (!xfer->pmi) {
> +		pr_err("Failed to open image file\n");
>   		return -1;
> +	}
>   
>   	xfer->pi = open_pages_image(O_DUMP, xfer->pmi, &pages_id);
>   	if (!xfer->pi) {
> +		pr_err("Failed to open pages image file\n");
>   		close_image(xfer->pmi);
>   		return -1;
>   	}
> @@ -329,6 +332,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
>   		if (!xfer->parent) {
>   			if (!opts.remote)
>   				close(pfd);
> +			pr_err("xmalloc() failed to allocate parent\n");
>   			return -1;
>   		}
>   
> @@ -468,9 +472,10 @@ int check_parent_local_xfer(int fd_type, int id)
>   
>   	if (opts.remote) {
>   		pfd = get_curr_parent_snapshot_id_idx();
> -		pr_err("Unable to get parent snapshot id\n");
> -		if (pfd == -1)
> +		if (pfd == -1) {
> +			pr_err("Unable to get parent snapshot id\n");
>   			return -1;
> +		}
>   	} else {
>   		pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
>   		if (pfd < 0 && errno == ENOENT)
> @@ -499,8 +504,10 @@ static int page_server_check_parent(int sk, struct page_server_iov *pi)
>   	id = decode_pm_id(pi->dst_id);
>   
>   	ret = check_parent_local_xfer(type, id);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pr_err("check_parent_local_xfer() failed: %d\n", ret);
>   		return -1;
> +	}
>   
>   	if (write(sk, &ret, sizeof(ret)) != sizeof(ret)) {
>   		pr_perror("Unable to send response");
> @@ -580,8 +587,10 @@ static int page_server_open(int sk, struct page_server_iov *pi)
>   
>   	page_server_close();
>   
> -	if (open_page_local_xfer(&cxfer.loc_xfer, type, id))
> +	if (open_page_local_xfer(&cxfer.loc_xfer, type, id)) {
> +		pr_err("open_page_local_xfer() failed\n");
>   		return -1;
> +	}
>   
>   	cxfer.dst_id = pi->dst_id;
>   
> @@ -619,8 +628,10 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
>   		return -1;
>   
>   	psi2iovec(pi, &iov);
> -	if (lxfer->write_pagemap(lxfer, &iov, flags))
> +	if (lxfer->write_pagemap(lxfer, &iov, flags)) {
> +		pr_err("Failed to write_pagemap()\n");
>   		return -1;
> +	}
>   
>   	if (!(flags & PE_PRESENT))
>   		return 0;
> @@ -653,8 +664,10 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
>   			return -1;
>   		}
>   
> -		if (lxfer->write_pages(lxfer, cxfer.p[0], chunk))
> +		if (lxfer->write_pages(lxfer, cxfer.p[0], chunk)) {
> +			pr_err("Failed to write_pages()\n");
>   			return -1;
> +		}
>   
>   		len -= chunk;
>   	}
> @@ -674,8 +687,10 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
>   
>   	ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr,
>   			     &pi->nr_pages, PPB_LAZY);
> -	if (ret)
> +	if (ret) {
> +		pr_err("page_pipe_read() failed: %d\n", ret);
>   		return ret;
> +	}
>   
>   	if (pi->nr_pages == 0) {
>   		/* no iovs found means we've hit a zero page */
> @@ -689,8 +704,10 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
>   		return -1;
>   
>   	ret = splice(pipe_read_dest.p[0], NULL, sk, NULL, len, SPLICE_F_MOVE);
> -	if (ret != len)
> +	if (ret != len) {
> +		pr_perror("Failed to splice()");
>   		return -1;
> +	}
>   
>   	tcp_nodelay(sk, true);
>   
> @@ -729,11 +746,9 @@ static int page_server_serve(int sk)
>   		u32 cmd;
>   
>   		ret = recv(sk, &pi, sizeof(pi), MSG_WAITALL);
> -		if (!ret)
> -			break;
>   
>   		if (ret != sizeof(pi)) {
> -			pr_perror("Can't read pagemap from socket");
> +			pr_perror("Can't read pageserver's command from socket, recv() = %d", ret);
>   			ret = -1;
>   			break;
>   		}
> @@ -788,8 +803,15 @@ static int page_server_serve(int sk)
>   			break;
>   		}
>   
> -		if (ret || (pi.cmd == PS_IOV_FLUSH_N_CLOSE))
> +		if (ret) {
> +			pr_err("Ending session due to ret code %d\n", ret);
>   			break;
> +		}
> +
> +		if (pi.cmd == PS_IOV_FLUSH_N_CLOSE) {
> +			pr_info("Closing session\n");
> +			break;
> +		}
>   	}
>   
>   	if (receiving_pages && !ret && !flushed) {
>