[03/11] soccr: add support for half-closed sockets

Submitted by Andrei Vagin on Nov. 11, 2016, 7:10 a.m.

Details

Message ID 1478848211-23802-4-git-send-email-avagin@openvz.org
State Superseded
Series "tcp: add support of half closed tcp sockets"
Headers show

Commit Message

Andrei Vagin Nov. 11, 2016, 7:10 a.m.
From: Andrei Vagin <avagin@virtuozzo.com>

A socket is in one of half-closed states, if it sent a fin packet
or it received a fin packet.

CRIU plays with fin packets to restore half-closed states too.

When we need to sent a fin packet from a socket, we can call
shutdown(SHUT_WR). When a fin packet has to be restore in
a received queue, criu generate a fin packet and send it via
a raw ip socket.

A raw packet is sent with the SOCCR_MARK mark to be able
to not block it.

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/Makefile.packages |   2 +-
 soccr/soccr.c          | 165 ++++++++++++++++++++++++++++++++++++++++++++++++-
 soccr/soccr.h          |  18 +++++-
 3 files changed, 180 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/Makefile.packages b/criu/Makefile.packages
index 53fbdae..886394f 100644
--- a/criu/Makefile.packages
+++ b/criu/Makefile.packages
@@ -19,7 +19,7 @@  REQ-DEB-PKG-NAMES	+= libcap-dev
 
 REQ-DEB-PKG-TEST-NAMES  += libaio-dev
 
-export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
+export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
 
 check-packages-failed:
 	$(warning Can not find some of the required libraries)
diff --git a/soccr/soccr.c b/soccr/soccr.c
index 7c6bfb1..af39eda 100644
--- a/soccr/soccr.c
+++ b/soccr/soccr.c
@@ -4,6 +4,9 @@ 
 #include <sys/ioctl.h>
 #include <errno.h>
 #include <linux/sockios.h>
+#include <libnet.h>
+#include <assert.h>
+
 #include "soccr.h"
 
 #ifndef SIOCOUTQNSD
@@ -11,6 +14,20 @@ 
 #define SIOCOUTQNSD     0x894B
 #endif
 
+enum {
+        TCPF_ESTABLISHED = (1 << 1),
+        TCPF_SYN_SENT    = (1 << 2),
+        TCPF_SYN_RECV    = (1 << 3),
+        TCPF_FIN_WAIT1   = (1 << 4),
+        TCPF_FIN_WAIT2   = (1 << 5),
+        TCPF_TIME_WAIT   = (1 << 6),
+        TCPF_CLOSE       = (1 << 7),
+        TCPF_CLOSE_WAIT  = (1 << 8),
+        TCPF_LAST_ACK    = (1 << 9),
+        TCPF_LISTEN      = (1 << 10),
+        TCPF_CLOSING     = (1 << 11),
+};
+
 static void (*log)(unsigned int loglevel, const char *format, ...)
 	__attribute__ ((__format__ (__printf__, 2, 3)));
 static unsigned int log_level = 0;
@@ -89,6 +106,11 @@  static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
 
 	switch (ti->tcpi_state) {
 	case TCP_ESTABLISHED:
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+	case TCP_LAST_ACK:
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
 	case TCP_CLOSE:
 		break;
 	default:
@@ -96,7 +118,7 @@  static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
 		return -1;
 	}
 
-	data->state = TCP_ESTABLISHED;
+	data->state = ti->tcpi_state;
 
 	if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
 		loge("Unable to get size of snd queue");
@@ -112,6 +134,14 @@  static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
 
 	data->unsq_len = size;
 
+	/* Don't account the fin packet. It doesn't countain real data. */
+	if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK | TCPF_CLOSING)) {
+		assert(data->outq_len > 0);
+		data->outq_len--;
+		data->unsq_len = data->unsq_len ? data->unsq_len - 1 : 0;
+	}
+
+
 	if (ioctl(sk->fd, SIOCINQ, &size) == -1) {
 		loge("Unable to get size of recv queue");
 		return -1;
@@ -325,12 +355,22 @@  static int set_queue_seq(struct libsoccr_sk *sk, int queue, __u32 seq)
 int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk,
 		struct libsoccr_sk_data *data, unsigned data_size)
 {
+	int mstate = 1 << data->state;
+
 	if (!data || data_size < SOCR_DATA_MIN_SIZE)
 		return -1;
 
-	if (data->state != TCP_ESTABLISHED)
+	if (data->state == TCP_LISTEN)
 		return -1;
 
+	if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE))
+		data->inq_seq--;
+
+	/* outq_seq is adjusted due to not accointing the fin packet */
+	if (mstate & (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
+			TCPF_LAST_ACK | TCPF_CLOSING | TCPF_CLOSE))
+		data->outq_seq--;
+
 	if (set_queue_seq(sk, TCP_RECV_QUEUE,
 				data->inq_seq - data->inq_len))
 		return -2;
@@ -400,6 +440,98 @@  int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
 	return 0;
 }
 
+static int send_fin(int sk, struct libsoccr_sk_data *data, unsigned data_size)
+{
+	int ret, exit_code = -1;
+	char errbuf[LIBNET_ERRBUF_SIZE];
+	int mark = SOCCR_MARK;;
+	int libnet_type;
+	libnet_t *l;
+
+	libnet_type = data->family == AF_INET6 ? LIBNET_RAW6 : LIBNET_RAW4;
+
+	l = libnet_init(
+		libnet_type,                            /* injection type */
+		NULL,                                   /* network interface */
+		errbuf);                                /* errbuf */
+	if (l == NULL)
+		return -1;
+
+	if (setsockopt(l->fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)))
+		goto err;
+
+	ret = libnet_build_tcp(
+		data->dst_port,		/* source port */
+		data->src_port,		/* destination port */
+		data->inq_seq,			/* sequence number */
+		data->outq_seq - data->outq_len,	/* acknowledgement num */
+		TH_FIN | TH_ACK,		/* control flags */
+		data->rcv_wnd,			/* window size */
+		0,				/* checksum */
+		10,				/* urgent pointer */
+		LIBNET_TCP_H + 20,		/* TCP packet size */
+		NULL,				/* payload */
+		0,				/* payload size */
+		l,				/* libnet handle */
+		0);				/* libnet id */
+	if (ret == -1) {
+		loge("Can't build TCP header: %s\n", libnet_geterror(l));
+		goto err;
+	}
+
+	if (data->family == AF_INET6) {
+		struct libnet_in6_addr src, dst;
+
+		memcpy(&dst, data->dst_addr, sizeof(dst));
+		memcpy(&src, data->src_addr, sizeof(src));
+
+		ret = libnet_build_ipv6(
+			0, 0,
+			LIBNET_TCP_H,	/* length */
+			IPPROTO_TCP,	/* protocol */
+			64,		/* hop limit */
+			dst,		/* source IP */
+			src,		/* destination IP */
+			NULL,		/* payload */
+			0,		/* payload size */
+			l,		/* libnet handle */
+			0);		/* libnet id */
+	} else if (data->family == AF_INET)
+		ret = libnet_build_ipv4(
+			LIBNET_IPV4_H + LIBNET_TCP_H + 20,	/* length */
+			0,			/* TOS */
+			242,			/* IP ID */
+			0,			/* IP Frag */
+			64,			/* TTL */
+			IPPROTO_TCP,		/* protocol */
+			0,			/* checksum */
+			data->dst_addr[0],	/* source IP */
+			data->src_addr[0],	/* destination IP */
+			NULL,			/* payload */
+			0,			/* payload size */
+			l,			/* libnet handle */
+			0);			/* libnet id */
+	else {
+		loge("Unknown socket family");
+		goto err;
+	}
+	if (ret == -1) {
+		loge("Can't build IP header: %s\n", libnet_geterror(l));
+		goto err;
+	}
+
+	ret = libnet_write(l);
+	if (ret == -1) {
+		loge("Unable to send a fin packet: %s", libnet_geterror(l));
+		goto err;
+	}
+
+	exit_code = 0;
+err:
+	libnet_destroy(l);
+	return exit_code;
+}
+
 int libsoccr_set_sk_data(struct libsoccr_sk *sk,
 		struct libsoccr_sk_data *data, unsigned data_size)
 {
@@ -411,13 +543,40 @@  int libsoccr_set_sk_data(struct libsoccr_sk *sk,
 			.rcv_wnd = data->rcv_wnd,
 			.rcv_wup = data->rcv_wup,
 		};
-	
+
+		if ((1 << data->state) & ((1 << TCP_CLOSE_WAIT) |
+				    (1 << TCP_LAST_ACK) |
+				    (1 << TCP_CLOSE))) {
+			wopt.rcv_wup--;
+			wopt.rcv_wnd++;
+		}
+
 		if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_WINDOW, &wopt, sizeof(wopt))) {
 			loge("Unable to set window parameters");
 			return -1;
 		}
 	}
 
+	if (data->flags & SOCCR_FLAGS_ADDR) {
+		int mstate = 1 << data->state;
+
+		if (data->state == TCP_CLOSING) {
+			shutdown(sk->fd, SHUT_WR);
+		}
+		if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE)) {
+			if (send_fin(sk->fd, data, data_size) < 0)
+				return -1;
+		}
+
+		if (mstate & (TCPF_LAST_ACK | TCPF_FIN_WAIT1 |
+				TCPF_FIN_WAIT2 | TCPF_CLOSE)) {
+			shutdown(sk->fd, SHUT_WR);
+		}
+	} else if (data->state != TCP_ESTABLISHED) {
+		loge("Unable to restore a socket state: %d", data->state);
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/soccr/soccr.h b/soccr/soccr.h
index 4e272d5..a3a950b 100644
--- a/soccr/soccr.h
+++ b/soccr/soccr.h
@@ -5,6 +5,9 @@ 
 
 #include "config.h"
 
+/* All packets with this mark have not to be blocked. */
+#define SOCCR_MARK 0xC114
+
 #ifndef CONFIG_HAS_TCP_REPAIR_WINDOW
 struct tcp_repair_window {
 	uint32_t   snd_wl1;
@@ -75,11 +78,18 @@  struct libsoccr_sk_data {
 	__u32	timestamp;
 
 	__u32	flags; /* SOCCR_FLAGS_... below */
-	__u32	snd_wl1;
+
+	__u32	snd_wl1;	/* SOCCR_FLAGS_WINDOW */
 	__u32	snd_wnd;
 	__u32	max_window;
 	__u32	rcv_wnd;
 	__u32	rcv_wup;
+
+	__u32   family;		/* SOCCR_FLAGS_ADDR */
+	__u32	src_port;
+	__u32	dst_port;
+	__u32	src_addr[4];
+	__u32	dst_addr[4];
 };
 
 /*
@@ -99,6 +109,12 @@  struct libsoccr_sk_data {
 #define SOCCR_FLAGS_WINDOW	0x1
 
 /*
+ * Source and destination addresses, which are required to restore
+ * a socket state.
+ */
+#define SOCCR_FLAGS_ADDR	0x2
+
+/*
  * These two calls pause and resume the socket for and after C/R
  * The first one returns an opaque handle that is to be used by all
  * the subsequent calls.

Comments

Pavel Emelianov Nov. 21, 2016, 7:36 p.m.
On 11/11/2016 10:10 AM, Andrei Vagin wrote:
> From: Andrei Vagin <avagin@virtuozzo.com>
> 
> A socket is in one of half-closed states, if it sent a fin packet
> or it received a fin packet.
> 
> CRIU plays with fin packets to restore half-closed states too.
> 
> When we need to sent a fin packet from a socket, we can call
> shutdown(SHUT_WR). When a fin packet has to be restore in
> a received queue, criu generate a fin packet and send it via
> a raw ip socket.
> 
> A raw packet is sent with the SOCCR_MARK mark to be able
> to not block it.
> 
> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> ---
>  criu/Makefile.packages |   2 +-
>  soccr/soccr.c          | 165 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  soccr/soccr.h          |  18 +++++-
>  3 files changed, 180 insertions(+), 5 deletions(-)
> 
> diff --git a/criu/Makefile.packages b/criu/Makefile.packages
> index 53fbdae..886394f 100644
> --- a/criu/Makefile.packages
> +++ b/criu/Makefile.packages
> @@ -19,7 +19,7 @@ REQ-DEB-PKG-NAMES	+= libcap-dev
>  
>  REQ-DEB-PKG-TEST-NAMES  += libaio-dev
>  
> -export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
> +export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
>  
>  check-packages-failed:
>  	$(warning Can not find some of the required libraries)
> diff --git a/soccr/soccr.c b/soccr/soccr.c
> index 7c6bfb1..af39eda 100644
> --- a/soccr/soccr.c
> +++ b/soccr/soccr.c
> @@ -4,6 +4,9 @@
>  #include <sys/ioctl.h>
>  #include <errno.h>
>  #include <linux/sockios.h>
> +#include <libnet.h>
> +#include <assert.h>
> +
>  #include "soccr.h"
>  
>  #ifndef SIOCOUTQNSD
> @@ -11,6 +14,20 @@
>  #define SIOCOUTQNSD     0x894B
>  #endif
>  
> +enum {
> +        TCPF_ESTABLISHED = (1 << 1),
> +        TCPF_SYN_SENT    = (1 << 2),
> +        TCPF_SYN_RECV    = (1 << 3),
> +        TCPF_FIN_WAIT1   = (1 << 4),
> +        TCPF_FIN_WAIT2   = (1 << 5),
> +        TCPF_TIME_WAIT   = (1 << 6),
> +        TCPF_CLOSE       = (1 << 7),
> +        TCPF_CLOSE_WAIT  = (1 << 8),
> +        TCPF_LAST_ACK    = (1 << 9),
> +        TCPF_LISTEN      = (1 << 10),
> +        TCPF_CLOSING     = (1 << 11),
> +};
> +
>  static void (*log)(unsigned int loglevel, const char *format, ...)
>  	__attribute__ ((__format__ (__printf__, 2, 3)));
>  static unsigned int log_level = 0;
> @@ -89,6 +106,11 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
>  
>  	switch (ti->tcpi_state) {
>  	case TCP_ESTABLISHED:
> +	case TCP_FIN_WAIT1:
> +	case TCP_FIN_WAIT2:
> +	case TCP_LAST_ACK:
> +	case TCP_CLOSE_WAIT:
> +	case TCP_CLOSING:
>  	case TCP_CLOSE:
>  		break;
>  	default:
> @@ -96,7 +118,7 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
>  		return -1;
>  	}
>  
> -	data->state = TCP_ESTABLISHED;
> +	data->state = ti->tcpi_state;
>  
>  	if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
>  		loge("Unable to get size of snd queue");
> @@ -112,6 +134,14 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
>  
>  	data->unsq_len = size;
>  
> +	/* Don't account the fin packet. It doesn't countain real data. */
> +	if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK | TCPF_CLOSING)) {
> +		assert(data->outq_len > 0);

assert?! We have BUG_ON-s in criu for this, don't we?

> +		data->outq_len--;
> +		data->unsq_len = data->unsq_len ? data->unsq_len - 1 : 0;
> +	}
> +
> +
>  	if (ioctl(sk->fd, SIOCINQ, &size) == -1) {
>  		loge("Unable to get size of recv queue");
>  		return -1;
> @@ -325,12 +355,22 @@ static int set_queue_seq(struct libsoccr_sk *sk, int queue, __u32 seq)
>  int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk,
>  		struct libsoccr_sk_data *data, unsigned data_size)
>  {
> +	int mstate = 1 << data->state;
> +
>  	if (!data || data_size < SOCR_DATA_MIN_SIZE)
>  		return -1;
>  
> -	if (data->state != TCP_ESTABLISHED)
> +	if (data->state == TCP_LISTEN)
>  		return -1;

This doesn't look correct, we don't support more states here.

> +	if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE))
> +		data->inq_seq--;
> +
> +	/* outq_seq is adjusted due to not accointing the fin packet */
> +	if (mstate & (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
> +			TCPF_LAST_ACK | TCPF_CLOSING | TCPF_CLOSE))
> +		data->outq_seq--;
> +
>  	if (set_queue_seq(sk, TCP_RECV_QUEUE,
>  				data->inq_seq - data->inq_len))
>  		return -2;
> @@ -400,6 +440,98 @@ int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
>  	return 0;
>  }
>  
> +static int send_fin(int sk, struct libsoccr_sk_data *data, unsigned data_size)
> +{
> +	int ret, exit_code = -1;
> +	char errbuf[LIBNET_ERRBUF_SIZE];
> +	int mark = SOCCR_MARK;;
> +	int libnet_type;
> +	libnet_t *l;
> +
> +	libnet_type = data->family == AF_INET6 ? LIBNET_RAW6 : LIBNET_RAW4;
> +
> +	l = libnet_init(
> +		libnet_type,                            /* injection type */
> +		NULL,                                   /* network interface */
> +		errbuf);                                /* errbuf */
> +	if (l == NULL)
> +		return -1;
> +
> +	if (setsockopt(l->fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)))
> +		goto err;
> +
> +	ret = libnet_build_tcp(
> +		data->dst_port,		/* source port */
> +		data->src_port,		/* destination port */
> +		data->inq_seq,			/* sequence number */
> +		data->outq_seq - data->outq_len,	/* acknowledgement num */
> +		TH_FIN | TH_ACK,		/* control flags */
> +		data->rcv_wnd,			/* window size */
> +		0,				/* checksum */
> +		10,				/* urgent pointer */
> +		LIBNET_TCP_H + 20,		/* TCP packet size */
> +		NULL,				/* payload */
> +		0,				/* payload size */
> +		l,				/* libnet handle */
> +		0);				/* libnet id */
> +	if (ret == -1) {
> +		loge("Can't build TCP header: %s\n", libnet_geterror(l));
> +		goto err;
> +	}
> +
> +	if (data->family == AF_INET6) {
> +		struct libnet_in6_addr src, dst;
> +
> +		memcpy(&dst, data->dst_addr, sizeof(dst));
> +		memcpy(&src, data->src_addr, sizeof(src));
> +
> +		ret = libnet_build_ipv6(
> +			0, 0,
> +			LIBNET_TCP_H,	/* length */
> +			IPPROTO_TCP,	/* protocol */
> +			64,		/* hop limit */
> +			dst,		/* source IP */
> +			src,		/* destination IP */
> +			NULL,		/* payload */
> +			0,		/* payload size */
> +			l,		/* libnet handle */
> +			0);		/* libnet id */
> +	} else if (data->family == AF_INET)
> +		ret = libnet_build_ipv4(
> +			LIBNET_IPV4_H + LIBNET_TCP_H + 20,	/* length */
> +			0,			/* TOS */
> +			242,			/* IP ID */
> +			0,			/* IP Frag */
> +			64,			/* TTL */
> +			IPPROTO_TCP,		/* protocol */
> +			0,			/* checksum */
> +			data->dst_addr[0],	/* source IP */
> +			data->src_addr[0],	/* destination IP */
> +			NULL,			/* payload */
> +			0,			/* payload size */
> +			l,			/* libnet handle */
> +			0);			/* libnet id */
> +	else {
> +		loge("Unknown socket family");
> +		goto err;
> +	}
> +	if (ret == -1) {
> +		loge("Can't build IP header: %s\n", libnet_geterror(l));
> +		goto err;
> +	}
> +
> +	ret = libnet_write(l);
> +	if (ret == -1) {
> +		loge("Unable to send a fin packet: %s", libnet_geterror(l));
> +		goto err;
> +	}
> +
> +	exit_code = 0;
> +err:
> +	libnet_destroy(l);
> +	return exit_code;
> +}
> +
>  int libsoccr_set_sk_data(struct libsoccr_sk *sk,
>  		struct libsoccr_sk_data *data, unsigned data_size)
>  {
> @@ -411,13 +543,40 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
>  			.rcv_wnd = data->rcv_wnd,
>  			.rcv_wup = data->rcv_wup,
>  		};
> -	
> +
> +		if ((1 << data->state) & ((1 << TCP_CLOSE_WAIT) |
> +				    (1 << TCP_LAST_ACK) |
> +				    (1 << TCP_CLOSE))) {
> +			wopt.rcv_wup--;
> +			wopt.rcv_wnd++;
> +		}
> +
>  		if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_WINDOW, &wopt, sizeof(wopt))) {
>  			loge("Unable to set window parameters");
>  			return -1;
>  		}
>  	}
>  
> +	if (data->flags & SOCCR_FLAGS_ADDR) {
> +		int mstate = 1 << data->state;
> +
> +		if (data->state == TCP_CLOSING) {
> +			shutdown(sk->fd, SHUT_WR);
> +		}
> +		if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE)) {
> +			if (send_fin(sk->fd, data, data_size) < 0)
> +				return -1;
> +		}
> +
> +		if (mstate & (TCPF_LAST_ACK | TCPF_FIN_WAIT1 |
> +				TCPF_FIN_WAIT2 | TCPF_CLOSE)) {
> +			shutdown(sk->fd, SHUT_WR);
> +		}
> +	} else if (data->state != TCP_ESTABLISHED) {
> +		loge("Unable to restore a socket state: %d", data->state);
> +		return -1;
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/soccr/soccr.h b/soccr/soccr.h
> index 4e272d5..a3a950b 100644
> --- a/soccr/soccr.h
> +++ b/soccr/soccr.h
> @@ -5,6 +5,9 @@
>  
>  #include "config.h"
>  
> +/* All packets with this mark have not to be blocked. */
> +#define SOCCR_MARK 0xC114
> +
>  #ifndef CONFIG_HAS_TCP_REPAIR_WINDOW
>  struct tcp_repair_window {
>  	uint32_t   snd_wl1;
> @@ -75,11 +78,18 @@ struct libsoccr_sk_data {
>  	__u32	timestamp;
>  
>  	__u32	flags; /* SOCCR_FLAGS_... below */
> -	__u32	snd_wl1;
> +
> +	__u32	snd_wl1;	/* SOCCR_FLAGS_WINDOW */
>  	__u32	snd_wnd;
>  	__u32	max_window;
>  	__u32	rcv_wnd;
>  	__u32	rcv_wup;
> +
> +	__u32   family;		/* SOCCR_FLAGS_ADDR */
> +	__u32	src_port;
> +	__u32	dst_port;
> +	__u32	src_addr[4];
> +	__u32	dst_addr[4];

I cannot find where libsoccr initializes these values.

>  };
>  
>  /*
> @@ -99,6 +109,12 @@ struct libsoccr_sk_data {
>  #define SOCCR_FLAGS_WINDOW	0x1
>  
>  /*
> + * Source and destination addresses, which are required to restore
> + * a socket state.
> + */
> +#define SOCCR_FLAGS_ADDR	0x2
> +
> +/*
>   * These two calls pause and resume the socket for and after C/R
>   * The first one returns an opaque handle that is to be used by all
>   * the subsequent calls.
>
Andrey Vagin Nov. 22, 2016, 1:45 a.m.
On Mon, Nov 21, 2016 at 10:36:29PM +0300, Pavel Emelyanov wrote:
> On 11/11/2016 10:10 AM, Andrei Vagin wrote:
> > From: Andrei Vagin <avagin@virtuozzo.com>
> > 
> > A socket is in one of half-closed states, if it sent a fin packet
> > or it received a fin packet.
> > 
> > CRIU plays with fin packets to restore half-closed states too.
> > 
> > When we need to sent a fin packet from a socket, we can call
> > shutdown(SHUT_WR). When a fin packet has to be restore in
> > a received queue, criu generate a fin packet and send it via
> > a raw ip socket.
> > 
> > A raw packet is sent with the SOCCR_MARK mark to be able
> > to not block it.
> > 
> > Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> > ---
> >  criu/Makefile.packages |   2 +-
> >  soccr/soccr.c          | 165 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  soccr/soccr.h          |  18 +++++-
> >  3 files changed, 180 insertions(+), 5 deletions(-)
> > 
> > diff --git a/criu/Makefile.packages b/criu/Makefile.packages
> > index 53fbdae..886394f 100644
> > --- a/criu/Makefile.packages
> > +++ b/criu/Makefile.packages
> > @@ -19,7 +19,7 @@ REQ-DEB-PKG-NAMES	+= libcap-dev
> >  
> >  REQ-DEB-PKG-TEST-NAMES  += libaio-dev
> >  
> > -export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
> > +export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
> >  
> >  check-packages-failed:
> >  	$(warning Can not find some of the required libraries)
> > diff --git a/soccr/soccr.c b/soccr/soccr.c
> > index 7c6bfb1..af39eda 100644
> > --- a/soccr/soccr.c
> > +++ b/soccr/soccr.c
> > @@ -4,6 +4,9 @@
> >  #include <sys/ioctl.h>
> >  #include <errno.h>
> >  #include <linux/sockios.h>
> > +#include <libnet.h>
> > +#include <assert.h>
> > +
> >  #include "soccr.h"
> >  
> >  #ifndef SIOCOUTQNSD
> > @@ -11,6 +14,20 @@
> >  #define SIOCOUTQNSD     0x894B
> >  #endif
> >  
> > +enum {
> > +        TCPF_ESTABLISHED = (1 << 1),
> > +        TCPF_SYN_SENT    = (1 << 2),
> > +        TCPF_SYN_RECV    = (1 << 3),
> > +        TCPF_FIN_WAIT1   = (1 << 4),
> > +        TCPF_FIN_WAIT2   = (1 << 5),
> > +        TCPF_TIME_WAIT   = (1 << 6),
> > +        TCPF_CLOSE       = (1 << 7),
> > +        TCPF_CLOSE_WAIT  = (1 << 8),
> > +        TCPF_LAST_ACK    = (1 << 9),
> > +        TCPF_LISTEN      = (1 << 10),
> > +        TCPF_CLOSING     = (1 << 11),
> > +};
> > +
> >  static void (*log)(unsigned int loglevel, const char *format, ...)
> >  	__attribute__ ((__format__ (__printf__, 2, 3)));
> >  static unsigned int log_level = 0;
> > @@ -89,6 +106,11 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> >  
> >  	switch (ti->tcpi_state) {
> >  	case TCP_ESTABLISHED:
> > +	case TCP_FIN_WAIT1:
> > +	case TCP_FIN_WAIT2:
> > +	case TCP_LAST_ACK:
> > +	case TCP_CLOSE_WAIT:
> > +	case TCP_CLOSING:
> >  	case TCP_CLOSE:
> >  		break;
> >  	default:
> > @@ -96,7 +118,7 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> >  		return -1;
> >  	}
> >  
> > -	data->state = TCP_ESTABLISHED;
> > +	data->state = ti->tcpi_state;
> >  
> >  	if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
> >  		loge("Unable to get size of snd queue");
> > @@ -112,6 +134,14 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> >  
> >  	data->unsq_len = size;
> >  
> > +	/* Don't account the fin packet. It doesn't countain real data. */
> > +	if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK | TCPF_CLOSING)) {
> > +		assert(data->outq_len > 0);
> 
> assert?! We have BUG_ON-s in criu for this, don't we?

soccr is a separate libary.

> 
> > +		data->outq_len--;
> > +		data->unsq_len = data->unsq_len ? data->unsq_len - 1 : 0;
> > +	}
> > +
> > +
> >  	if (ioctl(sk->fd, SIOCINQ, &size) == -1) {
> >  		loge("Unable to get size of recv queue");
> >  		return -1;
> > @@ -325,12 +355,22 @@ static int set_queue_seq(struct libsoccr_sk *sk, int queue, __u32 seq)
> >  int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk,
> >  		struct libsoccr_sk_data *data, unsigned data_size)
> >  {
> > +	int mstate = 1 << data->state;
> > +
> >  	if (!data || data_size < SOCR_DATA_MIN_SIZE)
> >  		return -1;
> >  
> > -	if (data->state != TCP_ESTABLISHED)
> > +	if (data->state == TCP_LISTEN)
> >  		return -1;
> 
> This doesn't look correct, we don't support more states here.

I don't understand what you want to say here. This set adds support for
other states.

> 
> > +	if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE))
> > +		data->inq_seq--;
> > +
> > +	/* outq_seq is adjusted due to not accointing the fin packet */
> > +	if (mstate & (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
> > +			TCPF_LAST_ACK | TCPF_CLOSING | TCPF_CLOSE))
> > +		data->outq_seq--;
> > +
> >  	if (set_queue_seq(sk, TCP_RECV_QUEUE,
> >  				data->inq_seq - data->inq_len))
> >  		return -2;
> > @@ -400,6 +440,98 @@ int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
> >  	return 0;
> >  }
> >  
> > +static int send_fin(int sk, struct libsoccr_sk_data *data, unsigned data_size)
> > +{
> > +	int ret, exit_code = -1;
> > +	char errbuf[LIBNET_ERRBUF_SIZE];
> > +	int mark = SOCCR_MARK;;
> > +	int libnet_type;
> > +	libnet_t *l;
> > +
> > +	libnet_type = data->family == AF_INET6 ? LIBNET_RAW6 : LIBNET_RAW4;
> > +
> > +	l = libnet_init(
> > +		libnet_type,                            /* injection type */
> > +		NULL,                                   /* network interface */
> > +		errbuf);                                /* errbuf */
> > +	if (l == NULL)
> > +		return -1;
> > +
> > +	if (setsockopt(l->fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)))
> > +		goto err;
> > +
> > +	ret = libnet_build_tcp(
> > +		data->dst_port,		/* source port */
> > +		data->src_port,		/* destination port */
> > +		data->inq_seq,			/* sequence number */
> > +		data->outq_seq - data->outq_len,	/* acknowledgement num */
> > +		TH_FIN | TH_ACK,		/* control flags */
> > +		data->rcv_wnd,			/* window size */
> > +		0,				/* checksum */
> > +		10,				/* urgent pointer */
> > +		LIBNET_TCP_H + 20,		/* TCP packet size */
> > +		NULL,				/* payload */
> > +		0,				/* payload size */
> > +		l,				/* libnet handle */
> > +		0);				/* libnet id */
> > +	if (ret == -1) {
> > +		loge("Can't build TCP header: %s\n", libnet_geterror(l));
> > +		goto err;
> > +	}
> > +
> > +	if (data->family == AF_INET6) {
> > +		struct libnet_in6_addr src, dst;
> > +
> > +		memcpy(&dst, data->dst_addr, sizeof(dst));
> > +		memcpy(&src, data->src_addr, sizeof(src));
> > +
> > +		ret = libnet_build_ipv6(
> > +			0, 0,
> > +			LIBNET_TCP_H,	/* length */
> > +			IPPROTO_TCP,	/* protocol */
> > +			64,		/* hop limit */
> > +			dst,		/* source IP */
> > +			src,		/* destination IP */
> > +			NULL,		/* payload */
> > +			0,		/* payload size */
> > +			l,		/* libnet handle */
> > +			0);		/* libnet id */
> > +	} else if (data->family == AF_INET)
> > +		ret = libnet_build_ipv4(
> > +			LIBNET_IPV4_H + LIBNET_TCP_H + 20,	/* length */
> > +			0,			/* TOS */
> > +			242,			/* IP ID */
> > +			0,			/* IP Frag */
> > +			64,			/* TTL */
> > +			IPPROTO_TCP,		/* protocol */
> > +			0,			/* checksum */
> > +			data->dst_addr[0],	/* source IP */
> > +			data->src_addr[0],	/* destination IP */
> > +			NULL,			/* payload */
> > +			0,			/* payload size */
> > +			l,			/* libnet handle */
> > +			0);			/* libnet id */
> > +	else {
> > +		loge("Unknown socket family");
> > +		goto err;
> > +	}
> > +	if (ret == -1) {
> > +		loge("Can't build IP header: %s\n", libnet_geterror(l));
> > +		goto err;
> > +	}
> > +
> > +	ret = libnet_write(l);
> > +	if (ret == -1) {
> > +		loge("Unable to send a fin packet: %s", libnet_geterror(l));
> > +		goto err;
> > +	}
> > +
> > +	exit_code = 0;
> > +err:
> > +	libnet_destroy(l);
> > +	return exit_code;
> > +}
> > +
> >  int libsoccr_set_sk_data(struct libsoccr_sk *sk,
> >  		struct libsoccr_sk_data *data, unsigned data_size)
> >  {
> > @@ -411,13 +543,40 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
> >  			.rcv_wnd = data->rcv_wnd,
> >  			.rcv_wup = data->rcv_wup,
> >  		};
> > -	
> > +
> > +		if ((1 << data->state) & ((1 << TCP_CLOSE_WAIT) |
> > +				    (1 << TCP_LAST_ACK) |
> > +				    (1 << TCP_CLOSE))) {
> > +			wopt.rcv_wup--;
> > +			wopt.rcv_wnd++;
> > +		}
> > +
> >  		if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_WINDOW, &wopt, sizeof(wopt))) {
> >  			loge("Unable to set window parameters");
> >  			return -1;
> >  		}
> >  	}
> >  
> > +	if (data->flags & SOCCR_FLAGS_ADDR) {
> > +		int mstate = 1 << data->state;
> > +
> > +		if (data->state == TCP_CLOSING) {
> > +			shutdown(sk->fd, SHUT_WR);
> > +		}
> > +		if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE)) {
> > +			if (send_fin(sk->fd, data, data_size) < 0)
> > +				return -1;
> > +		}
> > +
> > +		if (mstate & (TCPF_LAST_ACK | TCPF_FIN_WAIT1 |
> > +				TCPF_FIN_WAIT2 | TCPF_CLOSE)) {
> > +			shutdown(sk->fd, SHUT_WR);
> > +		}
> > +	} else if (data->state != TCP_ESTABLISHED) {
> > +		loge("Unable to restore a socket state: %d", data->state);
> > +		return -1;
> > +	}
> > +
> >  	return 0;
> >  }
> >  
> > diff --git a/soccr/soccr.h b/soccr/soccr.h
> > index 4e272d5..a3a950b 100644
> > --- a/soccr/soccr.h
> > +++ b/soccr/soccr.h
> > @@ -5,6 +5,9 @@
> >  
> >  #include "config.h"
> >  
> > +/* All packets with this mark have not to be blocked. */
> > +#define SOCCR_MARK 0xC114
> > +
> >  #ifndef CONFIG_HAS_TCP_REPAIR_WINDOW
> >  struct tcp_repair_window {
> >  	uint32_t   snd_wl1;
> > @@ -75,11 +78,18 @@ struct libsoccr_sk_data {
> >  	__u32	timestamp;
> >  
> >  	__u32	flags; /* SOCCR_FLAGS_... below */
> > -	__u32	snd_wl1;
> > +
> > +	__u32	snd_wl1;	/* SOCCR_FLAGS_WINDOW */
> >  	__u32	snd_wnd;
> >  	__u32	max_window;
> >  	__u32	rcv_wnd;
> >  	__u32	rcv_wup;
> > +
> > +	__u32   family;		/* SOCCR_FLAGS_ADDR */
> > +	__u32	src_port;
> > +	__u32	dst_port;
> > +	__u32	src_addr[4];
> > +	__u32	dst_addr[4];
> 
> I cannot find where libsoccr initializes these values.

It's initialized from criu.

> 
> >  };
> >  
> >  /*
> > @@ -99,6 +109,12 @@ struct libsoccr_sk_data {
> >  #define SOCCR_FLAGS_WINDOW	0x1
> >  
> >  /*
> > + * Source and destination addresses, which are required to restore
> > + * a socket state.
> > + */
> > +#define SOCCR_FLAGS_ADDR	0x2
> > +
> > +/*
> >   * These two calls pause and resume the socket for and after C/R
> >   * The first one returns an opaque handle that is to be used by all
> >   * the subsequent calls.
> > 
>
Andrey Vagin Nov. 22, 2016, 2:06 a.m.
On Mon, Nov 21, 2016 at 05:45:20PM -0800, Andrei Vagin wrote:
> On Mon, Nov 21, 2016 at 10:36:29PM +0300, Pavel Emelyanov wrote:
> > On 11/11/2016 10:10 AM, Andrei Vagin wrote:
> > > From: Andrei Vagin <avagin@virtuozzo.com>
> > > 
> > > A socket is in one of half-closed states, if it sent a fin packet
> > > or it received a fin packet.
> > > 
> > > CRIU plays with fin packets to restore half-closed states too.
> > > 
> > > When we need to sent a fin packet from a socket, we can call
> > > shutdown(SHUT_WR). When a fin packet has to be restore in
> > > a received queue, criu generate a fin packet and send it via
> > > a raw ip socket.
> > > 
> > > A raw packet is sent with the SOCCR_MARK mark to be able
> > > to not block it.
> > > 
> > > Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
> > > ---
> > >  criu/Makefile.packages |   2 +-
> > >  soccr/soccr.c          | 165 ++++++++++++++++++++++++++++++++++++++++++++++++-
> > >  soccr/soccr.h          |  18 +++++-
> > >  3 files changed, 180 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/criu/Makefile.packages b/criu/Makefile.packages
> > > index 53fbdae..886394f 100644
> > > --- a/criu/Makefile.packages
> > > +++ b/criu/Makefile.packages
> > > @@ -19,7 +19,7 @@ REQ-DEB-PKG-NAMES	+= libcap-dev
> > >  
> > >  REQ-DEB-PKG-TEST-NAMES  += libaio-dev
> > >  
> > > -export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
> > > +export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
> > >  
> > >  check-packages-failed:
> > >  	$(warning Can not find some of the required libraries)
> > > diff --git a/soccr/soccr.c b/soccr/soccr.c
> > > index 7c6bfb1..af39eda 100644
> > > --- a/soccr/soccr.c
> > > +++ b/soccr/soccr.c
> > > @@ -4,6 +4,9 @@
> > >  #include <sys/ioctl.h>
> > >  #include <errno.h>
> > >  #include <linux/sockios.h>
> > > +#include <libnet.h>
> > > +#include <assert.h>
> > > +
> > >  #include "soccr.h"
> > >  
> > >  #ifndef SIOCOUTQNSD
> > > @@ -11,6 +14,20 @@
> > >  #define SIOCOUTQNSD     0x894B
> > >  #endif
> > >  
> > > +enum {
> > > +        TCPF_ESTABLISHED = (1 << 1),
> > > +        TCPF_SYN_SENT    = (1 << 2),
> > > +        TCPF_SYN_RECV    = (1 << 3),
> > > +        TCPF_FIN_WAIT1   = (1 << 4),
> > > +        TCPF_FIN_WAIT2   = (1 << 5),
> > > +        TCPF_TIME_WAIT   = (1 << 6),
> > > +        TCPF_CLOSE       = (1 << 7),
> > > +        TCPF_CLOSE_WAIT  = (1 << 8),
> > > +        TCPF_LAST_ACK    = (1 << 9),
> > > +        TCPF_LISTEN      = (1 << 10),
> > > +        TCPF_CLOSING     = (1 << 11),
> > > +};
> > > +
> > >  static void (*log)(unsigned int loglevel, const char *format, ...)
> > >  	__attribute__ ((__format__ (__printf__, 2, 3)));
> > >  static unsigned int log_level = 0;
> > > @@ -89,6 +106,11 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> > >  
> > >  	switch (ti->tcpi_state) {
> > >  	case TCP_ESTABLISHED:
> > > +	case TCP_FIN_WAIT1:
> > > +	case TCP_FIN_WAIT2:
> > > +	case TCP_LAST_ACK:
> > > +	case TCP_CLOSE_WAIT:
> > > +	case TCP_CLOSING:
> > >  	case TCP_CLOSE:
> > >  		break;
> > >  	default:
> > > @@ -96,7 +118,7 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> > >  		return -1;
> > >  	}
> > >  
> > > -	data->state = TCP_ESTABLISHED;
> > > +	data->state = ti->tcpi_state;
> > >  
> > >  	if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
> > >  		loge("Unable to get size of snd queue");
> > > @@ -112,6 +134,14 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> > >  
> > >  	data->unsq_len = size;
> > >  
> > > +	/* Don't account the fin packet. It doesn't countain real data. */
> > > +	if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK | TCPF_CLOSING)) {
> > > +		assert(data->outq_len > 0);
> > 
> > assert?! We have BUG_ON-s in criu for this, don't we?
> 
> soccr is a separate libary.
> 

I want to say that it may be better to use more general things in
libraries.
Pavel Emelianov Nov. 22, 2016, 11:47 a.m.
On 11/22/2016 04:45 AM, Andrei Vagin wrote:
> On Mon, Nov 21, 2016 at 10:36:29PM +0300, Pavel Emelyanov wrote:
>> On 11/11/2016 10:10 AM, Andrei Vagin wrote:
>>> From: Andrei Vagin <avagin@virtuozzo.com>
>>>
>>> A socket is in one of half-closed states, if it sent a fin packet
>>> or it received a fin packet.
>>>
>>> CRIU plays with fin packets to restore half-closed states too.
>>>
>>> When we need to sent a fin packet from a socket, we can call
>>> shutdown(SHUT_WR). When a fin packet has to be restore in
>>> a received queue, criu generate a fin packet and send it via
>>> a raw ip socket.
>>>
>>> A raw packet is sent with the SOCCR_MARK mark to be able
>>> to not block it.
>>>
>>> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
>>> ---
>>>  criu/Makefile.packages |   2 +-
>>>  soccr/soccr.c          | 165 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>>  soccr/soccr.h          |  18 +++++-
>>>  3 files changed, 180 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/criu/Makefile.packages b/criu/Makefile.packages
>>> index 53fbdae..886394f 100644
>>> --- a/criu/Makefile.packages
>>> +++ b/criu/Makefile.packages
>>> @@ -19,7 +19,7 @@ REQ-DEB-PKG-NAMES	+= libcap-dev
>>>  
>>>  REQ-DEB-PKG-TEST-NAMES  += libaio-dev
>>>  
>>> -export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
>>> +export LIBS		+= -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
>>>  
>>>  check-packages-failed:
>>>  	$(warning Can not find some of the required libraries)
>>> diff --git a/soccr/soccr.c b/soccr/soccr.c
>>> index 7c6bfb1..af39eda 100644
>>> --- a/soccr/soccr.c
>>> +++ b/soccr/soccr.c
>>> @@ -4,6 +4,9 @@
>>>  #include <sys/ioctl.h>
>>>  #include <errno.h>
>>>  #include <linux/sockios.h>
>>> +#include <libnet.h>
>>> +#include <assert.h>
>>> +
>>>  #include "soccr.h"
>>>  
>>>  #ifndef SIOCOUTQNSD
>>> @@ -11,6 +14,20 @@
>>>  #define SIOCOUTQNSD     0x894B
>>>  #endif
>>>  
>>> +enum {
>>> +        TCPF_ESTABLISHED = (1 << 1),
>>> +        TCPF_SYN_SENT    = (1 << 2),
>>> +        TCPF_SYN_RECV    = (1 << 3),
>>> +        TCPF_FIN_WAIT1   = (1 << 4),
>>> +        TCPF_FIN_WAIT2   = (1 << 5),
>>> +        TCPF_TIME_WAIT   = (1 << 6),
>>> +        TCPF_CLOSE       = (1 << 7),
>>> +        TCPF_CLOSE_WAIT  = (1 << 8),
>>> +        TCPF_LAST_ACK    = (1 << 9),
>>> +        TCPF_LISTEN      = (1 << 10),
>>> +        TCPF_CLOSING     = (1 << 11),
>>> +};
>>> +
>>>  static void (*log)(unsigned int loglevel, const char *format, ...)
>>>  	__attribute__ ((__format__ (__printf__, 2, 3)));
>>>  static unsigned int log_level = 0;
>>> @@ -89,6 +106,11 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
>>>  
>>>  	switch (ti->tcpi_state) {
>>>  	case TCP_ESTABLISHED:
>>> +	case TCP_FIN_WAIT1:
>>> +	case TCP_FIN_WAIT2:
>>> +	case TCP_LAST_ACK:
>>> +	case TCP_CLOSE_WAIT:
>>> +	case TCP_CLOSING:
>>>  	case TCP_CLOSE:
>>>  		break;
>>>  	default:
>>> @@ -96,7 +118,7 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
>>>  		return -1;
>>>  	}
>>>  
>>> -	data->state = TCP_ESTABLISHED;
>>> +	data->state = ti->tcpi_state;
>>>  
>>>  	if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
>>>  		loge("Unable to get size of snd queue");
>>> @@ -112,6 +134,14 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
>>>  
>>>  	data->unsq_len = size;
>>>  
>>> +	/* Don't account the fin packet. It doesn't countain real data. */
>>> +	if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK | TCPF_CLOSING)) {
>>> +		assert(data->outq_len > 0);
>>
>> assert?! We have BUG_ON-s in criu for this, don't we?
> 
> soccr is a separate libary.
> 
>>
>>> +		data->outq_len--;
>>> +		data->unsq_len = data->unsq_len ? data->unsq_len - 1 : 0;
>>> +	}
>>> +
>>> +
>>>  	if (ioctl(sk->fd, SIOCINQ, &size) == -1) {
>>>  		loge("Unable to get size of recv queue");
>>>  		return -1;
>>> @@ -325,12 +355,22 @@ static int set_queue_seq(struct libsoccr_sk *sk, int queue, __u32 seq)
>>>  int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk,
>>>  		struct libsoccr_sk_data *data, unsigned data_size)
>>>  {
>>> +	int mstate = 1 << data->state;
>>> +
>>>  	if (!data || data_size < SOCR_DATA_MIN_SIZE)
>>>  		return -1;
>>>  
>>> -	if (data->state != TCP_ESTABLISHED)
>>> +	if (data->state == TCP_LISTEN)
>>>  		return -1;
>>
>> This doesn't look correct, we don't support more states here.
> 
> I don't understand what you want to say here. This set adds support for
> other states.

Ah, so the cover letter subject is "Add support for all other stated" isn't it?

>>
>>> +	if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE))
>>> +		data->inq_seq--;
>>> +
>>> +	/* outq_seq is adjusted due to not accointing the fin packet */
>>> +	if (mstate & (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
>>> +			TCPF_LAST_ACK | TCPF_CLOSING | TCPF_CLOSE))
>>> +		data->outq_seq--;
>>> +
>>>  	if (set_queue_seq(sk, TCP_RECV_QUEUE,
>>>  				data->inq_seq - data->inq_len))
>>>  		return -2;
>>> @@ -400,6 +440,98 @@ int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
>>>  	return 0;
>>>  }
>>>  
>>> +static int send_fin(int sk, struct libsoccr_sk_data *data, unsigned data_size)
>>> +{
>>> +	int ret, exit_code = -1;
>>> +	char errbuf[LIBNET_ERRBUF_SIZE];
>>> +	int mark = SOCCR_MARK;;
>>> +	int libnet_type;
>>> +	libnet_t *l;
>>> +
>>> +	libnet_type = data->family == AF_INET6 ? LIBNET_RAW6 : LIBNET_RAW4;
>>> +
>>> +	l = libnet_init(
>>> +		libnet_type,                            /* injection type */
>>> +		NULL,                                   /* network interface */
>>> +		errbuf);                                /* errbuf */
>>> +	if (l == NULL)
>>> +		return -1;
>>> +
>>> +	if (setsockopt(l->fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)))
>>> +		goto err;
>>> +
>>> +	ret = libnet_build_tcp(
>>> +		data->dst_port,		/* source port */
>>> +		data->src_port,		/* destination port */
>>> +		data->inq_seq,			/* sequence number */
>>> +		data->outq_seq - data->outq_len,	/* acknowledgement num */
>>> +		TH_FIN | TH_ACK,		/* control flags */
>>> +		data->rcv_wnd,			/* window size */
>>> +		0,				/* checksum */
>>> +		10,				/* urgent pointer */
>>> +		LIBNET_TCP_H + 20,		/* TCP packet size */
>>> +		NULL,				/* payload */
>>> +		0,				/* payload size */
>>> +		l,				/* libnet handle */
>>> +		0);				/* libnet id */
>>> +	if (ret == -1) {
>>> +		loge("Can't build TCP header: %s\n", libnet_geterror(l));
>>> +		goto err;
>>> +	}
>>> +
>>> +	if (data->family == AF_INET6) {
>>> +		struct libnet_in6_addr src, dst;
>>> +
>>> +		memcpy(&dst, data->dst_addr, sizeof(dst));
>>> +		memcpy(&src, data->src_addr, sizeof(src));
>>> +
>>> +		ret = libnet_build_ipv6(
>>> +			0, 0,
>>> +			LIBNET_TCP_H,	/* length */
>>> +			IPPROTO_TCP,	/* protocol */
>>> +			64,		/* hop limit */
>>> +			dst,		/* source IP */
>>> +			src,		/* destination IP */
>>> +			NULL,		/* payload */
>>> +			0,		/* payload size */
>>> +			l,		/* libnet handle */
>>> +			0);		/* libnet id */
>>> +	} else if (data->family == AF_INET)
>>> +		ret = libnet_build_ipv4(
>>> +			LIBNET_IPV4_H + LIBNET_TCP_H + 20,	/* length */
>>> +			0,			/* TOS */
>>> +			242,			/* IP ID */
>>> +			0,			/* IP Frag */
>>> +			64,			/* TTL */
>>> +			IPPROTO_TCP,		/* protocol */
>>> +			0,			/* checksum */
>>> +			data->dst_addr[0],	/* source IP */
>>> +			data->src_addr[0],	/* destination IP */
>>> +			NULL,			/* payload */
>>> +			0,			/* payload size */
>>> +			l,			/* libnet handle */
>>> +			0);			/* libnet id */
>>> +	else {
>>> +		loge("Unknown socket family");
>>> +		goto err;
>>> +	}
>>> +	if (ret == -1) {
>>> +		loge("Can't build IP header: %s\n", libnet_geterror(l));
>>> +		goto err;
>>> +	}
>>> +
>>> +	ret = libnet_write(l);
>>> +	if (ret == -1) {
>>> +		loge("Unable to send a fin packet: %s", libnet_geterror(l));
>>> +		goto err;
>>> +	}
>>> +
>>> +	exit_code = 0;
>>> +err:
>>> +	libnet_destroy(l);
>>> +	return exit_code;
>>> +}
>>> +
>>>  int libsoccr_set_sk_data(struct libsoccr_sk *sk,
>>>  		struct libsoccr_sk_data *data, unsigned data_size)
>>>  {
>>> @@ -411,13 +543,40 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
>>>  			.rcv_wnd = data->rcv_wnd,
>>>  			.rcv_wup = data->rcv_wup,
>>>  		};
>>> -	
>>> +
>>> +		if ((1 << data->state) & ((1 << TCP_CLOSE_WAIT) |
>>> +				    (1 << TCP_LAST_ACK) |
>>> +				    (1 << TCP_CLOSE))) {
>>> +			wopt.rcv_wup--;
>>> +			wopt.rcv_wnd++;
>>> +		}
>>> +
>>>  		if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_WINDOW, &wopt, sizeof(wopt))) {
>>>  			loge("Unable to set window parameters");
>>>  			return -1;
>>>  		}
>>>  	}
>>>  
>>> +	if (data->flags & SOCCR_FLAGS_ADDR) {
>>> +		int mstate = 1 << data->state;
>>> +
>>> +		if (data->state == TCP_CLOSING) {
>>> +			shutdown(sk->fd, SHUT_WR);
>>> +		}
>>> +		if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE)) {
>>> +			if (send_fin(sk->fd, data, data_size) < 0)
>>> +				return -1;
>>> +		}
>>> +
>>> +		if (mstate & (TCPF_LAST_ACK | TCPF_FIN_WAIT1 |
>>> +				TCPF_FIN_WAIT2 | TCPF_CLOSE)) {
>>> +			shutdown(sk->fd, SHUT_WR);
>>> +		}
>>> +	} else if (data->state != TCP_ESTABLISHED) {
>>> +		loge("Unable to restore a socket state: %d", data->state);
>>> +		return -1;
>>> +	}
>>> +
>>>  	return 0;
>>>  }
>>>  
>>> diff --git a/soccr/soccr.h b/soccr/soccr.h
>>> index 4e272d5..a3a950b 100644
>>> --- a/soccr/soccr.h
>>> +++ b/soccr/soccr.h
>>> @@ -5,6 +5,9 @@
>>>  
>>>  #include "config.h"
>>>  
>>> +/* All packets with this mark have not to be blocked. */
>>> +#define SOCCR_MARK 0xC114
>>> +
>>>  #ifndef CONFIG_HAS_TCP_REPAIR_WINDOW
>>>  struct tcp_repair_window {
>>>  	uint32_t   snd_wl1;
>>> @@ -75,11 +78,18 @@ struct libsoccr_sk_data {
>>>  	__u32	timestamp;
>>>  
>>>  	__u32	flags; /* SOCCR_FLAGS_... below */
>>> -	__u32	snd_wl1;
>>> +
>>> +	__u32	snd_wl1;	/* SOCCR_FLAGS_WINDOW */
>>>  	__u32	snd_wnd;
>>>  	__u32	max_window;
>>>  	__u32	rcv_wnd;
>>>  	__u32	rcv_wup;
>>> +
>>> +	__u32   family;		/* SOCCR_FLAGS_ADDR */
>>> +	__u32	src_port;
>>> +	__u32	dst_port;
>>> +	__u32	src_addr[4];
>>> +	__u32	dst_addr[4];
>>
>> I cannot find where libsoccr initializes these values.
> 
> It's initialized from criu.

OK, so this is an argument that libsoccr user gives to it. Let's make
it outside of libsoccr_sk_data, as the latter is an abstract blob that
is to be passed between libsoccr calls. Addresses are better to be
set with separate call(s) and struct sockaddr-s are to be used.

-- Pavel