[PATCHv3,1/2] unix: sysctl -- Preserve max_dgram_qlen value

Submitted by Alexander Mikhalitsyn on Nov. 1, 2019, noon

Details

Message ID 20191101120004.16933-2-alexander.mikhalitsyn@virtuozzo.com
State New
Series "Preserve max_dgram_qlen value patch update"
Headers show

Commit Message

Alexander Mikhalitsyn Nov. 1, 2019, noon
The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
we already noticed that systemd inside a container may change its value
(for example it sets it to 512 by now instead of kernel's default
value 10), thus we need keep it inside image and restore then.

Based-on-patch-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
---
 criu/net.c          | 102 +++++++++++++++++++++++++++++++++++++++++++-
 images/netdev.proto |   1 +
 2 files changed, 102 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/criu/net.c b/criu/net.c
index fe9b51ad..843f74d7 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -210,6 +210,15 @@  char *devconfs6[] = {
 #define MAX_CONF_OPT_PATH IFNAMSIZ+60
 #define MAX_STR_CONF_LEN 200
 
+static char *unix_conf_entries[] = {
+	"max_dgram_qlen",
+};
+
+#define CONF_UNIX_BASE		"net/unix"
+#define CONF_UNIX_FMT		CONF_UNIX_BASE"/%s"
+#define MAX_CONF_UNIX_OPT_PATH	32
+#define MAX_CONF_UNIX_PATH	(sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2)
+
 static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto,
 		struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size,
 		char **devconfs, SysctlEntry **def_conf)
@@ -339,6 +348,72 @@  static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr
 			devconfs6, def_conf);
 }
 
+static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
+{
+	int i, ret = -1, flags = op == CTL_READ ? CTL_FLAGS_OPTIONAL : 0;
+	char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { };
+	struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { };
+	SysctlEntry **conf = *rconf;
+
+	if (*n != ARRAY_SIZE(unix_conf_entries)) {
+		pr_err("unix: Unexpected entries in config (%u %u)\n",
+		       (unsigned)*n, (unsigned)ARRAY_SIZE(unix_conf_entries));
+		return -EINVAL;
+	}
+
+	if (opts.weak_sysctls)
+		flags = CTL_FLAGS_OPTIONAL;
+
+	for (i = 0; i < *n; i++) {
+		snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT,
+			 unix_conf_entries[i]);
+		req[i].name = path[i];
+		req[i].flags = flags;
+
+		switch (conf[i]->type) {
+		case SYSCTL_TYPE__CTL_32:
+			req[i].type = CTL_32;
+			req[i].arg = &conf[i]->iarg;
+			break;
+		default:
+			pr_err("unix: Unknown config type %d\n",
+			       (unsigned)conf[i]->type);
+			return -1;
+		}
+	}
+
+	ret = sysctl_op(req, *n, op, CLONE_NEWNET);
+	if (ret < 0) {
+		pr_err("unix: Failed to %s %s/<confs>\n",
+		       (op == CTL_READ) ? "read" : "write",
+		       CONF_UNIX_BASE);
+		return -1;
+	}
+
+	if (op == CTL_READ) {
+		bool has_entries = false;
+
+		for (i = 0; i < *n; i++) {
+			if (req[i].flags & CTL_FLAGS_HAS) {
+				conf[i]->has_iarg = true;
+				if (!has_entries)
+					has_entries = true;
+			}
+		}
+
+		/*
+		 * Zap the whole section of data.
+		 * Unix conf is optional.
+		 */
+		if (!has_entries) {
+			*n = 0;
+			*rconf = NULL;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * I case if some entry is missing in
  * the kernel, simply write DEVCONFS_UNUSED
@@ -1824,6 +1899,8 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	int ret = -1;
 	int i;
 	NetnsEntry netns = NETNS_ENTRY__INIT;
+	SysctlEntry *unix_confs = NULL;
+	size_t sizex = ARRAY_SIZE(unix_conf_entries);
 	SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL;
 	int size4 = ARRAY_SIZE(devconfs4);
 	SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL;
@@ -1840,7 +1917,8 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	o_buf = buf = xmalloc(
 			i * (sizeof(NetnsId*) + sizeof(NetnsId)) +
 			size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
-			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2
+			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
+			sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry))
 		     );
 	if (!buf)
 		goto out;
@@ -1896,6 +1974,16 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 		}
 	}
 
+	netns.n_unix_conf = sizex;
+	netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*));
+	unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry));
+
+	for (i = 0; i < sizex; i++) {
+		sysctl_entry__init(&unix_confs[i]);
+		netns.unix_conf[i] = &unix_confs[i];
+		netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32;
+	}
+
 	ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
 	if (ret < 0)
 		goto err_free;
@@ -1910,6 +1998,12 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	if (ret < 0)
 		goto err_free;
 
+	ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ);
+	if (ret < 0)
+		goto err_free;
+
+
+
 	ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
 err_free:
 	xfree(o_buf);
@@ -2148,6 +2242,12 @@  static int restore_netns_conf(struct ns_id *ns)
 		ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL);
 	}
 
+	if ((netns)->unix_conf) {
+		ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE);
+		if (ret)
+			goto out;
+	}
+
 	ns->net.netns = netns;
 out:
 	return ret;
diff --git a/images/netdev.proto b/images/netdev.proto
index 476a92ce..ae9c9953 100644
--- a/images/netdev.proto
+++ b/images/netdev.proto
@@ -71,4 +71,5 @@  message netns_entry {
 
 	repeated netns_id nsids		= 7;
 	optional string	ext_key		= 8;
+	repeated sysctl_entry unix_conf	= 9;
 }

Comments

Andrei Vagin Nov. 3, 2019, 4:55 p.m.
On Fri, Nov 01, 2019 at 12:00:20PM +0000, Alexander Mikhalitsyn wrote:
> The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
> we already noticed that systemd inside a container may change its value
> (for example it sets it to 512 by now instead of kernel's default
> value 10), thus we need keep it inside image and restore then.
> 
> Based-on-patch-by: Cyrill Gorcunov <gorcunov@gmail.com>

This might mean that the author should be Cyrill.

https://github.com/torvalds/linux/blob/master/Documentation/process/submitting-patches.rst#12-when-to-use-acked-by-cc-and-co-developed-by

Cyrill, could you review this patch?

> Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
> ---
>  criu/net.c          | 102 +++++++++++++++++++++++++++++++++++++++++++-
>  images/netdev.proto |   1 +
>  2 files changed, 102 insertions(+), 1 deletion(-)
> 
> diff --git a/criu/net.c b/criu/net.c
> index fe9b51ad..843f74d7 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -210,6 +210,15 @@ char *devconfs6[] = {
>  #define MAX_CONF_OPT_PATH IFNAMSIZ+60
>  #define MAX_STR_CONF_LEN 200
>  
> +static char *unix_conf_entries[] = {
> +	"max_dgram_qlen",
> +};
> +
> +#define CONF_UNIX_BASE		"net/unix"
> +#define CONF_UNIX_FMT		CONF_UNIX_BASE"/%s"
> +#define MAX_CONF_UNIX_OPT_PATH	32
> +#define MAX_CONF_UNIX_PATH	(sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2)
> +
>  static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto,
>  		struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size,
>  		char **devconfs, SysctlEntry **def_conf)
> @@ -339,6 +348,72 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr
>  			devconfs6, def_conf);
>  }
>  
> +static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
> +{
> +	int i, ret = -1, flags = op == CTL_READ ? CTL_FLAGS_OPTIONAL : 0;
> +	char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { };
> +	struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { };
> +	SysctlEntry **conf = *rconf;
> +
> +	if (*n != ARRAY_SIZE(unix_conf_entries)) {
> +		pr_err("unix: Unexpected entries in config (%u %u)\n",
> +		       (unsigned)*n, (unsigned)ARRAY_SIZE(unix_conf_entries));
> +		return -EINVAL;
> +	}
> +
> +	if (opts.weak_sysctls)
> +		flags = CTL_FLAGS_OPTIONAL;
> +
> +	for (i = 0; i < *n; i++) {
> +		snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT,
> +			 unix_conf_entries[i]);
> +		req[i].name = path[i];
> +		req[i].flags = flags;
> +
> +		switch (conf[i]->type) {
> +		case SYSCTL_TYPE__CTL_32:
> +			req[i].type = CTL_32;
> +			req[i].arg = &conf[i]->iarg;
> +			break;
> +		default:
> +			pr_err("unix: Unknown config type %d\n",
> +			       (unsigned)conf[i]->type);
> +			return -1;
> +		}
> +	}
> +
> +	ret = sysctl_op(req, *n, op, CLONE_NEWNET);
> +	if (ret < 0) {
> +		pr_err("unix: Failed to %s %s/<confs>\n",
> +		       (op == CTL_READ) ? "read" : "write",
> +		       CONF_UNIX_BASE);
> +		return -1;
> +	}
> +
> +	if (op == CTL_READ) {
> +		bool has_entries = false;
> +
> +		for (i = 0; i < *n; i++) {
> +			if (req[i].flags & CTL_FLAGS_HAS) {
> +				conf[i]->has_iarg = true;
> +				if (!has_entries)
> +					has_entries = true;
> +			}
> +		}
> +
> +		/*
> +		 * Zap the whole section of data.
> +		 * Unix conf is optional.
> +		 */
> +		if (!has_entries) {
> +			*n = 0;
> +			*rconf = NULL;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * I case if some entry is missing in
>   * the kernel, simply write DEVCONFS_UNUSED
> @@ -1824,6 +1899,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  	int ret = -1;
>  	int i;
>  	NetnsEntry netns = NETNS_ENTRY__INIT;
> +	SysctlEntry *unix_confs = NULL;
> +	size_t sizex = ARRAY_SIZE(unix_conf_entries);
>  	SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL;
>  	int size4 = ARRAY_SIZE(devconfs4);
>  	SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL;
> @@ -1840,7 +1917,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  	o_buf = buf = xmalloc(
>  			i * (sizeof(NetnsId*) + sizeof(NetnsId)) +
>  			size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
> -			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2
> +			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
> +			sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry))

need to add a comment which explains what all these mean

>  		     );
>  	if (!buf)
>  		goto out;
> @@ -1896,6 +1974,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  		}
>  	}
>  
> +	netns.n_unix_conf = sizex;
> +	netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*));
> +	unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry));
> +
> +	for (i = 0; i < sizex; i++) {
> +		sysctl_entry__init(&unix_confs[i]);
> +		netns.unix_conf[i] = &unix_confs[i];
> +		netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32;
> +	}
> +
>  	ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
>  	if (ret < 0)
>  		goto err_free;
> @@ -1910,6 +1998,12 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>  	if (ret < 0)
>  		goto err_free;
>  
> +	ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ);
> +	if (ret < 0)
> +		goto err_free;
> +
> +
> +

too many empty lines

>  	ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
>  err_free:
>  	xfree(o_buf);
> @@ -2148,6 +2242,12 @@ static int restore_netns_conf(struct ns_id *ns)
>  		ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL);
>  	}
>  
> +	if ((netns)->unix_conf) {
> +		ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE);
> +		if (ret)
> +			goto out;
> +	}
> +
>  	ns->net.netns = netns;
>  out:
>  	return ret;
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 476a92ce..ae9c9953 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -71,4 +71,5 @@ message netns_entry {
>  
>  	repeated netns_id nsids		= 7;
>  	optional string	ext_key		= 8;
> +	repeated sysctl_entry unix_conf	= 9;
>  }
> -- 
> 2.17.1
> 
> 
> _______________________________________________
> CRIU mailing list
> CRIU@openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
Cyrill Gorcunov Nov. 3, 2019, 5:10 p.m.
On Sun, Nov 03, 2019 at 08:55:16AM -0800, Andrei Vagin wrote:
> On Fri, Nov 01, 2019 at 12:00:20PM +0000, Alexander Mikhalitsyn wrote:
> > The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
> > we already noticed that systemd inside a container may change its value
> > (for example it sets it to 512 by now instead of kernel's default
> > value 10), thus we need keep it inside image and restore then.
> > 
> > Based-on-patch-by: Cyrill Gorcunov <gorcunov@gmail.com>
> 
> This might mean that the author should be Cyrill.
> 
> https://github.com/torvalds/linux/blob/master/Documentation/process/submitting-patches.rst#12-when-to-use-acked-by-cc-and-co-developed-by
> 
> Cyrill, could you review this patch?

The patch is based on mine, Alexander has updated it and ported
to the latest criu-dev. So after such heavy rework I think it
is fine if he is an author. The patch itself (except the nits
you've pointed) looks ok to me.
Andrei Vagin Nov. 25, 2019, 7:45 a.m.
On Sun, Nov 03, 2019 at 08:55:16AM -0800, Andrei Vagin wrote:
> On Fri, Nov 01, 2019 at 12:00:20PM +0000, Alexander Mikhalitsyn wrote:
> > The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
> > we already noticed that systemd inside a container may change its value
> > (for example it sets it to 512 by now instead of kernel's default
> > value 10), thus we need keep it inside image and restore then.
> > 
> > Based-on-patch-by: Cyrill Gorcunov <gorcunov@gmail.com>
> 
> This might mean that the author should be Cyrill.
> 
> https://github.com/torvalds/linux/blob/master/Documentation/process/submitting-patches.rst#12-when-to-use-acked-by-cc-and-co-developed-by
> 
> Cyrill, could you review this patch?
> 
> > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
> > ---
> >  criu/net.c          | 102 +++++++++++++++++++++++++++++++++++++++++++-
> >  images/netdev.proto |   1 +
> >  2 files changed, 102 insertions(+), 1 deletion(-)
> > 
> > diff --git a/criu/net.c b/criu/net.c
> > index fe9b51ad..843f74d7 100644
> > --- a/criu/net.c
> > +++ b/criu/net.c
> > @@ -210,6 +210,15 @@ char *devconfs6[] = {
> >  #define MAX_CONF_OPT_PATH IFNAMSIZ+60
> >  #define MAX_STR_CONF_LEN 200
> >  
> > +static char *unix_conf_entries[] = {
> > +	"max_dgram_qlen",
> > +};
> > +
> > +#define CONF_UNIX_BASE		"net/unix"
> > +#define CONF_UNIX_FMT		CONF_UNIX_BASE"/%s"
> > +#define MAX_CONF_UNIX_OPT_PATH	32

Need to add a comment which explains this "-2"

> > +#define MAX_CONF_UNIX_PATH	(sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2)
> > +
Andrei Vagin Nov. 25, 2019, 7:48 a.m.
On Sun, Nov 03, 2019 at 08:55:16AM -0800, Andrei Vagin wrote:
> > @@ -1824,6 +1899,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> >  	int ret = -1;
> >  	int i;
> >  	NetnsEntry netns = NETNS_ENTRY__INIT;
> > +	SysctlEntry *unix_confs = NULL;
> > +	size_t sizex = ARRAY_SIZE(unix_conf_entries);
> >  	SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL;
> >  	int size4 = ARRAY_SIZE(devconfs4);
> >  	SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL;
> > @@ -1840,7 +1917,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> >  	o_buf = buf = xmalloc(
> >  			i * (sizeof(NetnsId*) + sizeof(NetnsId)) +
> >  			size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
> > -			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2
> > +			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
> > +			sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry))
> 
> need to add a comment which explains what all these mean
> 

You can ignore this comment.
Alexander Mikhalitsyn Nov. 26, 2019, 9:26 a.m.
On Sun, 24 Nov 2019 23:45:18 -0800
Andrei Vagin <avagin@gmail.com> wrote:

> On Sun, Nov 03, 2019 at 08:55:16AM -0800, Andrei Vagin wrote:
> > On Fri, Nov 01, 2019 at 12:00:20PM +0000, Alexander Mikhalitsyn wrote:
> > > The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
> > > we already noticed that systemd inside a container may change its value
> > > (for example it sets it to 512 by now instead of kernel's default
> > > value 10), thus we need keep it inside image and restore then.
> > > 
> > > Based-on-patch-by: Cyrill Gorcunov <gorcunov@gmail.com>
> > 
> > This might mean that the author should be Cyrill.
> > 
> > https://github.com/torvalds/linux/blob/master/Documentation/process/submitting-patches.rst#12-when-to-use-acked-by-cc-and-co-developed-by
> > 
> > Cyrill, could you review this patch?
> > 
> > > Signed-off-by: Alexander Mikhalitsyn <alexander@mihalicyn.com>
> > > Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
> > > ---
> > >  criu/net.c          | 102 +++++++++++++++++++++++++++++++++++++++++++-
> > >  images/netdev.proto |   1 +
> > >  2 files changed, 102 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/criu/net.c b/criu/net.c
> > > index fe9b51ad..843f74d7 100644
> > > --- a/criu/net.c
> > > +++ b/criu/net.c
> > > @@ -210,6 +210,15 @@ char *devconfs6[] = {
> > >  #define MAX_CONF_OPT_PATH IFNAMSIZ+60
> > >  #define MAX_STR_CONF_LEN 200
> > >  
> > > +static char *unix_conf_entries[] = {
> > > +	"max_dgram_qlen",
> > > +};
> > > +
> > > +#define CONF_UNIX_BASE		"net/unix"
> > > +#define CONF_UNIX_FMT		CONF_UNIX_BASE"/%s"
> > > +#define MAX_CONF_UNIX_OPT_PATH	32
> 
> Need to add a comment which explains this "-2"
> 
> > > +#define MAX_CONF_UNIX_PATH	(sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2)
> > > +
done

https://github.com/checkpoint-restore/criu/pull/850