[4/6] unix: sysctl -- Preserve max_dgram_qlen value

Submitted by Cyrill Gorcunov on Aug. 31, 2018, 10:10 a.m.

Details

Message ID 20180831101004.26940-5-gorcunov@gmail.com
State New
Series "unix: Setup sysct and fix path resolving"
Headers show

Commit Message

Cyrill Gorcunov Aug. 31, 2018, 10:10 a.m.
The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
we already noticed that systemd inside a container may change its value
(for example it sets it to 512 by now instead of kernel's default
value 10), thus we need keep it inside image and restore then.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 criu/net.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/criu/net.c b/criu/net.c
index 177380efdcfc..0a87c7a120b4 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -208,6 +208,15 @@  char *devconfs6[] = {
 #define MAX_CONF_OPT_PATH IFNAMSIZ+60
 #define MAX_STR_CONF_LEN 200
 
+static char *unix_conf_entries[] = {
+	"max_dgram_qlen",
+};
+
+#define CONF_UNIX_BASE		"net/unix"
+#define CONF_UNIX_FMT		CONF_UNIX_BASE"/%s"
+#define MAX_CONF_UNIX_OPT_PATH	32
+#define MAX_CONF_UNIX_PATH	(sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2)
+
 static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto,
 		struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size,
 		char **devconfs, SysctlEntry **def_conf)
@@ -337,6 +346,72 @@  static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr
 			devconfs6, def_conf);
 }
 
+static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
+{
+	int i, ret = -1, flags = op == CTL_READ ? CTL_FLAGS_OPTIONAL : 0;
+	char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { };
+	struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { };
+	SysctlEntry **conf = *rconf;
+
+	if (*n != ARRAY_SIZE(unix_conf_entries)) {
+		pr_err("unix: Unexpected entries in config (%u %u)\n",
+		       (unsigned)*n, (unsigned)ARRAY_SIZE(unix_conf_entries));
+		return -EINVAL;
+	}
+
+	if (opts.weak_sysctls)
+		flags = CTL_FLAGS_OPTIONAL;
+
+	for (i = 0; i < *n; i++) {
+		snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT,
+			 unix_conf_entries[i]);
+		req[i].name = path[i];
+		req[i].flags = flags;
+
+		switch (conf[i]->type) {
+		case SYSCTL_TYPE__CTL_32:
+			req[i].type = CTL_32;
+			req[i].arg = &conf[i]->iarg;
+			break;
+		default:
+			pr_err("unix: Unknown config type %d\n",
+			       (unsigned)conf[i]->type);
+			return -1;
+		}
+	}
+
+	ret = sysctl_op(req, *n, op, CLONE_NEWNET);
+	if (ret < 0) {
+		pr_err("unix: Failed to %s %s/<confs>\n",
+		       (op == CTL_READ) ? "read" : "write",
+		       CONF_UNIX_BASE);
+		return -1;
+	}
+
+	if (op == CTL_READ) {
+		bool has_entries = false;
+
+		for (i = 0; i < *n; i++) {
+			if (req[i].flags & CTL_FLAGS_HAS) {
+				conf[i]->has_iarg = true;
+				if (!has_entries)
+					has_entries = true;
+			}
+		}
+
+		/*
+		 * Zap the whole section of data.
+		 * Unix conf is optional.
+		 */
+		if (!has_entries) {
+			*n = 0;
+			*rconf = NULL;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * I case if some entry is missing in
  * the kernel, simply write DEVCONFS_UNUSED
@@ -1822,6 +1897,8 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	int ret = -1;
 	int i;
 	NetnsEntry netns = NETNS_ENTRY__INIT;
+	SysctlEntry *unix_confs = NULL;
+	size_t sizex = ARRAY_SIZE(unix_conf_entries);
 	SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL;
 	int size4 = ARRAY_SIZE(devconfs4);
 	SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL;
@@ -1838,7 +1915,8 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	o_buf = buf = xmalloc(
 			i * (sizeof(NetnsId*) + sizeof(NetnsId)) +
 			size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
-			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2
+			size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
+			sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry))
 		     );
 	if (!buf)
 		goto out;
@@ -1894,6 +1972,16 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 		}
 	}
 
+	netns.n_unix_conf = sizex;
+	netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*));
+	unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry));
+
+	for (i = 0; i < sizex; i++) {
+		sysctl_entry__init(&unix_confs[i]);
+		netns.unix_conf[i] = &unix_confs[i];
+		netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32;
+	}
+
 	ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
 	if (ret < 0)
 		goto err_free;
@@ -1908,6 +1996,10 @@  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	if (ret < 0)
 		goto err_free;
 
+	ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ);
+	if (ret < 0)
+		goto err_free;
+
 	ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
 err_free:
 	xfree(o_buf);
@@ -2117,6 +2209,12 @@  static int restore_netns_conf(struct ns_id *ns)
 		ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL);
 	}
 
+	if ((netns)->unix_conf) {
+		ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE);
+		if (ret)
+			goto out;
+	}
+
 	ns->net.netns = netns;
 out:
 	return ret;

Comments

Dmitry Safonov Sept. 13, 2018, 1:54 p.m.
Hi Cyrill, sorry about late small nits,

2018-08-31 11:10 GMT+01:00 Cyrill Gorcunov <gorcunov@gmail.com>:
> The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and
> we already noticed that systemd inside a container may change its value
> (for example it sets it to 512 by now instead of kernel's default
> value 10), thus we need keep it inside image and restore then.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
>  criu/net.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 99 insertions(+), 1 deletion(-)
>
> diff --git a/criu/net.c b/criu/net.c
> index 177380efdcfc..0a87c7a120b4 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -208,6 +208,15 @@ char *devconfs6[] = {
>  #define MAX_CONF_OPT_PATH IFNAMSIZ+60
>  #define MAX_STR_CONF_LEN 200
>
> +static char *unix_conf_entries[] = {
> +       "max_dgram_qlen",
> +};
> +
> +#define CONF_UNIX_BASE         "net/unix"
> +#define CONF_UNIX_FMT          CONF_UNIX_BASE"/%s"
> +#define MAX_CONF_UNIX_OPT_PATH 32
> +#define MAX_CONF_UNIX_PATH     (sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2)
> +
>  static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto,
>                 struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size,
>                 char **devconfs, SysctlEntry **def_conf)
> @@ -337,6 +346,72 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr
>                         devconfs6, def_conf);
>  }
>
> +static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
> +{
> +       int i, ret = -1, flags = op == CTL_READ ? CTL_FLAGS_OPTIONAL : 0;

`i' differs by type from n,
ret is used only in one place, where could be omitted,
flags? Ewww

> +       char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { };
> +       struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { };
> +       SysctlEntry **conf = *rconf;
> +
> +       if (*n != ARRAY_SIZE(unix_conf_entries)) {
> +               pr_err("unix: Unexpected entries in config (%u %u)\n",
> +                      (unsigned)*n, (unsigned)ARRAY_SIZE(unix_conf_entries));

Maybe %zu and omit additional cast?

> +               return -EINVAL;
> +       }
> +
> +       if (opts.weak_sysctls)

Can we do (opts.weak_sysctls || op == CTL_READ)?

> +               flags = CTL_FLAGS_OPTIONAL;
> +
> +       for (i = 0; i < *n; i++) {
> +               snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT,
> +                        unix_conf_entries[i]);
> +               req[i].name = path[i];
> +               req[i].flags = flags;
> +
> +               switch (conf[i]->type) {
> +               case SYSCTL_TYPE__CTL_32:
> +                       req[i].type = CTL_32;
> +                       req[i].arg = &conf[i]->iarg;
> +                       break;
> +               default:
> +                       pr_err("unix: Unknown config type %d\n",
> +                              (unsigned)conf[i]->type);

Hehe, %d (unsigned)

> +                       return -1;
> +               }
> +       }
> +
> +       ret = sysctl_op(req, *n, op, CLONE_NEWNET);
> +       if (ret < 0) {
> +               pr_err("unix: Failed to %s %s/<confs>\n",
> +                      (op == CTL_READ) ? "read" : "write",
> +                      CONF_UNIX_BASE);
> +               return -1;
> +       }
> +
> +       if (op == CTL_READ) {
> +               bool has_entries = false;
> +
> +               for (i = 0; i < *n; i++) {
> +                       if (req[i].flags & CTL_FLAGS_HAS) {
> +                               conf[i]->has_iarg = true;
> +                               if (!has_entries)
> +                                       has_entries = true;

Is (!has_entries) check here to save processor cache
from being invalidated? ;-)

> +                       }
> +               }
> +
> +               /*
> +                * Zap the whole section of data.
> +                * Unix conf is optional.
> +                */
> +               if (!has_entries) {
> +                       *n = 0;
> +                       *rconf = NULL;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
>  /*
>   * I case if some entry is missing in
>   * the kernel, simply write DEVCONFS_UNUSED
> @@ -1822,6 +1897,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>         int ret = -1;
>         int i;
>         NetnsEntry netns = NETNS_ENTRY__INIT;
> +       SysctlEntry *unix_confs = NULL;
> +       size_t sizex = ARRAY_SIZE(unix_conf_entries);
>         SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL;
>         int size4 = ARRAY_SIZE(devconfs4);
>         SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL;
> @@ -1838,7 +1915,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>         o_buf = buf = xmalloc(
>                         i * (sizeof(NetnsId*) + sizeof(NetnsId)) +
>                         size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
> -                       size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2
> +                       size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 +
> +                       sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry))
>                      );
>         if (!buf)
>                 goto out;
> @@ -1894,6 +1972,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>                 }
>         }
>
> +       netns.n_unix_conf = sizex;
> +       netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*));
> +       unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry));
> +
> +       for (i = 0; i < sizex; i++) {
> +               sysctl_entry__init(&unix_confs[i]);
> +               netns.unix_conf[i] = &unix_confs[i];
> +               netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32;
> +       }
> +
>         ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
>         if (ret < 0)
>                 goto err_free;
> @@ -1908,6 +1996,10 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
>         if (ret < 0)
>                 goto err_free;
>
> +       ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ);
> +       if (ret < 0)
> +               goto err_free;
> +
>         ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
>  err_free:
>         xfree(o_buf);
> @@ -2117,6 +2209,12 @@ static int restore_netns_conf(struct ns_id *ns)
>                 ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL);
>         }
>
> +       if ((netns)->unix_conf) {
> +               ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE);
> +               if (ret)
> +                       goto out;
> +       }
> +
>         ns->net.netns = netns;
>  out:
>         return ret;

Thanks,
             Dmitry
Cyrill Gorcunov Sept. 17, 2018, 9:03 a.m.
On Thu, Sep 13, 2018 at 02:54:24PM +0100, Dmitry Safonov wrote:
> Hi Cyrill, sorry about late small nits,

Thanks for looking at!

> >
> > +static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
> > +{
> > +       int i, ret = -1, flags = op == CTL_READ ? CTL_FLAGS_OPTIONAL : 0;
> 
> `i' differs by type from n,
> ret is used only in one place, where could be omitted,
> flags? Ewww

It is done on a purpose to match other functions in this file.

> 
> > +       char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { };
> > +       struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { };
> > +       SysctlEntry **conf = *rconf;
> > +
> > +       if (*n != ARRAY_SIZE(unix_conf_entries)) {
> > +               pr_err("unix: Unexpected entries in config (%u %u)\n",
> > +                      (unsigned)*n, (unsigned)ARRAY_SIZE(unix_conf_entries));
> 
> Maybe %zu and omit additional cast?

It will make like shorted for sure but nothing else :) It won't
chage the code generated. I think we need a more significant
cleanup of the whole criu/net.c file, which I plan to make
and will zap this nit as well :)

> 
> > +               return -EINVAL;
> > +       }
> > +
> > +       if (opts.weak_sysctls)
> 
> Can we do (opts.weak_sysctls || op == CTL_READ)?

Same reason => to match other functions.

> > +                       pr_err("unix: Unknown config type %d\n",
> > +                              (unsigned)conf[i]->type);
> 
> Hehe, %d (unsigned)

Yup, and this is not an error :-)

> > +       if (op == CTL_READ) {
> > +               bool has_entries = false;
> > +
> > +               for (i = 0; i < *n; i++) {
> > +                       if (req[i].flags & CTL_FLAGS_HAS) {
> > +                               conf[i]->has_iarg = true;
> > +                               if (!has_entries)
> > +                                       has_entries = true;
> 
> Is (!has_entries) check here to save processor cache
> from being invalidated? ;-)

:-)